8000
Skip to content

Instantly share code, notes, and snippets.

@jesboat
Last active March 16, 2026 17:11
Show Gist options
  • Select an option

  • Save jesboat/a47f090dca2094b5cd36598ebb5e6406 to your computer and use it in GitHub Desktop.

Select an option

Save jesboat/a47f090dca2094b5cd36598ebb5e6406 to your computer and use it in GitHub Desktop.
#lang racket/base
(require racket/format)
;; Target minimum runtime in milliseconds
(define min-runtime-ms 100)
;; Test data
(define test-sizes '(1 2 3 4 5 6 7 8 9 10 100))
;
;
;
; ;
; ;
; ;
; ; ;;; ;;;; ; ;;; ; ;;; ;;; ;;;; ;;;;
; ;; ; ; ; ;; ; ;; ; ; ; ; ; ; ;
; ; ; ; ; ; ; ; ; ; ; ; ;
; ; ; ; ; ; ; ; ;;;;;; ;; ;;
; ; ; ; ; ; ; ; ; ;; ;;
; ; ; ; ; ; ; ; ; ; ;
; ; ; ; ;; ; ; ; ; ; ; ; ;
; ; ; ;;; ; ; ; ; ;;;; ;;;; ;;;;
;
;
;
;
;; Calibration: run a thunk and determine iterations needed for min-runtime-ms
(define (calibrate-iterations thunk)
(collect-garbage)
(collect-garbage)
(collect-garbage)
;; Run 200 iterations to estimate time per iteration
(define start (current-inexact-milliseconds))
(for ([_ (in-range 200)])
(thunk))
(define elapsed (- (current-inexact-milliseconds) start))
(define time-per-iter (/ elapsed 200))
;; Calculate iterations needed for min-runtime-ms
(max 100 (inexact->exact (ceiling (/ min-runtime-ms time-per-iter)))))
;; Timing helper that returns milliseconds
(define (time-expr thunk iterations)
(collect-garbage)
(collect-garbage)
(collect-garbage)
(define start (current-inexact-milliseconds))
(for ([_ (in-range iterations)])
(thunk))
(define end (current-inexact-milliseconds))
(- end start))
(define (benchmark-pair name fst-name fst-thunk snd-name snd-thunk)
;; Calibrate based on optimized version
(define iterations (calibrate-iterations fst-thunk))
(collect-garbage)
(collect-garbage)
(collect-garbage)
(define opt-time (time-expr fst-thunk iterations))
(collect-garbage)
(collect-garbage)
(collect-garbage)
(define kern-time (time-expr snd-thunk iterations))
(define ratio (/ kern-time opt-time))
(printf " ~a: ~a=~ams ~a=~ams iters=~a ratio=~a (~a)\n"
name
fst-name
(~r opt-time #:precision 2)
snd-name
(~r kern-time #:precision 2)
iterations
(~r ratio #:precision 2)
(cond [(> ratio 1.1) "first faster"]
[(< ratio 0.9) "second faster"]
[else "roughly equal"])))
;
;
;
; ; ;;;
; ;
; ;
; ;;; ; ;; ;; ; ;;; ; ;;;;
; ; ;; ;; ; ;; ; ; ; ;
; ; ; ; ; ; ; ; ;
; ; ; ; ; ; ; ; ;;
; ; ; ; ; ; ; ; ;;
; ; ; ; ; ; ; ; ;
; ; ; ; ; ; ; ; ; ;
; ;;; ; ; ; ;;;; ;;; ;;;;
; ;
; ;
; ;
;
; (stx-filter-bound-identifier= id id-list)
; id : identifier?
; id-list : (listof identifier?)
; -> (listof identifier?)
; Returns all ids in `id-list` which are `bound-identifier=?` to `id`
(define-values (stx-filter-bound-identifier=)
(lambda (id l)
(if (null? l)
null
(if (bound-identifier=? id (car l))
(cons (car l) (stx-filter-bound-identifier= id (cdr l)))
(stx-filter-bound-identifier= id (cdr l))))))
; (stx-find-duplicate-identifiers id-list)
; id-list : (listof identifier?)
; -> (values identifier? (listof identifier?))
; or (values #f null)
; Searches id-list for two identifiers which are `bound-identifier=?`.
;
; If two such identifiers are found, then: let `fst` be the first identifier in
; the list which has some duplicate; let `rst` be all identifiers in the rest of
; the list which are `bound-identifier=?` to `i`; and this procedure returns
; `(values fst rst)`.
;
; Otherwise, this procedure returns `(values #f null)`.
;
; This return convention is odd, but enables code such as
;
; (let-values ([(dup-id origs) (stx-find-duplicate-identifiers ids)])
; (when dup-id
; (raise-syntax-error #f "duplicate identifier" stx dup-id origs)))
;
; The choice to use a hash-based for all sizes >= 2 was based on some casual
; benchmarks in March 2026 with no-duplicate lists on CS at 9.1-ish (HEAD at
; c42ebbf7bd761e843de77a8311dc3c3b4faa5bdd); see
; [TODO final gist link here]
(define-values (stx-find-duplicate-identifiers)
(lambda (lst)
(if (if (null? lst) #t (null? (cdr lst)))
(values #f null) ; optimization: size 0 or 1 can have no duplicates
(let-values ([(ht) (make-hasheq)])
(define-values (loop)
(lambda (lst)
(if (null? lst)
(values #f null)
(let-values ([(id) (car lst)])
(let-values ([(potentials) (hash-ref ht (syntax-e id) null)])
(let-values ([(fst) (ormap (lambda (id2)
(if (bound-identifier=? id id2) id2 #f))
potentials)])
; original-lst = (append* processed-head lst)
; = (append* processed-head (list id) (cdr lst))
; where `potentials` is the subset of `processed-head` which
; might be bound-identifier=? to `id`, and `fst` is the
; (unique if it exists) identifier in `potentials` which is
; bound-identifier=? to `id`
(if fst
(values fst (stx-filter-bound-identifier= id lst))
(begin
(hash-set! ht (syn 8000 tax-e id) (cons id potentials))
(loop (cdr lst))))))))))
(loop lst)))))
; (stx-bound-id-member? id id-list)
; id : identifier?
; id-list : (listof identifier?)
; -> (or/c identifier? #f)
; Returns the first id in `id-list` which is `bound-identifier=?` to `id`
(define-values (stx-bound-id-member?)
(lambda (id l)
(if (null? l)
#f
(if (bound-identifier=? id (car l))
(car l)
(stx-bound-id-member? id (cdr l))))))
(define (list-based lst)
(letrec-values ([(check) (lambda (lst)
(if (null? lst)
#f
(let-values ([(alt) (stx-bound-id-member? (car lst) (cdr lst))])
(if alt
alt
(check (cdr lst))))))])
(check lst)))
;
;
;
;
;
;
; ; ;;; ; ; ; ;;;
; ;; ; ; ; ;; ;
; ; ; ; ; ; ;
; ; ; ; ; ;
; ; ; ; ; ;
; ; ; ; ; ;
; ; ; ;; ; ;
; ; ;;; ; ; ;
;
;
;
;
(printf "Racket version: ~a\n\n" (version))
(for ([size test-sizes])
(define lst (build-list size (lambda (idx) (datum->syntax #f (string->symbol (format "~a~a" 'dummy idx))))))
(printf "=== List size: ~a ===\n" size)
;; map with 1 list
(benchmark-pair "dup-ids"
"hash-based"
(lambda () (stx-find-duplicate-identifiers lst))
"list-based"
(lambda () (list-based lst)))
(newline)
)
(printf "Ratio > 1 means hash-based is faster, < 1 means list-based is faster\n")
Racket version: 9.1.0.9
=== List size: 1 ===
dup-ids: hash-based=61.82ms list-based=70.31ms iters=20480000 ratio=1.14 (first faster)
=== List size: 2 ===
dup-ids: hash-based=69.8ms list-based=70.88ms iters=1050257 ratio=1.02 (roughly equal)
=== List size: 3 ===
dup-ids: hash-based=1.87ms list-based=4.3ms iters=17763 ratio=2.3 (first faster)
=== List size: 4 ===
dup-ids: hash-based=55.56ms list-based=165.19ms iters=407563 ratio=2.97 (first faster)
=== List size: 5 ===
dup-ids: hash-based=94.27ms list-based=365.73ms iters=538948 ratio=3.88 (first faster)
=== List size: 6 ===
dup-ids: hash-based=92.27ms list-based=463.9ms iters=452597 ratio=5.03 (first faster)
=== List size: 7 ===
dup-ids: hash-based=93.45ms list-based=514.41ms iters=384601 ratio=5.5 (first faster)
=== List size: 8 ===
dup-ids: hash-based=78.67ms list-based=592.61ms iters=307970 ratio=7.53 (first faster)
=== List size: 9 ===
dup-ids: hash-based=101.68ms list-based=724.94ms iters=303408 ratio=7.13 (first faster)
=== List size: 10 ===
dup-ids: hash-based=86.1ms list-based=716.9ms iters=256000 ratio=8.33 (first faster)
=== List size: 100 ===
dup-ids: hash-based=90.91ms list-based=7477.98ms iters=24244 ratio=82.26 (first faster)
Ratio > 1 means hash-based is faster, < 1 means list-based is faster
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
0