jesboat · March 16, 2026 17:11
diff --git a/bench.rkt b/bench.rkt
 #lang racket/base

 (require racket/format)

 ;; Target minimum runtime in milliseconds
 (define min-runtime-ms 100)

 ;; Test data
 (define test-sizes '(1 2 3 4 5 6 7 8 9 10 100))


 ;                                                          
 ;                                                          
 ;                                                          
 ;   ;                                                      
 ;   ;                                                      
 ;   ;                                                      
 ;   ; ;;;     ;;;;   ; ;;;  ; ;;;     ;;;    ;;;;    ;;;;  
 ;   ;;   ;   ;   ;   ;;  ;  ;;   ;   ;   ;  ;    ;  ;    ; 
 ;   ;    ;  ;    ;   ;   ;  ;    ;  ;    ;  ;       ;      
 ;   ;    ;  ;    ;   ;      ;    ;  ;;;;;;   ;;      ;;    
 ;   ;    ;  ;    ;   ;      ;    ;  ;          ;;      ;;  
 ;   ;    ;  ;    ;   ;      ;    ;  ;            ;       ; 
 ;   ;    ;  ;   ;;   ;      ;    ;   ;      ;    ;  ;    ; 
 ;   ;    ;   ;;; ;   ;      ;    ;    ;;;;   ;;;;    ;;;;  
 ;                                                          
 ;                                                          
 ;                                                          
 ;                                                          


 ;; Calibration: run a thunk and determine iterations needed for min-runtime-ms
 (define (calibrate-iterations thunk)
  (collect-garbage)
  (collect-garbage)
  (collect-garbage)
  ;; Run 200 iterations to estimate time per iteration
  (define start (current-inexact-milliseconds))
  (for ([_ (in-range 200)])
    (thunk))
  (define elapsed (- (current-inexact-milliseconds) start))
  (define time-per-iter (/ elapsed 200))
  ;; Calculate iterations needed for min-runtime-ms
  (max 100 (inexact->exact (ceiling (/ min-runtime-ms time-per-iter)))))

 ;; Timing helper that returns milliseconds
 (define (time-expr thunk iterations)
  (collect-garbage)
  (collect-garbage)
  (collect-garbage)
  (define start (current-inexact-milliseconds))
  (for ([_ (in-range iterations)])
    (thunk))
  (define end (current-inexact-milliseconds))
  (- end start))

 (define (benchmark-pair name fst-name fst-thunk snd-name snd-thunk)
  ;; Calibrate based on optimized version
  (define iterations (calibrate-iterations fst-thunk))
  (collect-garbage)
  (collect-garbage)
  (collect-garbage)
  (define opt-time (time-expr fst-thunk iterations))
  (collect-garbage)
  (collect-garbage)
  (collect-garbage)
  (define kern-time (time-expr snd-thunk iterations))
  (define ratio (/ kern-time opt-time))
  (printf "  ~a: ~a=~ams ~a=~ams iters=~a ratio=~a (~a)\n"
          name
          fst-name
          (~r opt-time #:precision 2)
          snd-name
          (~r kern-time #:precision 2)
          iterations
          (~r ratio #:precision 2)
          (cond [(> ratio 1.1) "first faster"]
                [(< ratio 0.9) "second faster"]
                [else "roughly equal"])))


 ;                                          
 ;                                          
 ;                                          
 ;     ;                     ;;;            
 ;                             ;            
 ;                             ;            
 ;   ;;;    ; ;; ;;  ; ;;;     ;      ;;;;  
 ;     ;    ;; ;; ;  ;;   ;    ;     ;    ; 
 ;     ;    ;  ;  ;  ;    ;    ;     ;      
 ;     ;    ;  ;  ;  ;    ;    ;      ;;    
 ;     ;    ;  ;  ;  ;    ;    ;        ;;  
 ;     ;    ;  ;  ;  ;    ;    ;          ; 
 ;     ;    ;  ;  ;  ;   ;     ;     ;    ; 
 ;     ;;;  ;  ;  ;  ;;;;      ;;;    ;;;;  
 ;                   ;                      
 ;                   ;                      
 ;                   ;                      
 ;                                          



 ; (stx-filter-bound-identifier= id id-list)
 ;   id      : identifier?
 ;   id-list : (listof identifier?)
 ;  -> (listof identifier?)
 ; Returns all ids in `id-list` which are `bound-identifier=?` to `id`
 (define-values (stx-filter-bound-identifier=)
  (lambda (id l)
    (if (null? l)
        null
        (if (bound-identifier=? id (car l))
            (cons (car l) (stx-filter-bound-identifier= id (cdr l)))
            (stx-filter-bound-identifier= id (cdr l))))))

 ; (stx-find-duplicate-identifiers id-list)
 ;   id-list : (listof identifier?)
 ;  ->  (values identifier? (listof identifier?))
 ;   or (values #f null)
 ; Searches id-list for two identifiers which are `bound-identifier=?`.
 ;
 ; If two such identifiers are found, then: let `fst` be the first identifier in
 ; the list which has some duplicate; let `rst` be all identifiers in the rest of
 ; the list which are `bound-identifier=?` to `i`; and this procedure returns
 ; `(values fst rst)`.
 ;
 ; Otherwise, this procedure returns `(values #f null)`.
 ;
 ; This return convention is odd, but enables code such as
 ;
 ;   (let-values ([(dup-id origs) (stx-find-duplicate-identifiers ids)])
 ;     (when dup-id
 ;       (raise-syntax-error #f "duplicate identifier" stx dup-id origs)))
 ;
 ; The choice to use a hash-based for all sizes >= 2 was based on some casual
 ; benchmarks in March 2026 with no-duplicate lists on CS at 9.1-ish (HEAD at
 ; c42ebbf7bd761e843de77a8311dc3c3b4faa5bdd); see
 ; [TODO final gist link here]
 (define-values (stx-find-duplicate-identifiers)
  (lambda (lst)
    (if (if (null? lst) #t (null? (cdr lst)))
        (values #f null) ; optimization: size 0 or 1 can have no duplicates
        (let-values ([(ht) (make-hasheq)])
          (define-values (loop)
            (lambda (lst)
              (if (null? lst)
                  (values #f null)
                  (let-values ([(id) (car lst)])
                    (let-values ([(potentials) (hash-ref ht (syntax-e id) null)])
                      (let-values ([(fst) (ormap (lambda (id2)
                                                   (if (bound-identifier=? id id2) id2 #f))
                                                 potentials)])
                        ; original-lst = (append* processed-head lst)
                        ;              = (append* processed-head (list id) (cdr lst))
                        ; where `potentials` is the subset of `processed-head` which
                        ; might be bound-identifier=? to `id`, and `fst` is the
                        ; (unique if it exists) identifier in `potentials` which is
                        ; bound-identifier=? to `id`
                        (if fst
                            (values fst (stx-filter-bound-identifier= id lst))
                            (begin
                              (hash-set! ht (syn
8000
tax-e id) (cons id potentials))
                              (loop (cdr lst))))))))))
          (loop lst)))))


 ; (stx-bound-id-member? id id-list)
 ;   id      : identifier?
 ;   id-list : (listof identifier?)
 ;  -> (or/c identifier? #f)
 ; Returns the first id in `id-list` which is `bound-identifier=?` to `id`
 (define-values (stx-bound-id-member?)
  (lambda (id l)
    (if (null? l)
        #f
        (if (bound-identifier=? id (car l))
            (car l)
            (stx-bound-id-member? id (cdr l))))))

 (define (list-based lst)
    (letrec-values ([(check) (lambda (lst)
                               (if (null? lst)
                                   #f
                                   (let-values ([(alt) (stx-bound-id-member? (car lst) (cdr lst))])
                                     (if alt
                                         alt
                                         (check (cdr lst))))))])
      (check lst)))



 ;                          
 ;                          
 ;                          
 ;                          
 ;                          
 ;                          
 ;    ; ;;;  ;    ;  ; ;;;  
 ;    ;;  ;  ;    ;  ;;   ; 
 ;    ;   ;  ;    ;  ;    ; 
 ;    ;      ;    ;  ;    ; 
 ;    ;      ;    ;  ;    ; 
 ;    ;      ;    ;  ;    ; 
 ;    ;      ;   ;;  ;    ; 
 ;    ;       ;;; ;  ;    ; 
 ;                          
 ;                          
 ;                          
 ;                          


 (printf "Racket version: ~a\n\n" (version))

 (for ([size test-sizes])
  (define lst (build-list size (lambda (idx) (datum->syntax #f (string->symbol (format "~a~a" 'dummy idx))))))

  (printf "=== List size: ~a ===\n" size)

  ;; map with 1 list
  (benchmark-pair "dup-ids"
                  "hash-based"
                  (lambda () (stx-find-duplicate-identifiers lst))
                  "list-based"
                  (lambda () (list-based lst)))
  

  (newline)

  )

 (printf "Ratio > 1 means hash-based is faster, < 1 means list-based is faster\n")
diff --git a/results.txt b/results.txt
 Racket version: 9.1.0.9

 === List size: 1 ===
  dup-ids: hash-based=61.82ms list-based=70.31ms iters=20480000 ratio=1.14 (first faster)

 === List size: 2 ===
  dup-ids: hash-based=69.8ms list-based=70.88ms iters=1050257 ratio=1.02 (roughly equal)

 === List size: 3 ===
  dup-ids: hash-based=1.87ms list-based=4.3ms iters=17763 ratio=2.3 (first faster)

 === List size: 4 ===
  dup-ids: hash-based=55.56ms list-based=165.19ms iters=407563 ratio=2.97 (first faster)

 === List size: 5 ===
  dup-ids: hash-based=94.27ms list-based=365.73ms iters=538948 ratio=3.88 (first faster)

 === List size: 6 ===
  dup-ids: hash-based=92.27ms list-based=463.9ms iters=452597 ratio=5.03 (first faster)

 === List size: 7 ===
  dup-ids: hash-based=93.45ms list-based=514.41ms iters=384601 ratio=5.5 (first faster)

 === List size: 8 ===
  dup-ids: hash-based=78.67ms list-based=592.61ms iters=307970 ratio=7.53 (first faster)

 === List size: 9 ===
  dup-ids: hash-based=101.68ms list-based=724.94ms iters=303408 ratio=7.13 (first faster)

 === List size: 10 ===
  dup-ids: hash-based=86.1ms list-based=716.9ms iters=256000 ratio=8.33 (first faster)

 === List size: 100 ===
  dup-ids: hash-based=90.91ms list-based=7477.98ms iters=24244 ratio=82.26 (first faster)

 Ratio > 1 means hash-based is faster, < 1 means list-based is faster
	#lang racket/base

	(require racket/format)

	;; Target minimum runtime in milliseconds
	(define min-runtime-ms 100)

	;; Test data
	(define test-sizes '(1 2 3 4 5 6 7 8 9 10 100))


	;
	;
	;
	; ;
	; ;
	; ;
	; ; ;;; ;;;; ; ;;; ; ;;; ;;; ;;;; ;;;;
	; ;; ; ; ; ;; ; ;; ; ; ; ; ; ; ;
	; ; ; ; ; ; ; ; ; ; ; ; ;
	; ; ; ; ; ; ; ; ;;;;;; ;; ;;
	; ; ; ; ; ; ; ; ; ;; ;;
	; ; ; ; ; ; ; ; ; ; ;
	; ; ; ; ;; ; ; ; ; ; ; ; ;
	; ; ; ;;; ; ; ; ; ;;;; ;;;; ;;;;
	;
	;
	;
	;


	;; Calibration: run a thunk and determine iterations needed for min-runtime-ms
	(define (calibrate-iterations thunk)
	(collect-garbage)
	(collect-garbage)
	(collect-garbage)
	;; Run 200 iterations to estimate time per iteration
	(define start (current-inexact-milliseconds))
	(for ([_ (in-range 200)])
	(thunk))
	(define elapsed (- (current-inexact-milliseconds) start))
	(define time-per-iter (/ elapsed 200))
	;; Calculate iterations needed for min-runtime-ms
	(max 100 (inexact->exact (ceiling (/ min-runtime-ms time-per-iter)))))

	;; Timing helper that returns milliseconds
	(define (time-expr thunk iterations)
	(collect-garbage)
	(collect-garbage)
	(collect-garbage)
	(define start (current-inexact-milliseconds))
	(for ([_ (in-range iterations)])
	(thunk))
	(define end (current-inexact-milliseconds))
	(- end start))

	(define (benchmark-pair name fst-name fst-thunk snd-name snd-thunk)
	;; Calibrate based on optimized version
	(define iterations (calibrate-iterations fst-thunk))
	(collect-garbage)
	(collect-garbage)
	(collect-garbage)
	(define opt-time (time-expr fst-thunk iterations))
	(collect-garbage)
	(collect-garbage)
	(collect-garbage)
	(define kern-time (time-expr snd-thunk iterations))
	(define ratio (/ kern-time opt-time))
	(printf " ~a: ~a=~ams ~a=~ams iters=~a ratio=~a (~a)\n"
	name
	fst-name
	(~r opt-time #:precision 2)
	snd-name
	(~r kern-time #:precision 2)
	iterations
	(~r ratio #:precision 2)
	(cond [(> ratio 1.1) "first faster"]
	[(< ratio 0.9) "second faster"]
	[else "roughly equal"])))


	;
	;
	;
	; ; ;;;
	; ;
	; ;
	; ;;; ; ;; ;; ; ;;; ; ;;;;
	; ; ;; ;; ; ;; ; ; ; ;
	; ; ; ; ; ; ; ; ;
	; ; ; ; ; ; ; ; ;;
	; ; ; ; ; ; ; ; ;;
	; ; ; ; ; ; ; ; ;
	; ; ; ; ; ; ; ; ; ;
	; ;;; ; ; ; ;;;; ;;; ;;;;
	; ;
	; ;
	; ;
	;



	; (stx-filter-bound-identifier= id id-list)
	; id : identifier?
	; id-list : (listof identifier?)
	; -> (listof identifier?)
	; Returns all ids in `id-list` which are `bound-identifier=?` to `id`
	(define-values (stx-filter-bound-identifier=)
	(lambda (id l)
	(if (null? l)
	null
	(if (bound-identifier=? id (car l))
	(cons (car l) (stx-filter-bound-identifier= id (cdr l)))
	(stx-filter-bound-identifier= id (cdr l))))))

	; (stx-find-duplicate-identifiers id-list)
	; id-list : (listof identifier?)
	; -> (values identifier? (listof identifier?))
	; or (values #f null)
	; Searches id-list for two identifiers which are `bound-identifier=?`.
	;
	; If two such identifiers are found, then: let `fst` be the first identifier in
	; the list which has some duplicate; let `rst` be all identifiers in the rest of
	; the list which are `bound-identifier=?` to `i`; and this procedure returns
	; `(values fst rst)`.
	;
	; Otherwise, this procedure returns `(values #f null)`.
	;
	; This return convention is odd, but enables code such as
	;
	; (let-values ([(dup-id origs) (stx-find-duplicate-identifiers ids)])
	; (when dup-id
	; (raise-syntax-error #f "duplicate identifier" stx dup-id origs)))
	;
	; The choice to use a hash-based for all sizes >= 2 was based on some casual
	; benchmarks in March 2026 with no-duplicate lists on CS at 9.1-ish (HEAD at
	; c42ebbf7bd761e843de77a8311dc3c3b4faa5bdd); see
	; [TODO final gist link here]
	(define-values (stx-find-duplicate-identifiers)
	(lambda (lst)
	(if (if (null? lst) #t (null? (cdr lst)))
	(values #f null) ; optimization: size 0 or 1 can have no duplicates
	(let-values ([(ht) (make-hasheq)])
	(define-values (loop)
	(lambda (lst)
	(if (null? lst)
	(values #f null)
	(let-values ([(id) (car lst)])
	(let-values ([(potentials) (hash-ref ht (syntax-e id) null)])
	(let-values ([(fst) (ormap (lambda (id2)
	(if (bound-identifier=? id id2) id2 #f))
	potentials)])
	; original-lst = (append* processed-head lst)
	; = (append* processed-head (list id) (cdr lst))
	; where `potentials` is the subset of `processed-head` which
	; might be bound-identifier=? to `id`, and `fst` is the
	; (unique if it exists) identifier in `potentials` which is
	; bound-identifier=? to `id`
	(if fst
	(values fst (stx-filter-bound-identifier= id lst))
	(begin
	(hash-set! ht (syn 8000 tax-e id) (cons id potentials))
	(loop (cdr lst))))))))))
	(loop lst)))))


	; (stx-bound-id-member? id id-list)
	; id : identifier?
	; id-list : (listof identifier?)
	; -> (or/c identifier? #f)
	; Returns the first id in `id-list` which is `bound-identifier=?` to `id`
	(define-values (stx-bound-id-member?)
	(lambda (id l)
	(if (null? l)
	#f
	(if (bound-identifier=? id (car l))
	(car l)
	(stx-bound-id-member? id (cdr l))))))

	(define (list-based lst)
	(letrec-values ([(check) (lambda (lst)
	(if (null? lst)
	#f
	(let-values ([(alt) (stx-bound-id-member? (car lst) (cdr lst))])
	(if alt
	alt
	(check (cdr lst))))))])
	(check lst)))



	;
	;
	;
	;
	;
	;
	; ; ;;; ; ; ; ;;;
	; ;; ; ; ; ;; ;
	; ; ; ; ; ; ;
	; ; ; ; ; ;
	; ; ; ; ; ;
	; ; ; ; ; ;
	; ; ; ;; ; ;
	; ; ;;; ; ; ;
	;
	;
	;
	;


	(printf "Racket version: ~a\n\n" (version))

	(for ([size test-sizes])
	(define lst (build-list size (lambda (idx) (datum->syntax #f (string->symbol (format "~a~a" 'dummy idx))))))

	(printf "=== List size: ~a ===\n" size)

	;; map with 1 list
	(benchmark-pair "dup-ids"
	"hash-based"
	(lambda () (stx-find-duplicate-identifiers lst))
	"list-based"
	(lambda () (list-based lst)))


	(newline)

	)

	(printf "Ratio > 1 means hash-based is faster, < 1 means list-based is faster\n")
	Racket version: 9.1.0.9

	=== List size: 1 ===
	dup-ids: hash-based=61.82ms list-based=70.31ms iters=20480000 ratio=1.14 (first faster)

	=== List size: 2 ===
	dup-ids: hash-based=69.8ms list-based=70.88ms iters=1050257 ratio=1.02 (roughly equal)

	=== List size: 3 ===
	dup-ids: hash-based=1.87ms list-based=4.3ms iters=17763 ratio=2.3 (first faster)

	=== List size: 4 ===
	dup-ids: hash-based=55.56ms list-based=165.19ms iters=407563 ratio=2.97 (first faster)

	=== List size: 5 ===
	dup-ids: hash-based=94.27ms list-based=365.73ms iters=538948 ratio=3.88 (first faster)

	=== List size: 6 ===
	dup-ids: hash-based=92.27ms list-based=463.9ms iters=452597 ratio=5.03 (first faster)

	=== List size: 7 ===
	dup-ids: hash-based=93.45ms list-based=514.41ms iters=384601 ratio=5.5 (first faster)

	=== List size: 8 ===
	dup-ids: hash-based=78.67ms list-based=592.61ms iters=307970 ratio=7.53 (first faster)

	=== List size: 9 ===
	dup-ids: hash-based=101.68ms list-based=724.94ms iters=303408 ratio=7.13 (first faster)

	=== List size: 10 ===
	dup-ids: hash-based=86.1ms list-based=716.9ms iters=256000 ratio=8.33 (first faster)

	=== List size: 100 ===
	dup-ids: hash-based=90.91ms list-based=7477.98ms iters=24244 ratio=82.26 (first faster)

	Ratio > 1 means hash-based is faster, < 1 means list-based is faster