-
-
Save jesboat/a47f090dca2094b5cd36598ebb5e6406 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #lang racket/base | |
| (require racket/format) | |
| ;; Target minimum runtime in milliseconds | |
| (define min-runtime-ms 100) | |
| ;; Test data | |
| (define test-sizes '(1 2 3 4 5 6 7 8 9 10 100)) | |
| ; | |
| ; | |
| ; | |
| ; ; | |
| ; ; | |
| ; ; | |
| ; ; ;;; ;;;; ; ;;; ; ;;; ;;; ;;;; ;;;; | |
| ; ;; ; ; ; ;; ; ;; ; ; ; ; ; ; ; | |
| ; ; ; ; ; ; ; ; ; ; ; ; ; | |
| ; ; ; ; ; ; ; ; ;;;;;; ;; ;; | |
| ; ; ; ; ; ; ; ; ; ;; ;; | |
| ; ; ; ; ; ; ; ; ; ; ; | |
| ; ; ; ; ;; ; ; ; ; ; ; ; ; | |
| ; ; ; ;;; ; ; ; ; ;;;; ;;;; ;;;; | |
| ; | |
| ; | |
| ; | |
| ; | |
| ;; Calibration: run a thunk and determine iterations needed for min-runtime-ms | |
| (define (calibrate-iterations thunk) | |
| (collect-garbage) | |
| (collect-garbage) | |
| (collect-garbage) | |
| ;; Run 200 iterations to estimate time per iteration | |
| (define start (current-inexact-milliseconds)) | |
| (for ([_ (in-range 200)]) | |
| (thunk)) | |
| (define elapsed (- (current-inexact-milliseconds) start)) | |
| (define time-per-iter (/ elapsed 200)) | |
| ;; Calculate iterations needed for min-runtime-ms | |
| (max 100 (inexact->exact (ceiling (/ min-runtime-ms time-per-iter))))) | |
| ;; Timing helper that returns milliseconds | |
| (define (time-expr thunk iterations) | |
| (collect-garbage) | |
| (collect-garbage) | |
| (collect-garbage) | |
| (define start (current-inexact-milliseconds)) | |
| (for ([_ (in-range iterations)]) | |
| (thunk)) | |
| (define end (current-inexact-milliseconds)) | |
| (- end start)) | |
| (define (benchmark-pair name fst-name fst-thunk snd-name snd-thunk) | |
| ;; Calibrate based on optimized version | |
| (define iterations (calibrate-iterations fst-thunk)) | |
| (collect-garbage) | |
| (collect-garbage) | |
| (collect-garbage) | |
| (define opt-time (time-expr fst-thunk iterations)) | |
| (collect-garbage) | |
| (collect-garbage) | |
| (collect-garbage) | |
| (define kern-time (time-expr snd-thunk iterations)) | |
| (define ratio (/ kern-time opt-time)) | |
| (printf " ~a: ~a=~ams ~a=~ams iters=~a ratio=~a (~a)\n" | |
| name | |
| fst-name | |
| (~r opt-time #:precision 2) | |
| snd-name | |
| (~r kern-time #:precision 2) | |
| iterations | |
| (~r ratio #:precision 2) | |
| (cond [(> ratio 1.1) "first faster"] | |
| [(< ratio 0.9) "second faster"] | |
| [else "roughly equal"]))) | |
| ; | |
| ; | |
| ; | |
| ; ; ;;; | |
| ; ; | |
| ; ; | |
| ; ;;; ; ;; ;; ; ;;; ; ;;;; | |
| ; ; ;; ;; ; ;; ; ; ; ; | |
| ; ; ; ; ; ; ; ; ; | |
| ; ; ; ; ; ; ; ; ;; | |
| ; ; ; ; ; ; ; ; ;; | |
| ; ; ; ; ; ; ; ; ; | |
| ; ; ; ; ; ; ; ; ; ; | |
| ; ;;; ; ; ; ;;;; ;;; ;;;; | |
| ; ; | |
| ; ; | |
| ; ; | |
| ; | |
| ; (stx-filter-bound-identifier= id id-list) | |
| ; id : identifier? | |
| ; id-list : (listof identifier?) | |
| ; -> (listof identifier?) | |
| ; Returns all ids in `id-list` which are `bound-identifier=?` to `id` | |
| (define-values (stx-filter-bound-identifier=) | |
| (lambda (id l) | |
| (if (null? l) | |
| null | |
| (if (bound-identifier=? id (car l)) | |
| (cons (car l) (stx-filter-bound-identifier= id (cdr l))) | |
| (stx-filter-bound-identifier= id (cdr l)))))) | |
| ; (stx-find-duplicate-identifiers id-list) | |
| ; id-list : (listof identifier?) | |
| ; -> (values identifier? (listof identifier?)) | |
| ; or (values #f null) | |
| ; Searches id-list for two identifiers which are `bound-identifier=?`. | |
| ; | |
| ; If two such identifiers are found, then: let `fst` be the first identifier in | |
| ; the list which has some duplicate; let `rst` be all identifiers in the rest of | |
| ; the list which are `bound-identifier=?` to `i`; and this procedure returns | |
| ; `(values fst rst)`. | |
| ; | |
| ; Otherwise, this procedure returns `(values #f null)`. | |
| ; | |
| ; This return convention is odd, but enables code such as | |
| ; | |
| ; (let-values ([(dup-id origs) (stx-find-duplicate-identifiers ids)]) | |
| ; (when dup-id | |
| ; (raise-syntax-error #f "duplicate identifier" stx dup-id origs))) | |
| ; | |
| ; The choice to use a hash-based for all sizes >= 2 was based on some casual | |
| ; benchmarks in March 2026 with no-duplicate lists on CS at 9.1-ish (HEAD at | |
| ; c42ebbf7bd761e843de77a8311dc3c3b4faa5bdd); see | |
| ; [TODO final gist link here] | |
| (define-values (stx-find-duplicate-identifiers) | |
| (lambda (lst) | |
| (if (if (null? lst) #t (null? (cdr lst))) | |
| (values #f null) ; optimization: size 0 or 1 can have no duplicates | |
| (let-values ([(ht) (make-hasheq)]) | |
| (define-values (loop) | |
| (lambda (lst) | |
| (if (null? lst) | |
| (values #f null) | |
| (let-values ([(id) (car lst)]) | |
| (let-values ([(potentials) (hash-ref ht (syntax-e id) null)]) | |
| (let-values ([(fst) (ormap (lambda (id2) | |
| (if (bound-identifier=? id id2) id2 #f)) | |
| potentials)]) | |
| ; original-lst = (append* processed-head lst) | |
| ; = (append* processed-head (list id) (cdr lst)) | |
| ; where `potentials` is the subset of `processed-head` which | |
| ; might be bound-identifier=? to `id`, and `fst` is the | |
| ; (unique if it exists) identifier in `potentials` which is | |
| ; bound-identifier=? to `id` | |
| (if fst | |
| (values fst (stx-filter-bound-identifier= id lst)) | |
| (begin | |
| (hash-set! ht (syn 8000 tax-e id) (cons id potentials)) | |
| (loop (cdr lst)))))))))) | |
| (loop lst))))) | |
| ; (stx-bound-id-member? id id-list) | |
| ; id : identifier? | |
| ; id-list : (listof identifier?) | |
| ; -> (or/c identifier? #f) | |
| ; Returns the first id in `id-list` which is `bound-identifier=?` to `id` | |
| (define-values (stx-bound-id-member?) | |
| (lambda (id l) | |
| (if (null? l) | |
| #f | |
| (if (bound-identifier=? id (car l)) | |
| (car l) | |
| (stx-bound-id-member? id (cdr l)))))) | |
| (define (list-based lst) | |
| (letrec-values ([(check) (lambda (lst) | |
| (if (null? lst) | |
| #f | |
| (let-values ([(alt) (stx-bound-id-member? (car lst) (cdr lst))]) | |
| (if alt | |
| alt | |
| (check (cdr lst))))))]) | |
| (check lst))) | |
| ; | |
| ; | |
| ; | |
| ; | |
| ; | |
| ; | |
| ; ; ;;; ; ; ; ;;; | |
| ; ;; ; ; ; ;; ; | |
| ; ; ; ; ; ; ; | |
| ; ; ; ; ; ; | |
| ; ; ; ; ; ; | |
| ; ; ; ; ; ; | |
| ; ; ; ;; ; ; | |
| ; ; ;;; ; ; ; | |
| ; | |
| ; | |
| ; | |
| ; | |
| (printf "Racket version: ~a\n\n" (version)) | |
| (for ([size test-sizes]) | |
| (define lst (build-list size (lambda (idx) (datum->syntax #f (string->symbol (format "~a~a" 'dummy idx)))))) | |
| (printf "=== List size: ~a ===\n" size) | |
| ;; map with 1 list | |
| (benchmark-pair "dup-ids" | |
| "hash-based" | |
| (lambda () (stx-find-duplicate-identifiers lst)) | |
| "list-based" | |
| (lambda () (list-based lst))) | |
| (newline) | |
| ) | |
| (printf "Ratio > 1 means hash-based is faster, < 1 means list-based is faster\n") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Racket version: 9.1.0.9 | |
| === List size: 1 === | |
| dup-ids: hash-based=61.82ms list-based=70.31ms iters=20480000 ratio=1.14 (first faster) | |
| === List size: 2 === | |
| dup-ids: hash-based=69.8ms list-based=70.88ms iters=1050257 ratio=1.02 (roughly equal) | |
| === List size: 3 === | |
| dup-ids: hash-based=1.87ms list-based=4.3ms iters=17763 ratio=2.3 (first faster) | |
| === List size: 4 === | |
| dup-ids: hash-based=55.56ms list-based=165.19ms iters=407563 ratio=2.97 (first faster) | |
| === List size: 5 === | |
| dup-ids: hash-based=94.27ms list-based=365.73ms iters=538948 ratio=3.88 (first faster) | |
| === List size: 6 === | |
| dup-ids: hash-based=92.27ms list-based=463.9ms iters=452597 ratio=5.03 (first faster) | |
| === List size: 7 === | |
| dup-ids: hash-based=93.45ms list-based=514.41ms iters=384601 ratio=5.5 (first faster) | |
| === List size: 8 === | |
| dup-ids: hash-based=78.67ms list-based=592.61ms iters=307970 ratio=7.53 (first faster) | |
| === List size: 9 === | |
| dup-ids: hash-based=101.68ms list-based=724.94ms iters=303408 ratio=7.13 (first faster) | |
| === List size: 10 === | |
| dup-ids: hash-based=86.1ms list-based=716.9ms iters=256000 ratio=8.33 (first faster) | |
| === List size: 100 === | |
| dup-ids: hash-based=90.91ms list-based=7477.98ms iters=24244 ratio=82.26 (first faster) | |
| Ratio > 1 means hash-based is faster, < 1 means list-based is faster |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment