From 2c8a0da9de786e48b6ca8e3dbbd14d9a3c26476b Mon Sep 17 00:00:00 2001 From: Shiro Kawai Date: Mon, 15 Jan 2024 11:44:36 -1000 Subject: [PATCH] Completing srfi-194 support --- ChangeLog | 4 + lib/Makefile.in | 3 +- lib/srfi/194.scm | 17 ++-- lib/srfi/194/sphere.scm | 126 ++++++++++++++++++++++++ lib/srfi/194/zipf-zri.scm | 202 ++++++++++++++++++++++++++++++++++++++ test/srfi.scm | 4 +- 6 files changed, 344 insertions(+), 12 deletions(-) create mode 100644 lib/srfi/194/sphere.scm create mode 100644 lib/srfi/194/zipf-zri.scm diff --git a/ChangeLog b/ChangeLog index a38524c49..ae65531e7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2024-01-15 Shiro Kawai + + * lib/srfi/194.scm, lib/srfi/194/*.scm: Completing srfi-194 support. + 2024-01-12 Shiro Kawai * src/compile-5.scm (pass5/$DYNENV): Fixed a bug that when KEY or diff --git a/lib/Makefile.in b/lib/Makefile.in index 8127d5b65..fb0503fdd 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -35,7 +35,7 @@ SUBDIRS = gauche gauche/vm gauche/serializer gauche/interactive gauche/mop \ gauche/regexp \ lang lang/asm lang/c \ srfi srfi/160 srfi-29 srfi/29 srfi/146 \ - srfi-159 srfi-159/internal srfi/159 srfi/160 srfi/172 \ + srfi-159 srfi-159/internal srfi/159 srfi/160 srfi/172 srfi/194 \ srfi/226 srfi/227 \ binary control data dbd dbm math util compat file \ parser parser/peg rfc rfc/http \ @@ -52,6 +52,7 @@ SCMFILES = \ srfi-159/internal/*.sld srfi-159/internal/*.scm srfi/159/*.scm \ srfi/160/*.scm \ srfi/172/*.scm \ + srfi/194/*.scm \ srfi/226/*.scm \ srfi/227/*.scm \ slib.scm \ diff --git a/lib/srfi/194.scm b/lib/srfi/194.scm index 45c46c133..6cc98fe17 100644 --- a/lib/srfi/194.scm +++ b/lib/srfi/194.scm @@ -65,10 +65,10 @@ make-exponential-generator make-geometric-generator make-poisson-generator - ;;make-zipf-generator - ;;make-sphere-generator - ;;make-ellipsoid-generator - ;;make-ball-genrator + make-zipf-generator + make-sphere-generator + make-ellipsoid-generator + make-ball-generator gsampling )) (select-module srfi.194) @@ -191,13 +191,10 @@ (define (make-poisson-generator L) (integers-poisson$ L)) -;; make-zipf-generator +(autoload srfi.194.zipf-zri make-zipf-generator) -;; make-sphere-generator - -;; make-ellipsoid-generator - -;; make-ball-generator +(autoload srfi.194.sphere + make-sphere-generator make-ellipsoid-generator make-ball-generator) (define (gsampling . generators) (match generators diff --git a/lib/srfi/194/sphere.scm b/lib/srfi/194/sphere.scm new file mode 100644 index 000000000..e12690b75 --- /dev/null +++ b/lib/srfi/194/sphere.scm @@ -0,0 +1,126 @@ +;;; +;;; Grafted from srfi-194 reference implementation +;;; Original code by Arvydas Silankas +;;; + +(define-module srfi.194.sphere + (use scheme.vector) + (use srfi.194) + (export make-sphere-generator + make-ellipsoid-generator + make-ball-generator) + ) +(select-module srfi.194.sphere) + +; +; sphere.scm +; Uniform distributions on a sphere, and a ball. +; Submitted for inclusion in srfi-194 +; +; Algorithm based on BoxMeuller as described in +; http://extremelearning.com.au/how-to-generate-uniformly-random-points-on-n-spheres-and-n-balls/ +; + +; make-sphere-generator N - return a generator of points uniformly +; distributed on an N-dimensional sphere. +; This implements the BoxMeuller algorithm, that is, of normalizing +; N+1 Gaussian random variables. +(define (make-sphere-generator arg) + (cond + ((integer? arg) (make-ellipsoid-generator* (make-vector (+ 1 arg) 1.0))) + (else (error "expected argument to be an integer dimension")))) + +(define (make-ellipsoid-generator arg) + (cond + ((vector? arg) (make-ellipsoid-generator* arg)) + (else (error "expected argument to be a vector of axis lengths")))) + +; ----------------------------------------------- +; Generator of points uniformly distributed on an N-dimensional ellipsoid. +; +; The `axes` should be a vector of floats, specifying the axes of the +; ellipsoid. The algorithm used is an accept/reject sampling algo, +; wherein the acceptance rate is proportional to the measure of a +; surface element on the ellipsoid. The measure is straight-forward to +; arrive at, and the 3D case is described by `mercio` in detail at +; https://math.stackexchange.com/questions/973101/how-to-generate-points-uniformly-distributed-on-the-surface-of-an-ellipsoid +; +; Note that sampling means that performance goes as +; O(B/A x C/A x D/A x ...) where `A` is the shorest axis, +; and `B`, `C`, `D`, ... are the other axes. Maximum performance +; achieved on spheres. +; +(define (make-ellipsoid-generator* axes) + + ; A vector of normal gaussian generators + (define gaussg-vec + (make-vector (vector-length axes) (make-normal-generator 0 1))) + + ; Banach l2-norm of a vector + (define (l2-norm VEC) + (sqrt (vector-fold + (lambda (sum x) (+ sum (* x x))) + 0 + VEC))) + + ; Generate one point on a sphere + (define (sph) + ; Sample a point + (define point + (vector-map (lambda (gaussg) (gaussg)) gaussg-vec)) + ; Project it to the unit sphere (make it unit length) + (define norm (/ 1.0 (l2-norm point))) + (vector-map (lambda (x) (* x norm)) point)) + + ; Distance from origin to the surface of the + ; ellipsoid along direction RAY. + (define (ellipsoid-dist RAY) + (sqrt (vector-fold + (lambda (sum x a) (+ sum (/ (* x x) (* a a)))) + 0 RAY axes))) + + ; Find the shortest axis. + (define minor + (vector-fold + (lambda (mino l) (if (< l mino) l mino)) + 1e308 axes)) + + ; Uniform generator [0,1) + (define uni (make-random-real-generator 0.0 1.0)) + + ; Return #t if the POINT can be kept; else must resample. + (define (keep POINT) + (< (uni) (* minor (ellipsoid-dist POINT)))) + + ; Sample until a good point is found. The returned sample is a + ; vector of unit length (we already normed up above). + (define (sample) + (define vect (sph)) + (if (keep vect) vect (sample))) + + (lambda () + ; Find a good point, and rescale to ellipsoid. + (vector-map + (lambda (x a) (* x a)) (sample) axes)) +) + +; ----------------------------------------------- +; make-ball-generator N - return a generator of points uniformly +; distributed inside an N-dimensional ball. +; This implements the Harman-Lacko-Voelker Dropped Coordinate method. +(define (make-ball-generator arg) + (define dim-sizes + (cond + ((integer? arg) (make-vector (+ 2 arg) 1.0)) + ((vector? arg) (vector-append arg (vector 1.0 1.0))) + (else (error "expected argument to either be a number (dimension), or vector (axis length for the dimensions)")))) + (define N (- (vector-length dim-sizes) 2)) + (define sphereg (make-sphere-generator (+ N 2))) + ; Create a vector of N+2 values, and drop the last two. + ; (The sphere already added one, so we only add one more) + (lambda () + (vector-map + (lambda (el dim-size _) (* el dim-size)) + (sphereg) + dim-sizes + (make-vector N #f)))) diff --git a/lib/srfi/194/zipf-zri.scm b/lib/srfi/194/zipf-zri.scm new file mode 100644 index 000000000..e2c11694e --- /dev/null +++ b/lib/srfi/194/zipf-zri.scm @@ -0,0 +1,202 @@ +;;; +;;; Grafted from srfi-194 reference implementation +;;; Original code by Linas Vepstas +;;; + +(define-module srfi.194.zipf-zri + (use srfi.194) + (export make-zipf-generator) + ) +(select-module srfi.194.zipf-zri) + +; +; zipf-zri.scm +; Create a Zipf random distribution. +; +; Created by Linas Vepstas 10 July 2020 +; Nominated for inclusion in srfi-194 +; +; Not optimized for speed! +; +; Implementation from ZRI algorithm presented in the paper: +; "Rejection-inversion to generate variates from monotone discrete +; distributions", Wolfgang Hörmann and Gerhard Derflinger +; ACM TOMACS 6.3 (1996): 169-184 +; +; Hörmann and Derflinger use "q" everywhere, when they really mean "s". +; Thier "q" is not the standard q-series deformation. Its just "s". +; The notation in the code below differs from the article to reflect +; conventional usage. +; +;------------------------------------------------------------------ +; +; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer +; The Zipf distribution is recovered by setting q=0. +; +; The exponent `s` must be a real number not equal to 1. +; Accuracy is diminished for |1-s|< 1e-6. The accuracy is roughly +; equal to 1e-15 / |1-s| where 1e-15 == 64-bit double-precision ULP. +; +(define (make-zipf-generator/zri n s q) + + ; The hat function h(x) = 1 / (x+q)^s + (define (hat x) + (expt (+ x q) (- s))) + + (define _1-s (- 1 s)) + (define oms (/ 1 _1-s)) + + ; The integral of hat(x) + ; H(x) = (x+q)^{1-s} / (1-s) + ; Note that H(x) is always negative. + (define (big-h x) + (/ (expt (+ q x) _1-s) _1-s)) + + ; The inverse function of H(x) + ; H^{-1}(y) = -q + (y(1-s))^{1/(1-s)} + (define (big-h-inv y) + (- (expt (* y _1-s) oms) q)) + + ; Lower and upper bounds for the uniform random generator. + (define big-h-half (- (big-h 1.5) (hat 1))) + (define big-h-n (big-h (+ n 0.5))) + + ; Rejection cut + (define cut (- 1 (big-h-inv (- (big-h 1.5) (hat 1))))) + + ; Uniform distribution + (define dist (make-random-real-generator big-h-half big-h-n)) + + ; Attempt to hit the dartboard. Return #f if we fail, + ; otherwise return an integer between 1 and n. + (define (try) + (define u (dist)) + (define x (big-h-inv u)) + (define kflt (floor (+ x 0.5))) + (define k (exact kflt)) + (if (and (< 0 k) + (or + (<= (- k x) cut) + (>= u (- (big-h (+ k 0.5)) (hat k))))) k #f)) + + ; Did we hit the dartboard? If not, try again. + (define (loop-until) + (define k (try)) + (if k k (loop-until))) + + ; Return the generator. + loop-until) + +;------------------------------------------------------------------ +; +; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer +; The Zipf distribution is recovered by setting q=0. +; +; The exponent `s` must be a real number close to 1. +; Accuracy is diminished for |1-s|> 2e-4. The accuracy is roughly +; equal to 0.05 * |1-s|^4 due to exp(1-s) being expanded to 4 terms. +; +; This handles the special case of s==1 perfectly. +(define (make-zipf-generator/one n s q) + + (define _1-s (- 1 s)) + + ; The hat function h(x) = 1 / (x+q)^s + ; Written for s->1 i.e. 1/(x+q)(x+q)^{s-1} + (define (hat x) + (define xpq (+ x q)) + (/ (expt xpq _1-s) xpq)) + + ; Expansion of exn(y) = [exp(y(1-s))-1]/(1-s) for s->1 + ; Expanded to 4th order. + ; Should equal this: + ;;; (define (exn lg) (/ (- (exp (* _1-s lg)) 1) _1-s)) + ; but more accurate for s near 1.0 + (define (exn lg) + (define (trm n u lg) (* lg (+ 1 (/ (* _1-s u) n)))) + (trm 2 (trm 3 (trm 4 1 lg) lg) lg)) + + ; Expansion of lg(y) = [log(1 + y(1-s))] / (1-s) for s->1 + ; Expanded to 4th order. + ; Should equal this: + ;;; (define (lg y) (/ (log (+ 1 (* y _1-s))) _1-s)) + ; but more accurate for s near 1.0 + (define (lg y) + (define yms (* y _1-s)) + (define (trm n u r) (- (/ 1 n) (* u r))) + (* y (trm 1 yms (trm 2 yms (trm 3 yms (trm 4 yms 0)))))) + + ; The integral of hat(x) defined at s==1 + ; H(x) = [exp{(1-s) log(x+q)} - 1]/(1-s) + ; Should equal this: + ;;; (define (big-h x) (/ (- (exp (* _1-s (log (+ q x)))) 1) _1-s)) + ; but expanded so that it's more accurate for s near 1.0 + (define (big-h x) + (exn (log (+ q x)))) + + ; The inverse function of H(x) + ; H^{-1}(y) = -q + (1 + y(1-s))^{1/(1-s)} + ; Should equal this: + ;;; (define (big-h-inv y) (- (expt (+ 1 (* y _1-s)) (/ 1 _1-s)) q )) + ; but expanded so that it's more accurate for s near 1.0 + (define (big-h-inv y) + (- (exp (lg y)) q)) + + ; Lower and upper bounds for the uniform random generator. + (define big-h-half (- (big-h 1.5) (hat 1))) + + (define big-h-n (big-h (+ n 0.5))) + + ; Rejection cut + (define cut (- 1 (big-h-inv (- (big-h 1.5) (/ 1 (+ 1 q)))))) + + ; Uniform distribution + (define dist (make-random-real-generator big-h-half big-h-n)) + + ; Attempt to hit the dartboard. Return #f if we fail, + ; otherwise return an integer between 1 and n. + (define (try) + (define u (dist)) + (define x (big-h-inv u)) + (define kflt (floor (+ x 0.5))) + (define k (exact kflt)) + (if (and (< 0 k) + (or + (<= (- k x) cut) + (>= u (- (big-h (+ k 0.5)) (hat k))))) k #f)) + + ; Did we hit the dartboard? If not, try again. + (define (loop-until) + (define k (try)) + (if k k (loop-until))) + + ; Return the generator. + loop-until) + +;------------------------------------------------------------------ +; +; (make-zipf-generator n [s [q]]) +; +; The Hurwicz zeta distribution 1 / (k+q)^s for 1 <= k <= n integer +; The Zipf distribution is recovered by setting q=0. +; If `q` is not specified, 0 is assumed. +; If `s` is not specified, 1 is assumed. +; +; Valid for real -10 < s < 100 (otherwise overflows likely) +; Valid for real -0.5 < q < 2e8 (otherwise overflows likely) +; Valid for integer 1 <= k < int-max +; +; Example usage: +; (define zgen (make-zipf-generator 50 1.01 0)) +; (generator->list zgen 10) +; +(define make-zipf-generator + (case-lambda + ((n) + (make-zipf-generator n 1.0 0.0)) + ((n s) + (make-zipf-generator n s 0.0)) + ((n s q) + (if (< 1e-5 (abs (- 1 s))) + (make-zipf-generator/zri n s q) + (make-zipf-generator/one n s q))))) diff --git a/test/srfi.scm b/test/srfi.scm index 52ed89c28..c8e34de04 100644 --- a/test/srfi.scm +++ b/test/srfi.scm @@ -2856,7 +2856,9 @@ (define-module srfi-194-tests (use gauche.test) (use srfi.194) - (test-module 'srfi.194)) + (test-module 'srfi.194) + (test-module 'srfi.194.zipf-zri) + (test-module 'srfi.194.sphere)) ;;----------------------------------------------------------------------- ;; NB: SRFI-196 is tested with data.range