# HG changeset patch # User Leonard Ritter # Date 1685005676 -7200 # Thu May 25 11:07:56 2023 +0200 # Node ID 914704f37ddca54f670e66ebb76acf21e5e2a9e9 # Parent 36dfc0711d9972570e9653412b5e71cb45da1cf2 * specified sources diff --git a/lib/scopes/compiler/atom.sc b/lib/scopes/compiler/atom.sc --- a/lib/scopes/compiler/atom.sc +++ b/lib/scopes/compiler/atom.sc @@ -8,7 +8,8 @@ enum Atom -# use a flattened merkle tree rather than sequential hash integration +# use a flattened merkle tree of rolling hashes rather than a sequential + rolling hash. USE_MERKLE_TREE := false ############################### @@ -95,41 +96,44 @@ ############################### -let HASH-CONSTANT = 0x66d6cf4cc5ddd26d:u64 -let HASH-CONSTANT^-1 = 0x24ffe0c7fcc70765:u64 +# implementation of polynomial rolling hash + https://en.wikipedia.org/wiki/Rolling_hash#Polynomial_rolling_hash + +let RHASH-CONSTANT = 0x66d6cf4cc5ddd26d:u64 +let RHASH-CONSTANT^-1 = 0x24ffe0c7fcc70765:u64 # precomputed log2 exponents -global hash-constant-exp = +global rhash-constant-exp = arrayof u64 va-map inline (i) stride := 1:u64 << i as u64 - HASH-CONSTANT ** stride + RHASH-CONSTANT ** stride va-range 64 -global hash-constant-exp-rcp = +global rhash-constant-exp-rcp = arrayof u64 va-map inline (i) stride := 1:u64 << i as u64 - HASH-CONSTANT^-1 ** stride + RHASH-CONSTANT^-1 ** stride va-range 64 -inline hash-integrate (a b) - HASH-CONSTANT * a + b +inline rhash-integrate (a b) + RHASH-CONSTANT * a + b -fn hash-constant-pow (k) +fn rhash-constant-pow (k) upper_h := alignsizeu k kinv := upper_h - k sugar-if false # 38% fewer iterations than powi - k h map := k, 1:u64, hash-constant-exp + k h map := k, 1:u64, rhash-constant-exp else # 50% fewer iterations than powi k h map := if ((bitcount kinv) < (bitcount k)) d := findmsb upper_h - kinv as u64, copy (hash-constant-exp @ d), hash-constant-exp-rcp + kinv as u64, copy (rhash-constant-exp @ d), rhash-constant-exp-rcp else - k, 1:u64, hash-constant-exp + k, 1:u64, rhash-constant-exp loop (k h d = k h 0:u64) m := k & -k if (m == 0:u64) @@ -140,14 +144,14 @@ h := h * (map @ d) repeat k h d -inline hash-integrate-join (a b bstride) - (HASH-CONSTANT ** bstride) * a + b +inline rhash-integrate-join (a b bstride) + (RHASH-CONSTANT ** bstride) * a + b -inline hash-integrate-join-log2 (a b bstridelog2) - (hash-constant-exp @ bstridelog2) * a + b +inline rhash-integrate-join-log2 (a b bstridelog2) + (rhash-constant-exp @ bstridelog2) * a + b -inline hash-integral-diff (a b ha hb) - hb - ha * (HASH-CONSTANT ** ((b - a) as u64)) +inline rhash-integral-diff (a b ha hb) + hb - ha * (RHASH-CONSTANT ** ((b - a) as u64)) ############################### @@ -177,7 +181,7 @@ HashArrayType := Array u64 keys : ArrayType - hashmap : HashArrayType + rhash : HashArrayType static-if invertible? field 'map MapType field 'past IndexArrayType @@ -203,7 +207,7 @@ cls := typeof self x0 := offset // cls.Stride x1 := (offset + count + cls.Stride - 1) // cls.Stride - hsize := countof self.hashmap + hsize := countof self.rhash offset := offset // cls.Stride stride := alignsizeu (countof self.keys) if (not stride) @@ -216,7 +220,7 @@ offset := (index // stride) i := hasharray-offset (stride // cls.Stride) offset if (i < hsize) - self.hashmap @ i = 0:u64 + self.rhash @ i = 0:u64 repeat (stride // 2) fn... hashrange (self, offset : usize, size : usize) @@ -252,11 +256,11 @@ 'hashrange-slow self x0 count else i := hasharray-offset (stride // cls.Stride) offset - if (i >= (countof self.hashmap)) - 'resize self.hashmap (i + 1) 0:u64 - elseif (self.hashmap @ i != 0:u64) + if (i >= (countof self.rhash)) + 'resize self.rhash (i + 1) 0:u64 + elseif (self.rhash @ i != 0:u64) return - copy (self.hashmap @ i) + copy (self.rhash @ i) H := if (stride == cls.Stride) count := min stride ((countof self.keys) - x0) 'hashrange-slow self x0 count @@ -268,13 +272,13 @@ L := this-function self level offset R := this-function self level (offset + 1) hash-integrate-join-log2 L R level - self.hashmap @ i = H - copy (self.hashmap @ i) + self.rhash @ i = H + copy (self.rhash @ i) else 0:u64 else # not USE_MERKLE_TREE inline hashlrange (self x) - if (x > 0) (copy (self.hashmap @ (x - 1))) + if (x > 0) (copy (self.rhash @ (x - 1))) else 0:u64 fn... hashrange (self, offset : usize, size : usize) @@ -285,7 +289,7 @@ a b := offset, offset + size ha := hashlrange self a hb := hashlrange self b - h := hash-integral-diff a b ha hb + h := rhash-integral-diff a b ha hb cls.HashType h inline __countof (self) @@ -308,20 +312,20 @@ the new hash range. viewing self cls := typeof self - inline invalidate_hashmap (pastcount keyscount) + inline invalidate_rhash (pastcount keyscount) sugar-if USE_MERKLE_TREE 'invalidate self pastcount (keyscount - pastcount) else h := 'hashlrange self pastcount fold (h) for i in (range pastcount keyscount) - h := hash-integrate h ((cls.HashType (self.keys @ i)) as u64) - 'append self.hashmap h + h := rhash-integrate h ((cls.HashType (self.keys @ i)) as u64) + 'append self.rhash h h ; static-if cls.Invertible? pastcount := countof self.past keyscount := countof self.keys - invalidate_hashmap pastcount keyscount + invalidate_rhash pastcount keyscount for i in (range pastcount keyscount) key := self.keys @ i j := 'setdefault self.map key NoIndex @@ -330,7 +334,7 @@ else pastcount := copy self.pastcount keyscount := countof self.keys - invalidate_hashmap pastcount keyscount + invalidate_rhash pastcount keyscount self.pastcount = keyscount ;