fad8758662b9 — Chris Cannam 6 years ago
Pull out pattern-match trie fn from trie fn generally; docs; small rearrangements
8 files changed, 119 insertions(+), 76 deletions(-)

M .hgignore
M README
A => pattern-match-trie-fn.sml
M string-trie-map-fn.sml
A => string-trie-map.sml
M string-trie.sml
M trie-fn.sml
M trie.mlb
M .hgignore +2 -0
@@ 3,3 3,5 @@ test
 *~
 *.deps
 *.orig
+mlmon.out
+mlb-coverage.log

          
M README +63 -39
@@ 2,64 2,88 @@ 
 A persistent trie in Standard ML
 ================================
 
-This is a fairly naive immutable implementation of a trie.
+This is a straightforward immutable implementation of a trie.
 
-A trie is an ordered set container. It stores a set of entries of a
-common type, which is some sort of sequence (vector, list, string
-etc), filing them into a tree structure internally according to their
-common prefixes.
+A trie is an ordered container. It stores a set of entries of a common
+type, which is some sort of sequence (vector, list, string etc),
+filing them into a tree structure internally according to their common
+prefixes.
 
 As with a hash set or tree-backed set structure, membership testing is
-generally fast. Unlike hash and tree sets, a with a trie you can also
+generally fast. Unlike hash and tree sets, with a trie you can also
 quickly test whether any prefix of a given entry appears in the trie,
 obtain the longest such prefix, and find all entries with a given
 prefix. Like a tree-backed set but not a hash set, a trie supports
 enumerating the entries in order.
 
-This is a persistent or immutable implementation in the sense that add
-and remove operations return a separate trie without modifying the one
-passed in.
+A trie can also support a map structure, in which each entry (key) has
+an associated value rather than simply being present or absent. A
+set-type trie can (and is, in this library) be implemented as a map
+from entry to unit type.
 
-There are two fairly naive functor implementations included: the
-ListMapTrieFn simply uses a red-black tree map (from the SML/NJ
-library) at each node, and the ListArrayTrieFn uses an array at each
-node.
+This library provides a persistent or immutable trie, in the sense
+that add and remove operations return a separate trie without
+modifying the one passed in.
+
+The main signatures are:
+
+ * TRIE (trie.sig) - signature of a trie set container with arbitrary
+   entry type
 
-ListMapTrieFn isn't all that memory efficient and is better suited to
-wide, shallow tries than narrow, deep ones. It was written to provide
-indexes for an in-memory RDF triple store, and its performance there
-seems to be acceptable.
+ * TRIE_MAP (trie-map.sig) - signature of a polymorphic-value trie map
+   container with arbitrary key type
 
-ListArrayTrieFn is better suited to narrow, deep tries where elements
-can be compactly mapped onto integer indices for array lookup
-(classically, an 8-bit char element, for a string trie).
+ * PATTERN_MATCH_TRIE (pattern-match-trie.sig) - signature extending
+   TRIE with the capability of matching sequences with wildcards in
+   them. Unlike TRIE, this exposes the types of the individual
+   elements in each entry (e.g. char for a trie with string entries)
 
-This library contains
+ * PATTERN_MATCH_TRIE_MAP (pattern-match-trie-map.sig) - signature
+   extending TRIE_MAP with the capability of matching sequences with
+   wildcards in them, like PATTERN_MATCH_TRIE
 
- * A signature TRIE for tries with arbitrary entry types.
+There are two core trie implementations provided, both of which
+implement trie-maps:
 
- * A signature PATTERN_MATCH_TRIE extending TRIE with the capability
-   of matching sequences with wildcards in them. Unlike TRIE, this
-   exposes the types of the individual elements in each entry
-   (e.g. char for a trie of strings).
+ * ListMTrieMapFn (list-mtrie-map-fn.sml) - a functor that takes a
+   comparable element type and implements a PATTERN_MATCH_TRIE_MAP in
+   which the entry type is a list of that element. The implementation
+   simply uses a red-black tree map (from the SML/NJ library) at each
+   node. This is not very memory-efficient and is better suited to
+   wide, shallow trie structures than narrow, deep ones. Insertion and
+   enumeration are relatively cheap.
 
- * A functor ListMapTrieFn which turns a list type into a structure
-   implementing PATTERN_MATCH_TRIE (and so also implementing TRIE),
-   provided that the list element type is comparable.
+ * ListATrieMapFn (list-atrie-map-fn.sml) - a functor that takes an
+   element type that can be mapped compactly onto a small integer
+   range, and implements a PATTERN_MATCH_TRIE_MAP in which the entry
+   type is a list of that element. The implementation uses an array
+   (actually a vector) at each node. This is better suited to narrow,
+   deep structures with more lookups than insertions, but note that it
+   does not use any kind of compression on the arrays, so memory usage
+   can still be high.
+
+These functors provide set-type trie structures:
 
- * A functor ListArrayTrieFn which turns a list type into a structure
-   implementing PATTERN_MATCH_TRIE (and so also implementing TRIE),
-   provided that the list element type can be mapped compactly onto a
-   small range of integer values for indexing into an array.
+ * TrieFn (trie-fn.sml) - a functor that turns a TRIE_MAP into a TRIE
+
+ * PatternMatchTrieFn (pattern-match-trie-fn.sml) - a functor that
+   turns a PATTERN_MATCH_TRIE_MAP into a PATTERN_MATCH_TRIE
+
+ * ListMTrieFn, ListATrieFn (pattern-match-trie-fn.sml) - shorthands
+   that turn an element type directly into a trie set (the set
+   equivalents of ListMTrieMapFn and ListATrieMapFn)
 
- * Structures StringMapTrie and StringArrayTrie which use
-   ListMapTrieFn and ListArrayTrieFn respectively to make concrete
-   string tries with different performance characteristics. The name
-   StringTrie is also provided as an alias for StringMapTrie.
+And there are specialisations for string entries:
+
+ * StringMTrieMap, StringATrieMap (string-trie-map.sml) - trie-maps
+   with string key type
 
- * Basic unit tests.
+ * StringMTrie, StringATrie (string-trie.sml) - trie sets with string
+   key type
 
-With the MLton compiler, just run
+ * StringTrie - alias for StringATrie
+
+Basic unit tests are provided. With the MLton compiler, run
 
  $ mlton test.mlb && ./test
 

          
A => pattern-match-trie-fn.sml +32 -0
@@ 0,0 1,32 @@ 
+
+functor PatternMatchTrieFn (M : PATTERN_MATCH_TRIE_MAP)
+        :> PATTERN_MATCH_TRIE
+               where type element = M.element where type entry = M.key where type trie = unit M.trie = struct
+
+    structure T = TrieFn(M)
+    open T
+
+    type element = M.element
+    type entry = M.key
+    type pattern = element option list
+
+    fun keysOf kvl =
+        map (fn (k, v) => k) kvl
+
+    fun patternMatch (t, p) =
+        keysOf (M.patternMatch (t, p))
+
+    fun foldlPatternMatch f acc (t, p) =
+        M.foldliPatternMatch (fn (k, v, acc) => f (k, acc)) acc (t, p)
+                             
+end
+                                                                        
+functor ListMTrieFn (E : MTRIE_ELEMENT)
+        :> PATTERN_MATCH_TRIE
+               where type element = E.t where type entry = E.t list =
+    PatternMatchTrieFn(ListMTrieMapFn(E))
+                                                                        
+functor ListATrieFn (E : ATRIE_ELEMENT)
+        :> PATTERN_MATCH_TRIE
+               where type element = E.t where type entry = E.t list =
+    PatternMatchTrieFn(ListATrieMapFn(E))

          
M string-trie-map-fn.sml +0 -14
@@ 72,17 72,3 @@ functor StringTrieMapFn (T : CHAR_LIST_T
                  
 end
     
-structure StringMTrieMap = StringTrieMapFn
-                               (ListMTrieMapFn(struct
-				                type t = char
-				                val compare = Char.compare
-				                end))
-
-structure StringATrieMap = StringTrieMapFn
-                               (ListATrieMapFn(struct
-				                type t = char
-                                                val ord = Char.ord
-                                                val invOrd = Char.chr
-                                                val maxOrd = Char.maxOrd
-                                                end))
-

          
A => string-trie-map.sml +15 -0
@@ 0,0 1,15 @@ 
+
+structure StringMTrieMap = StringTrieMapFn
+                               (ListMTrieMapFn(struct
+				                type t = char
+				                val compare = Char.compare
+				                end))
+
+structure StringATrieMap = StringTrieMapFn
+                               (ListATrieMapFn(struct
+				                type t = char
+                                                val ord = Char.ord
+                                                val invOrd = Char.chr
+                                                val maxOrd = Char.maxOrd
+                                                end))
+

          
M string-trie.sml +2 -2
@@ 5,7 5,7 @@ signature STRING_TRIE = sig
     where type element = char
 end
 
-structure StringMTrie :> STRING_TRIE = TrieFn(StringMTrieMap)
-structure StringATrie :> STRING_TRIE = TrieFn(StringATrieMap)
+structure StringMTrie :> STRING_TRIE = PatternMatchTrieFn(StringMTrieMap)
+structure StringATrie :> STRING_TRIE = PatternMatchTrieFn(StringATrieMap)
 
 structure StringTrie = StringATrie

          
M trie-fn.sml +3 -21
@@ 1,13 1,11 @@ 
 
-functor TrieFn (M : PATTERN_MATCH_TRIE_MAP)
-	:> PATTERN_MATCH_TRIE
-	       where type element = M.element where type entry = M.key = struct
+functor TrieFn (M : TRIE_MAP)
+	:> TRIE
+	       where type entry = M.key where type trie = unit M.trie = struct
 
     open M
 
-    type element = M.element
     type entry = M.key
-    type pattern = element option list
 
     type trie = unit M.trie
     type t = trie

          
@@ 29,21 27,5 @@ functor TrieFn (M : PATTERN_MATCH_TRIE_M
 
     fun foldlPrefixMatch f acc (t, e) =
         M.foldliPrefixMatch (fn (k, v, acc) => f (k, acc)) acc (t, e)
-
-    fun patternMatch (t, p) =
-        keysOf (M.patternMatch (t, p))
-
-    fun foldlPatternMatch f acc (t, p) =
-        M.foldliPatternMatch (fn (k, v, acc) => f (k, acc)) acc (t, p)
                           
 end
-                                                                        
-functor ListMTrieFn (E : MTRIE_ELEMENT)
-        :> PATTERN_MATCH_TRIE
-               where type element = E.t where type entry = E.t list =
-    TrieFn(ListMTrieMapFn(E))
-                                                                        
-functor ListATrieFn (E : ATRIE_ELEMENT)
-        :> PATTERN_MATCH_TRIE
-               where type element = E.t where type entry = E.t list =
-    TrieFn(ListATrieMapFn(E))

          
M trie.mlb +2 -0
@@ 7,5 7,7 @@ pattern-match-trie.sig
 list-mtrie-map-fn.sml
 list-atrie-map-fn.sml
 trie-fn.sml
+pattern-match-trie-fn.sml
 string-trie-map-fn.sml
+string-trie-map.sml
 string-trie.sml