ADD Examples of things that can be done with GADTs
6 files changed, 405 insertions(+), 0 deletions(-)

A => .hgignore
A => src/a_intro.ml
A => src/b_performance_no_workie.ml
A => src/b_performance_workie.ml
A => src/c_existentials.ml
A => src/d_string_extraction.ml
A => .hgignore +6 -0
@@ 0,0 1,6 @@ 
+syntax: glob
+
+*~
+*.cmo
+*.cmi
+a.out

          
A => src/a_intro.ml +55 -0
@@ 0,0 1,55 @@ 
+(* A standard ADT *)
+module Adt1 = struct
+  type t =
+    | A of int
+    | B of string
+end
+
+(* Now with polymorphism.  Note that the type will match whatever typed is
+   passed in, for example: A 2 will have the type int t *)
+module Adt2 = struct
+  type 'a t =
+    | A of 'a
+    | B of 'a
+end
+
+(* Same as Adt1 *)
+module Gadt1 = struct
+  type t =
+    | A : int -> t
+    | B : string -> t
+end
+
+(* Same as Adt2 *)
+module Gadt2 = struct
+  type _ t =
+    | A : 'a -> 'a t
+    | B : 'a -> 'a t
+end
+
+(* Whoaaaaa, now things are different.  The G in GADT is what allows this.
+   Rather than the type having to match what the polymorphic type is, we can
+   specify it to be whatever we want.  This is a simple change with HUGE
+   consequences. *)
+module Gadt3 = struct
+  type _ t =
+    | A : int -> string t
+    | B : string -> float t
+end
+
+(* We can also hide information inside a GADT, this is called an
+   "existential". *)
+module Gadt4 = struct
+  type t = T : 'a -> t
+end
+
+(* What is a GADT?  From the manual:
+
+   Generalized algebraic datatypes, or GADTs, extend usual sum types in two
+   ways: constraints on type parameters may change depending on the value
+   constructor, and some type variables may be existentially quantified. Adding
+   constraints is done by giving an explicit return type (the rightmost typexpr
+   in the above syntax), where type parameters are instantiated. This return
+   type must use the same type constructor as the type being defined, and have
+   the same number of parameters. Variables are made existential when they
+   appear inside a constructor’s argument, but not in its return type. *)

          
A => src/b_performance_no_workie.ml +49 -0
@@ 0,0 1,49 @@ 
+(* This example is taken from
+   https://blog.janestreet.com/why-gadts-matter-for-performance/ *)
+
+(* The objective here is to control the memory representation of a type for, in
+   this case, performance reasons.  Here we have a container and when it's
+   containing char's, we want to use bytes instead of an array *)
+module Arr = struct
+  type 'a t =
+    | Array of 'a array
+    | Bytes of bytes
+
+  let get t idx =
+    match t with
+      | Array arr -> Array.get arr idx
+      | Bytes b -> Bytes.get b idx
+
+  let set t idx v =
+    match t with
+      | Array arr -> Array.set arr idx v
+      | Bytes b -> Bytes.set b idx v
+end
+
+(* Byte this has a problem, the inferred type is:
+
+module Arr :
+  sig
+    type 'a t = Array of 'a array | Bytes of bytes
+    val get : char t -> int -> char
+    val set : char t -> int -> char -> unit
+  end
+
+*)
+
+(* Note that get and set are wrong.  And using an ADT we are unable to get more
+   generic.  We could try specifying a type, for example:
+
+   let get : type el. el t -> int -> el = fun t idx -> ...
+
+   However, this will not compile:
+
+   Error: This expression has type char but an expression was expected of type
+   el
+
+   The problem is that in the match, both paths are required to have the same
+   constraints in ADTs.  And because the Bytes path is restricted to the "char"
+   type, the Array path must also have the same restriction.
+
+   If only we had a tool for allow the constraints on types to differ depending
+   on the value constructor we match against... *)

          
A => src/b_performance_workie.ml +29 -0
@@ 0,0 1,29 @@ 
+(* This example is taken from
+   https://blog.janestreet.com/why-gadts-matter-for-performance/ *)
+
+(* By using GADTs, however, we can accomplish what we want.  This is because
+   GADTs let the constraints on types change depending on the value constructor.
+   Note, we have to use "locally abstract types" to reify the type from the
+   value constructor.  This says that there is a type, in this case "el" that
+   exists solely in this scope, and then the match will refine that type more.
+
+   So how does the below work?  Take "get": if the type is "char t" then we know
+   that no matter if Array or Bytes matches, then the type of the return will be
+   char.  No problem.  But the compiler also knows that if the type is *not*
+   "char t", then the "Bytes" path will never be followed, so what that branch
+   would return doesn't matter. *)
+module Arr = struct
+  type _ t =
+    | Array : 'a array -> 'a t
+    | Bytes : bytes -> char t
+
+  let get (type el) : el t -> int -> el = fun t idx ->
+    match t with
+      | Array arr -> Array.get arr idx
+      | Bytes b -> Bytes.get b idx
+
+  let set (type el) : el t -> int -> el -> unit = fun t idx v ->
+    match t with
+      | Array arr -> Array.set arr idx v
+      | Bytes b -> Bytes.set b idx v
+end

          
A => src/c_existentials.ml +131 -0
@@ 0,0 1,131 @@ 
+(* A type variable is existentially qualified if there is a type variable in the
+   GADT does not appear on the right-most type expression.  This is type
+   variable can only exist in the scope where the GADT is being evaluated can
+   never exist that scope.  This lets one do some interesting things, one of
+   them being: Objects!
+
+   What is an object?  At its core it's a tuple of: a dispatch table (often
+   called a vtable), and an opaque piece of data.
+
+   Note, first-class modules actually provide all of this functionality as well,
+   and it's a bit less messy.  But this demonstrates one can do it with
+   GADTs. *)
+module Person : sig
+
+  (* A Person object has the following four operations. *)
+  module Vtable : sig
+    type 'a t = { name : 'a -> string
+                ; age : 'a -> int
+                ; set_name : 'a -> string -> unit
+                ; set_age : 'a -> int -> unit
+                }
+  end
+
+  type t
+
+  (* We can make a person object with a name and age *)
+  val make : name:string -> age:int -> t
+
+  (* We can also "subclass" a person object by providing a new vtable
+     implementations that corresponds to some data. *)
+  val subclass : 'a Vtable.t -> 'a -> t
+
+  val name : t -> string
+  val age : t -> int
+  val set_name : t -> string -> unit
+  val set_age : t -> int -> unit
+end = struct
+  module Vtable = struct
+    type 'a t = { name : 'a -> string
+                ; age : 'a -> int
+                ; set_name : 'a -> string -> unit
+                ; set_age : 'a -> int -> unit
+                }
+  end
+
+  type t = Person : ('a Vtable.t * 'a) -> t
+
+  (* Our opaque data for this person object is a record with the name and
+     age. *)
+  type p = { mutable n : string
+           ; mutable a : int
+           }
+
+
+  let subclass vtable v = Person (vtable, v)
+
+  (* A Person object's vtable. *)
+  let vtable = Vtable.({ name = (fun p -> p.n)
+                       ; set_name = (fun p n -> p.n <- n)
+                       ; age = (fun p -> p.a)
+                       ; set_age = (fun p a -> p.a <- a)
+                       })
+
+  let make ~name ~age = subclass vtable {n = name; a = age}
+
+  let name (Person (vtable, v)) = vtable.Vtable.name v
+  let set_name (Person (vtable, v)) n = vtable.Vtable.set_name v n
+
+  let age (Person (vtable, v)) = vtable.Vtable.age v
+  let set_age (Person (vtable, v)) a = vtable.Vtable.set_age v a
+end
+
+(* Here we create a Person with a different vtable, but the underlying data type
+   is a person.  This person's age maxes out at 21. *)
+module Forever_21 = struct
+  let make ~name ~age =
+    let p = Person.make ~name ~age in
+    let always_young p =
+      match Person.age p with
+        | a when a <= 21 -> a
+        | _ -> 21
+    in
+    let f21 = Person.Vtable.({ name = Person.name
+                             ; set_name = Person.set_name
+                             ; age = always_young
+                             ; set_age = Person.set_age
+                             })
+    in
+    Person.subclass f21 p
+end
+
+(* Here we create something slightly more sophisticated than a Person, we have a
+   new value called a title.  Setting the name updates the name like a regular
+   Person, however getting the name includes the title.
+
+   Since [make] is returning a person, we provide no mechanism to modify the
+   title.  To do that we would have needed to create a new vtable that includes
+   a get/set title function, and then perhaps a way to "upcast" to a Person. *)
+module Royalty : sig
+  val make : name:string -> age:int -> title:string -> Person.t
+end = struct
+  type t = { mutable n : string
+           ; mutable a : int
+           ; mutable t : string
+           }
+
+  let make ~name ~age ~title =
+    let t = { n = name; a = age;  t = title } in
+    let vtable = Person.Vtable.({ name = (fun t -> Printf.sprintf "%s the %s" t.n t.t)
+                                ; set_name = (fun t n -> t.n <- n)
+                                ; age = (fun t -> t.a)
+                                ; set_age = (fun t a -> t.a <- a)
+                                })
+    in
+    Person.subclass vtable t
+end
+
+let () =
+  let p = Person.make ~name:"Ronald McDonald" ~age:30 in
+  Printf.printf "Hello %s\n" (Person.name p);
+  let forever_young = Forever_21.make ~name:"Larry Ellison" ~age:54 in
+  Printf.printf
+    "Hello %s, are you really %d years old???\n"
+    (Person.name forever_young)
+    (Person.age forever_young);
+  Person.set_age forever_young 18;
+  Printf.printf "Your new age is: %d\n" (Person.age forever_young);
+  let duke = Royalty.make ~name:"Fredinando" ~age:23 ~title:"Duke of Lisbon" in
+  Printf.printf "Hello %s\n" (Person.name duke)
+
+

          
A => src/d_string_extraction.ml +135 -0
@@ 0,0 1,135 @@ 
+(* The objective is to be able to define a pattern which extracts values from a
+   string, and that pattern corresponds to a function type.  If an input string
+   matches that pattern, we can call a function that matches that function type
+   with the values extracted.
+
+   For example, if we had a pattern that looks like (in psuedo code):
+
+   "hello <string>!"
+
+   This would correspond to a function that has the type (string -> '_weak)
+
+   If we then apply that pattern to a string: "hello Mr. Rogers!" we could apply
+   it to a function like print_endline.
+
+   In this library, this would be expressed like:
+
+   let pattern = s / "hello " /% Var.until '!' / "!" in
+   let str = "hello Mr. Rogers!" in
+   match extract str pattern print_endline with
+      | Some () -> (* great success *)
+      | None -> (* pattern match failed *) *)
+module Str_ext : sig
+  module Var : sig
+    type 'a t
+
+    val digit : int t
+    val int : int t
+    val until : char -> string t
+  end
+
+  type ('f, 'r) t
+
+  val s : ('r, 'r) t
+
+  val (/) : ('f, 'r) t -> string -> ('f, 'r) t
+
+  val (/%) : ('f, 'a -> 'r) t -> 'a Var.t -> ('f, 'r) t
+
+  val extract : string -> ('f, 'r) t -> 'f -> 'r option
+end = struct
+  module Var = struct
+    type 'a t = (int -> string -> (int * 'a) option)
+
+    let digit p s =
+      if p < String.length s then
+        match s.[p] with
+          | '0'..'9' as d -> Some (p + 1, int_of_string (String.make 1 d))
+          | _ -> None
+      else
+        None
+
+    let int p s = failwith "nyi"
+    let until chr p s = failwith "nyi"
+  end
+
+  type (_, _) t =
+    | S : ('r, 'r) t
+    | Const : (('f, 'r) t * string) -> ('f, 'r) t
+    | Var : (('f, 'a -> 'r) t * 'a Var.t) -> ('f, 'r) t
+
+  let s = S
+  let (/) t str = Const (t, str)
+  let (/%) t v = Var (t, v)
+
+  (* A little option bind operator for simplicity *)
+  let (>>=) t f =
+    match t with
+      | Some v -> f v
+      | None -> None
+
+  (* [bind] takes a string, a data structure describing what the string needs to
+     look like, a function that matches that data structure, and returns an
+     option.  On success, the value of the option will be the mount of string
+     consumed and the value which applying the function evaluated to.
+
+     Note: There is one bug in here based on how this is implemented: the
+     function is applied as the variables are picked off the string, which means
+     if the suffix of the string does not match a string constant, this function
+     will fail but the function will have been entirely applied.
+
+     So, how does this work?  Well let's start with the base case:
+
+     t is S : *)
+  let rec bind : type f r. string -> (f, r) t -> f -> (int * r) option = fun str t f ->
+    match t with
+      | S ->
+        Some (0, f)
+      | Const (t, s) ->
+        bind str t f
+        >>= fun (pos, f) ->
+        let l = String.length str - pos in
+        let sub = String.sub str pos (min l (String.length s)) in
+        if sub = s then
+          Some (pos + String.length sub, f)
+        else
+          None
+      | Var (t, v) ->
+        bind str t f
+        >>= fun (pos, f) ->
+        v pos str
+        >>= fun (pos', r) ->
+        Some (pos', f r)
+
+  let extract str t f =
+    bind str t f
+    >>= fun (_, r) ->
+    (* There is another bug here where we do not check if the whole string has
+       been consumed. *)
+    Some r
+end
+
+let () =
+  (*  (int -> '_weak1, '_weak1) Str_ext.t *)
+  let t = Str_ext.(s / "some number: " /% Var.digit) in
+  ignore (Str_ext.extract "some number: 9" t (fun n -> Printf.printf "What?  You're on cloud %d?\n" n));
+  (match Str_ext.extract "some number: " t (fun _ -> print_endline "I don't think this will work") with
+    | Some () -> failwith "SHOULD NOT REACH HERE"
+    | None -> print_endline "whomp whomp, string did not match");
+
+  (*(int -> int -> '_weak2, '_weak2) Str_ext.t *)
+  let t = Str_ext.(s / "some number: " /% Var.digit / " another number: " /% Var.digit) in
+  let str = "some number: 9 another number: 1" in
+  (match Str_ext.extract str t (Printf.sprintf "Your numbers combined are %d%d") with
+    | Some res -> print_endline res
+    | None -> assert false)
+
+(* And the following won't even compile:
+
+   ignore (Str_ext.extract "some number: 9" t (Printf.printf "%s"))
+
+
+   Error: This expression has type string -> unit
+       but an expression was expected of type int -> unit
+       Type string is not compatible with type int
+*)