# HG changeset patch # User Malcolm Matalka # Date 1574025891 -3600 # Sun Nov 17 22:24:51 2019 +0100 # Node ID ff6105669e4f6a6302695848038013fb85794752 # Parent 0000000000000000000000000000000000000000 ADD Examples of things that can be done with GADTs diff --git a/.hgignore b/.hgignore new file mode 100644 --- /dev/null +++ b/.hgignore @@ -0,0 +1,6 @@ +syntax: glob + +*~ +*.cmo +*.cmi +a.out diff --git a/src/a_intro.ml b/src/a_intro.ml new file mode 100644 --- /dev/null +++ b/src/a_intro.ml @@ -0,0 +1,55 @@ +(* A standard ADT *) +module Adt1 = struct + type t = + | A of int + | B of string +end + +(* Now with polymorphism. Note that the type will match whatever typed is + passed in, for example: A 2 will have the type int t *) +module Adt2 = struct + type 'a t = + | A of 'a + | B of 'a +end + +(* Same as Adt1 *) +module Gadt1 = struct + type t = + | A : int -> t + | B : string -> t +end + +(* Same as Adt2 *) +module Gadt2 = struct + type _ t = + | A : 'a -> 'a t + | B : 'a -> 'a t +end + +(* Whoaaaaa, now things are different. The G in GADT is what allows this. + Rather than the type having to match what the polymorphic type is, we can + specify it to be whatever we want. This is a simple change with HUGE + consequences. *) +module Gadt3 = struct + type _ t = + | A : int -> string t + | B : string -> float t +end + +(* We can also hide information inside a GADT, this is called an + "existential". *) +module Gadt4 = struct + type t = T : 'a -> t +end + +(* What is a GADT? From the manual: + + Generalized algebraic datatypes, or GADTs, extend usual sum types in two + ways: constraints on type parameters may change depending on the value + constructor, and some type variables may be existentially quantified. Adding + constraints is done by giving an explicit return type (the rightmost typexpr + in the above syntax), where type parameters are instantiated. This return + type must use the same type constructor as the type being defined, and have + the same number of parameters. Variables are made existential when they + appear inside a constructor’s argument, but not in its return type. *) diff --git a/src/b_performance_no_workie.ml b/src/b_performance_no_workie.ml new file mode 100644 --- /dev/null +++ b/src/b_performance_no_workie.ml @@ -0,0 +1,49 @@ +(* This example is taken from + https://blog.janestreet.com/why-gadts-matter-for-performance/ *) + +(* The objective here is to control the memory representation of a type for, in + this case, performance reasons. Here we have a container and when it's + containing char's, we want to use bytes instead of an array *) +module Arr = struct + type 'a t = + | Array of 'a array + | Bytes of bytes + + let get t idx = + match t with + | Array arr -> Array.get arr idx + | Bytes b -> Bytes.get b idx + + let set t idx v = + match t with + | Array arr -> Array.set arr idx v + | Bytes b -> Bytes.set b idx v +end + +(* Byte this has a problem, the inferred type is: + +module Arr : + sig + type 'a t = Array of 'a array | Bytes of bytes + val get : char t -> int -> char + val set : char t -> int -> char -> unit + end + +*) + +(* Note that get and set are wrong. And using an ADT we are unable to get more + generic. We could try specifying a type, for example: + + let get : type el. el t -> int -> el = fun t idx -> ... + + However, this will not compile: + + Error: This expression has type char but an expression was expected of type + el + + The problem is that in the match, both paths are required to have the same + constraints in ADTs. And because the Bytes path is restricted to the "char" + type, the Array path must also have the same restriction. + + If only we had a tool for allow the constraints on types to differ depending + on the value constructor we match against... *) diff --git a/src/b_performance_workie.ml b/src/b_performance_workie.ml new file mode 100644 --- /dev/null +++ b/src/b_performance_workie.ml @@ -0,0 +1,29 @@ +(* This example is taken from + https://blog.janestreet.com/why-gadts-matter-for-performance/ *) + +(* By using GADTs, however, we can accomplish what we want. This is because + GADTs let the constraints on types change depending on the value constructor. + Note, we have to use "locally abstract types" to reify the type from the + value constructor. This says that there is a type, in this case "el" that + exists solely in this scope, and then the match will refine that type more. + + So how does the below work? Take "get": if the type is "char t" then we know + that no matter if Array or Bytes matches, then the type of the return will be + char. No problem. But the compiler also knows that if the type is *not* + "char t", then the "Bytes" path will never be followed, so what that branch + would return doesn't matter. *) +module Arr = struct + type _ t = + | Array : 'a array -> 'a t + | Bytes : bytes -> char t + + let get (type el) : el t -> int -> el = fun t idx -> + match t with + | Array arr -> Array.get arr idx + | Bytes b -> Bytes.get b idx + + let set (type el) : el t -> int -> el -> unit = fun t idx v -> + match t with + | Array arr -> Array.set arr idx v + | Bytes b -> Bytes.set b idx v +end diff --git a/src/c_existentials.ml b/src/c_existentials.ml new file mode 100644 --- /dev/null +++ b/src/c_existentials.ml @@ -0,0 +1,131 @@ +(* A type variable is existentially qualified if there is a type variable in the + GADT does not appear on the right-most type expression. This is type + variable can only exist in the scope where the GADT is being evaluated can + never exist that scope. This lets one do some interesting things, one of + them being: Objects! + + What is an object? At its core it's a tuple of: a dispatch table (often + called a vtable), and an opaque piece of data. + + Note, first-class modules actually provide all of this functionality as well, + and it's a bit less messy. But this demonstrates one can do it with + GADTs. *) +module Person : sig + + (* A Person object has the following four operations. *) + module Vtable : sig + type 'a t = { name : 'a -> string + ; age : 'a -> int + ; set_name : 'a -> string -> unit + ; set_age : 'a -> int -> unit + } + end + + type t + + (* We can make a person object with a name and age *) + val make : name:string -> age:int -> t + + (* We can also "subclass" a person object by providing a new vtable + implementations that corresponds to some data. *) + val subclass : 'a Vtable.t -> 'a -> t + + val name : t -> string + val age : t -> int + val set_name : t -> string -> unit + val set_age : t -> int -> unit +end = struct + module Vtable = struct + type 'a t = { name : 'a -> string + ; age : 'a -> int + ; set_name : 'a -> string -> unit + ; set_age : 'a -> int -> unit + } + end + + type t = Person : ('a Vtable.t * 'a) -> t + + (* Our opaque data for this person object is a record with the name and + age. *) + type p = { mutable n : string + ; mutable a : int + } + + + let subclass vtable v = Person (vtable, v) + + (* A Person object's vtable. *) + let vtable = Vtable.({ name = (fun p -> p.n) + ; set_name = (fun p n -> p.n <- n) + ; age = (fun p -> p.a) + ; set_age = (fun p a -> p.a <- a) + }) + + let make ~name ~age = subclass vtable {n = name; a = age} + + let name (Person (vtable, v)) = vtable.Vtable.name v + let set_name (Person (vtable, v)) n = vtable.Vtable.set_name v n + + let age (Person (vtable, v)) = vtable.Vtable.age v + let set_age (Person (vtable, v)) a = vtable.Vtable.set_age v a +end + +(* Here we create a Person with a different vtable, but the underlying data type + is a person. This person's age maxes out at 21. *) +module Forever_21 = struct + let make ~name ~age = + let p = Person.make ~name ~age in + let always_young p = + match Person.age p with + | a when a <= 21 -> a + | _ -> 21 + in + let f21 = Person.Vtable.({ name = Person.name + ; set_name = Person.set_name + ; age = always_young + ; set_age = Person.set_age + }) + in + Person.subclass f21 p +end + +(* Here we create something slightly more sophisticated than a Person, we have a + new value called a title. Setting the name updates the name like a regular + Person, however getting the name includes the title. + + Since [make] is returning a person, we provide no mechanism to modify the + title. To do that we would have needed to create a new vtable that includes + a get/set title function, and then perhaps a way to "upcast" to a Person. *) +module Royalty : sig + val make : name:string -> age:int -> title:string -> Person.t +end = struct + type t = { mutable n : string + ; mutable a : int + ; mutable t : string + } + + let make ~name ~age ~title = + let t = { n = name; a = age; t = title } in + let vtable = Person.Vtable.({ name = (fun t -> Printf.sprintf "%s the %s" t.n t.t) + ; set_name = (fun t n -> t.n <- n) + ; age = (fun t -> t.a) + ; set_age = (fun t a -> t.a <- a) + }) + in + Person.subclass vtable t +end + +let () = + let p = Person.make ~name:"Ronald McDonald" ~age:30 in + Printf.printf "Hello %s\n" (Person.name p); + let forever_young = Forever_21.make ~name:"Larry Ellison" ~age:54 in + Printf.printf + "Hello %s, are you really %d years old???\n" + (Person.name forever_young) + (Person.age forever_young); + Person.set_age forever_young 18; + Printf.printf "Your new age is: %d\n" (Person.age forever_young); + let duke = Royalty.make ~name:"Fredinando" ~age:23 ~title:"Duke of Lisbon" in + Printf.printf "Hello %s\n" (Person.name duke) + + diff --git a/src/d_string_extraction.ml b/src/d_string_extraction.ml new file mode 100644 --- /dev/null +++ b/src/d_string_extraction.ml @@ -0,0 +1,135 @@ +(* The objective is to be able to define a pattern which extracts values from a + string, and that pattern corresponds to a function type. If an input string + matches that pattern, we can call a function that matches that function type + with the values extracted. + + For example, if we had a pattern that looks like (in psuedo code): + + "hello !" + + This would correspond to a function that has the type (string -> '_weak) + + If we then apply that pattern to a string: "hello Mr. Rogers!" we could apply + it to a function like print_endline. + + In this library, this would be expressed like: + + let pattern = s / "hello " /% Var.until '!' / "!" in + let str = "hello Mr. Rogers!" in + match extract str pattern print_endline with + | Some () -> (* great success *) + | None -> (* pattern match failed *) *) +module Str_ext : sig + module Var : sig + type 'a t + + val digit : int t + val int : int t + val until : char -> string t + end + + type ('f, 'r) t + + val s : ('r, 'r) t + + val (/) : ('f, 'r) t -> string -> ('f, 'r) t + + val (/%) : ('f, 'a -> 'r) t -> 'a Var.t -> ('f, 'r) t + + val extract : string -> ('f, 'r) t -> 'f -> 'r option +end = struct + module Var = struct + type 'a t = (int -> string -> (int * 'a) option) + + let digit p s = + if p < String.length s then + match s.[p] with + | '0'..'9' as d -> Some (p + 1, int_of_string (String.make 1 d)) + | _ -> None + else + None + + let int p s = failwith "nyi" + let until chr p s = failwith "nyi" + end + + type (_, _) t = + | S : ('r, 'r) t + | Const : (('f, 'r) t * string) -> ('f, 'r) t + | Var : (('f, 'a -> 'r) t * 'a Var.t) -> ('f, 'r) t + + let s = S + let (/) t str = Const (t, str) + let (/%) t v = Var (t, v) + + (* A little option bind operator for simplicity *) + let (>>=) t f = + match t with + | Some v -> f v + | None -> None + + (* [bind] takes a string, a data structure describing what the string needs to + look like, a function that matches that data structure, and returns an + option. On success, the value of the option will be the mount of string + consumed and the value which applying the function evaluated to. + + Note: There is one bug in here based on how this is implemented: the + function is applied as the variables are picked off the string, which means + if the suffix of the string does not match a string constant, this function + will fail but the function will have been entirely applied. + + So, how does this work? Well let's start with the base case: + + t is S : *) + let rec bind : type f r. string -> (f, r) t -> f -> (int * r) option = fun str t f -> + match t with + | S -> + Some (0, f) + | Const (t, s) -> + bind str t f + >>= fun (pos, f) -> + let l = String.length str - pos in + let sub = String.sub str pos (min l (String.length s)) in + if sub = s then + Some (pos + String.length sub, f) + else + None + | Var (t, v) -> + bind str t f + >>= fun (pos, f) -> + v pos str + >>= fun (pos', r) -> + Some (pos', f r) + + let extract str t f = + bind str t f + >>= fun (_, r) -> + (* There is another bug here where we do not check if the whole string has + been consumed. *) + Some r +end + +let () = + (* (int -> '_weak1, '_weak1) Str_ext.t *) + let t = Str_ext.(s / "some number: " /% Var.digit) in + ignore (Str_ext.extract "some number: 9" t (fun n -> Printf.printf "What? You're on cloud %d?\n" n)); + (match Str_ext.extract "some number: " t (fun _ -> print_endline "I don't think this will work") with + | Some () -> failwith "SHOULD NOT REACH HERE" + | None -> print_endline "whomp whomp, string did not match"); + + (*(int -> int -> '_weak2, '_weak2) Str_ext.t *) + let t = Str_ext.(s / "some number: " /% Var.digit / " another number: " /% Var.digit) in + let str = "some number: 9 another number: 1" in + (match Str_ext.extract str t (Printf.sprintf "Your numbers combined are %d%d") with + | Some res -> print_endline res + | None -> assert false) + +(* And the following won't even compile: + + ignore (Str_ext.extract "some number: 9" t (Printf.printf "%s")) + + + Error: This expression has type string -> unit + but an expression was expected of type int -> unit + Type string is not compatible with type int +*)