[MLton-commit] r4983
Wesley Terpstra
wesley at mlton.org
Mon Dec 18 18:56:07 PST 2006
my collection of SML libs, only half finished mostly
----------------------------------------------------------------------
A mltonlib/trunk/ca/terpstra/regexp/
A mltonlib/trunk/ca/terpstra/regexp/README
A mltonlib/trunk/ca/terpstra/regexp/automata.fun
A mltonlib/trunk/ca/terpstra/regexp/automata.mlb
A mltonlib/trunk/ca/terpstra/regexp/automata.sig
A mltonlib/trunk/ca/terpstra/regexp/btree.sml
A mltonlib/trunk/ca/terpstra/regexp/compare.dot
A mltonlib/trunk/ca/terpstra/regexp/compare.mlb
A mltonlib/trunk/ca/terpstra/regexp/compare.sml
A mltonlib/trunk/ca/terpstra/regexp/todot.mlb
A mltonlib/trunk/ca/terpstra/regexp/todot.sml
A mltonlib/trunk/ca/terpstra/regexp/ztree.sml
----------------------------------------------------------------------
Added: mltonlib/trunk/ca/terpstra/regexp/README
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/README 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/README 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,21 @@
+This is just a quick implementation of finite automata.
+
+It includes:
+ a binary tree implementation
+ a "z-tree" which stores intervals instead of point values
+ a regular expression parser
+ methods for converting regular expressions to NFAs to DFAs
+
+It's fairly self-documenting in the file automata.sig.
+
+Included examples are:
+
+1. a program which compiles a regular expression to a minimal DFA
+ represented as a file suitable for consumption by dot.
+
+2. a program comparing two regular expressions to each other.
+ it provides example strings matched by one and/or not the other.
+
+Compile with:
+ mlton compare.mlb
+ mlton todot.mlb
Added: mltonlib/trunk/ca/terpstra/regexp/automata.fun
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/automata.fun 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/automata.fun 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,665 @@
+signature ALPHABET =
+ sig
+ eqtype char
+ eqtype string
+
+ val ord: char -> int
+ val chr: int -> char
+
+ val < : (char * char) -> bool
+ val foldl: (char * 'a -> 'a) -> 'a -> string -> 'a
+ end
+
+structure Alphabet =
+ struct
+ type char = char
+ type string = string
+
+ val ord = Char.ord
+ val chr = Char.chr
+
+ val (op <) = Char.<
+ fun foldl f a s = Substring.foldl f a (Substring.full s)
+ end
+
+functor Automata(Alphabet : ALPHABET) : AUTOMATA
+ where type char = Alphabet.char
+ and type ZTree.key = Alphabet.char
+ and type string = Alphabet.string =
+ struct
+ structure AlphaOrder =
+ struct
+ type t = Alphabet.char
+ val (op <) = Alphabet.<
+ end
+ structure StateOrder =
+ struct
+ type t = int
+ val (op <) = Int.<
+ end
+ structure ZTree = ZTree(AlphaOrder)
+ structure BTree = BTree(StateOrder)
+ open Alphabet
+
+ fun printSML (f, i, ZTree.Leaf v, tail) = f v :: tail
+ | printSML (f, i, ZTree.Node (l, k, r), tail) =
+ "\n" :: i :: "if c < chr " :: Int.toString (ord k) :: " then " ::
+ printSML (f, i ^ " ", l,
+ "\n" :: i :: "else " ::
+ printSML (f, i ^ " ", r, tail))
+
+ fun toString c = "state" ^ Int.toString c
+ fun printC (i, ZTree.Leaf v, tail) = "goto " :: toString v :: ";" :: tail
+ | printC (i, ZTree.Node (l, k, r), tail) =
+ "\n" :: i ::
+ "if (*s < " :: (Int.toString o ord) k :: ") " ::
+ printC (i ^ "\t", l,
+ "\n" :: i :: "else " ::
+ printC (i ^ "\t", r, tail))
+
+ fun dotNode (i, (b, _), tail) =
+ "\t" :: Int.toString i :: " [label=\"\"" ::
+ (if i = 0 then ",shape=diamond" else "") ::
+ (if b then ",fillcolor=green" else "") ::
+ "]\n" :: tail
+
+ structure Deterministic =
+ struct
+ type state = int
+ type t = (bool * state ZTree.t) vector
+
+ fun size a = Vector.length a
+ fun start _ = 0
+ fun accepts a x = case Vector.sub (a, x) of (b, _) => b
+ fun step a (c, x) = ZTree.lookup (#2 (Vector.sub (a, x))) c
+ fun multistep a (s, x) = foldl (step a) x s
+ fun test a s = accepts a (multistep a (s, start a))
+
+ val empty = Vector.fromList [
+ (true, ZTree.uniform 1),
+ (false, ZTree.uniform 1) ]
+ val any = Vector.fromList [
+ (false, ZTree.uniform 1),
+ (true, ZTree.uniform 2),
+ (false, ZTree.uniform 2) ]
+ fun char t = Vector.fromList [
+ (false, ZTree.map (fn true => 1 | false => 2) t),
+ (true, ZTree.uniform 2),
+ (false, ZTree.uniform 2) ]
+
+ fun mapPair f (x, y) = (f x, f y)
+
+ (* f maps old state to new, n is the number of cells to keep *)
+ fun mapStates (f, n) a =
+ let
+ open ZTree
+ val v = Array.tabulate (Vector.length a, fn _ => (true, uniform 0))
+ val fixtree = fromFront o uniq (op =) o imap f o front
+ fun map (i, (b, t)) = Array.update (v, f i, (b, fixtree t))
+ val () = Vector.appi map a
+ in
+ Vector.tabulate (n, fn i => Array.sub (v, i))
+ end
+
+ (* eliminate unreachable states -- and put states in canonical order *)
+ fun unreachable a =
+ let
+ val l = Vector.length a
+ val v = Array.tabulate (l, fn _ => false)
+ val m = Array.tabulate (l, fn _ => l - 1)
+ val e = ref 0
+ fun dfs i =
+ if Array.sub (v, i) then () else (
+ Array.update (v, i, true);
+ Array.update (m, i, !e);
+ e := (!e + 1);
+ ZTree.app dfs (#2 (Vector.sub (a, i)))
+ )
+ val () = dfs 0
+ in
+ mapStates (fn i => Array.sub (m, i), !e) a
+ end
+
+ (* detect and merge duplicate states *)
+ fun finddups a =
+ let
+ val len = size a
+ fun toPair i = (i mod len, i div len)
+ fun ofPair (r, c) = len * c + r
+ fun agree (r, c) = accepts a r = accepts a c
+ val v = Array.tabulate (len*len, agree o toPair)
+
+ open ZTree
+ fun tree i = #2 (Vector.sub (a, i))
+ fun fold a (Iter (b, NONE, _)) = b andalso a
+ | fold a (Iter (b, SOME _, iter)) = fold (b andalso a) (iter ())
+ fun match (r, c) = Array.sub (v, ofPair (r, c))
+ fun distinct (i, b) = b andalso
+ (fold true o merge match o mapPair (front o tree) o toPair) i
+
+ val changed = ref true
+ fun update (i, b) = let val n = distinct (i, b) in
+ (changed := (!changed orelse (n <> b)); n) end
+ fun pass () = Array.modifyi update v
+ val () = while (!changed) do (changed := false; pass ())
+
+ (* m stores new state name *)
+ val m = Array.tabulate (len, fn _ => 0)
+ val e = ref 0
+ fun whoAmI (i, j) =
+ if Array.sub (v, ofPair (i, j)) then j else whoAmI (i, j+1)
+ fun setState (i, _) =
+ let val j = whoAmI (i, 0) in
+ if i = j then (!e before e := (!e + 1))
+ else Array.sub (m, j)
+ end
+ val () = Array.modifyi setState m
+ in
+ mapStates (fn i => Array.sub (m, i), !e) a
+ end
+
+ (* the second unreachable step puts the DFA in canonical order *)
+ val optimize = unreachable o finddups o unreachable
+
+ (* more interesting would be to output an example difference *)
+ fun equal (v1, v2) = Vector.foldli
+ (fn (i, (b1, t1), a) =>
+ case Vector.sub (v2, i) of (b2, t2) =>
+ a andalso b1 = b2 andalso ZTree.equal (op =) (t1, t2))
+ true v1
+
+ fun crossproduct (a, b, f) =
+ let
+ open ZTree
+ val (rows, cols) = (Vector.length a, Vector.length b)
+ fun toPair i = (i mod rows, i div rows)
+ fun ofPair (r, c) = rows * c + r
+ fun getState (r, c) = (Vector.sub (a, r), Vector.sub (b, c))
+ val tree = fromFront o uniq (op =) o merge ofPair o mapPair front
+ fun cross ((b1, t1), (b2, t2)) = (f (b1, b2), tree (t1, t2))
+ in
+ Vector.tabulate (rows*cols, cross o getState o toPair)
+ end
+
+ fun complement a = Vector.map (fn (b, t) => (not b, t)) a
+ fun union (a, b) = crossproduct (a, b, fn (a, b) => a orelse b)
+ fun intersect (a, b) = crossproduct (a, b, fn (a, b) => a andalso b)
+
+ (* Find the lowest weight string which matches the expression *)
+ fun shortestMatch edgeweight a =
+ let
+ val n = Vector.length a
+ val parent = Array.tabulate (n, fn _ => (0, chr 0))
+ val weight = Array.tabulate (n, fn _ => 1999999999)
+ val visited = Array.tabulate (n, fn _ => false)
+ val () = Array.update(weight, 0, 0) (* start at empty string *)
+
+ val nextNode = Array.foldli
+ (fn (i, w, (bi, bw)) =>
+ if not (Array.sub (visited, i)) andalso Int.< (w, bw)
+ then (i, w) else (bi, bw))
+ (~1, 1999999999)
+
+ fun relaxEdges (i, vw) = ZTree.fold
+ (fn (l, j, r, ()) => case edgeweight (l, r) of (ew, c) =>
+ if vw + ew >= Array.sub (weight, j) then () else (
+ Array.update (weight, j, vw + ew);
+ Array.update (parent, j, (i, c))))
+ ()
+ (case Vector.sub (a, i) of (_, t) => t)
+
+ val working = ref true
+ val () = while (!working) do
+ let
+ val (i, w) = nextNode weight
+ in
+ if i = ~1 then working := false else (
+ Array.update (visited, i, true);
+ relaxEdges (i, w))
+ end
+
+ val shortestAccept = Array.foldli
+ (fn (i, w, (bi, bw)) =>
+ if #1 (Vector.sub (a, i)) andalso Int.< (w, bw)
+ then (i, w) else (bi, bw))
+ (~1, 1999999999) weight
+
+ fun followTrail (0, tail) = tail
+ | followTrail (i, tail) =
+ case Array.sub (parent, i) of (p, c) =>
+ followTrail (p, c :: tail)
+ in
+ if #1 shortestAccept = ~1 then NONE else
+ SOME (followTrail (#1 shortestAccept, []))
+ end
+
+ fun dotEdge (i, (_, t), tail) =
+ let
+ val toString = String.toCString o Char.toString o Char.chr o ord
+ fun pred NONE = NONE | pred (SOME x) = SOME (chr (ord x - 1))
+ fun fmt NONE = "" | fmt (SOME x) = toString x
+ fun fmtp (SOME x, SOME y) =
+ if x = y then toString x else toString x ^ "-" ^ toString y
+ | fmtp (x, y) = fmt x ^ "-" ^ fmt y
+ fun append (l, v, r, tree) =
+ case BTree.get tree v of
+ NONE => BTree.insert tree (v, [fmtp (l, pred r)])
+ | SOME x => BTree.insert tree (v, fmtp (l, pred r) :: x)
+ val edges = BTree.map (String.concatWith ",")
+ (ZTree.foldr append BTree.empty t)
+ fun print (j, l, tail) =
+ "\t" ::Int.toString i :: "->" :: Int.toString j ::
+ " [label=\"" :: l :: "\"]\n" :: tail
+ in
+ BTree.foldr print tail edges
+ end
+
+ fun toDot (n, a) = String.concat (
+ "strict digraph " :: n :: " {\n" ::
+ "\tnode [style=filled,fillcolor=grey,shape=circle]\n" ::
+ Vector.foldri dotNode
+ (Vector.foldri dotEdge ["}\n"] a) a)
+
+ fun toSML (n, a) = String.concat (
+ "fun step s =\n" ::
+ " let\n" ::
+ " datatype x = F of (char -> x)\n" ::
+ " fun eval s = foldl (fn (c, F f) => f c) (F step0) s" ::
+ Vector.foldri
+ (fn (i, (b, t), tail) =>
+ "\n and step" :: Int.toString i :: " c = " ::
+ printSML (fn i => ("F step" ^ Int.toString i), " ", t, tail))
+ ("\n" ::
+ " in\n" ::
+ " case eval s of F f => f\n" ::
+ " end\n" ::
+ nil)
+ a)
+
+ fun bodyC (i, (b, t), tail) =
+ "\n" :: toString i :: ":\n" ::
+ "\tif (++s == e) return " ::
+ (if b then "1" else "0") :: ";\n\t" ::
+ printC ("\t", t, tail)
+ fun caseC (i, (b, ZTree.Leaf v), tail) =
+ if i = v then
+ "\n" :: toString i :: ": return " ::
+ (if b then "1" else "0") :: ";\n" :: tail
+ else bodyC (i, (b, ZTree.Leaf v), tail)
+ | caseC (i, (b, t), tail) = bodyC (i, (b, t), tail)
+ fun toC (n, a) = String.concat (
+ "int " :: n :: "(const unsigned char* s, const unsigned char* e) {\n" ::
+ "\t--s;" ::
+ Vector.foldri caseC
+ ["\n}\n"]
+ a)
+ end
+
+ structure NonDeterministic =
+ struct
+ type state = Deterministic.state
+ type t = state list vector * Deterministic.t
+
+ (* note: the output is sorted b/c it was in a btree *)
+ fun dfs e q =
+ let
+ open BTree
+ fun touch (t, []) = t
+ | touch (t, a :: r) =
+ if isSome (get t a) then touch (t, r) else
+ touch (insert t (a, ()), Vector.sub (e, a) @ r)
+ in
+ fold (fn (k, _, l) => k :: l) [] (touch (empty, q))
+ end
+
+ fun size (_, a) = Vector.length a
+ fun start _ = 0
+ fun accepts (_, a) x = Deterministic.accepts a x
+ fun step (e, a) (c, l) = dfs e
+ (List.map (fn x => Deterministic.step a (c, x)) l)
+ fun multistep a (s, x) = foldl (step a) x s
+ fun test a s = List.exists (accepts a) (multistep a (s, [start a]))
+
+ (* set all accept states to have epsilon transitions to s *)
+ fun mapAccept s (e, a) =
+ let
+ fun mapEpsilon (i, l) = if accepts (e, a) i then s :: l else l
+ fun noAccept a = Vector.map (fn (_, x) => (false, x)) a
+ in
+ (Vector.mapi mapEpsilon e, noAccept a)
+ end
+
+ fun mapRenumber x (e, a) =
+ let
+ val e = Vector.map (List.map (fn i => i + x)) e
+ fun stateRelabel (b, t) = (b, ZTree.map (fn i => i + x) t)
+ in
+ (e, Vector.map stateRelabel a)
+ end
+
+ (* Scheme: new start state s accepts and -> all old starts, accepts -> s*)
+ fun power (e, a) =
+ let
+ val (e, a) = (mapAccept 0 o mapRenumber 2) (e, a)
+ val e0 = Vector.fromList [[2], []]
+ val a0 = Vector.fromList [(true, ZTree.uniform 1),
+ (false, ZTree.uniform 1)]
+ in
+ (Vector.concat [e0, e], Vector.concat [a0, a])
+ end
+
+ (* Scheme: s1 = start states, v1 accept states -> s2 start states *)
+ fun concat ((e1, a1), (e2, a2)) =
+ let
+ val l1 = Vector.length a1
+ val (e1, a1) = mapAccept l1 (e1, a1)
+ val (e2, a2) = mapRenumber l1 (e2, a2)
+ in
+ (Vector.concat [e1, e2], Vector.concat [a1, a2])
+ end
+
+ fun fromDFA a = (Vector.tabulate (Vector.length a, fn _ => []), a)
+
+ (* The general NFA->DFA conversion algorithm works as follows:
+ * - we start by calling getName (dfs e [0])
+ * - getName checks for an existing integer mapping for the list
+ * if one exists, the integer is returned
+ * otherwise:
+ * - the next available integer is allocated to this list
+ * - we merge all trees for the named states in the list
+ * via a hierachical combination of ZTree.merge
+ * - the new int list ZTree.iterator is imap'd with dfs
+ * - then we uniq the operation, and imap mapName it
+ * (this recursively explores other reachable subset states)
+ * - the new iterator is fromFront'd to create the tree.
+ * - if any of the states in the list accept, this accepts too
+ *)
+ structure Names =
+ struct
+ type t = int vector
+ fun < (l, r) = Vector.collate Int.compare (l, r) = LESS
+ end
+ structure NTree = BTree(Names)
+ fun toDFA (e, a) =
+ let
+ val names = ref NTree.empty
+ val number = ref 0
+
+ fun buildTree v =
+ let
+ open ZTree
+ datatype tree = Leaf of int | Node of tree * tree
+ fun flatten tail (Leaf i) = i :: tail
+ | flatten tail (Node (l, r)) = flatten (flatten tail r) l
+
+ fun getIter i = front (#2 (Vector.sub (a, Vector.sub (v, i))))
+
+ fun grow (l, r) =
+ if l + 1 = r then imap Leaf (getIter l) else
+ let val m = (l+r) div 2 in
+ merge Node (grow (l, m), grow (m, r))
+ end
+ in
+ (fromFront o uniq (op =) o imap (mapName o dfs e o flatten []) o grow)
+ (0, Vector.length v)
+ end
+ and mapName l =
+ let
+ val v = Vector.fromList l
+ in
+ case NTree.get (!names) v of
+ SOME (i, _, _) => i
+ | NONE =>
+ let
+ val me = !number before (number := !number + 1)
+ val () = names := NTree.insert (!names)
+ (v, (me, false, ZTree.uniform 0)) (* store name *)
+ val value =
+ (me, List.exists (accepts (e, a)) l, buildTree v)
+ val () = names := NTree.insert (!names) (v, value)
+ in
+ me
+ end
+ end
+
+ val _ = mapName (dfs e [0])
+ val d = Array.tabulate (!number, fn _ => (false, ZTree.uniform 0))
+ val () = NTree.app
+ (fn (i, b, t) => Array.update (d, i, (b, t))) (!names)
+
+(*
+ fun fmt NONE = ()
+ | fmt (SOME c) = (print o Char.toString o Char.chr o ord) c
+ fun treedump (l, v, r, ()) = (
+ fmt l; print "-"; fmt r; print ":";
+ print (Int.toString v ^ " "))
+ fun debug (v, (i, b, t)) = (
+ print "States ";
+ Vector.map (print o Int.toString) v;
+ print (": (" ^ Int.toString i ^ ", " ^ Bool.toString b ^ ", ");
+ ZTree.fold treedump () t;
+ print ")\n")
+ val () = NTree.appk debug (!names)
+*)
+ in
+ Array.vector d
+ end
+
+ fun dotEpsilon (i, [], tail) = tail
+ | dotEpsilon (i, h :: r, tail) =
+ "\t" :: Int.toString i :: "->" :: Int.toString h :: "\n" :: tail
+ fun toDot (n, (e, a)) = String.concat (
+ "digraph " :: n :: " {\n" ::
+ "\tnode [style=filled,fillcolor=grey,shape=circle]\n" ::
+ Vector.foldri dotNode
+ (Vector.foldri Deterministic.dotEdge
+ ("\tedge [style=dashed]\n" ::
+ Vector.foldri dotEpsilon ["}\n"] e) a) a)
+ end
+
+ structure Expression =
+ struct
+ datatype t =
+ Empty | Any | Char of bool ZTree.t | Not of t | Star of t |
+ Concat of t * t | Union of t * t | Intersect of t * t
+
+ structure DFA = Deterministic
+ structure NFA = NonDeterministic
+
+ fun toDFA Empty = DFA.empty
+ | toDFA Any = DFA.any
+ | toDFA (Char t) = DFA.char t
+ | toDFA (Not e) = DFA.complement (toDFA e)
+ | toDFA (Star e) =
+ (DFA.optimize o NFA.toDFA o NFA.power o NFA.fromDFA o toDFA) e
+ | toDFA (Concat (e1, e2)) =
+ (DFA.optimize o NFA.toDFA o NFA.concat)
+ (NFA.fromDFA (toDFA e1), NFA.fromDFA (toDFA e2))
+ | toDFA (Union (e1, e2)) =
+ (DFA.optimize o DFA.union) (toDFA e1, toDFA e2)
+ | toDFA (Intersect (e1, e2)) =
+ (DFA.optimize o DFA.intersect) (toDFA e1, toDFA e2)
+
+(*
+ fun toString Empty = ""
+ | toString Any = "."
+ | toString (Char c) = Char.toString (Char.chr (ord c))
+ | toString (Not e) = "^(" ^ toString e ^ ")"
+ | toString (Star e) = "(" ^ toString e ^ ")*"
+ | toString (Concat (e1, e2)) = toString e1 ^ toString e2
+ | toString (Union (e1, e2)) = "(" ^ toString e1 ^ ")+(" ^ toString e2 ^ ")"
+ | toString (Intersect (e1, e2)) = "(" ^ toString e1 ^ ")-(" ^ toString e2 ^ ")"
+*)
+ end
+
+ structure RegularExpression =
+ struct
+ structure E = Expression
+ type char = Char.char
+ (* BNF:
+ exp = branch
+ branch '|' exp
+ branch = empty
+ piece
+ piece branch
+ piece = atom ('*' | '+' | '?' | bound)?
+ bound = '{' int (',' int?)? '}'
+ atom = '(' exp ')'
+ bracket
+ '^'
+ '$'
+ '\' char
+ char
+ '{' (* if not followed by integer... *)
+ bracket = '[' '^'? (']')? (col | equiv | class | range | char)* ']'
+ col = '[.' chars '.]'
+ equiv = '[=' chars '=]'
+ class = '[:' chars ':]'
+ range = char '-' char
+ *)
+
+ datatype bracket =
+ Elt of char | End | Not of bracket | Range of char * char |
+ Alt of bracket * bracket
+
+ datatype t =
+ Union of t * t | Star of t | Plus of t | Option of t | Paran of t |
+ Concat of t * t | Char of char | Any | Empty |
+ Bound of t * int * int option | Bracket of bracket
+
+ fun cvtBound (e, 0, NONE) = E.Star e
+ | cvtBound (e, i, NONE) = E.Concat (e, cvtBound (e, i-1, NONE))
+ | cvtBound (e, 0, SOME 0) = E.Empty
+ | cvtBound (e, 0, SOME j) = E.Union (E.Empty, cvtBound (e, 1, SOME j))
+ | cvtBound (e, i, SOME j) = E.Concat (e, cvtBound (e, i-1, SOME (j-1)))
+
+ fun cvtBracket (Elt c) = cvtBracket (Range (c, c))
+ | cvtBracket (Not b) = ZTree.map not (cvtBracket b)
+ | cvtBracket End = ZTree.uniform false
+ | cvtBracket (Range (l, h)) =
+ ZTree.range (false, chr (Char.ord l), chr (Char.ord h + 1), true)
+ | cvtBracket (Alt (b1, b2)) =
+ (ZTree.fromFront o ZTree.uniq (op =) o
+ ZTree.merge (fn (x,y) => x orelse y))
+ (ZTree.front (cvtBracket b1), ZTree.front (cvtBracket b2))
+
+ fun exp (Union (e1, e2)) = E.Union (exp e1, exp e2)
+ | exp (Concat (e1, e2)) = E.Concat (exp e1, exp e2)
+ | exp (Star e) = E.Star (exp e)
+ | exp (Plus e) = let val e = exp e in E.Concat (e, E.Star e) end
+ | exp (Option e) = E.Union (E.Empty, exp e)
+ | exp (Paran e) = exp e
+ | exp (Char c) = E.Char (cvtBracket (Elt c))
+ | exp (Bound (e, l, r)) = cvtBound (exp e, l, r)
+ | exp (Bracket b) = E.Char (cvtBracket b)
+ | exp Any = E.Any
+ | exp Empty = E.Empty
+ val toExpression = exp
+
+ fun fromString s =
+ case parse_exp (String.explode s) of
+ (e, []) => e
+ | (e, l) => (
+ print ("Failed to parse: " ^ String.implode l ^ "\n");
+ e)
+ and parse_exp ts =
+ case parse_branch ts of
+ (branch, #"|" :: ts') =>
+ let val (exp, ts'') = parse_exp ts'
+ in (Union (branch, exp), ts'') end
+ | (branch, ts'') => (branch, ts'')
+ and parse_branch ts =
+ case parse_piece ts of
+ (SOME p, ts') =>
+ let val (r, ts'') = parse_branch ts'
+ in (Concat (p, r), ts'') end
+ | (NONE, _) => (Empty, ts)
+ and parse_piece ts =
+ case parse_atom ts of
+ (SOME a, #"*" :: ts') => (SOME (Star a), ts')
+ | (SOME a, #"+" :: ts') => (SOME (Plus a), ts')
+ | (SOME a, #"?" :: ts') => (SOME (Option a), ts')
+ | (SOME a, #"{" :: ts') =>
+ (case parse_bound a ts' of
+ (SOME b, ts'') => (SOME b, ts'')
+ | (NONE, _) => (SOME a, #"{" :: ts'))
+ | (SOME a, ts') => (SOME a, ts')
+ | (NONE, _) => (NONE, ts)
+ and parse_bound a ts =
+ case parse_int ts of
+ (SOME i, _, #"," :: #"}" :: ts') =>
+ (SOME (Bound (a, i, NONE)), ts')
+ | (SOME i, _, #"," :: ts') =>
+ (case parse_int ts' of
+ (SOME j, _, #"}"::ts'') =>
+ if i <= j then
+ (SOME (Bound (a, i, SOME j)), ts'')
+ else (NONE, ts)
+ | (SOME j, _, _) => (NONE, ts)
+ | (NONE, _, _) => (NONE, ts))
+ | (SOME i, _, #"}" :: ts') => (SOME (Bound (a, i, SOME i)), ts')
+ | (SOME i, _, _) => (NONE, ts)
+ | (NONE, _, _) => (NONE, ts)
+ and parse_int ts =
+ case parse_digit ts of
+ (SOME i, ts') =>
+ (case parse_int ts' of
+ (SOME j, p, ts'') => (SOME (i*p+j), p*10, ts'')
+ | (NONE, _, _) => (SOME i, 10, ts'))
+ | (NONE, _) => (NONE, 1, ts)
+ and parse_digit ts =
+ case ts of
+ (#"0" :: ts') => (SOME 0, ts')
+ | (#"1" :: ts') => (SOME 1, ts')
+ | (#"2" :: ts') => (SOME 2, ts')
+ | (#"3" :: ts') => (SOME 3, ts')
+ | (#"4" :: ts') => (SOME 4, ts')
+ | (#"5" :: ts') => (SOME 5, ts')
+ | (#"6" :: ts') => (SOME 6, ts')
+ | (#"7" :: ts') => (SOME 7, ts')
+ | (#"8" :: ts') => (SOME 8, ts')
+ | (#"9" :: ts') => (SOME 9, ts')
+ | _ => (NONE, ts)
+ and parse_atom ts =
+ case ts of
+ (#"(" :: ts') =>
+ (case parse_exp ts' of
+ (exp, #")" :: ts'') => (SOME (Paran exp), ts'')
+ | (exp, _) => (NONE, #"(" :: ts')) (* warn!!! *)
+ | (#"\\" :: x :: ts'') => (SOME (Char x), ts'')
+ | (#"." :: ts'') => (SOME Any, ts'')
+ | (#"[" :: ts') =>
+ (case parse_bnot ts' of
+ (bracket, #"]" :: ts'') => (SOME (Bracket bracket), ts'')
+ | (_, _) => (NONE, #"[" :: ts')) (* warn!!! *)
+ | (#")" :: ts') => (NONE, #")" :: ts')
+ | (#"|" :: ts') => (NONE, #"|" :: ts')
+ | (x :: ts') => (SOME (Char x), ts')
+ | [] => (NONE, ts)
+ and parse_bnot ts =
+ case ts of
+ (#"^" :: ts') =>
+ let val (r, ts'') = parse_bclose ts'
+ in (Not r, ts'') end
+ | _ => parse_bclose ts
+ and parse_bclose ts =
+ case ts of
+ (#"]" :: ts') =>
+ let val (r, ts'') = parse_blist ts'
+ in (Alt (Elt #"]", r), ts'') end
+ | _ => parse_blist ts
+ and parse_blist ts =
+ case parse_batom ts of
+ (SOME a, ts') =>
+ let val (r, ts'') = parse_blist ts'
+ in (Alt (a, r), ts'') end
+ | (NONE, _) => (End, ts)
+ and parse_batom ts =
+ case ts of
+ (c :: #"-" :: #"]" :: ts') => (SOME (Elt c), tl ts)
+ | (#"]" :: ts') => (NONE, ts)
+ | (c :: #"-" :: d :: ts') => (SOME (Range (c, d)), ts')
+ | (c :: ts') => (SOME (Elt c), ts')
+ | _ => (NONE, ts) (* warn!!! *)
+ end
+ end
Added: mltonlib/trunk/ca/terpstra/regexp/automata.mlb
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/automata.mlb 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/automata.mlb 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,8 @@
+local
+ $(SML_LIB)/basis/basis.mlb
+in
+ ztree.sml
+ btree.sml
+ automata.sig
+ automata.fun
+end
Added: mltonlib/trunk/ca/terpstra/regexp/automata.sig
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/automata.sig 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/automata.sig 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,80 @@
+signature AUTOMATA =
+ sig
+ eqtype char
+ eqtype string
+
+ structure ZTree : ZTREE
+
+ structure Deterministic :
+ sig
+ eqtype state
+ type t
+
+ val size: t -> int
+ val start: t -> state
+ val accepts: t -> state -> bool
+ val step: t -> (char * state) -> state
+ val multistep: t -> (string * state) -> state
+ val test: t -> string -> bool
+
+ val any: t
+ val empty: t
+ val char: bool ZTree.t -> t
+
+ (* minimizes states and puts in canonical order *)
+ val optimize: t -> t
+ (* compares two minimal, canonical DFAs for equality *)
+ val equal: (t * t) -> bool
+
+ val complement: t -> t
+ val union: (t * t) -> t
+ val intersect: (t * t) -> t
+
+ (* The passed function is the 'cost' of a character in length *)
+ val shortestMatch: (char option * char option -> int * char) -> t
+ -> char list option
+
+ val toDot: (String.string * t) -> String.string
+ val toSML: (String.string * t) -> String.string
+ val toC: (String.string * t) -> String.string
+ end
+
+ structure NonDeterministic :
+ sig
+ eqtype state
+ type t
+
+ val size: t -> int
+ val start: t -> state
+ val accepts: t -> state -> bool
+ val step: t -> (char * state list) -> state list
+ val multistep: t -> (string * state list) -> state list
+ val test: t -> string -> bool
+
+ val power: t -> t
+ val concat: (t * t) -> t
+
+ val toDFA: t -> Deterministic.t
+ val fromDFA: Deterministic.t -> t
+
+ val toDot: (String.string * t) -> String.string
+ end
+
+ structure Expression :
+ sig
+ datatype t =
+ Empty | Any | Char of bool ZTree.t | Not of t | Star of t |
+ Concat of t * t | Union of t * t | Intersect of t * t
+
+ (* val toString: t -> String.string *)
+ val toDFA: t -> Deterministic.t
+ end
+
+ structure RegularExpression :
+ sig
+ type t
+
+ val fromString: String.string -> t
+ val toExpression: t -> Expression.t
+ end
+ end
Added: mltonlib/trunk/ca/terpstra/regexp/btree.sml
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/btree.sml 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/btree.sml 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,105 @@
+signature BTREE =
+ sig
+ type key
+ type 'val t
+
+ val empty: 'val t
+
+ val app: ('val -> unit) -> 'val t -> unit
+ val appk: ((key * 'val) -> unit) -> 'val t -> unit
+ val map: ('val -> 'new) -> 'val t -> 'new t
+ val mapk: ((key * 'val) -> 'new) -> 'val t -> 'new t
+
+ val fold: (key * 'val * 'a -> 'a) -> 'a -> 'val t -> 'a
+ val foldr: (key * 'val * 'a -> 'a) -> 'a -> 'val t -> 'a
+
+ val get: 'val t -> key -> 'val option
+ val insert: 'val t -> (key * 'val) -> 'val t
+
+ datatype 'val iterator =
+ Iter of key * 'val * (unit -> 'val iterator) option
+ val front: 'val t -> (unit -> 'val iterator) option
+ end
+
+functor BTree(Order : ORDER) : BTREE =
+ struct
+ open Order
+
+ type key = Order.t
+ datatype colour = Red | Black
+ datatype 'val t = Node of colour * 'val t * (key * 'val) * 'val t | Leaf
+
+ val empty = Leaf
+
+ fun app f Leaf = ()
+ | app f (Node (c, l, (y, v), r)) =
+ (app f l; f v; app f r)
+
+ fun appk f Leaf = ()
+ | appk f (Node (c, l, (y, v), r)) =
+ (appk f l; f (y, v); appk f r)
+
+ fun map f Leaf = Leaf
+ | map f (Node (c, l, (y, v), r)) =
+ Node (c, map f l, (y, f v), map f r)
+
+ fun mapk f Leaf = Leaf
+ | mapk f (Node (c, l, (y, v), r)) =
+ Node (c, mapk f l, (y, f (y, v)), mapk f r)
+
+ fun fold f a Leaf = a
+ | fold f a (Node (c, l, (y, v), r)) =
+ fold f (f (y, v, fold f a l)) r
+
+ fun foldr f a Leaf = a
+ | foldr f a (Node (c, l, (y, v), r)) =
+ foldr f (f (y, v, foldr f a r)) l
+
+ fun get Leaf x = NONE
+ | get (Node (_, l, (y, v), r)) x =
+ if x < y then get l x
+ else if y < x then get r x
+ else SOME v
+
+ fun balance x = case x of
+ (Black, Node (Red, Node (Red, a, x, b), y, c), z, d) =>
+ Node (Red, Node (Black, a, x, b), y, Node (Black, c, z, d))
+ | (Black, Node (Red, a, x, Node (Red, b, y, c)), z, d) =>
+ Node (Red, Node (Black, a, x, b), y, Node (Black, c, z, d))
+ | (Black, a, x, Node (Red, Node (Red, b, y, c), z, d)) =>
+ Node (Red, Node (Black, a, x, b), y, Node (Black, c, z, d))
+ | (Black, a, x, Node (Red, b, y, Node (Red, c, z, d))) =>
+ Node (Red, Node (Black, a, x, b), y, Node (Black, c, z, d))
+ | (a, b, c, d) =>
+ Node (a, b, c, d)
+
+ fun insert t (x, v) =
+ let
+ fun ins Leaf = Node (Red, Leaf, (x, v), Leaf)
+ | ins (Node (c, a, (y, v'), b)) =
+ if x < y then balance (c, ins a, (y, v'), b)
+ else if y < x then balance (c, a, (y, v'), ins b)
+ else balance (c, a, (x, v), b)
+ in
+ case ins t of
+ Node (_, a, y, b) => Node (Black, a, y, b)
+ | Leaf => Leaf
+ end
+
+ datatype 'val iterator =
+ Iter of key * 'val * (unit -> 'val iterator) option
+
+ fun front t =
+ let
+ datatype 'val stack = Parent of key * 'val * 'val t
+ fun goleft (Leaf, []) = NONE
+ | goleft (Leaf, stack) = SOME (spit stack)
+ | goleft (Node (_, l, (k, v), r), stack) =
+ goleft (l, Parent (k, v, r) :: stack)
+ and spit [] () = raise Overflow (* unreachable *)
+ | spit (Parent (k, v, r) :: stack) () =
+ Iter (k, v, goleft (r, stack))
+ in
+ goleft (t, [])
+ end
+ end
Added: mltonlib/trunk/ca/terpstra/regexp/compare.dot
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/compare.dot 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/compare.dot 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,427 @@
+digraph "compare call-stack graph" {
+label = "compare call-stack graph"
+n0 [shape = "box", label = "examine.max\n", color = "Black"]
+n1 [shape = "box", label = "examine.length\n", color = "Black"]
+n2 [shape = "box", label = "examine.biggest\n", color = "Black"]
+n2 -> n1 []
+n2 -> n0 []
+n3 [shape = "box", label = "Automata.RegularExpression.fromString\n", color = "Black"]
+n3 -> n4 []
+n3 -> n5 []
+n3 -> n6 []
+n3 -> n7 []
+n3 -> n8 []
+n3 -> n9 []
+n10 [shape = "box", label = "examine\n", color = "Black"]
+n10 -> n3 []
+n10 -> n11 []
+n10 -> n2 []
+n10 -> n0 []
+n10 -> n12 []
+n10 -> n13 []
+n10 -> n4 []
+n10 -> n14 []
+n10 -> n15 []
+n10 -> n16 []
+n10 -> n5 []
+n10 -> n6 []
+n10 -> n7 []
+n10 -> n8 []
+n10 -> n17 []
+n10 -> n18 []
+n10 -> n19 []
+n10 -> n20 []
+n10 -> n21 []
+n10 -> n22 []
+n10 -> n23 []
+n10 -> n24 []
+n10 -> n25 []
+n10 -> n26 []
+n12 [shape = "box", label = "examine.format\n", color = "Black"]
+n12 -> n4 []
+n12 -> n27 []
+n12 -> n24 []
+n26 [shape = "box", label = "examine.entry\n", color = "Black"]
+n26 -> n4 []
+n25 [shape = "box", label = "examine.dashes\n", color = "Black"]
+n25 -> n25 []
+n25 -> n4 []
+n24 [shape = "box", label = "examine.whitespace\n", color = "Black"]
+n24 -> n24 []
+n24 -> n4 []
+n28 [shape = "box", label = "Automata.Deterministic.shortestMatch.anon\n", color = "Black"]
+n29 [shape = "box", label = "Automata.Deterministic.shortestMatch.anon.anon\n", color = "Black"]
+n30 [shape = "box", label = "Automata.Deterministic.shortestMatch.anon\n", color = "Black"]
+n30 -> n29 []
+n23 [shape = "box", label = "Option.map.anon\n", color = "Black"]
+n31 [shape = "box", label = "Automata.Deterministic.shortestMatch.followTrail\n", color = "Black"]
+n32 [shape = "box", label = "ZTree.fold\n", color = "Black"]
+n32 -> n33 []
+n34 [shape = "box", label = "Automata.Deterministic.shortestMatch.relaxEdges\n", color = "Black"]
+n34 -> n32 []
+n35 [shape = "box", label = "Automata.Deterministic.shortestMatch\n", color = "Black"]
+n35 -> n34 []
+n35 -> n31 []
+n35 -> n30 []
+n35 -> n28 []
+n36 [shape = "box", label = "pick\n", color = "Black"]
+n37 [shape = "box", label = "overlap\n", color = "Black"]
+n38 [shape = "box", label = "edgeLength.match\n", color = "Black"]
+n38 -> n37 []
+n39 [shape = "box", label = "edgeLength\n", color = "Black"]
+n39 -> n38 []
+n39 -> n36 []
+n40 [shape = "box", label = "Automata.Deterministic.shortestMatch.relaxEdges.anon\n", color = "Black"]
+n40 -> n39 []
+n33 [shape = "box", label = "ZTree.fold.deep\n", color = "Black"]
+n33 -> n40 []
+n33 -> n33 []
+n41 [shape = "box", label = "C.toArrayOfLength.loop\n", color = "Black"]
+n42 [shape = "box", label = "Automata.RegularExpression.parse_bclose\n", color = "Black"]
+n42 -> n43 []
+n44 [shape = "box", label = "Automata.RegularExpression.parse_bnot\n", color = "Black"]
+n44 -> n42 []
+n44 -> n43 []
+n45 [shape = "box", label = "Automata.RegularExpression.parse_bound\n", color = "Black"]
+n45 -> n46 []
+n47 [shape = "box", label = "Automata.RegularExpression.parse_atom\n", color = "Black"]
+n47 -> n44 []
+n47 -> n9 []
+n48 [shape = "box", label = "Automata.RegularExpression.parse_piece\n", color = "Black"]
+n48 -> n47 []
+n48 -> n45 []
+n49 [shape = "box", label = "Automata.RegularExpression.parse_branch\n", color = "Black"]
+n49 -> n48 []
+n49 -> n49 []
+n9 [shape = "box", label = "Automata.RegularExpression.parse_exp\n", color = "Black"]
+n9 -> n9 []
+n9 -> n49 []
+n50 [shape = "box", label = "Automata.RegularExpression.parse_digit\n", color = "Black"]
+n46 [shape = "box", label = "Automata.RegularExpression.parse_int\n", color = "Black"]
+n46 -> n50 []
+n46 -> n46 []
+n51 [shape = "box", label = "Automata.RegularExpression.parse_batom\n", color = "Black"]
+n43 [shape = "box", label = "Automata.RegularExpression.parse_blist\n", color = "Black"]
+n43 -> n51 []
+n43 -> n43 []
+n21 [shape = "box", label = "Automata.RegularExpression.exp\n", color = "Black"]
+n21 -> n52 []
+n21 -> n53 []
+n21 -> n54 []
+n21 -> n21 []
+n21 -> n22 []
+n55 [shape = "box", label = "ZTree.range\n", color = "Black"]
+n52 [shape = "box", label = "Automata.RegularExpression.cvtBracket\n", color = "Black"]
+n52 -> n55 []
+n52 -> n56 []
+n52 -> n57 []
+n52 -> n58 []
+n52 -> n59 []
+n52 -> n52 []
+n52 -> n53 []
+n52 -> n54 []
+n60 [shape = "box", label = "Automata.RegularExpression.cvtBracket.anon\n", color = "Black"]
+n22 [shape = "box", label = "Automata.RegularExpression.cvtBound\n", color = "Black"]
+n22 -> n22 []
+n19 [shape = "box", label = "Sequence.fromList\n", color = "Black"]
+n61 [shape = "box", label = "Automata.Deterministic.char\n", color = "Black"]
+n61 -> n53 []
+n62 [shape = "box", label = "Automata.NonDeterministic.power\n", color = "Black"]
+n62 -> n63 []
+n62 -> n64 []
+n62 -> n13 []
+n65 [shape = "box", label = "Automata.NonDeterministic.mapAccept.mapEpsilon\n", color = "Black"]
+n65 -> n66 []
+n67 [shape = "box", label = "Automata.NonDeterministic.mapRenumber.anon\n", color = "Black"]
+n68 [shape = "box", label = "Automata.NonDeterministic.toDFA\n", color = "Black"]
+n68 -> n69 []
+n68 -> n70 []
+n68 -> n71 []
+n68 -> n72 []
+n63 [shape = "box", label = "Automata.NonDeterministic.mapRenumber\n", color = "Black"]
+n63 -> n67 []
+n63 -> n13 []
+n73 [shape = "box", label = "Automata.NonDeterministic.mapAccept.noAccept\n", color = "Black"]
+n73 -> n13 []
+n64 [shape = "box", label = "Automata.NonDeterministic.mapAccept\n", color = "Black"]
+n64 -> n73 []
+n64 -> n65 []
+n74 [shape = "box", label = "Automata.NonDeterministic.concat\n", color = "Black"]
+n74 -> n64 []
+n74 -> n63 []
+n74 -> n13 []
+n75 [shape = "box", label = "Automata.NonDeterministic.fromDFA\n", color = "Black"]
+n11 [shape = "box", label = "Automata.Deterministic.crossproduct\n", color = "Black"]
+n76 [shape = "box", label = "Automata.Deterministic.union\n", color = "Black"]
+n76 -> n11 []
+n76 -> n13 []
+n20 [shape = "box", label = "Automata.Expression.toDFA\n", color = "Black"]
+n20 -> n76 []
+n20 -> n75 []
+n20 -> n74 []
+n20 -> n68 []
+n20 -> n62 []
+n20 -> n61 []
+n20 -> n13 []
+n20 -> n19 []
+n20 -> n20 []
+n20 -> n17 []
+n20 -> n18 []
+n77 [shape = "box", label = "Automata.NonDeterministic.toDFA.buildTree\n", color = "Black"]
+n77 -> n78 []
+n77 -> n57 []
+n77 -> n58 []
+n77 -> n54 []
+n77 -> n79 []
+n77 -> n80 []
+n77 -> n81 []
+n82 [shape = "box", label = "BTree.insert\n", color = "Black"]
+n82 -> n83 []
+n82 -> n84 []
+n85 [shape = "box", label = "BTree.get\n", color = "Black"]
+n85 -> n86 []
+n72 [shape = "box", label = "Automata.NonDeterministic.toDFA.mapName\n0.1% (0.01s)\n", color = "Black"]
+n72 -> n85 []
+n72 -> n82 []
+n72 -> n66 []
+n72 -> n77 []
+n87 [shape = "box", label = "BTree.insert\n", color = "Black"]
+n87 -> n88 []
+n87 -> n89 []
+n90 [shape = "box", label = "BTree.get\n", color = "Black"]
+n91 [shape = "box", label = "Automata.NonDeterministic.dfs.pass\n", color = "Black"]
+n91 -> n90 []
+n91 -> n87 []
+n69 [shape = "box", label = "Automata.NonDeterministic.dfs\n", color = "Black"]
+n69 -> n91 []
+n92 [shape = "box", label = "Automata.NonDeterministic.toDFA.buildTree.getIter.anon\n", color = "Black"]
+n93 [shape = "box", label = "Automata.NonDeterministic.toDFA.buildTree.getIter\n", color = "Black"]
+n93 -> n92 []
+n93 -> n59 []
+n79 [shape = "box", label = "Automata.NonDeterministic.toDFA.buildTree.grow\n", color = "Black"]
+n79 -> n93 []
+n79 -> n27 []
+n79 -> n79 []
+n79 -> n80 []
+n79 -> n81 []
+n94 [shape = "box", label = "Automata.NonDeterministic.toDFA.buildTree.flatten\n", color = "Black"]
+n94 -> n94 []
+n83 [shape = "box", label = "BTree.balance\n", color = "Black"]
+n86 [shape = "box", label = "Automata.NonDeterministic.Names.<\n0.2% (0.02s)\n", color = "Black"]
+n84 [shape = "box", label = "BTree.insert.ins\n", color = "Black"]
+n84 -> n86 []
+n84 -> n83 []
+n84 -> n84 []
+n95 [shape = "box", label = "Automata.NonDeterministic.toDFA.anon\n", color = "Black"]
+n71 [shape = "box", label = "BTree.app\n", color = "Black"]
+n71 -> n95 []
+n71 -> n71 []
+n88 [shape = "box", label = "BTree.balance\n", color = "Black"]
+n89 [shape = "box", label = "BTree.insert.ins\n", color = "Black"]
+n89 -> n88 []
+n89 -> n89 []
+n96 [shape = "box", label = "Automata.Deterministic.crossproduct.ofPair\n0.1% (0.01s)\n", color = "Black"]
+n66 [shape = "box", label = "Automata.Deterministic.accepts\n0.1% (0.01s)\n", color = "Black"]
+n97 [shape = "box", label = "Automata.Deterministic.finddups.agree\n1.3% (0.11s)\n", color = "Black"]
+n97 -> n66 []
+n98 [shape = "box", label = "Automata.Deterministic.finddups.fold\n7.7% (0.63s)\n", color = "Black"]
+n98 -> n99 []
+n98 -> n59 []
+n98 -> n81 []
+n98 -> n100 []
+n101 [shape = "box", label = "Automata.Deterministic.finddups.tree.anon\n", color = "Black"]
+n102 [shape = "box", label = "Automata.Deterministic.finddups.tree\n2.1% (0.17s)\n", color = "Black"]
+n102 -> n101 []
+n103 [shape = "box", label = "Automata.Deterministic.finddups.toPair\n5.1% (0.42s)\n", color = "Black"]
+n103 -> n27 []
+n104 [shape = "box", label = "Automata.Deterministic.finddups.distinct\n0.4% (0.03s)\n", color = "Black"]
+n104 -> n103 []
+n104 -> n105 []
+n104 -> n56 []
+n104 -> n98 []
+n106 [shape = "box", label = "Automata.Deterministic.finddups.update\n1.2% (0.10s)\n", color = "Black"]
+n106 -> n104 []
+n107 [shape = "box", label = "Automata.Deterministic.finddups.whoAmI\n0.1% (0.01s)\n", color = "Black"]
+n107 -> n108 []
+n109 [shape = "box", label = "Automata.Deterministic.finddups.setState\n", color = "Black"]
+n109 -> n107 []
+n110 [shape = "box", label = "Automata.Deterministic.finddups.pass\n3.4% (0.28s)\n", color = "Black"]
+n110 -> n106 []
+n111 [shape = "box", label = "Automata.Deterministic.finddups\n", color = "Black"]
+n111 -> n110 []
+n111 -> n112 []
+n111 -> n109 []
+n111 -> n103 []
+n111 -> n97 []
+n108 [shape = "box", label = "Automata.Deterministic.finddups.ofPair\n0.6% (0.05s)\n", color = "Black"]
+n113 [shape = "box", label = "Automata.Deterministic.finddups.match\n0.1% (0.01s)\n", color = "Black"]
+n113 -> n108 []
+n81 [shape = "box", label = "ZTree.merge.wrap\n30.6% (2.50s)\n", color = "Black"]
+n81 -> n60 []
+n81 -> n96 []
+n81 -> n113 []
+n81 -> n99 []
+n81 -> n59 []
+n81 -> n80 []
+n81 -> n100 []
+n81 -> n81 []
+n114 [shape = "box", label = "Automata.Deterministic.unreachable.anon\n", color = "Black"]
+n78 [shape = "box", label = "ZTree.imap\n", color = "Black"]
+n78 -> n80 []
+n115 [shape = "box", label = "Automata.Deterministic.mapStates.map\n0.1% (0.01s)\n", color = "Black"]
+n115 -> n78 []
+n115 -> n57 []
+n115 -> n59 []
+n115 -> n58 []
+n115 -> n54 []
+n112 [shape = "box", label = "Automata.Deterministic.mapStates\n", color = "Black"]
+n112 -> n13 []
+n112 -> n115 []
+n18 [shape = "box", label = "Automata.Deterministic.unreachable\n", color = "Black"]
+n18 -> n116 []
+n18 -> n112 []
+n18 -> n114 []
+n18 -> n117 []
+n100 [shape = "box", label = "ZTree.uniq.wrap\n0.2% (0.02s)\n", color = "Black"]
+n100 -> n99 []
+n100 -> n59 []
+n100 -> n80 []
+n100 -> n100 []
+n100 -> n81 []
+n80 [shape = "box", label = "ZTree.imap.wrap\n0.2% (0.02s)\n", color = "Black"]
+n80 -> n17 []
+n80 -> n72 []
+n80 -> n99 []
+n80 -> n59 []
+n80 -> n80 []
+n80 -> n100 []
+n80 -> n81 []
+n118 [shape = "box", label = "Automata.NonDeterministic.dfs.anon\n0.1% (0.01s)\n", color = "Black"]
+n70 [shape = "box", label = "BTree.fold\n", color = "Black"]
+n70 -> n118 []
+n70 -> n70 []
+n99 [shape = "box", label = "ZTree.front.next\n1.1% (0.09s)\n", color = "Black"]
+n119 [shape = "box", label = "ZTree.fromFront.suck\n", color = "Black"]
+n119 -> n99 []
+n119 -> n59 []
+n119 -> n80 []
+n119 -> n100 []
+n119 -> n81 []
+n58 [shape = "box", label = "ZTree.fromFront\n0.7% (0.06s)\n", color = "Black"]
+n58 -> n119 []
+n54 [shape = "box", label = "ZTree.fromFront.grow\n", color = "Black"]
+n54 -> n27 []
+n54 -> n54 []
+n59 [shape = "box", label = "ZTree.front.goleft\n19.4% (1.58s)\n", color = "Black"]
+n120 [shape = "box", label = "Automata.Deterministic.char.anon\n", color = "Black"]
+n53 [shape = "box", label = "ZTree.map\n", color = "Black"]
+n53 -> n120 []
+n53 -> n53 []
+n121 [shape = "box", label = "Automata.Deterministic.unreachable.dfs.anon\n", color = "Black"]
+n116 [shape = "box", label = "Automata.Deterministic.unreachable.dfs\n", color = "Black"]
+n116 -> n121 []
+n117 [shape = "box", label = "ZTree.app\n", color = "Black"]
+n117 -> n117 []
+n117 -> n116 []
+n5 [shape = "box", label = "StreamIOExtra.flushOut\n", color = "Black"]
+n5 -> n7 []
+n5 -> n8 []
+n6 [shape = "box", label = "TextIO.print\n", color = "Black"]
+n6 -> n122 []
+n6 -> n123 []
+n6 -> n7 []
+n6 -> n8 []
+n7 [shape = "box", label = "StreamIOExtra.flushGen.loop\n", color = "Black"]
+n7 -> n122 []
+n7 -> n123 []
+n8 [shape = "box", label = "StreamIOExtra.flushBuf\n", color = "Black"]
+n124 [shape = "box", label = "Time.make.anon\n", color = "Black"]
+n17 [shape = "box", label = "General.o\n", color = "Black"]
+n17 -> n35 []
+n17 -> n69 []
+n17 -> n70 []
+n17 -> n94 []
+n17 -> n111 []
+n17 -> n18 []
+n17 -> n13 []
+n122 [shape = "box", label = "PosixError.SysCall.syscallErr.errUnblocked\n", color = "Black"]
+n123 [shape = "box", label = "PosixError.SysCall.simpleResult'\n", color = "Black"]
+n125 [shape = "box", label = "IntInf.dontInline.recur\n", color = "Black"]
+n125 -> n125 []
+n126 [shape = "box", label = "Sequence.unfoldi.loop\n", color = "Black"]
+n126 -> n127 []
+n126 -> n128 []
+n126 -> n129 []
+n126 -> n14 []
+n126 -> n15 []
+n126 -> n16 []
+n14 [shape = "box", label = "Array.ArraySlice.vector\n", color = "Black"]
+n15 [shape = "box", label = "Integer.fmt.loop\n", color = "Black"]
+n16 [shape = "box", label = "Integer.fmt\n", color = "Black"]
+n27 [shape = "box", label = "Integer.div\n4.3% (0.35s)\n", color = "Black"]
+n127 [shape = "box", label = "Sequence.Slice.sequence\n", color = "Black"]
+n128 [shape = "box", label = "Sequence.Slice.concat\n", color = "Black"]
+n129 [shape = "box", label = "Sequence.concat\n", color = "Black"]
+n4 [shape = "box", label = "Sequence.append\n", color = "Black"]
+n130 [shape = "box", label = "Automata.NonDeterministic.mapRenumber.stateRelabel\n", color = "Black"]
+n130 -> n53 []
+n131 [shape = "box", label = "Automata.Deterministic.complement.anon\n", color = "Black"]
+n132 [shape = "box", label = "Automata.Deterministic.mapStates.anon\n", color = "Black"]
+n133 [shape = "box", label = "Automata.Deterministic.intersect.anon\n", color = "Black"]
+n57 [shape = "box", label = "ZTree.uniq\n", color = "Black"]
+n57 -> n100 []
+n56 [shape = "box", label = "ZTree.merge\n1.6% (0.13s)\n", color = "Black"]
+n56 -> n81 []
+n105 [shape = "box", label = "Automata.Deterministic.mapPair\n3.4% (0.28s)\n", color = "Black"]
+n105 -> n102 []
+n105 -> n59 []
+n134 [shape = "box", label = "Automata.Deterministic.union.anon\n", color = "Black"]
+n135 [shape = "box", label = "Automata.Deterministic.crossproduct.cross\n0.2% (0.02s)\n", color = "Black"]
+n135 -> n134 []
+n135 -> n105 []
+n135 -> n56 []
+n135 -> n57 []
+n135 -> n133 []
+n135 -> n58 []
+n135 -> n54 []
+n136 [shape = "box", label = "Automata.Deterministic.crossproduct.getState\n", color = "Black"]
+n137 [shape = "box", label = "Automata.Deterministic.crossproduct.toPair\n", color = "Black"]
+n137 -> n27 []
+n138 [shape = "box", label = "Automata.NonDeterministic.mapAccept.noAccept.anon\n", color = "Black"]
+n13 [shape = "box", label = "Sequence.tabulate\n", color = "Black"]
+n13 -> n138 []
+n13 -> n137 []
+n13 -> n136 []
+n13 -> n135 []
+n13 -> n132 []
+n13 -> n131 []
+n13 -> n130 []
+n139 [shape = "box", label = "General.exnMessage.find\n", color = "Black"]
+n139 -> n139 []
+n139 -> n140 []
+n139 -> n127 []
+n139 -> n128 []
+n139 -> n129 []
+n140 [shape = "box", label = "General.exnMessage\n", color = "Black"]
+n141 [shape = "box", label = "<main>\n", color = "Black"]
+n141 -> n10 []
+n141 -> n139 []
+n141 -> n140 []
+n141 -> n127 []
+n141 -> n128 []
+n141 -> n129 []
+n141 -> n4 []
+n141 -> n126 []
+n141 -> n122 []
+n141 -> n123 []
+n141 -> n124 []
+n141 -> n17 []
+n141 -> n125 []
+n141 -> n7 []
+n141 -> n8 []
+n141 -> n5 []
+n141 -> n6 []
+n141 -> n41 []
+n142 [shape = "box", label = "<gc>\n15.1% (1.23s)\n", color = "Black"]
+n143 [shape = "box", label = "<unknown>\n", color = "Black"]
+}
\ No newline at end of file
Added: mltonlib/trunk/ca/terpstra/regexp/compare.mlb
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/compare.mlb 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/compare.mlb 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,6 @@
+local
+ $(SML_LIB)/basis/basis.mlb
+ automata.mlb
+in
+ compare.sml
+end
Added: mltonlib/trunk/ca/terpstra/regexp/compare.sml
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/compare.sml 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/compare.sml 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,90 @@
+fun overlap (NONE, NONE, x, y) = true
+ | overlap (SOME l, NONE, x, y) = l < y
+ | overlap (NONE, SOME r, x, y) = x < r
+ | overlap (SOME l, SOME r, x, y) = x < r andalso l < y
+
+(* assumes overlap *)
+fun pick (NONE, NONE, x, y) = x
+ | pick (SOME l, NONE, x, y) = y - 1 (* l < y *)
+ | pick (NONE, SOME r, x, y) = x (* x < r *)
+ | pick (SOME l, SOME r, x, y) = if l < x then x else l
+
+fun edgeLength (l, r) =
+ let
+ val asciiweights = [
+ ( 65, 91, 1), (* uppercase chars are perfect *)
+ ( 97, 123, 1), (* lowercase chars are perfect *)
+ ( 48, 58, 2), (* digits are nice *)
+ ( 32, 33, 3), (* space is better than punctuation *)
+ ( 58, 65, 4), (* :;<=>?@ not pretty, but ok *)
+ ( 91, 97, 4), (* [\]^_` not pretty, but ok *)
+ (123, 127, 4), (* {|}~ not pretty, but ok *)
+ ( 33, 48, 4), (* !"#$%&'()*+-,-./ are not pretty, but acceptable *)
+ (127, 256, 12), (* anything bigger is not nicely printable *)
+ ( 1, 32, 25), (* control chars are bad too *)
+ ( 0, 1, 200)] (* try really hard to avoid nulls *)
+
+ val (li, ri) = (Option.map Char.ord l, Option.map Char.ord r)
+ fun match (x, y, _) = overlap (li, ri, x, y)
+ in
+ case valOf (List.find match asciiweights) of (x, y, w) =>
+ (w, Char.chr (pick (li, ri, x, y)))
+ end
+
+structure A = Automata(Alphabet)
+structure RE = A.RegularExpression
+structure E = A.Expression
+structure DFA = A.Deterministic
+
+fun examine (a, b) =
+ let
+ val convert = E.toDFA o RE.toExpression o RE.fromString
+ val find = Option.map String.implode o DFA.shortestMatch edgeLength
+ val join = find o DFA.optimize o DFA.intersect
+ val (pa, pb) = (convert a, convert b)
+ val (na, nb) = (DFA.complement pa, DFA.complement pb)
+ val (pas, nas, pbs, nbs) = (find pa, find na, find pb, find nb)
+ val (papbs, panbs, napbs, nanbs) =
+ (join (pa, pb), join (pa, nb), join (na, pb), join (na, nb))
+
+ fun length (SOME x) = 4 + String.size x
+ | length NONE = 3
+ fun max (x, y) = if x < y then y else x
+ fun biggest (x, y, z) = max (length x, max (length y, length z))
+ val col1 = biggest(NONE, pas, nas)
+ val col2 = biggest(pbs, papbs, napbs)
+ val col3 = max(biggest(nbs, panbs, nanbs), 8)
+
+ fun whitespace 0 = ""
+ | whitespace i = " " ^ whitespace (i-1)
+ fun dashes 0 = ""
+ | dashes i = "-" ^ dashes (i-1)
+ fun format (s, w) =
+ let val pad = w - String.size s in
+ whitespace (pad div 2) ^ s ^ whitespace ((pad+1) div 2) end
+ fun entry (SOME x, w) = format ("\"" ^ x ^ "\"", w)
+ | entry (NONE, w) = format ("-", w)
+
+ val setrelation = case (papbs, panbs, napbs, nanbs) of
+ (_, NONE, NONE, _) => "A is identical to B"
+ | (NONE, _, _, NONE) => "A is the complement of B"
+ | (_, NONE, _, _) => "A is a subset of B"
+ | (_, _, NONE, _) => "A is a superset of B"
+ | (NONE, _, _, _) => "A is disjoint from B"
+ | _ => "A overlaps B"
+ in
+ print ("Expression A (" ^ Int.toString (DFA.size pa) ^ " states) = \"" ^ a ^ "\"\n");
+ print ("Expression B (" ^ Int.toString (DFA.size pb) ^ " states) = \"" ^ b ^ "\"\n");
+ print "\n";
+ print (" |" ^ whitespace col1 ^ "|" ^ format("B", col2) ^ "|" ^ format("not(B)", col3) ^ "\n");
+ print ("--------" ^ dashes col1 ^ "-" ^ dashes col2 ^ "-" ^ dashes col3 ^ "\n");
+ print (" |" ^ whitespace col1 ^ "|" ^ entry(pbs, col2) ^ "|" ^ entry(nbs, col3) ^ "\n");
+ print ("A |" ^ entry(pas, col1) ^ "|" ^ entry(papbs, col2) ^ "|" ^ entry(panbs, col3) ^ "\n");
+ print ("not(A) |" ^ entry(nas, col1) ^ "|" ^ entry(napbs, col2) ^ "|" ^ entry(nanbs, col3) ^ "\n");
+ print "\n";
+ print ("Set relationship: " ^ setrelation ^ ".\n")
+ end
+
+val () = case CommandLine.arguments () of
+ (a :: b :: []) => examine (a, b)
+ | _ => print "Expect two regular expressions for arguments\n"
Added: mltonlib/trunk/ca/terpstra/regexp/todot.mlb
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/todot.mlb 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/todot.mlb 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,6 @@
+local
+ $(SML_LIB)/basis/basis.mlb
+ automata.mlb
+in
+ todot.sml
+end
Added: mltonlib/trunk/ca/terpstra/regexp/todot.sml
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/todot.sml 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/todot.sml 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,10 @@
+structure T = Automata(Alphabet)
+structure DFA = T.Deterministic
+structure NFA = T.NonDeterministic
+structure E = T.Expression
+structure RE = T.RegularExpression
+open E
+
+val exp = (RE.toExpression o RE.fromString o hd o CommandLine.arguments) ()
+val s = toDFA exp
+val () = print (DFA.toDot ("dotfile", s))
Added: mltonlib/trunk/ca/terpstra/regexp/ztree.sml
===================================================================
--- mltonlib/trunk/ca/terpstra/regexp/ztree.sml 2006-12-19 02:52:52 UTC (rev 4982)
+++ mltonlib/trunk/ca/terpstra/regexp/ztree.sml 2006-12-19 02:56:05 UTC (rev 4983)
@@ -0,0 +1,171 @@
+signature ORDER =
+ sig
+ type t
+ val < : t * t -> bool
+ end
+
+signature ZTREE =
+ sig
+ type key
+ datatype 'val t =
+ Leaf of 'val |
+ Node of 'val t * key * 'val t
+
+ val uniform: 'val -> 'val t
+ val range: ('val * key * key * 'val) -> 'val t
+ val size: 'val t -> int
+
+ (* compare two ZTrees for structural equality (balance must match) *)
+ val equal: ('val * 'val -> bool) -> ('val t * 'val t) -> bool
+
+ val app: ('val -> unit) -> 'val t -> unit
+ val map: ('val -> 'new) -> 'val t -> 'new t
+ val fold: (key option * 'val * key option * 'a -> 'a) -> 'a -> 'val t -> 'a
+ val foldr: (key option * 'val * key option * 'a -> 'a) -> 'a -> 'val t -> 'a
+ val lookup: 'val t -> key -> 'val
+
+ datatype 'val iterator =
+ Iter of 'val * key option * (unit -> 'val iterator)
+ val front: 'val t -> 'val iterator
+ val back: 'val t -> 'val iterator
+ val fromFront: 'val iterator -> 'val t
+
+ val imap: ('val -> 'new) -> 'val iterator -> 'new iterator
+ val uniq: ('val * 'val -> bool) -> 'val iterator -> 'val iterator
+ val merge: ('v1 * 'v2 -> 'new) -> ('v1 iterator * 'v2 iterator) -> 'new iterator
+ end
+
+functor ZTree(Order : ORDER) : ZTREE
+ where type key = Order.t =
+ struct
+ open Order
+ type key = Order.t
+
+ datatype 'val t =
+ Leaf of 'val |
+ Node of 'val t * key * 'val t
+
+ fun uniform v = Leaf v
+ fun range (u, l, r, v) = Node (Node (Leaf u, l, Leaf v), r, Leaf u)
+
+ fun size (Leaf v) = 1
+ | size (Node (l, _, r)) = size l + size r
+
+ fun equal eq (Leaf v1, Leaf v2) = eq (v1, v2)
+ | equal eq (Node _, Leaf _) = false
+ | equal eq (Leaf _, Node _) = false
+ | equal eq (Node (l1, k1, r1), Node (l2, k2, r2)) =
+ not (k1 < k2) andalso not (k2 < k1) andalso
+ equal eq (l1, l2) andalso equal eq (r1, r2)
+
+ fun app f (Leaf v) = f v
+ | app f (Node (l, k, r)) = (app f l; app f r)
+
+ fun map f (Leaf v) = Leaf (f v)
+ | map f (Node (l, k, r)) = Node (map f l, k, map f r)
+
+ fun fold f a t =
+ let
+ fun deep (x, y, Leaf v, a) = f (x, v, y, a)
+ | deep (x, y, Node (l, k, r), a) =
+ deep (SOME k, y, r, deep (x, SOME k, l, a))
+ in
+ deep (NONE, NONE, t, a)
+ end
+
+ fun foldr f a t =
+ let
+ fun deep (x, y, Leaf v, a) = f (x, v, y, a)
+ | deep (x, y, Node (l, k, r), a) =
+ deep (x, SOME k, l, deep (SOME k, y, r, a))
+ in
+ deep (NONE, NONE, t, a)
+ end
+
+ fun lookup (Leaf v) _ = v
+ | lookup (Node (l, k, r)) x =
+ if x < k then lookup l x else lookup r x
+
+ datatype 'val iterator =
+ Iter of 'val * key option * (unit -> 'val iterator)
+
+ fun front t =
+ let
+ datatype 'val stack = Parent of key option * 'val t
+ fun goleft (Leaf v, c, stack) = Iter (v, c, next stack)
+ | goleft (Node (l, k, r), c, stack) =
+ goleft (l, SOME k, Parent (c, r) :: stack)
+ and next [] () = raise Subscript
+ | next (Parent (c, r) :: stack) () =
+ goleft (r, c, stack)
+ in
+ goleft (t, NONE, [])
+ end
+
+ fun back t =
+ let
+ datatype 'val stack = Parent of key option * 'val t
+ fun goright (Leaf v, c, stack) = Iter (v, c, next stack)
+ | goright (Node (l, k, r), c, stack) =
+ goright (r, SOME k, Parent (c, l) :: stack)
+ and next [] () = raise Subscript
+ | next (Parent (c, l) :: stack) () =
+ goright (l, c, stack)
+ in
+ goright (t, NONE, [])
+ end
+
+ fun fromFront f =
+ let
+ fun suck (Iter (v1, NONE, iter), r) = (v1, NONE) :: r
+ | suck (Iter (v1, k1, iter), r) = suck (iter (), (v1, k1) :: r)
+ val table = Vector.fromList (suck (f, []))
+ fun grow (l, r) =
+ if l + 1 = r then Leaf (#1 (Vector.sub (table, l))) else
+ let val m = (l+r) div 2 in
+ Node (grow (m, r),
+ valOf (#2 (Vector.sub (table, m))),
+ grow (l, m))
+ end
+ in
+ grow (0, Vector.length table)
+ end
+
+ fun imap f iter =
+ let
+ fun wrap (Iter (v, NONE, n)) () = Iter (f v, NONE, wrap (Iter (v, NONE, n)))
+ | wrap (Iter (v, k, n)) () = Iter (f v, k, wrap (n ()))
+ in
+ wrap iter ()
+ end
+
+ fun uniq eq iter =
+ let
+ fun wrap (Iter (v, NONE, n)) () = Iter (v, NONE, n)
+ | wrap (Iter (v1, k1, n1)) () =
+ case n1 () of Iter (v2, k2, n2) =>
+ if eq (v1, v2) then wrap (Iter (v2, k2, n2)) () else
+ Iter (v1, k1, wrap (Iter (v2, k2, n2)))
+ in
+ wrap iter ()
+ end
+
+ fun merge f (iter1, iter2) =
+ let
+ fun wrap (Iter (v1, NONE, n1), Iter (v2, NONE, n2)) () =
+ Iter (f (v1, v2), NONE, wrap (Iter (v1, NONE, n1), Iter (v2, NONE, n2)))
+ | wrap (Iter (v1, SOME k1, n1), Iter (v2, NONE, n2)) () =
+ Iter (f (v1, v2), SOME k1, wrap (n1 (), Iter (v2, NONE, n2)))
+ | wrap (Iter (v1, NONE, n1), Iter (v2, SOME k2, n2)) () =
+ Iter (f (v1, v2), SOME k2, wrap (Iter (v1, NONE, n1), n2 ()))
+ | wrap (Iter (v1, SOME k1, n1), Iter (v2, SOME k2, n2)) () =
+ if k1 < k2 then
+ Iter (f (v1, v2), SOME k1, wrap (n1 (), Iter (v2, SOME k2, n2)))
+ else if k2 < k1 then
+ Iter (f (v1, v2), SOME k2, wrap (Iter (v1, SOME k1, n1), n2 ()))
+ else
+ Iter (f (v1, v2), SOME k1, wrap (n1 (), n2 ()))
+ in
+ wrap (iter1, iter2) ()
+ end
+ end
More information about the MLton-commit
mailing list