[MLton-commit] r7064

Wed Apr 8 05:32:48 PDT 2009

Minimize diff with SML/NJ upstream version of ml-lex tool.

Since we now use the default int and word type provided by the host
compiler, there is no need for the extra 'type int = Int.int' and
':int' in the output of ml-lex.  Minimizing the diff with SML/NJ
upstream means that it is easier to apply changes.
----------------------------------------------------------------------

D   mlton/trunk/mllex/INSTALL
U   mlton/trunk/mllex/Makefile
U   mlton/trunk/mllex/README.MLton
D   mlton/trunk/mllex/export-lex.sml
U   mlton/trunk/mllex/lexgen.doc
U   mlton/trunk/mllex/lexgen.sml
U   mlton/trunk/mllex/lexgen.tex
U   mlton/trunk/mllex/mlex_int.doc
D   mlton/trunk/mllex/mllex.cm
U   mlton/trunk/mllex/mllex.mlb
D   mlton/trunk/mllex/sources.cm
D   mlton/trunk/mllex/sources.mlb

----------------------------------------------------------------------

Deleted: mlton/trunk/mllex/INSTALL
===================================================================

--- mlton/trunk/mllex/INSTALL	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/INSTALL	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,21 +0,0 @@
-Installation instructions for ML-Lex
--------------------------------------
-
-ML-Lex will normally be automatically
-installed as part of the SML/NJ system
-by the SML/NJ installer.
-
-To install by hand (e.g., if you make
-your own modifications), run the
-
-    ./build
-
-script in this directory and then move
-the file
-
-    ml-lex.$ARCH-$OS
-
-to the heap-file directory.
-
-Running ./build requires a properly
-functioning installation of SML/NJ.

Modified: mlton/trunk/mllex/Makefile
===================================================================
--- mlton/trunk/mllex/Makefile	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/Makefile	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,4 +1,5 @@
-## Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh
+## Copyright (C) 2009 Matthew Fluet.
+ # Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh
  #    Jagannathan, and Stephen Weeks.
  # Copyright (C) 1997-2000 NEC Research Institute.
  #
@@ -22,9 +23,6 @@
 	@echo 'Compiling $(NAME)'
 	"$(MLTON)" $(FLAGS) $(NAME).mlb
 
-$(NAME).sml: $(NAME).cm $(shell "$(MLTON)" -stop f $(NAME).cm)
-	mlton -stop sml $(NAME).cm
-
 html/index.html: $(TEX_FILES)
 	mkdir -p html
 	hevea -fix -o html/mllex.html -exec xxdate.exe macros.hva lexgen.tex

Modified: mlton/trunk/mllex/README.MLton
===================================================================
--- mlton/trunk/mllex/README.MLton	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/README.MLton	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,11 +1,22 @@
-This is a modified version of the ml-lex directory that comes with SML/NJ
-110.55.  I made a few changes so that the sources are compilable with MLton.
+This is a modified version of the ml-lex directory that comes with SML/NJ.
 
-mfluet at acm.org  2005-7-21.
+Files from SML/NJ:
+  INSTALL -- deleted
+  README
+  build -- deleted
+  build.bat -- deleted
+  export-lex.sml -- deleted
+  lexgen.doc
+  lexgen.sml -- modified
+  lexgen.tex -- modified
+  ml-lex.cm -- deleted
+  mlex_int.doc
+  tool/* -- deleted
 
-*****
-
-This is a modified version of the ml-lex directory that comes with SML/NJ
-110.9.1.  I made a few changes so that the sources are compilable with MLton.
-
-sweeks at acm.org  2000-8-22.
+Files added:
+  Makefile
+  README.MLton
+  call-main.sml
+  macros.hva
+  main.sml
+  mllex.mlb

Deleted: mlton/trunk/mllex/export-lex.sml
===================================================================
--- mlton/trunk/mllex/export-lex.sml	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/export-lex.sml	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,45 +0,0 @@
-(* export-lex.sml
- *
- * Revision 1.2  2000/03/07 04:01:05  blume
- * - build script now use new ml-build mechanism
- *)
-structure ExportLexGen : sig
-    val lexGen : (string * string list) -> OS.Process.status
-end = struct
-
-    exception Interrupt
-
-  (* This function applies operation to ().  If it handles an interrupt
-   * signal (Control-C), it raises the exception Interrupt.  Example:
-   * (handleInterrupt foo) handle Interrupt => print "Bang!\n"
-   *)
-    fun handleInterrupt (operation : unit -> unit) =
-      let exception Done
-          val old'handler = Signals.inqHandler(Signals.sigINT)
-          fun reset'handler () =
-            Signals.setHandler(Signals.sigINT, old'handler)
-      in (SMLofNJ.Cont.callcc (fn k =>
-             (Signals.setHandler(Signals.sigINT, Signals.HANDLER(fn _ => k));
-               operation ();
-               raise Done));
-           raise Interrupt)
-          handle Done => (reset'handler ())
-               | exn  => (reset'handler (); raise exn)
-      end
-
-    fun err msg = TextIO.output(TextIO.stdErr, String.concat msg)
-
-    fun lexGen (name, args) = let
-        fun lex_gen () =
-            case args of
-                [] => (err [name, ": missing filename\n"];
-                       OS.Process.exit OS.Process.failure)
-              | files => List.app LexGen.lexGen files
-    in
-        (handleInterrupt lex_gen; OS.Process.success)
-        handle Interrupt => (err [name, ": Interrupt\n"]; OS.Process.failure)
-             | any => (err [name, ": uncaught exception ",
-                            General.exnMessage any, "\n"];
-                       OS.Process.failure)
-    end
-end

Modified: mlton/trunk/mllex/lexgen.doc
===================================================================
--- mlton/trunk/mllex/lexgen.doc	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/lexgen.doc	2009-04-08 12:32:46 UTC (rev 7064)
@@ -63,7 +63,7 @@
 to recognize the words.  It saves programmer time and increases
 program maintainability.
 
-Unfortunately, Lex is targeted only C.  It also places artificial 
+Unfortunately, Lex is targeted only C.  It also places artificial
 limits on the size of strings that can be recognized.
 
 ML-Lex is a variant of Lex for the ML programming language.  ML-Lex
@@ -149,7 +149,7 @@
         but first; to include - literally in a set, put it first or last.
 
         The dot . character stands for any character except newline,
-        i.e. the same as [^\n]  
+        i.e. the same as [^\n]
 
         The following special escape sequences are available, inside
         or outside of square-brackets:
@@ -192,7 +192,7 @@
 
         The infix operator | stands for alternation.  The expression
         e1 | e2  stands for anything that either e1 or e2 stands for.
-    
+
         The infix operator / denotes lookahead.  Lookahead is not
         implemented and cannot be used, because there is a bug
         in the algorithm for generating lexers with lookahead.  If
@@ -210,7 +210,7 @@
         sign $ occurred at the end of an expression, that expression
         would only match strings that occur at the end of a line
         (right before a newline character).
-        
+
 Here are some examples of regular expressions, and descriptions of the
 set of strings they denote:
 
@@ -300,7 +300,7 @@
 V. Values available inside the code associated with a rule.
 
 Mlex places the value of the string matched by a regular expression
-in yytext, a string variable.  
+in yytext, a string variable.
 
 The user may recursively
 call the lexing function with lex().  (If %arg is used, the
@@ -325,13 +325,13 @@
                                         string, or that matches the longest
                                         possible prefix of this string,
                                         is used instead.
-                                
+
         yypos                           Current character position from
                                         beginning of file.
 
         yylineno        %count          Current line number
-        
 
+
 These values should be used only if necessary.  Adding REJECT to a
 lexer will slow it down by 20%; adding yylineno will slow it down by
 another 20%, or more.  (It is much more efficient to recognize \n and
@@ -368,9 +368,9 @@
 from the input stream.  It should return a null string to indicate
 that the end of the stream has been reached.  The integer is the
 number of characters that the lexer wishes to read; the function may
-return any non-zero number of characters.  For example, 
+return any non-zero number of characters.  For example,
 
-  val lexer = 
+  val lexer =
     let val input_line = fn f =>
           let fun loop result =
              let val c = input (f,1)
@@ -420,7 +420,7 @@
 Here is a sample lexer for a calculator program:
 
 datatype lexresult= DIV | EOF | EOS | ID of string | LPAREN |
-                     NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES 
+                     NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES
 
 val linenum = ref 1
 val error = fn x => output(std_out,x ^ "\n")
@@ -448,7 +448,7 @@
 Here is the parser for the calculator:
 
 (* Sample interactive calculator to demonstrate use of lexer produced by ML-Lex
- 
+
    The original grammar was
 
        stmt_list -> stmt_list stmt
@@ -457,14 +457,14 @@
        t -> t * f | t/f | f
        f -> (exp) | id | num
 
-  The function parse takes a stream and parses it for the calculator 
+  The function parse takes a stream and parses it for the calculator
   program.
 
   If a syntax error occurs, parse prints an error message and calls itself
   on the stream.  On this system that has the effect of ignoring all input
   to the end of a line.
 *)
-       
+
 structure Calc =
  struct
    open CalcLex
@@ -496,7 +496,7 @@
          case !nexttok of
             EOF => ()
           | _ => (STMT(); STMT_LIST())
-        
+
      and STMT() =
          (case !nexttok
            of EOS  => ()

Modified: mlton/trunk/mllex/lexgen.sml
===================================================================
--- mlton/trunk/mllex/lexgen.sml	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/lexgen.sml	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,15 +1,20 @@
-(* Modified by mfluet at acm.org on 2005-8-01.
- * Update with SML/NJ 110.55+.
+(* Modified by Vesa Karvonen on 2007-12-19.
+ * Create line directives in output.
  *)
-(* Modified by sweeks at acm.org on 2000-8-24.
- * Ported to MLton.
+(* Modified by Matthew Fluet on 2007-11-07.
+ * Add %posint command.
  *)
-
+(* Modified by StephenWeeks on 2005-08-18.
+ * Fix file starting position
+ *)
+(* Modified by Stephen Weeks on 2004-10-19.
+ * Do not create references to Unsafe structure.
+ *)
 (*  Lexical analyzer generator for Standard ML.
         Version 1.7.0, June 1998
 
 Copyright (c) 1989-1992 by Andrew W. Appel,
-   David R. Tarditi, James S. Mattson 
+   David R. Tarditi, James S. Mattson
 
 This software comes with ABSOLUTELY NO WARRANTY.
 This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
@@ -52,7 +57,7 @@
                 and characters.
         02/08/95 (jhr) Modified to use new List module interface.
         05/18/95 (jhr) changed Vector.vector to Vector.fromList
- 
+
  * Revision 1.9  1998/01/06 19:23:53  appel
  *   added %posarg feature to permit position-within-file to be passed
  *   as a parameter to makeLexer
@@ -108,7 +113,7 @@
 
 The ASU proposal works as follows. Suppose that we are
 using NFA's to represent our regular expressions.  Then to
-build an NFA for e1 / e2, we build an NFA n1 for e1 
+build an NFA for e1 / e2, we build an NFA n1 for e1
 and an NFA n2 for e2, and add an epsilon transition
 from e1 to e2.
 
@@ -229,10 +234,10 @@
    datatype token = CHARS of bool array | QMARK | STAR | PLUS | BAR
           | LP | RP | CARAT | DOLLAR | SLASH | STATE of string list
           | REPS of int * int | ID of string | ACTION of pos * string
-          | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES 
+          | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES
           | COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG
           | POSINT
-        
+
    datatype exp = EPS | CLASS of bool array * int | CLOSURE of exp
                 | ALT of exp * exp | CAT of exp * exp | TRAIL of int
                 | END of int
@@ -240,10 +245,10 @@
    (* flags describing input Lex spec. - unnecessary code is omitted *)
    (* if possible *)
 
-   val CharFormat = ref false;  
+   val CharFormat = ref false;
    val UsesTrailingContext = ref false;
    val UsesPrevNewLine = ref false;
-   
+
    (* flags for various bells & whistles that Lex has.  These slow the
       lexer down and should be omitted from production lexers (if you
       really want speed) *)
@@ -254,10 +259,10 @@
 
    (* Can increase size of character set *)
 
-   val CharSetSize: int ref = ref 129;
+   val CharSetSize = ref 129;
 
    (* Can name structure or declare header code *)
- 
+
    val StrName = ref "Mlex"
    val HeaderCode = ref ""
    val HeaderPos = ref {line = 0, col = 0}
@@ -274,7 +279,7 @@
                               UsesTrailingContext := false;
                                CharSetSize := 129; StrName := "Mlex";
                                 HeaderCode := ""; HeaderDecl:= false;
-                                ArgCode := NONE; 
+                                ArgCode := NONE;
                                 StrDecl := false;
                               PosIntName := "Int"; PosIntDecl := false)
 
@@ -384,11 +389,11 @@
       end
 end
 
-open dict; 
+open dict;
 
 (* INPUT.ML : Input w/ one character push back capability *)
 
-val LineNum: int ref = ref 1;
+val LineNum = ref 1;
 
 abstype ibuf =
         BUF of TextIO.instream * {b : string ref, p : int ref}
@@ -402,16 +407,16 @@
         fun make_ibuf(s) = BUF (s, {b=ref"", p = ref 0})
         fun close_ibuf (BUF (s,_)) = TextIO.closeIn(s)
         exception eof
-        fun getch (a as (BUF(s,{b,p}))) = 
+        fun getch (a as (BUF(s,{b,p}))) =
                  if (!p = (size (!b)))
                    then (b := TextIO.inputN(s, 1024);
                          p := 0;
                          if (size (!b))=0
-                            then raise eof 
+                            then raise eof
                             else getch a)
                    else (let val ch = String.sub(!b,!p)
-                         in (pos := !pos + 1
-                           ; if ch = #"\n"
+                         in (pos := !pos + 1;
+                             if ch = #"\n"
                                  then (LineNum := !LineNum + 1;
                                        linePos := !pos)
                                  else ();
@@ -472,9 +477,9 @@
                 then skipws()
                 else ch
             end
-                
-      and nextch () = getch(!LexBuf) 
 
+      and nextch () = getch(!LexBuf)
+
       and escaped () = (case nextch()
              of #"b" => #"\008"
               | #"n" => #"\n"
@@ -484,7 +489,7 @@
               | x => let
                   fun err t = prErr("illegal ascii escape '"^(implode(rev t))^"'")
                   fun cvt c = (Char.ord c - Char.ord #"0")
-                  fun f (n: int, c, t) = if c=3
+                  fun f (n, c, t) = if c=3
                         then if n >= (!CharSetSize)
                           then err t
                           else Char.chr n
@@ -498,16 +503,16 @@
                     if isDigit x then f(cvt x, 1, [x]) else x
                   end
             (* end case *))
-        
+
       and onechar x = let val c = array(!CharSetSize, false)
               in
                 update(c, Char.ord(x), true); CHARS(c)
               end
-                
+
       in case !LexState of 0 => let val makeTok = fn () =>
                 case skipws()
                         (* Lex % operators *)
-                 of #"%" => (case nextch() of 
+                 of #"%" => (case nextch() of
                           #"%" => LEXMARK
                         | a => let fun f s =
                                     let val a = nextch()
@@ -688,7 +693,7 @@
 end
 handle eof => NextTok := EOF ;
 
-fun GetTok (_:unit) : token = 
+fun GetTok (_:unit) : token =
         let val t = !NextTok in AdvanceTok(); t
         end;
 val SymTab = ref (create String.<=) : (string,exp) dictionary ref
@@ -698,20 +703,20 @@
         let val rec optional = fn e => ALT(EPS,e)
 
             and lookup' = fn name =>
-                lookup(!SymTab) name 
+                lookup(!SymTab) name
                 handle LOOKUP => prErr ("bad regular expression name: "^
                                             name)
 
         and newline = fn () => let val c = array(!CharSetSize,false) in
                 update(c,10,true); c
                 end
-        
+
         and endline = fn e => trail(e,CLASS(newline(),0))
-        
+
         and trail = fn (e1,e2) => CAT(CAT(e1,TRAIL(0)),e2)
-        
+
         and closure1 = fn e => CAT(e,CLOSURE(e))
-        
+
         and repeat = fn (min,max,e) => let val rec rep = fn
                   (0,0) => EPS
                 | (0,1) => ALT(e,EPS)
@@ -719,16 +724,16 @@
                 | (i,j) => CAT(e,rep(i-1,j-1))
         in rep(min,max)
         end
-        
+
         and exp0 = fn () => case GetTok() of
                   CHARS(c) => exp1(CLASS(c,0))
                 | LP => let val e = exp0() in
                  if !NextTok = RP then
                   (AdvanceTok(); exp1(e))
-                 else (prSynErr "missing '('") end
+                 else (prSynErr "missing ')'") end
                 | ID(name) => exp1(lookup' name)
                 | _ => raise SyntaxError
-                
+
         and exp1 = fn (e) => case !NextTok of
                   SEMI => e
                 | ARROW => e
@@ -747,7 +752,7 @@
                         | REPS(i,j) => exp1(repeat(i,j,e))
                         | ID(name) => exp2(e,lookup' name)
                         | _ => raise SyntaxError)
-                        
+
         and exp2 = fn (e1,e2) => case !NextTok of
                   SEMI => CAT(e1,e2)
                 | ARROW => CAT(e1,e2)
@@ -769,9 +774,9 @@
                         | _ => raise SyntaxError)
 in exp0()
 end;
-val StateTab = ref(create(String.<=)) : (string,int) dictionary ref 
+val StateTab = ref(create(String.<=)) : (string,int) dictionary ref
 
-val StateNum: int ref = ref 0;
+val StateNum = ref 0;
 
 fun GetStates () : int list =
 
@@ -781,7 +786,7 @@
                                               prErr ("bad state name: "^x)
                                           ],sl))
 
-        fun addall i sl = 
+        fun addall i sl =
             if i <= !StateNum then addall (i+2) (union ([i],sl))
             else sl
 
@@ -792,17 +797,17 @@
           | addincs (x::y) = x::(x+1)::addincs y
 
         val state_list =
-           case !NextTok of 
+           case !NextTok of
              STATE s => (AdvanceTok(); LexState := 1; add s nil)
              | _ => addall 1 nil
-                
+
       in case !NextTok
            of CARAT => (LexState := 1; AdvanceTok(); UsesPrevNewLine := true;
                         incall state_list)
             | _ => addincs state_list
       end
 
-val LeafNum: int ref = ref ~1;
+val LeafNum = ref ~1;
 
 fun renum(e : exp) : exp =
         let val rec label = fn
@@ -835,7 +840,7 @@
                                      ++StateNum; AdvanceTok(); f())
                                         | _ => ())
                    in AdvanceTok(); f ();
-                      if !NextTok=SEMI then ParseDefs() else 
+                      if !NextTok=SEMI then ParseDefs() else
                         (prSynErr "expected ';'")
                    end
                 | ID x => (LexState:=1; AdvanceTok(); if GetTok() = ASSIGN
@@ -848,13 +853,13 @@
                 | FULLCHARSET => (CharSetSize := 256; ParseDefs())
                 | HEADER => (LexState := 2; AdvanceTok();
                              case GetTok()
-                             of ACTION (p, s) => 
+                             of ACTION (p, s) =>
                                 if (!StrDecl) then
                                    (prErr "cannot have both %structure and %header \
                                     \declarations")
                                 else if (!HeaderDecl) then
                                    (prErr "duplicate %header declarations")
-                                else 
+                                else
                                     (HeaderCode := s; LexState := 0;
                                      HeaderPos := p;
                                      HeaderDecl := true; ParseDefs())
@@ -870,7 +875,7 @@
                                 ParseDefs())
                 | ARG => (LexState := 2; AdvanceTok();
                              case GetTok()
-                             of ACTION s => 
+                             of ACTION s =>
                                 (case !ArgCode
                                    of SOME _ => prErr "duplicate %arg declarations"
                                     | NONE => ArgCode := SOME s;
@@ -896,7 +901,7 @@
                  let val s = GetStates()
                      val e = renum(CAT(GetExp(),END(0)))
                  in
-                 if !NextTok = ARROW then 
+                 if !NextTok = ARROW then
                    (LexState:=2; AdvanceTok();
                     case GetTok() of ACTION(act) =>
                       if !NextTok=SEMI then
@@ -918,8 +923,8 @@
                                 say (Int.toString n); say ";\n"; make y)
    in say "\n(* start state definitions *)\n\n"; make(listofdict(!StateTab))
    end
-                       
-structure L = 
+
+structure L =
         struct
           nonfix >
           type key = int list * string
@@ -1033,24 +1038,24 @@
                   let val name = (Int.toString l)
                   in let val (r,n) = lookup ((x,name),t)
                       in makeEntry(y,(n::rs),t)
-                      end handle notfound _ => 
+                      end handle notfound _ =>
                         (count := !count+1;
                           say " ("; say name; say ",";
                           makeItems x; say "),\n";
                          makeEntry(y,(name::rs),(insert ((x,name),t))))
                   end
 
-            val _ = say "val s = [ \n" 
+            val _ = say "val s = [ \n"
             val res =  makeEntry(trans,nil,empty)
-            val _ = 
-              case !CharFormat 
+            val _ =
+              case !CharFormat
                of true => (say "(0, \"\")]\n"; say "fun f x = x \n")
                 | false => (say "(0, 0, \"\")]\n";
                     say "fun f(n, i, x) = (n, Vector.tabulate(i, decode x)) \n")
 
             val _ = say "val s = map f (rev (tl (rev s))) \n"
             val _ = say "exception LexHackingError \n"
-            val _ = say "fun look ((j,x)::r, i) = if i = j then x else look(r, i) \n"
+            val _ = say "fun look ((j,x)::r, i: int) = if i = j then x else look(r, i) \n"
             val _ = say "  | look ([], i) = raise LexHackingError\n"
 
         val _ = say "fun g {fin=x, trans=i} = {fin=x, trans=look(s,i)} \n"
@@ -1084,7 +1089,7 @@
         in
             mt args
         end
-                        
+
 (*
         fun makeTable(nil,nil) = ()
           | makeTable(a::a',b::b') =
@@ -1110,7 +1115,7 @@
 
         fun msg x = TextIO.output(TextIO.stdOut, x)
 
-  in (say "in Vector.fromList(map g \n["; makeTable(rs,newfins); 
+  in (say "in Vector.fromList(map g \n["; makeTable(rs,newfins);
       say "])\nend\n";
     msg ("\nNumber of states = " ^ (Int.toString (length trans)));
     msg ("\nNumber of distinct rows = " ^ (Int.toString (!count)));
@@ -1135,7 +1140,7 @@
                                 say "\n"; make(y,false))
     in make (listofdict(ends),true)
     end
-                        
+
 fun leafdata(e:(int list * exp) list) =
         let val fp = array(!LeafNum + 1,nil)
         and leaf = array(!LeafNum + 1,EPS)
@@ -1163,7 +1168,7 @@
                 | (_,x)::tl => (moredata(x);makedata(tl))
         in trailmark := ~1; makedata(e); (fp,leaf,!tcpairs)
         end;
-        
+
 fun makedfa(rules) =
 let val StateTab = ref (create(String.<=)) : (string,int) dictionary ref
     val fintab = ref (create(Int.<=)) : (int,(int list)) dictionary ref
@@ -1177,20 +1182,20 @@
            tctab := enter(!tctab)(statenum,gettc(state));
            transtab := enter(!transtab)(statenum,transitions)
         end
-        
+
 and visitstarts (states) =
         let fun vs nil i = ()
               | vs (hd::tl) i = (visit (hd,i); vs tl (i+1))
         in vs states 0
         end
-        
+
 and hashstate(s: int list) =
         let val rec hs =
                 fn (nil,z) => z
                  | ((x:int)::y,z) => hs(y,z ^ " " ^ (Int.toString x))
         in hs(s,"")
         end
-        
+
 and find(s) = lookup(!StateTab)(hashstate(s))
 
 and add(s,n) = StateTab := enter(!StateTab)(hashstate(s),n)
@@ -1200,11 +1205,11 @@
         handle LOOKUP => let val n = ++StateNum in
                 add(state,n); visit(state,n); n
                 end
-                
+
 and getfin state =
         let fun f nil fins = fins
               | f (hd::tl) fins =
-                 case (leaf sub hd) 
+                 case (leaf sub hd)
                     of END _ => f tl (hd::fins)
                      | _ => f tl fins
         in f state nil
@@ -1213,7 +1218,7 @@
 and gettc state =
         let fun f nil fins = fins
               | f (hd::tl) fins =
-                 case (leaf sub hd) 
+                 case (leaf sub hd)
                     of TRAIL _ => f tl (hd::fins)
                      | _ => f tl fins
         in f state nil
@@ -1226,7 +1231,7 @@
                   case (leaf sub hd) of
                    CLASS(i,_)=>
                         (if (i sub c) then cktrans tl (union(r,fp sub hd))
-                         else cktrans tl r handle Subscript => 
+                         else cktrans tl r handle Subscript =>
                                                 cktrans tl r
                         )
                    | _ => cktrans tl r
@@ -1238,7 +1243,7 @@
          end
      in loop ((!CharSetSize) - 1) nil
      end
-        
+
 and startstates() =
         let val startarray = array(!StateNum + 1, nil);
             fun listofarray(a,n) =
@@ -1254,12 +1259,12 @@
                         fix(tl,firsts))
         in makess(rules);listofarray(startarray, !StateNum + 1)
         end
-        
+
 in visitstarts(startstates());
 (listofdict(!fintab),listofdict(!transtab),listofdict(!tctab),tcpairs)
 end
 
-val skel_hd = 
+val skel_hd =
 "   struct\n\
 \    structure UserDeclarations =\n\
 \      struct\n\
@@ -1282,18 +1287,18 @@
         val () = (InFile := infile; OutFile := outfile)
       fun PrintLexer (ends) =
     let val sayln = fn x => (say x; say "\n")
-     in case !ArgCode 
+     in case !ArgCode
          of NONE => (sayln "fun lex () : Internal.result =";
                      sayln "let fun continue() = lex() in")
           | SOME (p,s) =>
                     (say "fun lex "; say "(yyarg as (";
                      sayPos (SOME p); say s; sayPos NONE; sayln ")) =";
-                     sayln "let fun continue() : Internal.result = ");
+                       sayln "let fun continue() : Internal.result = ");
          say "  let fun scan (s,AcceptingLeaves : Internal.yyfinstate";
-         sayln " list list,l,i0: int) =";
+         sayln " list list,l,i0) =";
          if !UsesTrailingContext
-             then say "\tlet fun action (i: int,nil,rs)"
-             else say "\tlet fun action (i: int,nil)";
+             then say "\tlet fun action (i,nil,rs)"
+             else say "\tlet fun action (i,nil)";
          sayln " = raise LexError";
          if !UsesTrailingContext
              then sayln "\t| action (i,nil::l,rs) = action(i-1,l,rs)"
@@ -1303,10 +1308,9 @@
              else sayln "\t| action (i,(node::acts)::l) =";
          sayln "\t\tcase node of";
          sayln "\t\t    Internal.N yyk => ";
-         sayln "\t\t\t(let fun yymktext() = String.substring(!yyb,i0,i-i0)\n\
-               \\t\t\t     val yypos: YYPosInt.int = YYPosInt.+(YYPosInt.fromInt i0, !yygone)\n";
-        
-         if !CountNewLines 
+         sayln "\t\t\t(let fun yymktext() = substring(!yyb,i0,i-i0)\n\
+               \\t\t\t     val yypos = YYPosInt.+(YYPosInt.fromInt i0, !yygone)";
+         if !CountNewLines
             then (sayln "\t\t\tval _ = yylineno := CharVectorSlice.foldli";
                   sayln "\t\t\t\t(fn (_,#\"\\n\", n) => n+1 | (_,_, n) => n) (!yylineno) (CharVectorSlice.slice (!yyb,i0,SOME(i-i0)))")
             else ();
@@ -1314,7 +1318,7 @@
              then (say "\t\t\tfun REJECT() = action(i,acts::l";
                    if !UsesTrailingContext
                        then sayln ",rs)" else sayln ")")
-             else ();    
+             else ();
          sayln "\t\t\topen UserDeclarations Internal.StartStates";
          sayln " in (yybufpos := i; case yyk of ";
          sayln "";
@@ -1323,7 +1327,7 @@
          say "\n\t\t) end ";
          say ")\n\n";
          if (!UsesTrailingContext) then say skel_mid2 else ();
-         sayln "\tval {fin,trans} = Vector.sub (Internal.tab, s)";
+         sayln "\tval {fin,trans} = Vector.sub(Internal.tab, s)";
          sayln "\tval NewAcceptingLeaves = fin::AcceptingLeaves";
          sayln "\tin if l = !yybl then";
          sayln "\t     if trans = #trans(Vector.sub(Internal.tab,0))";
@@ -1331,7 +1335,7 @@
          if !UsesTrailingContext then say ",nil" else ();
          say ") else";
          sayln "\t    let val newchars= if !yydone then \"\" else yyinput 1024";
-         sayln "\t    in if (String.size newchars)=0";
+         sayln "\t    in if (size newchars)=0";
          sayln "\t\t  then (yydone := true;";
          say "\t\t        if (l=i0) then UserDeclarations.eof ";
          sayln (case !ArgCode of NONE => "()" | SOME _ => "yyarg");
@@ -1339,30 +1343,30 @@
          if !UsesTrailingContext then
             sayln ",nil))" else sayln "))";
          sayln "\t\t  else (if i0=l then yyb := newchars";
-         sayln "\t\t     else yyb := String.substring(!yyb,i0,l-i0)^newchars;";
-         sayln "\t\t     yygone := YYPosInt.+(!yygone, YYPosInt.fromInt i0);\n";
-         sayln "\t\t     yybl := String.size (!yyb);";
+         sayln "\t\t     else yyb := substring(!yyb,i0,l-i0)^newchars;";
+         sayln "\t\t     yygone := YYPosInt.+(!yygone, YYPosInt.fromInt i0);";
+         sayln "\t\t     yybl := size (!yyb);";
          sayln "\t\t     scan (s,AcceptingLeaves,l-i0,0))";
          sayln "\t    end";
-         sayln "\t  else let val NewChar = Char.ord (CharVector.sub (!yyb,l))";
+         sayln "\t  else let val NewChar = Char.ord(CharVector.sub(!yyb,l))";
          if !CharSetSize=129
-           then sayln "\t\tval NewChar = if NewChar<128 then NewChar else 128" 
+           then sayln "\t\tval NewChar = if NewChar<128 then NewChar else 128"
            else ();
          say "\t\tval NewState = ";
-         sayln (if !CharFormat 
-                then "Char.ord (CharVector.sub (trans,NewChar))"
-                else "Vector.sub (trans, NewChar)");
+         sayln (if !CharFormat
+                then "Char.ord(CharVector.sub(trans,NewChar))"
+                else "Vector.sub(trans, NewChar)");
          say "\t\tin if NewState=0 then action(l,NewAcceptingLeaves";
          if !UsesTrailingContext then sayln ",nil)" else sayln ")";
          sayln "\t\telse scan(NewState,NewAcceptingLeaves,l+1,i0)";
          sayln "\tend";
          sayln "\tend";
          if !UsesPrevNewLine then () else sayln "(*";
-         sayln "\tval start= if String.substring(!yyb,!yybufpos-1,1)=\"\\n\"";
+         sayln "\tval start= if substring(!yyb,!yybufpos-1,1)=\"\\n\"";
          sayln "then !yybegin+1 else !yybegin";
          if !UsesPrevNewLine then () else sayln "*)";
          say "\tin scan(";
-         if !UsesPrevNewLine then say "start" 
+         if !UsesPrevNewLine then say "start"
          else say "!yybegin (* start *)";
          sayln ",nil,!yybufpos,!yybufpos)";
          sayln "    end";
@@ -1397,11 +1401,10 @@
                        prErr "lookahead is unimplemented")
                    else ()
         in
-           say "type int = Int.int\n";
           if (!HeaderDecl)
               then (sayPos (SOME (!HeaderPos))
-                  ; say (!HeaderCode)
-                  ; sayPos NONE)
+                    ; say (!HeaderCode)
+                    ; sayPos NONE)
               else say ("structure " ^ (!StrName));
           say "=\n";
           say skel_hd;
@@ -1422,16 +1425,15 @@
           say "action tried *)\n";
           say "end\n\n";
           say ("structure YYPosInt : INTEGER = " ^ (!PosIntName) ^ "\n");
-          say "type int = Int.int\n";
-          say (if (!PosArg) then "fun makeLexer (yyinput: int -> string,yygone0:YYPosInt.int) =\nlet\n"
-                else "fun makeLexer (yyinput: int -> string) =\nlet\tval yygone0:YYPosInt.int = YYPosInt.fromInt ~1\n");
-          if !CountNewLines then say "\tval yylineno: int ref = ref 0\n\n" else ();
+          say (if (!PosArg) then "fun makeLexer (yyinput,yygone0:YYPosInt.int) =\nlet\n"
+                else "fun makeLexer yyinput =\nlet\tval yygone0= YYPosInt.fromInt ~1\n");
+          if !CountNewLines then say "\tval yylineno = ref 0\n\n" else ();
           say "\tval yyb = ref \"\\n\" \t\t(* buffer *)\n\
-          \\tval yybl: int ref = ref 1\t\t(*buffer length *)\n\
-          \\tval yybufpos: int ref = ref 1\t\t(* location of next character to use *)\n\
-          \\tval yygone: YYPosInt.int ref = ref yygone0\t(* position in file of beginning of buffer *)\n\
+          \\tval yybl = ref 1\t\t(*buffer length *)\n\
+          \\tval yybufpos = ref 1\t\t(* location of next character to use *)\n\
+          \\tval yygone = ref yygone0\t(* position in file of beginning of buffer *)\n\
           \\tval yydone = ref false\t\t(* eof found yet? *)\n\
-          \\tval yybegin: int ref = ref 1\t\t(*Current 'start state' for lexer *)\n\
+          \\tval yybegin = ref 1\t\t(*Current 'start state' for lexer *)\n\
           \\n\tval YYBEGIN = fn (Internal.StartStates.STARTSTATE x) =>\n\
           \\t\t yybegin := x\n\n";
           PrintLexer(ends);

Modified: mlton/trunk/mllex/lexgen.tex
===================================================================
--- mlton/trunk/mllex/lexgen.tex	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/lexgen.tex	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,3 +1,10 @@
+% Modified by Matthew Fluet on 2007-11-07.
+% Add %posint command.
+%
+% Modified by Matthew Fluet on 2007-10-31.
+% Add \r escape sequence (from Florian Weimer).
+% Fix TeX formatting bug (from Florian Weimer).
+%
 \documentstyle{article}
 \title{        A lexical analyzer generator for Standard ML.\\
                                Version 1.6.0, October 1994
@@ -5,7 +12,7 @@
 \author{                    Andrew W. Appel$^1$\\
                             James S. Mattson\\
                             David R. Tarditi$^2$\\
-\\              
+\\
 \small
 $^1$Department of Computer Science, Princeton University \\
 \small
@@ -26,7 +33,7 @@
 
 \vspace{1in}
 
-New in this version:  
+New in this version:
 \begin{itemize}
 \item REJECT is much less costly than before.
 \item Lexical analyzers with more than 255 states can now compile in your
@@ -80,7 +87,7 @@
 to recognize the words.  It saves programmer time and increases
 program maintainability.
 
-Unfortunately, Lex is targeted only C.  It also places artificial 
+Unfortunately, Lex is targeted only C.  It also places artificial
 limits on the size of strings that can be recognized.
 
 ML-Lex is a variant of Lex for the ML programming language.  ML-Lex
@@ -145,7 +152,7 @@
 Regular expressions are a simple language for denoting classes of
 strings.  A regular expression is defined inductively over an
 alphabet with a set of basic operations.  The alphabet for ML-Lex is
-the Ascii character set (character codes 0--127; or if 
+the Ascii character set (character codes 0--127; or if
 \verb|%full| is used, 0--255).
 
 The syntax and semantics of regular expressions will be described in
@@ -161,7 +168,7 @@
 
 \item   A set of characters enclosed in square brackets [ ] stands
         for any one of those characters.  Inside the brackets, only
-        the symbols  \verb|\ - ^| are reserved.  An initial up-arrow 
+        the symbols  \verb|\ - ^| are reserved.  An initial up-arrow
         \verb|^| stands
         for the complement of the characters listed, e.g. \verb|[^abc]|
         stands any character except a, b, or c.  The hyphen - denotes
@@ -219,7 +226,7 @@
 
 \item\verb-|-   The infix operator \verb-|- stands for alternation.  The expression
         $e_1$~\verb"|"~$e_2$  stands for anything that either $e_1$ or $e_2$ stands for.
-    
+
 \item[\verb|/|] The infix operator \verb|/| denotes lookahead.  Lookahead is not
         implemented and cannot be used, because there is a bug
         in the algorithm for generating lexers with lookahead.  If
@@ -235,7 +242,7 @@
         for lookahead involving the newline character (that is, it
         is an abbreviation for \verb|/\n|).
 \end{itemize}
-        
+
 Here are some examples of regular expressions, and descriptions of the
 set of strings they denote:
 
@@ -256,7 +263,7 @@
 \subsection{User declarations}
 
 Anything up to the first \verb|%%| is in the user declarations section.  The
-user should note that no symbolic identifier containing 
+user should note that no symbolic identifier containing
 \verb|%%| can be
 used in this section.
 
@@ -296,7 +303,7 @@
 \item[\tt \%arg]       extra (curried) formal parameter argument to be
                           passed to the lex functions, and to be passed
                           to the eof function in place of ()
-\item[\tt \%posint \{identifier\}]  use the {\tt INTEGER} structure for the 
+\item[\tt \%posint \{identifier\}]  use the {\tt INTEGER} structure for the
                           type of {\tt yypos}; use {\tt Int64} or {\tt Position}
                           to allow lexing of multi-gigabyte input files
 \end{description}
@@ -337,7 +344,7 @@
 \label{avail}
 
 ML-Lex places the value of the string matched by a regular expression
-in \verb|yytext|, a string variable.  
+in \verb|yytext|, a string variable.
 
 The user may recursively
 call the lexing function with \verb|lex()|.  (If \verb|%arg| is used, the
@@ -371,11 +378,11 @@
 {\tt yylineno } & {\tt \%count} &         Current line number\\
 \\
 \end{tabular}
-        
 
+
 These values should be used only if necessary.  Adding {\tt REJECT} to a
 lexer will slow it down by 20\%; adding {\tt yylineno} will slow it down by
-another 20\%, or more.  (It is much more efficient to 
+another 20\%, or more.  (It is much more efficient to
 recognize \verb|\n| and
 have an action that increments the line-number variable.)  The use of
 the lookahead operator {\tt /} will also slow down the entire lexer.
@@ -419,7 +426,7 @@
 creates a lexer that operates on the file whose name is f.
 
 When the {\tt \%posarg} directive is used, the type of
-{\tt makeLexer} is 
+{\tt makeLexer} is
 \begin{verbatim}
   val makeLexer : ((int->string)*int) -> yyarg -> lexresult
 \end{verbatim}
@@ -434,10 +441,10 @@
 from the input stream.  It should return a null string to indicate
 that the end of the stream has been reached.  The integer is the
 number of characters that the lexer wishes to read; the function may
-return any non-zero number of characters.  For example, 
+return any non-zero number of characters.  For example,
 
 \begin{verbatim}
-  val lexer = 
+  val lexer =
     let val input_line = fn f =>
           let fun loop result =
              let val c = input (f,1)
@@ -460,9 +467,7 @@
 function at once, and it is desirable that the input function return
 as many as possible.  Reading many characters at once makes the lexer
 more efficient.  Fewer input calls and buffering operations are
-needed, and input is more efficient in large block reads.
-Furthermore, performance is very poor (quadratic in the token length)
-when a token requires lots of calls to the input function. For 
+needed, and input is more efficient in large block reads.  For
 interactive streams this is less of a concern, as the limiting factor
 is the speed at which the user can type.
 
@@ -495,7 +500,7 @@
 \small
 \begin{verbatim}
 datatype lexresult= DIV | EOF | EOS | ID of string | LPAREN |
-                     NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES 
+                     NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES
 
 val linenum = ref 1
 val error = fn x => output(std_out,x ^ "\n")
@@ -524,8 +529,8 @@
 Here is the parser for the calculator:
 \begin{verbatim}
 
-(* Sample interactive calculator to demonstrate use of lexer 
- 
+(* Sample interactive calculator to demonstrate use of lexer
+
    The original grammar was
 
        stmt_list -> stmt_list stmt
@@ -534,14 +539,14 @@
        t -> t * f | t/f | f
        f -> (exp) | id | num
 
-  The function parse takes a stream and parses it for the calculator 
+  The function parse takes a stream and parses it for the calculator
   program.
 
   If a syntax error occurs, parse prints an error message and calls
   itself on the stream.  On this system that has the effect of ignoring
-  all input to the end of a line.  
+  all input to the end of a line.
 *)
-       
+
 structure Calc =
  struct
    open CalcLex
@@ -573,7 +578,7 @@
          case !nexttok of
             EOF => ()
           | _ => (STMT(); STMT_LIST())
-        
+
      and STMT() =
          (case !nexttok
            of EOS  => ()

Modified: mlton/trunk/mllex/mlex_int.doc
===================================================================
--- mlton/trunk/mllex/mlex_int.doc	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/mlex_int.doc	2009-04-08 12:32:46 UTC (rev 7064)
@@ -24,10 +24,10 @@
           removed from the backward propagating list after this node is
           encountered.
 
-    
+
      The function scan inside the function lex operates as a transition
 function, scanning the input until it is no longer possible to take any
-more transitions.  It accumulates a list of the accepting leaf list 
+more transitions.  It accumulates a list of the accepting leaf list
 associated with each accepting state passed through.
 
        Scan operates as follows:
@@ -39,16 +39,16 @@
                 * l - position of the next character in the buffer b to read
                 * i0 - starting position in the buffer.
 
-        Output: If no match is found, it raises the exception LexError. 
+        Output: If no match is found, it raises the exception LexError.
                 Otherwise, it returns a value of type lexresult.
 
         It operates as a transtion function:
              It (1) adds the list of accepting leaves for the current state to
                     the list of accepting leave lists
                 (2) tries to make a transition on the current input character
-                    to the next state.  If it can't make a transition, it 
+                    to the next state.  If it can't make a transition, it
                     executes the action function.
-                        (a) - if it is past the end of the buffer, it 
+                        (a) - if it is past the end of the buffer, it
                                 (1) checks if it as at end eof.  If it is then:
                                         It checks to see if it has made any
                                         transitions since it was first called -
@@ -71,7 +71,7 @@
 
                                     This buffer update operation requires
                                     O(n^2/1024) char. copies for lexemes > 1024
-                                    characters in length, and O(n) char. copies 
+                                    characters in length, and O(n) char. copies
                                     for lexemes <= 1024 characters in length.
                                     It can be made O(n) using linked list
                                     buffers & a Byte.array of size n (not the

Deleted: mlton/trunk/mllex/mllex.cm
===================================================================
--- mlton/trunk/mllex/mllex.cm	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/mllex.cm	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,12 +0,0 @@
-(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
- *    Jagannathan, and Stephen Weeks.
- * Copyright (C) 1997-2000 NEC Research Institute.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-Group is
-
-sources.cm
-call-main.sml

Modified: mlton/trunk/mllex/mllex.mlb
===================================================================
--- mlton/trunk/mllex/mllex.mlb	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/mllex.mlb	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,4 +1,5 @@
-(* Copyright (C) 2004-2005 Henry Cejtin, Matthew Fluet, Suresh
+(* Copyright (C) 2009 Matthew Fluet.
+ * Copyright (C) 2004-2005 Henry Cejtin, Matthew Fluet, Suresh
  *    Jagannathan, and Stephen Weeks.
  *
  * MLton is released under a BSD-style license.
@@ -6,7 +7,22 @@
  *)
 
 local
-   sources.mlb
+   local
+      local
+         $(SML_LIB)/basis/basis.mlb
+         lexgen.sml
+      in
+         structure LexGen
+      end
+      local
+         ../lib/mlton/sources.mlb
+         main.sml
+      in
+         structure Main
+      end
+   in
+      structure Main
+   end
 in
    call-main.sml
 end

Deleted: mlton/trunk/mllex/sources.cm
===================================================================
--- mlton/trunk/mllex/sources.cm	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/sources.cm	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,13 +0,0 @@
-(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
- *    Jagannathan, and Stephen Weeks.
- * Copyright (C) 1997-2000 NEC Research Institute.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-Group is
-
-../lib/mlton/sources.cm
-lexgen.sml
-main.sml

Deleted: mlton/trunk/mllex/sources.mlb
===================================================================
--- mlton/trunk/mllex/sources.mlb	2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/sources.mlb	2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,22 +0,0 @@
-(* Copyright (C) 2004-2005 Henry Cejtin, Matthew Fluet, Suresh
- *    Jagannathan, and Stephen Weeks.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-local
-   local 
-      $(SML_LIB)/basis/basis.mlb
-      lexgen.sml
-   in
-      structure LexGen
-   end
-   local
-      ../lib/mlton/sources.mlb
-   in
-      main.sml
-   end
-in
-   structure Main
-end