[MLton-commit] r7064
Matthew Fluet
fluet at mlton.org
Wed Apr 8 05:32:48 PDT 2009
Minimize diff with SML/NJ upstream version of ml-lex tool.
Since we now use the default int and word type provided by the host
compiler, there is no need for the extra 'type int = Int.int' and
':int' in the output of ml-lex. Minimizing the diff with SML/NJ
upstream means that it is easier to apply changes.
----------------------------------------------------------------------
D mlton/trunk/mllex/INSTALL
U mlton/trunk/mllex/Makefile
U mlton/trunk/mllex/README.MLton
D mlton/trunk/mllex/export-lex.sml
U mlton/trunk/mllex/lexgen.doc
U mlton/trunk/mllex/lexgen.sml
U mlton/trunk/mllex/lexgen.tex
U mlton/trunk/mllex/mlex_int.doc
D mlton/trunk/mllex/mllex.cm
U mlton/trunk/mllex/mllex.mlb
D mlton/trunk/mllex/sources.cm
D mlton/trunk/mllex/sources.mlb
----------------------------------------------------------------------
Deleted: mlton/trunk/mllex/INSTALL
===================================================================
--- mlton/trunk/mllex/INSTALL 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/INSTALL 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,21 +0,0 @@
-Installation instructions for ML-Lex
--------------------------------------
-
-ML-Lex will normally be automatically
-installed as part of the SML/NJ system
-by the SML/NJ installer.
-
-To install by hand (e.g., if you make
-your own modifications), run the
-
- ./build
-
-script in this directory and then move
-the file
-
- ml-lex.$ARCH-$OS
-
-to the heap-file directory.
-
-Running ./build requires a properly
-functioning installation of SML/NJ.
Modified: mlton/trunk/mllex/Makefile
===================================================================
--- mlton/trunk/mllex/Makefile 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/Makefile 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,4 +1,5 @@
-## Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh
+## Copyright (C) 2009 Matthew Fluet.
+ # Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh
# Jagannathan, and Stephen Weeks.
# Copyright (C) 1997-2000 NEC Research Institute.
#
@@ -22,9 +23,6 @@
@echo 'Compiling $(NAME)'
"$(MLTON)" $(FLAGS) $(NAME).mlb
-$(NAME).sml: $(NAME).cm $(shell "$(MLTON)" -stop f $(NAME).cm)
- mlton -stop sml $(NAME).cm
-
html/index.html: $(TEX_FILES)
mkdir -p html
hevea -fix -o html/mllex.html -exec xxdate.exe macros.hva lexgen.tex
Modified: mlton/trunk/mllex/README.MLton
===================================================================
--- mlton/trunk/mllex/README.MLton 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/README.MLton 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,11 +1,22 @@
-This is a modified version of the ml-lex directory that comes with SML/NJ
-110.55. I made a few changes so that the sources are compilable with MLton.
+This is a modified version of the ml-lex directory that comes with SML/NJ.
-mfluet at acm.org 2005-7-21.
+Files from SML/NJ:
+ INSTALL -- deleted
+ README
+ build -- deleted
+ build.bat -- deleted
+ export-lex.sml -- deleted
+ lexgen.doc
+ lexgen.sml -- modified
+ lexgen.tex -- modified
+ ml-lex.cm -- deleted
+ mlex_int.doc
+ tool/* -- deleted
-*****
-
-This is a modified version of the ml-lex directory that comes with SML/NJ
-110.9.1. I made a few changes so that the sources are compilable with MLton.
-
-sweeks at acm.org 2000-8-22.
+Files added:
+ Makefile
+ README.MLton
+ call-main.sml
+ macros.hva
+ main.sml
+ mllex.mlb
Deleted: mlton/trunk/mllex/export-lex.sml
===================================================================
--- mlton/trunk/mllex/export-lex.sml 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/export-lex.sml 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,45 +0,0 @@
-(* export-lex.sml
- *
- * Revision 1.2 2000/03/07 04:01:05 blume
- * - build script now use new ml-build mechanism
- *)
-structure ExportLexGen : sig
- val lexGen : (string * string list) -> OS.Process.status
-end = struct
-
- exception Interrupt
-
- (* This function applies operation to (). If it handles an interrupt
- * signal (Control-C), it raises the exception Interrupt. Example:
- * (handleInterrupt foo) handle Interrupt => print "Bang!\n"
- *)
- fun handleInterrupt (operation : unit -> unit) =
- let exception Done
- val old'handler = Signals.inqHandler(Signals.sigINT)
- fun reset'handler () =
- Signals.setHandler(Signals.sigINT, old'handler)
- in (SMLofNJ.Cont.callcc (fn k =>
- (Signals.setHandler(Signals.sigINT, Signals.HANDLER(fn _ => k));
- operation ();
- raise Done));
- raise Interrupt)
- handle Done => (reset'handler ())
- | exn => (reset'handler (); raise exn)
- end
-
- fun err msg = TextIO.output(TextIO.stdErr, String.concat msg)
-
- fun lexGen (name, args) = let
- fun lex_gen () =
- case args of
- [] => (err [name, ": missing filename\n"];
- OS.Process.exit OS.Process.failure)
- | files => List.app LexGen.lexGen files
- in
- (handleInterrupt lex_gen; OS.Process.success)
- handle Interrupt => (err [name, ": Interrupt\n"]; OS.Process.failure)
- | any => (err [name, ": uncaught exception ",
- General.exnMessage any, "\n"];
- OS.Process.failure)
- end
-end
Modified: mlton/trunk/mllex/lexgen.doc
===================================================================
--- mlton/trunk/mllex/lexgen.doc 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/lexgen.doc 2009-04-08 12:32:46 UTC (rev 7064)
@@ -63,7 +63,7 @@
to recognize the words. It saves programmer time and increases
program maintainability.
-Unfortunately, Lex is targeted only C. It also places artificial
+Unfortunately, Lex is targeted only C. It also places artificial
limits on the size of strings that can be recognized.
ML-Lex is a variant of Lex for the ML programming language. ML-Lex
@@ -149,7 +149,7 @@
but first; to include - literally in a set, put it first or last.
The dot . character stands for any character except newline,
- i.e. the same as [^\n]
+ i.e. the same as [^\n]
The following special escape sequences are available, inside
or outside of square-brackets:
@@ -192,7 +192,7 @@
The infix operator | stands for alternation. The expression
e1 | e2 stands for anything that either e1 or e2 stands for.
-
+
The infix operator / denotes lookahead. Lookahead is not
implemented and cannot be used, because there is a bug
in the algorithm for generating lexers with lookahead. If
@@ -210,7 +210,7 @@
sign $ occurred at the end of an expression, that expression
would only match strings that occur at the end of a line
(right before a newline character).
-
+
Here are some examples of regular expressions, and descriptions of the
set of strings they denote:
@@ -300,7 +300,7 @@
V. Values available inside the code associated with a rule.
Mlex places the value of the string matched by a regular expression
-in yytext, a string variable.
+in yytext, a string variable.
The user may recursively
call the lexing function with lex(). (If %arg is used, the
@@ -325,13 +325,13 @@
string, or that matches the longest
possible prefix of this string,
is used instead.
-
+
yypos Current character position from
beginning of file.
yylineno %count Current line number
-
+
These values should be used only if necessary. Adding REJECT to a
lexer will slow it down by 20%; adding yylineno will slow it down by
another 20%, or more. (It is much more efficient to recognize \n and
@@ -368,9 +368,9 @@
from the input stream. It should return a null string to indicate
that the end of the stream has been reached. The integer is the
number of characters that the lexer wishes to read; the function may
-return any non-zero number of characters. For example,
+return any non-zero number of characters. For example,
- val lexer =
+ val lexer =
let val input_line = fn f =>
let fun loop result =
let val c = input (f,1)
@@ -420,7 +420,7 @@
Here is a sample lexer for a calculator program:
datatype lexresult= DIV | EOF | EOS | ID of string | LPAREN |
- NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES
+ NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES
val linenum = ref 1
val error = fn x => output(std_out,x ^ "\n")
@@ -448,7 +448,7 @@
Here is the parser for the calculator:
(* Sample interactive calculator to demonstrate use of lexer produced by ML-Lex
-
+
The original grammar was
stmt_list -> stmt_list stmt
@@ -457,14 +457,14 @@
t -> t * f | t/f | f
f -> (exp) | id | num
- The function parse takes a stream and parses it for the calculator
+ The function parse takes a stream and parses it for the calculator
program.
If a syntax error occurs, parse prints an error message and calls itself
on the stream. On this system that has the effect of ignoring all input
to the end of a line.
*)
-
+
structure Calc =
struct
open CalcLex
@@ -496,7 +496,7 @@
case !nexttok of
EOF => ()
| _ => (STMT(); STMT_LIST())
-
+
and STMT() =
(case !nexttok
of EOS => ()
Modified: mlton/trunk/mllex/lexgen.sml
===================================================================
--- mlton/trunk/mllex/lexgen.sml 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/lexgen.sml 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,15 +1,20 @@
-(* Modified by mfluet at acm.org on 2005-8-01.
- * Update with SML/NJ 110.55+.
+(* Modified by Vesa Karvonen on 2007-12-19.
+ * Create line directives in output.
*)
-(* Modified by sweeks at acm.org on 2000-8-24.
- * Ported to MLton.
+(* Modified by Matthew Fluet on 2007-11-07.
+ * Add %posint command.
*)
-
+(* Modified by StephenWeeks on 2005-08-18.
+ * Fix file starting position
+ *)
+(* Modified by Stephen Weeks on 2004-10-19.
+ * Do not create references to Unsafe structure.
+ *)
(* Lexical analyzer generator for Standard ML.
Version 1.7.0, June 1998
Copyright (c) 1989-1992 by Andrew W. Appel,
- David R. Tarditi, James S. Mattson
+ David R. Tarditi, James S. Mattson
This software comes with ABSOLUTELY NO WARRANTY.
This software is subject only to the PRINCETON STANDARD ML SOFTWARE LIBRARY
@@ -52,7 +57,7 @@
and characters.
02/08/95 (jhr) Modified to use new List module interface.
05/18/95 (jhr) changed Vector.vector to Vector.fromList
-
+
* Revision 1.9 1998/01/06 19:23:53 appel
* added %posarg feature to permit position-within-file to be passed
* as a parameter to makeLexer
@@ -108,7 +113,7 @@
The ASU proposal works as follows. Suppose that we are
using NFA's to represent our regular expressions. Then to
-build an NFA for e1 / e2, we build an NFA n1 for e1
+build an NFA for e1 / e2, we build an NFA n1 for e1
and an NFA n2 for e2, and add an epsilon transition
from e1 to e2.
@@ -229,10 +234,10 @@
datatype token = CHARS of bool array | QMARK | STAR | PLUS | BAR
| LP | RP | CARAT | DOLLAR | SLASH | STATE of string list
| REPS of int * int | ID of string | ACTION of pos * string
- | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES
+ | BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES
| COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG
| POSINT
-
+
datatype exp = EPS | CLASS of bool array * int | CLOSURE of exp
| ALT of exp * exp | CAT of exp * exp | TRAIL of int
| END of int
@@ -240,10 +245,10 @@
(* flags describing input Lex spec. - unnecessary code is omitted *)
(* if possible *)
- val CharFormat = ref false;
+ val CharFormat = ref false;
val UsesTrailingContext = ref false;
val UsesPrevNewLine = ref false;
-
+
(* flags for various bells & whistles that Lex has. These slow the
lexer down and should be omitted from production lexers (if you
really want speed) *)
@@ -254,10 +259,10 @@
(* Can increase size of character set *)
- val CharSetSize: int ref = ref 129;
+ val CharSetSize = ref 129;
(* Can name structure or declare header code *)
-
+
val StrName = ref "Mlex"
val HeaderCode = ref ""
val HeaderPos = ref {line = 0, col = 0}
@@ -274,7 +279,7 @@
UsesTrailingContext := false;
CharSetSize := 129; StrName := "Mlex";
HeaderCode := ""; HeaderDecl:= false;
- ArgCode := NONE;
+ ArgCode := NONE;
StrDecl := false;
PosIntName := "Int"; PosIntDecl := false)
@@ -384,11 +389,11 @@
end
end
-open dict;
+open dict;
(* INPUT.ML : Input w/ one character push back capability *)
-val LineNum: int ref = ref 1;
+val LineNum = ref 1;
abstype ibuf =
BUF of TextIO.instream * {b : string ref, p : int ref}
@@ -402,16 +407,16 @@
fun make_ibuf(s) = BUF (s, {b=ref"", p = ref 0})
fun close_ibuf (BUF (s,_)) = TextIO.closeIn(s)
exception eof
- fun getch (a as (BUF(s,{b,p}))) =
+ fun getch (a as (BUF(s,{b,p}))) =
if (!p = (size (!b)))
then (b := TextIO.inputN(s, 1024);
p := 0;
if (size (!b))=0
- then raise eof
+ then raise eof
else getch a)
else (let val ch = String.sub(!b,!p)
- in (pos := !pos + 1
- ; if ch = #"\n"
+ in (pos := !pos + 1;
+ if ch = #"\n"
then (LineNum := !LineNum + 1;
linePos := !pos)
else ();
@@ -472,9 +477,9 @@
then skipws()
else ch
end
-
- and nextch () = getch(!LexBuf)
+ and nextch () = getch(!LexBuf)
+
and escaped () = (case nextch()
of #"b" => #"\008"
| #"n" => #"\n"
@@ -484,7 +489,7 @@
| x => let
fun err t = prErr("illegal ascii escape '"^(implode(rev t))^"'")
fun cvt c = (Char.ord c - Char.ord #"0")
- fun f (n: int, c, t) = if c=3
+ fun f (n, c, t) = if c=3
then if n >= (!CharSetSize)
then err t
else Char.chr n
@@ -498,16 +503,16 @@
if isDigit x then f(cvt x, 1, [x]) else x
end
(* end case *))
-
+
and onechar x = let val c = array(!CharSetSize, false)
in
update(c, Char.ord(x), true); CHARS(c)
end
-
+
in case !LexState of 0 => let val makeTok = fn () =>
case skipws()
(* Lex % operators *)
- of #"%" => (case nextch() of
+ of #"%" => (case nextch() of
#"%" => LEXMARK
| a => let fun f s =
let val a = nextch()
@@ -688,7 +693,7 @@
end
handle eof => NextTok := EOF ;
-fun GetTok (_:unit) : token =
+fun GetTok (_:unit) : token =
let val t = !NextTok in AdvanceTok(); t
end;
val SymTab = ref (create String.<=) : (string,exp) dictionary ref
@@ -698,20 +703,20 @@
let val rec optional = fn e => ALT(EPS,e)
and lookup' = fn name =>
- lookup(!SymTab) name
+ lookup(!SymTab) name
handle LOOKUP => prErr ("bad regular expression name: "^
name)
and newline = fn () => let val c = array(!CharSetSize,false) in
update(c,10,true); c
end
-
+
and endline = fn e => trail(e,CLASS(newline(),0))
-
+
and trail = fn (e1,e2) => CAT(CAT(e1,TRAIL(0)),e2)
-
+
and closure1 = fn e => CAT(e,CLOSURE(e))
-
+
and repeat = fn (min,max,e) => let val rec rep = fn
(0,0) => EPS
| (0,1) => ALT(e,EPS)
@@ -719,16 +724,16 @@
| (i,j) => CAT(e,rep(i-1,j-1))
in rep(min,max)
end
-
+
and exp0 = fn () => case GetTok() of
CHARS(c) => exp1(CLASS(c,0))
| LP => let val e = exp0() in
if !NextTok = RP then
(AdvanceTok(); exp1(e))
- else (prSynErr "missing '('") end
+ else (prSynErr "missing ')'") end
| ID(name) => exp1(lookup' name)
| _ => raise SyntaxError
-
+
and exp1 = fn (e) => case !NextTok of
SEMI => e
| ARROW => e
@@ -747,7 +752,7 @@
| REPS(i,j) => exp1(repeat(i,j,e))
| ID(name) => exp2(e,lookup' name)
| _ => raise SyntaxError)
-
+
and exp2 = fn (e1,e2) => case !NextTok of
SEMI => CAT(e1,e2)
| ARROW => CAT(e1,e2)
@@ -769,9 +774,9 @@
| _ => raise SyntaxError)
in exp0()
end;
-val StateTab = ref(create(String.<=)) : (string,int) dictionary ref
+val StateTab = ref(create(String.<=)) : (string,int) dictionary ref
-val StateNum: int ref = ref 0;
+val StateNum = ref 0;
fun GetStates () : int list =
@@ -781,7 +786,7 @@
prErr ("bad state name: "^x)
],sl))
- fun addall i sl =
+ fun addall i sl =
if i <= !StateNum then addall (i+2) (union ([i],sl))
else sl
@@ -792,17 +797,17 @@
| addincs (x::y) = x::(x+1)::addincs y
val state_list =
- case !NextTok of
+ case !NextTok of
STATE s => (AdvanceTok(); LexState := 1; add s nil)
| _ => addall 1 nil
-
+
in case !NextTok
of CARAT => (LexState := 1; AdvanceTok(); UsesPrevNewLine := true;
incall state_list)
| _ => addincs state_list
end
-val LeafNum: int ref = ref ~1;
+val LeafNum = ref ~1;
fun renum(e : exp) : exp =
let val rec label = fn
@@ -835,7 +840,7 @@
++StateNum; AdvanceTok(); f())
| _ => ())
in AdvanceTok(); f ();
- if !NextTok=SEMI then ParseDefs() else
+ if !NextTok=SEMI then ParseDefs() else
(prSynErr "expected ';'")
end
| ID x => (LexState:=1; AdvanceTok(); if GetTok() = ASSIGN
@@ -848,13 +853,13 @@
| FULLCHARSET => (CharSetSize := 256; ParseDefs())
| HEADER => (LexState := 2; AdvanceTok();
case GetTok()
- of ACTION (p, s) =>
+ of ACTION (p, s) =>
if (!StrDecl) then
(prErr "cannot have both %structure and %header \
\declarations")
else if (!HeaderDecl) then
(prErr "duplicate %header declarations")
- else
+ else
(HeaderCode := s; LexState := 0;
HeaderPos := p;
HeaderDecl := true; ParseDefs())
@@ -870,7 +875,7 @@
ParseDefs())
| ARG => (LexState := 2; AdvanceTok();
case GetTok()
- of ACTION s =>
+ of ACTION s =>
(case !ArgCode
of SOME _ => prErr "duplicate %arg declarations"
| NONE => ArgCode := SOME s;
@@ -896,7 +901,7 @@
let val s = GetStates()
val e = renum(CAT(GetExp(),END(0)))
in
- if !NextTok = ARROW then
+ if !NextTok = ARROW then
(LexState:=2; AdvanceTok();
case GetTok() of ACTION(act) =>
if !NextTok=SEMI then
@@ -918,8 +923,8 @@
say (Int.toString n); say ";\n"; make y)
in say "\n(* start state definitions *)\n\n"; make(listofdict(!StateTab))
end
-
-structure L =
+
+structure L =
struct
nonfix >
type key = int list * string
@@ -1033,24 +1038,24 @@
let val name = (Int.toString l)
in let val (r,n) = lookup ((x,name),t)
in makeEntry(y,(n::rs),t)
- end handle notfound _ =>
+ end handle notfound _ =>
(count := !count+1;
say " ("; say name; say ",";
makeItems x; say "),\n";
makeEntry(y,(name::rs),(insert ((x,name),t))))
end
- val _ = say "val s = [ \n"
+ val _ = say "val s = [ \n"
val res = makeEntry(trans,nil,empty)
- val _ =
- case !CharFormat
+ val _ =
+ case !CharFormat
of true => (say "(0, \"\")]\n"; say "fun f x = x \n")
| false => (say "(0, 0, \"\")]\n";
say "fun f(n, i, x) = (n, Vector.tabulate(i, decode x)) \n")
val _ = say "val s = map f (rev (tl (rev s))) \n"
val _ = say "exception LexHackingError \n"
- val _ = say "fun look ((j,x)::r, i) = if i = j then x else look(r, i) \n"
+ val _ = say "fun look ((j,x)::r, i: int) = if i = j then x else look(r, i) \n"
val _ = say " | look ([], i) = raise LexHackingError\n"
val _ = say "fun g {fin=x, trans=i} = {fin=x, trans=look(s,i)} \n"
@@ -1084,7 +1089,7 @@
in
mt args
end
-
+
(*
fun makeTable(nil,nil) = ()
| makeTable(a::a',b::b') =
@@ -1110,7 +1115,7 @@
fun msg x = TextIO.output(TextIO.stdOut, x)
- in (say "in Vector.fromList(map g \n["; makeTable(rs,newfins);
+ in (say "in Vector.fromList(map g \n["; makeTable(rs,newfins);
say "])\nend\n";
msg ("\nNumber of states = " ^ (Int.toString (length trans)));
msg ("\nNumber of distinct rows = " ^ (Int.toString (!count)));
@@ -1135,7 +1140,7 @@
say "\n"; make(y,false))
in make (listofdict(ends),true)
end
-
+
fun leafdata(e:(int list * exp) list) =
let val fp = array(!LeafNum + 1,nil)
and leaf = array(!LeafNum + 1,EPS)
@@ -1163,7 +1168,7 @@
| (_,x)::tl => (moredata(x);makedata(tl))
in trailmark := ~1; makedata(e); (fp,leaf,!tcpairs)
end;
-
+
fun makedfa(rules) =
let val StateTab = ref (create(String.<=)) : (string,int) dictionary ref
val fintab = ref (create(Int.<=)) : (int,(int list)) dictionary ref
@@ -1177,20 +1182,20 @@
tctab := enter(!tctab)(statenum,gettc(state));
transtab := enter(!transtab)(statenum,transitions)
end
-
+
and visitstarts (states) =
let fun vs nil i = ()
| vs (hd::tl) i = (visit (hd,i); vs tl (i+1))
in vs states 0
end
-
+
and hashstate(s: int list) =
let val rec hs =
fn (nil,z) => z
| ((x:int)::y,z) => hs(y,z ^ " " ^ (Int.toString x))
in hs(s,"")
end
-
+
and find(s) = lookup(!StateTab)(hashstate(s))
and add(s,n) = StateTab := enter(!StateTab)(hashstate(s),n)
@@ -1200,11 +1205,11 @@
handle LOOKUP => let val n = ++StateNum in
add(state,n); visit(state,n); n
end
-
+
and getfin state =
let fun f nil fins = fins
| f (hd::tl) fins =
- case (leaf sub hd)
+ case (leaf sub hd)
of END _ => f tl (hd::fins)
| _ => f tl fins
in f state nil
@@ -1213,7 +1218,7 @@
and gettc state =
let fun f nil fins = fins
| f (hd::tl) fins =
- case (leaf sub hd)
+ case (leaf sub hd)
of TRAIL _ => f tl (hd::fins)
| _ => f tl fins
in f state nil
@@ -1226,7 +1231,7 @@
case (leaf sub hd) of
CLASS(i,_)=>
(if (i sub c) then cktrans tl (union(r,fp sub hd))
- else cktrans tl r handle Subscript =>
+ else cktrans tl r handle Subscript =>
cktrans tl r
)
| _ => cktrans tl r
@@ -1238,7 +1243,7 @@
end
in loop ((!CharSetSize) - 1) nil
end
-
+
and startstates() =
let val startarray = array(!StateNum + 1, nil);
fun listofarray(a,n) =
@@ -1254,12 +1259,12 @@
fix(tl,firsts))
in makess(rules);listofarray(startarray, !StateNum + 1)
end
-
+
in visitstarts(startstates());
(listofdict(!fintab),listofdict(!transtab),listofdict(!tctab),tcpairs)
end
-val skel_hd =
+val skel_hd =
" struct\n\
\ structure UserDeclarations =\n\
\ struct\n\
@@ -1282,18 +1287,18 @@
val () = (InFile := infile; OutFile := outfile)
fun PrintLexer (ends) =
let val sayln = fn x => (say x; say "\n")
- in case !ArgCode
+ in case !ArgCode
of NONE => (sayln "fun lex () : Internal.result =";
sayln "let fun continue() = lex() in")
| SOME (p,s) =>
(say "fun lex "; say "(yyarg as (";
sayPos (SOME p); say s; sayPos NONE; sayln ")) =";
- sayln "let fun continue() : Internal.result = ");
+ sayln "let fun continue() : Internal.result = ");
say " let fun scan (s,AcceptingLeaves : Internal.yyfinstate";
- sayln " list list,l,i0: int) =";
+ sayln " list list,l,i0) =";
if !UsesTrailingContext
- then say "\tlet fun action (i: int,nil,rs)"
- else say "\tlet fun action (i: int,nil)";
+ then say "\tlet fun action (i,nil,rs)"
+ else say "\tlet fun action (i,nil)";
sayln " = raise LexError";
if !UsesTrailingContext
then sayln "\t| action (i,nil::l,rs) = action(i-1,l,rs)"
@@ -1303,10 +1308,9 @@
else sayln "\t| action (i,(node::acts)::l) =";
sayln "\t\tcase node of";
sayln "\t\t Internal.N yyk => ";
- sayln "\t\t\t(let fun yymktext() = String.substring(!yyb,i0,i-i0)\n\
- \\t\t\t val yypos: YYPosInt.int = YYPosInt.+(YYPosInt.fromInt i0, !yygone)\n";
-
- if !CountNewLines
+ sayln "\t\t\t(let fun yymktext() = substring(!yyb,i0,i-i0)\n\
+ \\t\t\t val yypos = YYPosInt.+(YYPosInt.fromInt i0, !yygone)";
+ if !CountNewLines
then (sayln "\t\t\tval _ = yylineno := CharVectorSlice.foldli";
sayln "\t\t\t\t(fn (_,#\"\\n\", n) => n+1 | (_,_, n) => n) (!yylineno) (CharVectorSlice.slice (!yyb,i0,SOME(i-i0)))")
else ();
@@ -1314,7 +1318,7 @@
then (say "\t\t\tfun REJECT() = action(i,acts::l";
if !UsesTrailingContext
then sayln ",rs)" else sayln ")")
- else ();
+ else ();
sayln "\t\t\topen UserDeclarations Internal.StartStates";
sayln " in (yybufpos := i; case yyk of ";
sayln "";
@@ -1323,7 +1327,7 @@
say "\n\t\t) end ";
say ")\n\n";
if (!UsesTrailingContext) then say skel_mid2 else ();
- sayln "\tval {fin,trans} = Vector.sub (Internal.tab, s)";
+ sayln "\tval {fin,trans} = Vector.sub(Internal.tab, s)";
sayln "\tval NewAcceptingLeaves = fin::AcceptingLeaves";
sayln "\tin if l = !yybl then";
sayln "\t if trans = #trans(Vector.sub(Internal.tab,0))";
@@ -1331,7 +1335,7 @@
if !UsesTrailingContext then say ",nil" else ();
say ") else";
sayln "\t let val newchars= if !yydone then \"\" else yyinput 1024";
- sayln "\t in if (String.size newchars)=0";
+ sayln "\t in if (size newchars)=0";
sayln "\t\t then (yydone := true;";
say "\t\t if (l=i0) then UserDeclarations.eof ";
sayln (case !ArgCode of NONE => "()" | SOME _ => "yyarg");
@@ -1339,30 +1343,30 @@
if !UsesTrailingContext then
sayln ",nil))" else sayln "))";
sayln "\t\t else (if i0=l then yyb := newchars";
- sayln "\t\t else yyb := String.substring(!yyb,i0,l-i0)^newchars;";
- sayln "\t\t yygone := YYPosInt.+(!yygone, YYPosInt.fromInt i0);\n";
- sayln "\t\t yybl := String.size (!yyb);";
+ sayln "\t\t else yyb := substring(!yyb,i0,l-i0)^newchars;";
+ sayln "\t\t yygone := YYPosInt.+(!yygone, YYPosInt.fromInt i0);";
+ sayln "\t\t yybl := size (!yyb);";
sayln "\t\t scan (s,AcceptingLeaves,l-i0,0))";
sayln "\t end";
- sayln "\t else let val NewChar = Char.ord (CharVector.sub (!yyb,l))";
+ sayln "\t else let val NewChar = Char.ord(CharVector.sub(!yyb,l))";
if !CharSetSize=129
- then sayln "\t\tval NewChar = if NewChar<128 then NewChar else 128"
+ then sayln "\t\tval NewChar = if NewChar<128 then NewChar else 128"
else ();
say "\t\tval NewState = ";
- sayln (if !CharFormat
- then "Char.ord (CharVector.sub (trans,NewChar))"
- else "Vector.sub (trans, NewChar)");
+ sayln (if !CharFormat
+ then "Char.ord(CharVector.sub(trans,NewChar))"
+ else "Vector.sub(trans, NewChar)");
say "\t\tin if NewState=0 then action(l,NewAcceptingLeaves";
if !UsesTrailingContext then sayln ",nil)" else sayln ")";
sayln "\t\telse scan(NewState,NewAcceptingLeaves,l+1,i0)";
sayln "\tend";
sayln "\tend";
if !UsesPrevNewLine then () else sayln "(*";
- sayln "\tval start= if String.substring(!yyb,!yybufpos-1,1)=\"\\n\"";
+ sayln "\tval start= if substring(!yyb,!yybufpos-1,1)=\"\\n\"";
sayln "then !yybegin+1 else !yybegin";
if !UsesPrevNewLine then () else sayln "*)";
say "\tin scan(";
- if !UsesPrevNewLine then say "start"
+ if !UsesPrevNewLine then say "start"
else say "!yybegin (* start *)";
sayln ",nil,!yybufpos,!yybufpos)";
sayln " end";
@@ -1397,11 +1401,10 @@
prErr "lookahead is unimplemented")
else ()
in
- say "type int = Int.int\n";
if (!HeaderDecl)
then (sayPos (SOME (!HeaderPos))
- ; say (!HeaderCode)
- ; sayPos NONE)
+ ; say (!HeaderCode)
+ ; sayPos NONE)
else say ("structure " ^ (!StrName));
say "=\n";
say skel_hd;
@@ -1422,16 +1425,15 @@
say "action tried *)\n";
say "end\n\n";
say ("structure YYPosInt : INTEGER = " ^ (!PosIntName) ^ "\n");
- say "type int = Int.int\n";
- say (if (!PosArg) then "fun makeLexer (yyinput: int -> string,yygone0:YYPosInt.int) =\nlet\n"
- else "fun makeLexer (yyinput: int -> string) =\nlet\tval yygone0:YYPosInt.int = YYPosInt.fromInt ~1\n");
- if !CountNewLines then say "\tval yylineno: int ref = ref 0\n\n" else ();
+ say (if (!PosArg) then "fun makeLexer (yyinput,yygone0:YYPosInt.int) =\nlet\n"
+ else "fun makeLexer yyinput =\nlet\tval yygone0= YYPosInt.fromInt ~1\n");
+ if !CountNewLines then say "\tval yylineno = ref 0\n\n" else ();
say "\tval yyb = ref \"\\n\" \t\t(* buffer *)\n\
- \\tval yybl: int ref = ref 1\t\t(*buffer length *)\n\
- \\tval yybufpos: int ref = ref 1\t\t(* location of next character to use *)\n\
- \\tval yygone: YYPosInt.int ref = ref yygone0\t(* position in file of beginning of buffer *)\n\
+ \\tval yybl = ref 1\t\t(*buffer length *)\n\
+ \\tval yybufpos = ref 1\t\t(* location of next character to use *)\n\
+ \\tval yygone = ref yygone0\t(* position in file of beginning of buffer *)\n\
\\tval yydone = ref false\t\t(* eof found yet? *)\n\
- \\tval yybegin: int ref = ref 1\t\t(*Current 'start state' for lexer *)\n\
+ \\tval yybegin = ref 1\t\t(*Current 'start state' for lexer *)\n\
\\n\tval YYBEGIN = fn (Internal.StartStates.STARTSTATE x) =>\n\
\\t\t yybegin := x\n\n";
PrintLexer(ends);
Modified: mlton/trunk/mllex/lexgen.tex
===================================================================
--- mlton/trunk/mllex/lexgen.tex 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/lexgen.tex 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,3 +1,10 @@
+% Modified by Matthew Fluet on 2007-11-07.
+% Add %posint command.
+%
+% Modified by Matthew Fluet on 2007-10-31.
+% Add \r escape sequence (from Florian Weimer).
+% Fix TeX formatting bug (from Florian Weimer).
+%
\documentstyle{article}
\title{ A lexical analyzer generator for Standard ML.\\
Version 1.6.0, October 1994
@@ -5,7 +12,7 @@
\author{ Andrew W. Appel$^1$\\
James S. Mattson\\
David R. Tarditi$^2$\\
-\\
+\\
\small
$^1$Department of Computer Science, Princeton University \\
\small
@@ -26,7 +33,7 @@
\vspace{1in}
-New in this version:
+New in this version:
\begin{itemize}
\item REJECT is much less costly than before.
\item Lexical analyzers with more than 255 states can now compile in your
@@ -80,7 +87,7 @@
to recognize the words. It saves programmer time and increases
program maintainability.
-Unfortunately, Lex is targeted only C. It also places artificial
+Unfortunately, Lex is targeted only C. It also places artificial
limits on the size of strings that can be recognized.
ML-Lex is a variant of Lex for the ML programming language. ML-Lex
@@ -145,7 +152,7 @@
Regular expressions are a simple language for denoting classes of
strings. A regular expression is defined inductively over an
alphabet with a set of basic operations. The alphabet for ML-Lex is
-the Ascii character set (character codes 0--127; or if
+the Ascii character set (character codes 0--127; or if
\verb|%full| is used, 0--255).
The syntax and semantics of regular expressions will be described in
@@ -161,7 +168,7 @@
\item A set of characters enclosed in square brackets [ ] stands
for any one of those characters. Inside the brackets, only
- the symbols \verb|\ - ^| are reserved. An initial up-arrow
+ the symbols \verb|\ - ^| are reserved. An initial up-arrow
\verb|^| stands
for the complement of the characters listed, e.g. \verb|[^abc]|
stands any character except a, b, or c. The hyphen - denotes
@@ -219,7 +226,7 @@
\item\verb-|- The infix operator \verb-|- stands for alternation. The expression
$e_1$~\verb"|"~$e_2$ stands for anything that either $e_1$ or $e_2$ stands for.
-
+
\item[\verb|/|] The infix operator \verb|/| denotes lookahead. Lookahead is not
implemented and cannot be used, because there is a bug
in the algorithm for generating lexers with lookahead. If
@@ -235,7 +242,7 @@
for lookahead involving the newline character (that is, it
is an abbreviation for \verb|/\n|).
\end{itemize}
-
+
Here are some examples of regular expressions, and descriptions of the
set of strings they denote:
@@ -256,7 +263,7 @@
\subsection{User declarations}
Anything up to the first \verb|%%| is in the user declarations section. The
-user should note that no symbolic identifier containing
+user should note that no symbolic identifier containing
\verb|%%| can be
used in this section.
@@ -296,7 +303,7 @@
\item[\tt \%arg] extra (curried) formal parameter argument to be
passed to the lex functions, and to be passed
to the eof function in place of ()
-\item[\tt \%posint \{identifier\}] use the {\tt INTEGER} structure for the
+\item[\tt \%posint \{identifier\}] use the {\tt INTEGER} structure for the
type of {\tt yypos}; use {\tt Int64} or {\tt Position}
to allow lexing of multi-gigabyte input files
\end{description}
@@ -337,7 +344,7 @@
\label{avail}
ML-Lex places the value of the string matched by a regular expression
-in \verb|yytext|, a string variable.
+in \verb|yytext|, a string variable.
The user may recursively
call the lexing function with \verb|lex()|. (If \verb|%arg| is used, the
@@ -371,11 +378,11 @@
{\tt yylineno } & {\tt \%count} & Current line number\\
\\
\end{tabular}
-
+
These values should be used only if necessary. Adding {\tt REJECT} to a
lexer will slow it down by 20\%; adding {\tt yylineno} will slow it down by
-another 20\%, or more. (It is much more efficient to
+another 20\%, or more. (It is much more efficient to
recognize \verb|\n| and
have an action that increments the line-number variable.) The use of
the lookahead operator {\tt /} will also slow down the entire lexer.
@@ -419,7 +426,7 @@
creates a lexer that operates on the file whose name is f.
When the {\tt \%posarg} directive is used, the type of
-{\tt makeLexer} is
+{\tt makeLexer} is
\begin{verbatim}
val makeLexer : ((int->string)*int) -> yyarg -> lexresult
\end{verbatim}
@@ -434,10 +441,10 @@
from the input stream. It should return a null string to indicate
that the end of the stream has been reached. The integer is the
number of characters that the lexer wishes to read; the function may
-return any non-zero number of characters. For example,
+return any non-zero number of characters. For example,
\begin{verbatim}
- val lexer =
+ val lexer =
let val input_line = fn f =>
let fun loop result =
let val c = input (f,1)
@@ -460,9 +467,7 @@
function at once, and it is desirable that the input function return
as many as possible. Reading many characters at once makes the lexer
more efficient. Fewer input calls and buffering operations are
-needed, and input is more efficient in large block reads.
-Furthermore, performance is very poor (quadratic in the token length)
-when a token requires lots of calls to the input function. For
+needed, and input is more efficient in large block reads. For
interactive streams this is less of a concern, as the limiting factor
is the speed at which the user can type.
@@ -495,7 +500,7 @@
\small
\begin{verbatim}
datatype lexresult= DIV | EOF | EOS | ID of string | LPAREN |
- NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES
+ NUM of int | PLUS | PRINT | RPAREN | SUB | TIMES
val linenum = ref 1
val error = fn x => output(std_out,x ^ "\n")
@@ -524,8 +529,8 @@
Here is the parser for the calculator:
\begin{verbatim}
-(* Sample interactive calculator to demonstrate use of lexer
-
+(* Sample interactive calculator to demonstrate use of lexer
+
The original grammar was
stmt_list -> stmt_list stmt
@@ -534,14 +539,14 @@
t -> t * f | t/f | f
f -> (exp) | id | num
- The function parse takes a stream and parses it for the calculator
+ The function parse takes a stream and parses it for the calculator
program.
If a syntax error occurs, parse prints an error message and calls
itself on the stream. On this system that has the effect of ignoring
- all input to the end of a line.
+ all input to the end of a line.
*)
-
+
structure Calc =
struct
open CalcLex
@@ -573,7 +578,7 @@
case !nexttok of
EOF => ()
| _ => (STMT(); STMT_LIST())
-
+
and STMT() =
(case !nexttok
of EOS => ()
Modified: mlton/trunk/mllex/mlex_int.doc
===================================================================
--- mlton/trunk/mllex/mlex_int.doc 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/mlex_int.doc 2009-04-08 12:32:46 UTC (rev 7064)
@@ -24,10 +24,10 @@
removed from the backward propagating list after this node is
encountered.
-
+
The function scan inside the function lex operates as a transition
function, scanning the input until it is no longer possible to take any
-more transitions. It accumulates a list of the accepting leaf list
+more transitions. It accumulates a list of the accepting leaf list
associated with each accepting state passed through.
Scan operates as follows:
@@ -39,16 +39,16 @@
* l - position of the next character in the buffer b to read
* i0 - starting position in the buffer.
- Output: If no match is found, it raises the exception LexError.
+ Output: If no match is found, it raises the exception LexError.
Otherwise, it returns a value of type lexresult.
It operates as a transtion function:
It (1) adds the list of accepting leaves for the current state to
the list of accepting leave lists
(2) tries to make a transition on the current input character
- to the next state. If it can't make a transition, it
+ to the next state. If it can't make a transition, it
executes the action function.
- (a) - if it is past the end of the buffer, it
+ (a) - if it is past the end of the buffer, it
(1) checks if it as at end eof. If it is then:
It checks to see if it has made any
transitions since it was first called -
@@ -71,7 +71,7 @@
This buffer update operation requires
O(n^2/1024) char. copies for lexemes > 1024
- characters in length, and O(n) char. copies
+ characters in length, and O(n) char. copies
for lexemes <= 1024 characters in length.
It can be made O(n) using linked list
buffers & a Byte.array of size n (not the
Deleted: mlton/trunk/mllex/mllex.cm
===================================================================
--- mlton/trunk/mllex/mllex.cm 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/mllex.cm 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,12 +0,0 @@
-(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
- * Jagannathan, and Stephen Weeks.
- * Copyright (C) 1997-2000 NEC Research Institute.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-Group is
-
-sources.cm
-call-main.sml
Modified: mlton/trunk/mllex/mllex.mlb
===================================================================
--- mlton/trunk/mllex/mllex.mlb 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/mllex.mlb 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,4 +1,5 @@
-(* Copyright (C) 2004-2005 Henry Cejtin, Matthew Fluet, Suresh
+(* Copyright (C) 2009 Matthew Fluet.
+ * Copyright (C) 2004-2005 Henry Cejtin, Matthew Fluet, Suresh
* Jagannathan, and Stephen Weeks.
*
* MLton is released under a BSD-style license.
@@ -6,7 +7,22 @@
*)
local
- sources.mlb
+ local
+ local
+ $(SML_LIB)/basis/basis.mlb
+ lexgen.sml
+ in
+ structure LexGen
+ end
+ local
+ ../lib/mlton/sources.mlb
+ main.sml
+ in
+ structure Main
+ end
+ in
+ structure Main
+ end
in
call-main.sml
end
Deleted: mlton/trunk/mllex/sources.cm
===================================================================
--- mlton/trunk/mllex/sources.cm 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/sources.cm 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,13 +0,0 @@
-(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
- * Jagannathan, and Stephen Weeks.
- * Copyright (C) 1997-2000 NEC Research Institute.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-Group is
-
-../lib/mlton/sources.cm
-lexgen.sml
-main.sml
Deleted: mlton/trunk/mllex/sources.mlb
===================================================================
--- mlton/trunk/mllex/sources.mlb 2009-04-08 12:32:43 UTC (rev 7063)
+++ mlton/trunk/mllex/sources.mlb 2009-04-08 12:32:46 UTC (rev 7064)
@@ -1,22 +0,0 @@
-(* Copyright (C) 2004-2005 Henry Cejtin, Matthew Fluet, Suresh
- * Jagannathan, and Stephen Weeks.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-local
- local
- $(SML_LIB)/basis/basis.mlb
- lexgen.sml
- in
- structure LexGen
- end
- local
- ../lib/mlton/sources.mlb
- in
- main.sml
- end
-in
- structure Main
-end
More information about the MLton-commit
mailing list