[MLton-commit] r6138
Matthew Fluet
fluet at mlton.org
Wed Nov 7 18:28:41 PST 2007
Added %posint command to ml-lex
----------------------------------------------------------------------
U mlton/trunk/doc/changelog
U mlton/trunk/mllex/lexgen.sml
U mlton/trunk/mllex/lexgen.tex
----------------------------------------------------------------------
Modified: mlton/trunk/doc/changelog
===================================================================
--- mlton/trunk/doc/changelog 2007-11-08 02:11:05 UTC (rev 6137)
+++ mlton/trunk/doc/changelog 2007-11-08 02:28:40 UTC (rev 6138)
@@ -1,6 +1,12 @@
Here are the changes from version 20070826 to version YYYYMMDD.
* 2007-11-07
+ - Added %posint command to ml-lex, to set the yypos type and allow
+ the lexing of multi-gigabyte input files. Thanks to Florian
+ Weimer for the feature concept and original patch.
+
+
+* 2007-11-07
- Added command-line switch -mlb-path-var '<name> <value>' for
specifying MLB path variables.
Modified: mlton/trunk/mllex/lexgen.sml
===================================================================
--- mlton/trunk/mllex/lexgen.sml 2007-11-08 02:11:05 UTC (rev 6137)
+++ mlton/trunk/mllex/lexgen.sml 2007-11-08 02:28:40 UTC (rev 6138)
@@ -229,6 +229,7 @@
| REPS of int * int | ID of string | ACTION of string
| BOF | EOF | ASSIGN | SEMI | ARROW | LEXMARK | LEXSTATES
| COUNT | REJECT | FULLCHARSET | STRUCT | HEADER | ARG | POSARG
+ | POSINT
datatype exp = EPS | CLASS of bool array * int | CLOSURE of exp
| ALT of exp * exp | CAT of exp * exp | TRAIL of int
@@ -261,13 +262,18 @@
val ArgCode = ref (NONE: string option)
val StrDecl = ref false
+ (* Can define INTEGER structure for yypos variable. *)
+ val PosIntName = ref "Int"
+ val PosIntDecl = ref false
+
val ResetFlags = fn () => (CountNewLines := false; HaveReject := false;
PosArg := false;
UsesTrailingContext := false;
CharSetSize := 129; StrName := "Mlex";
HeaderCode := ""; HeaderDecl:= false;
ArgCode := NONE;
- StrDecl := false)
+ StrDecl := false;
+ PosIntName := "Int"; PosIntDecl := false)
val LexOut = ref(TextIO.stdOut)
fun say x = TextIO.output(!LexOut, x)
@@ -491,6 +497,7 @@
| "header" => HEADER
| "arg" => ARG
| "posarg" => POSARG
+ | "posint" => POSINT
| _ => prErr "unknown % operator "
end
)
@@ -824,6 +831,14 @@
HeaderDecl := true; ParseDefs())
| _ => raise SyntaxError)
| POSARG => (PosArg := true; ParseDefs())
+ | POSINT => (AdvanceTok();
+ case !NextTok of
+ (ID i) =>
+ if (!PosIntDecl) then
+ (prErr "duplicate %posint declarations")
+ else (PosIntName := i; PosIntDecl := true)
+ | _ => (prErr "expected ID");
+ ParseDefs())
| ARG => (LexState := 2; AdvanceTok();
case GetTok()
of ACTION s =>
@@ -1257,7 +1272,8 @@
sayln "\t\tcase node of";
sayln "\t\t Internal.N yyk => ";
sayln "\t\t\t(let fun yymktext() = String.substring(!yyb,i0,i-i0)\n\
- \\t\t\t val yypos: int = i0+ !yygone";
+ \\t\t\t val yypos: YYPosInt.int = YYPosInt.+(YYPosInt.fromInt i0, !yygone)\n";
+
if !CountNewLines
then (sayln "\t\t\tval _ = yylineno := CharVectorSlice.foldli";
sayln "\t\t\t\t(fn (_,#\"\\n\", n) => n+1 | (_,_, n) => n) (!yylineno) (CharVectorSlice.slice (!yyb,i0,SOME(i-i0)))")
@@ -1292,7 +1308,7 @@
sayln ",nil))" else sayln "))";
sayln "\t\t else (if i0=l then yyb := newchars";
sayln "\t\t else yyb := String.substring(!yyb,i0,l-i0)^newchars;";
- sayln "\t\t yygone := !yygone+i0;";
+ sayln "\t\t yygone := YYPosInt.+(!yygone, YYPosInt.fromInt i0);\n";
sayln "\t\t yybl := String.size (!yyb);";
sayln "\t\t scan (s,AcceptingLeaves,l-i0,0))";
sayln "\t end";
@@ -1369,14 +1385,15 @@
say "\texception LexerError (* raised if illegal leaf ";
say "action tried *)\n";
say "end\n\n";
+ say ("YYPosInt : INTEGER = " ^ (!PosIntName) ^ "\n");
say "type int = Int.int\n";
- say (if (!PosArg) then "fun makeLexer (yyinput: int -> string,yygone0:int) =\nlet\n"
- else "fun makeLexer (yyinput: int -> string) =\nlet\tval yygone0:int= ~1\n");
+ say (if (!PosArg) then "fun makeLexer (yyinput: int -> string,yygone0:YYPosInt.int) =\nlet\n"
+ else "fun makeLexer (yyinput: int -> string) =\nlet\tval yygone0:YYPosInt.int = YYPosInt.fromInt ~1\n");
if !CountNewLines then say "\tval yylineno: int ref = ref 0\n\n" else ();
say "\tval yyb = ref \"\\n\" \t\t(* buffer *)\n\
\\tval yybl: int ref = ref 1\t\t(*buffer length *)\n\
\\tval yybufpos: int ref = ref 1\t\t(* location of next character to use *)\n\
- \\tval yygone: int ref = ref yygone0\t(* position in file of beginning of buffer *)\n\
+ \\tval yygone: YYPosInt.int ref = ref yygone0\t(* position in file of beginning of buffer *)\n\
\\tval yydone = ref false\t\t(* eof found yet? *)\n\
\\tval yybegin: int ref = ref 1\t\t(*Current 'start state' for lexer *)\n\
\\n\tval YYBEGIN = fn (Internal.StartStates.STARTSTATE x) =>\n\
Modified: mlton/trunk/mllex/lexgen.tex
===================================================================
--- mlton/trunk/mllex/lexgen.tex 2007-11-08 02:11:05 UTC (rev 6137)
+++ mlton/trunk/mllex/lexgen.tex 2007-11-08 02:28:40 UTC (rev 6138)
@@ -296,6 +296,9 @@
\item[\tt \%arg] extra (curried) formal parameter argument to be
passed to the lex functions, and to be passed
to the eof function in place of ()
+\item[\tt \%posint \{identifier\}] use the {\tt INTEGER} structure for the
+ type of {\tt yypos}; use {\tt Int64} or {\tt Position}
+ to allow lexing of multi-gigabyte input files
\end{description}
These functions are discussed in section~\ref{avail}.
More information about the MLton-commit
mailing list