[MLton] cvs commit: spaces in mlb imported file names
Matthew Fluet
fluet@mlton.org
Sat, 25 Sep 2004 14:39:22 -0700
fluet 04/09/25 14:39:21
Modified: mlton/front-end ml.lex mlb.lex
Log:
MAIL spaces in mlb imported file names
Extended the MLB lexer to parse SML style strings, which are passed as
FILE tokens to the MLB parser. This allows arbitrary characters in
file names. Note, however, that mlton -stop f will print file names
without quoting, so spaces in file names break using mlton -stop f
in Makefiles.
Note that the MLB lexer has a fairly limited regexp for filenames.
Filenames must be composed of alpha-num characters or "-" or "_". So,
you need to "quote" for anything more advanced.
I'm not 100% happy with this situation, but I wasn't very successful
in determining the Makefile or bash filename regexp. Also,
considering that this is SML, I'm tempted to use SML style strings,
which makes it easy for a user to understand what they are writing
down, although it makes writing Win32 filenames a bit verbose.
Revision Changes Path
1.17 +1 -1 mlton/mlton/front-end/ml.lex
Index: ml.lex
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/front-end/ml.lex,v
retrieving revision 1.16
retrieving revision 1.17
diff -u -r1.16 -r1.17
--- ml.lex 15 Sep 2004 18:16:28 -0000 1.16
+++ ml.lex 25 Sep 2004 21:39:21 -0000 1.17
@@ -309,7 +309,7 @@
Char.ord(String.sub(yytext, 1)) * 100
+ Char.ord(String.sub(yytext, 2)) * 10
+ Char.ord(String.sub(yytext, 3))
- - (Char.ord #"0") *111
+ - (Char.ord #"0") * 111
in (if x > 255
then stringError (source, yypos,
"illegal ascii escape")
1.2 +113 -20 mlton/mlton/front-end/mlb.lex
Index: mlb.lex
===================================================================
RCS file: /cvsroot/mlton/mlton/mlton/front-end/mlb.lex,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- mlb.lex 28 Jul 2004 21:05:15 -0000 1.1
+++ mlb.lex 25 Sep 2004 21:39:21 -0000 1.2
@@ -13,11 +13,14 @@
val commentStart = ref SourcePos.bogus
val lineFile: File.t ref = ref ""
val lineNum: int ref = ref 0
+val stringStart = ref SourcePos.bogus
fun lineDirective (source, file, yypos) =
Source.lineDirective (source, file,
{lineNum = !lineNum,
lineStart = yypos - !colNum})
+fun addString (s: string) = charlist := s :: (!charlist)
+fun addChar (c: char) = addString (String.fromChar c)
fun inc (ri as ref (i: int)) = (ri := i + 1)
fun dec (ri as ref (i: int)) = (ri := i-1)
@@ -27,6 +30,11 @@
right = Source.getPos (source, right)},
msg)
+fun stringError (source, right, msg) =
+ Control.errorStr (Region.make {left = !stringStart,
+ right = Source.getPos (source, right)},
+ msg)
+
val eof: lexarg -> lexresult =
fn {source, ...} =>
let
@@ -64,50 +72,63 @@
%%
%reject
-%s A L LL LLC LLCQ;
+%s A S F L LL LLC LLCQ;
%header (functor MLBLexFun (structure Tokens : MLB_TOKENS));
%arg ({source});
alphanum=[A-Za-z'_0-9]*;
alphanumId=[A-Za-z]{alphanum};
id={alphanumId};
-envvar="$("([A-Z_]+)")";
+
+pathvar="$("([A-Z_]+)")";
filebase=[-A-Za-z_0-9]+;
fileext=[-A-Za-z_0-9]+;
filename={filebase}("."{fileext})*;
-arc=({envvar}|{filename}|"."|"..");
+arc=({pathvar}|{filename}|"."|"..");
relpath=({arc}"/")*;
abspath="/"{relpath};
path={relpath}|{abspath};
file={path}{filename};
+
ws=("\012"|[\t\ ])*;
nrws=("\012"|[\t\ ])+;
cr="\013";
nl="\010";
eol=({cr}{nl}|{nl}|{cr});
+hexDigit=[0-9a-fA-F];
+
%%
<INITIAL>{ws} => (continue ());
<INITIAL>{eol} => (Source.newline (source, yypos); continue ());
-
+<INITIAL>"_prim"
+ => (tok (Tokens.PRIM, source, yypos, yypos + 4));
<INITIAL>"," => (tok (Tokens.COMMA, source, yypos, yypos + 1));
<INITIAL>";" => (tok (Tokens.SEMICOLON, source, yypos, yypos + 1));
-<INITIAL>"=" => (tok (Tokens.EQUALOP, source, yypos, yypos + 1));
-<INITIAL>"ann" => (tok (Tokens.ANN, source, yypos, yypos + 3));
-<INITIAL>"and" => (tok (Tokens.AND, source, yypos, yypos + 3));
-<INITIAL>"bas" => (tok (Tokens.BAS, source, yypos, yypos + 3));
-<INITIAL>"basis" => (tok (Tokens.BASIS, source, yypos, yypos + 5));
-<INITIAL>"end" => (tok (Tokens.END, source, yypos, yypos + 3));
-<INITIAL>"functor" => (tok (Tokens.FUNCTOR, source, yypos, yypos + 7));
-<INITIAL>"in" => (tok (Tokens.IN, source, yypos, yypos + 2));
-<INITIAL>"let" => (tok (Tokens.LET, source, yypos, yypos + 3));
-<INITIAL>"local" => (tok (Tokens.LOCAL, source, yypos, yypos + 5));
+<INITIAL>"=" => (tok (Tokens.EQUALOP, source, yypos, yypos + 1));
+<INITIAL>"ann" => (tok (Tokens.ANN, source, yypos, yypos + 3));
+<INITIAL>"and" => (tok (Tokens.AND, source, yypos, yypos + 3));
+<INITIAL>"bas" => (tok (Tokens.BAS, source, yypos, yypos + 3));
+<INITIAL>"basis"
+ => (tok (Tokens.BASIS, source, yypos, yypos + 5));
+<INITIAL>"end" => (tok (Tokens.END, source, yypos, yypos + 3));
+<INITIAL>"functor"
+ => (tok (Tokens.FUNCTOR, source, yypos, yypos + 7));
+<INITIAL>"in" => (tok (Tokens.IN, source, yypos, yypos + 2));
+<INITIAL>"let" => (tok (Tokens.LET, source, yypos, yypos + 3));
+<INITIAL>"local"
+ => (tok (Tokens.LOCAL, source, yypos, yypos + 5));
<INITIAL>"open" => (tok (Tokens.OPEN, source, yypos, yypos + 4));
-<INITIAL>"_prim" => (tok (Tokens.PRIM, source, yypos, yypos + 4));
-<INITIAL>"signature" => (tok (Tokens.SIGNATURE, source, yypos, yypos + 9));
-<INITIAL>"structure" => (tok (Tokens.STRUCTURE, source, yypos, yypos + 9));
-<INITIAL>{id} => (tok' (Tokens.ID, yytext, source, yypos));
+<INITIAL>"signature"
+ => (tok (Tokens.SIGNATURE, source, yypos, yypos + 9));
+<INITIAL>"structure"
+ => (tok (Tokens.STRUCTURE, source, yypos, yypos + 9));
+<INITIAL>{id} => (tok' (Tokens.ID, yytext, source, yypos));
<INITIAL>{file} => (tok' (Tokens.FILE, yytext, source, yypos));
+<INITIAL>\" => (charlist := [""]
+ ; stringStart := Source.getPos (source, yypos)
+ ; YYBEGIN S
+ ; continue ());
<INITIAL>"(*#line"{nrws}
=> (YYBEGIN L
; commentStart := Source.getPos (source, yypos)
@@ -130,7 +151,7 @@
; (colNum := valOf (Int.fromString yytext))
handle Overflow => YYBEGIN A
; continue ());
-<LL>. => (YYBEGIN LLC; continue ()
+<LL>. => (YYBEGIN LLC; continue ()
(* note hack, since ml-lex chokes on the empty string for 0* *));
<LLC>"*)" => (YYBEGIN INITIAL
; lineDirective (source, NONE, yypos + 2)
@@ -140,7 +161,8 @@
<LLCQ>\""*)" => (YYBEGIN INITIAL
; lineDirective (source, SOME (!lineFile), yypos + 3)
; commentLevel := 0; charlist := []; continue ());
-<L,LLC,LLCQ>"*)" => (YYBEGIN INITIAL; commentLevel := 0; charlist := []; continue ());
+<L,LLC,LLCQ>"*)"
+ => (YYBEGIN INITIAL; commentLevel := 0; charlist := []; continue ());
<L,LLC,LLCQ>. => (YYBEGIN A; continue ());
<A>"(*" => (inc commentLevel; continue ());
@@ -149,3 +171,74 @@
; if 0 = !commentLevel then YYBEGIN INITIAL else ()
; continue ());
<A>. => (continue ());
+
+<S>\" => (let
+ val s = concat (rev (!charlist))
+ val _ = charlist := nil
+ fun make (t, v) =
+ t (v, !stringStart, Source.getPos (source, yypos + 1))
+ in YYBEGIN INITIAL
+ ; make (Tokens.FILE, s)
+ end);
+<S>\\a => (addChar #"\a"; continue ());
+<S>\\b => (addChar #"\b"; continue ());
+<S>\\f => (addChar #"\f"; continue ());
+<S>\\n => (addChar #"\n"; continue ());
+<S>\\r => (addChar #"\r"; continue ());
+<S>\\t => (addChar #"\t"; continue ());
+<S>\\v => (addChar #"\v"; continue ());
+<S>\\\^[@-_] => (addChar (Char.chr(Char.ord(String.sub(yytext, 2))
+ -Char.ord #"@"))
+ ; continue ());
+<S>\\\^. => (error (source, yypos, yypos + 2,
+ "illegal control escape; must be one of @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_")
+ ; continue ());
+<S>\\[0-9]{3} => (let
+ val x =
+ Char.ord(String.sub(yytext, 1)) * 100
+ + Char.ord(String.sub(yytext, 2)) * 10
+ + Char.ord(String.sub(yytext, 3))
+ - (Char.ord #"0") * 111
+ in (if x > 255
+ then stringError (source, yypos,
+ "illegal ascii escape")
+ else addChar(Char.chr x);
+ continue ())
+ end);
+<S>\\u{hexDigit}{4}
+ => (let
+ val x =
+ StringCvt.scanString
+ (Pervasive.Int.scan StringCvt.HEX)
+ (String.substring (yytext, 2, 4))
+ fun err () =
+ stringError (source, yypos,
+ "illegal unicode escape")
+ in (case x of
+ SOME x => if x > 255
+ then err()
+ else addChar(Char.chr x)
+ | _ => err())
+ ; continue ()
+ end);
+<S>\\\" => (addString "\""; continue ());
+<S>\\\\ => (addString "\\"; continue ());
+<S>\\{nrws} => (YYBEGIN F; continue ());
+<S>\\{eol} => (Source.newline (source, yypos) ; YYBEGIN F ; continue ());
+<S>\\ => (stringError (source, yypos, "illegal string escape")
+ ; continue ());
+<S>{eol} => (Source.newline (source, yypos)
+ ; stringError (source, yypos, "unclosed string")
+ ; continue ());
+<S>" "|[\033-\126]
+ => (addString yytext; continue ());
+<S>. => (stringError (source, yypos + 1, "illegal character in string")
+ ; continue ());
+
+<F>{eol} => (Source.newline (source, yypos) ; continue ());
+<F>{ws} => (continue ());
+<F>\\ => (YYBEGIN S
+ ; stringStart := Source.getPos (source, yypos)
+ ; continue ());
+<F>. => (stringError (source, yypos, "unclosed string")
+ ; continue ());