[MLton] cvs commit: Updating Char.{fromString,scan}, String.{fromString,scan}

Stephen Weeks sweeks@mlton.org
Mon, 29 Dec 2003 15:31:25 -0800


sweeks      03/12/29 15:31:25

  Modified:    basis-library/text char.sig char.sml string.sig string.sml
               mlyacc/src look.sml
               regression string.sml
  Added:       regression char.scan.ok char.scan.sml string.fromString.ok
                        string.fromString.sml
  Log:
  MAIL Updating Char.{fromString,scan}, String.{fromString,scan}
  
  Updating these basis library functions according to my latest
  understanding of the spec.  This is based on a discussion on the basis
  library list in September 2003, which I concluded with the following
  observations.
  
  1. Char.scan returns NONE upon double quote.
  
  2. String.scan stops upon double quote, and returns the output to that
     point.
  
  3. String.scan consumes format sequences.
  
  4. Char.scan consumes format sequences adjacent to a valid character.
     I mention this because of pathological character constants like
     #"a\ \", #"\ \a", #"\ \a\ \", and #"\ \\ \a".
  
  5. Char.scan does not consume format sequences if it is unable to
     produce a character.  Consider the invalid character constant
     #"\ \".
  
  6. String.scan never returns NONE.
  
  I added some regressions to check these and updated our current
  regressions for the changes in semantics (mostly due to String.scan
  never returning NONE).

Revision  Changes    Path
1.3       +1 -0      mlton/basis-library/text/char.sig

Index: char.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/basis-library/text/char.sig,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- char.sig	24 Nov 2002 01:19:40 -0000	1.2
+++ char.sig	29 Dec 2003 23:31:24 -0000	1.3
@@ -49,5 +49,6 @@
    sig
       include CHAR
 
+      val formatSequences: (char, 'a) StringCvt.reader -> 'a -> 'a
       val scanC: (char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
    end



1.4       +80 -39    mlton/basis-library/text/char.sml

Index: char.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/basis-library/text/char.sml,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- char.sml	24 Nov 2002 01:19:40 -0000	1.3
+++ char.sml	29 Dec 2003 23:31:24 -0000	1.4
@@ -17,54 +17,95 @@
 		  then SOME (chr (ord c -? ord #"@"), state)
 	       else NONE
 
+      fun formatChar reader state =
+	 case reader state of
+	    NONE => NONE
+	  | SOME (c, state) =>
+	       if isSpace c
+		  then SOME ((), state)
+	       else NONE
+
+      fun formatChars reader =
+	 let
+	    fun loop state =
+	       case formatChar reader state of
+		  NONE => state
+		| SOME ((), state) => loop state
+	 in
+	    loop
+	 end
+		  
+      val 'a formatSequences: (char, 'a) StringCvt.reader -> 'a -> 'a =
+	 fn reader =>
+	 let
+	    fun loop state =
+	       case reader state of
+		  SOME (#"\\", state1) =>
+		     (case formatChar reader state1 of
+			 NONE => state
+		       | SOME ((), state2) =>
+			    let
+			       val state3 = formatChars reader state2
+			    in
+			       case reader state3 of
+				  SOME (#"\\", state4) => loop state4
+				| _ => state
+			    end)
+		| _ => state
+	 in
+	    loop
+	 end
+
       fun 'a scan (reader: (char, 'a) StringCvt.reader)
 	: (char, 'a) StringCvt.reader =
 	 let
-	    fun main state =
-	       case reader state of
-		  NONE => NONE
-		| SOME (c, state) =>
-		     if isPrint c
-			then
-			   case c of
-			      #"\\" => escape state
-			    | _ => SOME (c, state)
-		     else NONE
-	    and escape state =
+	    val escape: (char, 'a) StringCvt.reader =
+	       fn state =>
 	       case reader state of
 		  NONE => NONE
 		| SOME (c, state') =>
-		     let fun yes c = SOME (c, state')
-		     in case c of
-			#"a" => yes #"\a"
-		      | #"b" => yes #"\b"
-		      | #"t" => yes #"\t"
-		      | #"n" => yes #"\n"
-		      | #"v" => yes #"\v"
-		      | #"f" => yes #"\f"
-		      | #"r" => yes #"\r"
-		      | #"\\" => yes #"\\"
-		      | #"\"" => yes #"\""
-		      | #"^" => control reader state'
-		      | #"u" =>
-			   Reader.mapOpt chrOpt
-			   (StringCvt.digitsExact (StringCvt.HEX, 4) reader)
-			   state'
-		      | _ => (* either formatting chars or 3 decimal digits *)
-			   if isSpace c
-			      then
-				 case Reader.ignore isSpace reader state' of
-				    NONE => NONE
-				  | SOME (c, state) =>
-				       case c of
-					  #"\\" => main state
-					| _ => NONE
-			   else
+		     let
+			fun yes c = SOME (c, state')
+		     in
+			case c of
+			   #"a" => yes #"\a"
+			 | #"b" => yes #"\b"
+			 | #"t" => yes #"\t"
+			 | #"n" => yes #"\n"
+			 | #"v" => yes #"\v"
+			 | #"f" => yes #"\f"
+			 | #"r" => yes #"\r"
+			 | #"\\" => yes #"\\"
+			 | #"\"" => yes #"\""
+			 | #"^" => control reader state'
+			 | #"u" =>
+			      Reader.mapOpt chrOpt
+			      (StringCvt.digitsExact (StringCvt.HEX, 4) reader)
+			      state'
+			 | _ => (* 3 decimal digits *)
 			      Reader.mapOpt chrOpt
-			      (StringCvt.digitsExact (StringCvt.DEC, 3) reader)
+			      (StringCvt.digitsExact (StringCvt.DEC, 3)
+			       reader)
 			      state
 		     end
-	 in main
+	    val main: (char, 'a) StringCvt.reader =
+	       fn state =>
+	       let
+		  val state = formatSequences reader state
+	       in
+		  case reader state of
+		     NONE => NONE
+		   | SOME (c, state) =>
+			if isPrint c
+			   then
+			      case c of
+				 #"\\" => escape state
+			       | #"\"" => NONE
+			       | _ => SOME (c, formatSequences reader state)
+			else NONE
+	       end
+	 in
+	    main
 	 end
 
       val fromString = StringCvt.scanString scan



1.4       +20 -19    mlton/basis-library/text/string.sig

Index: string.sig
===================================================================
RCS file: /cvsroot/mlton/mlton/basis-library/text/string.sig,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- string.sig	27 Dec 2003 02:58:04 -0000	1.3
+++ string.sig	29 Dec 2003 23:31:24 -0000	1.4
@@ -2,13 +2,13 @@
    sig
       eqtype string
 	 
-      val size: string -> int 
-      val substring: string * int * int -> string 
       val ^ : string * string -> string 
       val concat: string list -> string 
-      val str: Char.char -> string 
-      val implode: Char.char list -> string 
       val explode: string -> Char.char list 
+      val implode: Char.char list -> string 
+      val size: string -> int 
+      val str: Char.char -> string 
+      val substring: string * int * int -> string 
    end
 
 signature STRING =
@@ -17,27 +17,28 @@
 
       eqtype char
 	 
-      val maxSize: int 
-      val sub: string * int -> char 
-      val extract: string * int * int option -> string
-      val concatWith: string -> string list -> string
-      val map: (Char.char -> Char.char) -> string -> string 
-      val translate: (Char.char -> string) -> string -> string 
-      val tokens: (Char.char -> bool) -> string -> string list 
-      val fields: (Char.char -> bool) -> string -> string list
-      val isPrefix: string -> string -> bool
-      val isSubstring: string -> string -> bool
-      val isSuffix: string -> string -> bool
-      val compare: string * string -> order
-      val collate: (char * char -> order) -> string * string -> order
       val < : string * string -> bool 
       val <= : string * string -> bool 
       val > : string * string -> bool 
       val >= : string * string -> bool 
-      val fromString: string -> string option 
-      val toString: string -> string 
+      val collate: (char * char -> order) -> string * string -> order
+      val compare: string * string -> order
+      val concatWith: string -> string list -> string
+      val extract: string * int * int option -> string
+      val fields: (Char.char -> bool) -> string -> string list
       val fromCString: string -> string option 
+      val fromString: string -> string option 
+      val isPrefix: string -> string -> bool
+      val isSubstring: string -> string -> bool
+      val isSuffix: string -> string -> bool
+      val map: (Char.char -> Char.char) -> string -> string 
+      val maxSize: int
+      val scan: (char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader
+      val sub: string * int -> char 
       val toCString: string -> string
+      val toString: string -> string 
+      val tokens: (Char.char -> bool) -> string -> string list 
+      val translate: (Char.char -> string) -> string -> string 
    end
 
 signature STRING_EXTRA =



1.5       +14 -1     mlton/basis-library/text/string.sml

Index: string.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/basis-library/text/string.sml,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- string.sml	16 Sep 2003 06:48:55 -0000	1.4
+++ string.sml	29 Dec 2003 23:31:24 -0000	1.5
@@ -22,13 +22,26 @@
       val toString = translate Char.toString
       val toCString = translate Char.toCString
 
+      val scan: (char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader =
+	 fn reader =>
+	 let
+	    fun loop (state, cs) =
+	       case Char.scan reader state of
+		  NONE => SOME (implode (rev cs),
+				Char.formatSequences reader state)
+		| SOME (c, state) => loop (state, c :: cs)
+	 in
+	    fn state => loop (state, [])
+	 end
+	 
+      val fromString = StringCvt.scanString scan
+	 
       fun scanString scanChar (reader: (char, 'a) StringCvt.reader)
 	: (string, 'a) StringCvt.reader =
 	 fn state =>
 	 Option.map (fn (cs, state) => (implode cs, state))
 	 (Reader.list (scanChar reader) state)
 
-      val fromString = StringCvt.scanString (scanString Char.scan)
       val fromCString = StringCvt.scanString (scanString Char.scanC)
 
       fun nullTerm s = s ^ "\000"



1.2       +1 -1      mlton/mlyacc/src/look.sml

Index: look.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/mlyacc/src/look.sml,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- look.sml	18 Jul 2001 05:51:03 -0000	1.1
+++ look.sml	29 Dec 2003 23:31:24 -0000	1.2
@@ -76,7 +76,7 @@
 		| ok_rhs ((TERM _)::_) = false
 		| ok_rhs ((NONTERM i)::r) = ok_rhs r
 	      fun add_rule (RULE {lhs,rhs,...},r) =
-		 if ok_rhs rhs then (lhs,map (fn (NONTERM (NT i)) => i) rhs)::r
+		 if ok_rhs rhs then (lhs,map (fn NONTERM (NT i) => i) rhs)::r
 		 else r
 	      val items = List.foldr add_rule [] rules
 	      val nullable = array(nonterms,false)



1.2       +2 -2      mlton/regression/string.sml

Index: string.sml
===================================================================
RCS file: /cvsroot/mlton/mlton/regression/string.sml,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- string.sml	18 Jul 2001 05:51:07 -0000	1.1
+++ string.sml	29 Dec 2003 23:31:25 -0000	1.2
@@ -242,7 +242,7 @@
     end;
 
 val test20 = 
-    tst' "test20" (fn _ => List.all (fn arg => fromString arg = NONE)
+    tst' "test20" (fn _ => List.all (fn arg => isSome (fromString arg))
 	   ["\\",
 	    "\\c",
 	    "\\F",
@@ -342,7 +342,7 @@
     end;
 
 val test24 = 
-    let fun checkFromCStringFail arg = fromCString arg = NONE
+    let fun checkFromCStringFail arg = isSome (fromCString arg)
     in
 	tst' "test24" (fn _ => List.all checkFromCStringFail 
 	       ["\\",



1.1                  mlton/regression/char.scan.ok

Index: char.scan.ok
===================================================================
false
a at 4 of 4
a at 4 of 4
a at 7 of 7
a at 7 of 7
NONE



1.1                  mlton/regression/char.scan.sml

Index: char.scan.sml
===================================================================
val dquote = "\""
   
val _ = print (concat [Bool.toString (isSome (Char.fromString dquote)), "\n"])

val scan: string -> unit =
   fn s =>
   let
      val n = String.size s
      fun reader i =
	 if i = n
	    then NONE
	 else SOME (String.sub (s, i), i + 1)
   in
      case Char.scan reader 0 of
	 NONE => print "NONE\n"
       | SOME (c, i) => print (concat [str c, " at ", Int.toString i,
				       " of ", Int.toString n, "\n"])
   end

val _ =
   List.app scan ["a\\ \\", "\\ \\a", "\\ \\a\\ \\", "\\ \\\\ \\a",
		  "\\ \\"]



1.1                  mlton/regression/string.fromString.ok

Index: string.fromString.ok
===================================================================
OK  [abc]
OK  []
OK  [a]
OK  [a]
OK  []
OK  []
OK  []
OK  []



1.1                  mlton/regression/string.fromString.sml

Index: string.fromString.sml
===================================================================
fun check (s, s') =
   case String.fromString s of
      NONE => print "WRONG  NONE\n"
    | SOME s'' =>
	 if s' = s''
	    then print (concat ["OK  [", s', "]\n"])
	 else print (concat ["WRONG  [", s', "] [", s'', "]\n"])

val _ =
   List.app check
   [("abc\"def", "abc"),
     ("\\q", ""),
     ("a\^D", "a"),
     ("a\\ \\\\q", "a"),
     ("\\ \\", ""),
     ("", ""),
     ("\\ \\\^D", ""),
     ("\\ a", "")]