[MLton-commit] r5175
Wesley Terpstra
wesley at mlton.org
Sun Feb 11 17:31:20 PST 2007
WideChar, WideString, etc. support added\!
----------------------------------------------------------------------
U mlton/trunk/basis-library/arrays-and-vectors/mono.sml
U mlton/trunk/basis-library/build/sources.mlb
U mlton/trunk/basis-library/integer/int-inf.sml
U mlton/trunk/basis-library/integer/word.sml
U mlton/trunk/basis-library/libs/basis-2002/top-level/basis.sig
U mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sig
U mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sml
U mlton/trunk/basis-library/text/char.sig
U mlton/trunk/basis-library/text/char.sml
A mlton/trunk/basis-library/text/char0.sig
U mlton/trunk/basis-library/text/char0.sml
U mlton/trunk/basis-library/text/string-cvt.sig
U mlton/trunk/basis-library/text/string-cvt.sml
U mlton/trunk/basis-library/text/string.sig
U mlton/trunk/basis-library/text/string.sml
U mlton/trunk/basis-library/text/string0.sml
U mlton/trunk/basis-library/text/substring.sml
U mlton/trunk/basis-library/text/text.sml
----------------------------------------------------------------------
Modified: mlton/trunk/basis-library/arrays-and-vectors/mono.sml
===================================================================
--- mlton/trunk/basis-library/arrays-and-vectors/mono.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/arrays-and-vectors/mono.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -176,6 +176,16 @@
structure CharVectorSlice = VectorSlice
end
local
+ structure S = EqMono (type elem = WideChar.char)
+ open S
+in
+ structure WideCharArray = Array
+ structure WideCharArray2 = Array2
+ structure WideCharArraySlice = ArraySlice
+ structure WideCharVector = Vector
+ structure WideCharVectorSlice = VectorSlice
+end
+local
structure S = EqMono (type elem = Int.int)
open S
in
Modified: mlton/trunk/basis-library/build/sources.mlb
===================================================================
--- mlton/trunk/basis-library/build/sources.mlb 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/build/sources.mlb 2007-02-12 01:31:14 UTC (rev 5175)
@@ -58,6 +58,7 @@
../config/bind/word-prim.sml
in ann "forceUsed" in
../config/default/$(DEFAULT_CHAR)
+ ../config/default/$(DEFAULT_WIDECHAR)
../config/default/$(DEFAULT_INT)
../config/default/$(DEFAULT_REAL)
../config/default/$(DEFAULT_WORD)
@@ -100,6 +101,7 @@
../arrays-and-vectors/mono-array2.sig
../arrays-and-vectors/mono-array2.fun
../arrays-and-vectors/mono.sml
+ ../text/char0.sig
../text/string0.sml
../text/char0.sml
../util/reader.sig
@@ -159,18 +161,20 @@
end end
../text/char.sig
+ ../text/string.sig
+ ../text/substring.sig
+ ../text/text.sig
+
+ ../util/heap.sml
../text/char.sml
- ../text/string.sig
../text/string.sml
- ../text/substring.sig
../text/substring.sml
+ ../text/text.sml
../text/char-global.sml
../text/string-global.sml
../text/substring-global.sml
../text/byte.sig
../text/byte.sml
- ../text/text.sig
- ../text/text.sml
../text/nullstring.sml
../util/CUtil.sig
Modified: mlton/trunk/basis-library/integer/int-inf.sml
===================================================================
--- mlton/trunk/basis-library/integer/int-inf.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/integer/int-inf.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -84,30 +84,30 @@
| _ => NONE
local
- val op <= = PreChar.<=
+ val op <= = Char.<=
in
fun octDig (ch: char): W.word option =
if #"0" <= ch andalso ch <= #"7"
- then SOME (W.fromInt (Int.- (PreChar.ord ch,
- PreChar.ord #"0")))
+ then SOME (W.fromInt (Int.- (Char.ord ch,
+ Char.ord #"0")))
else NONE
fun decDig (ch: char): W.word option =
if #"0" <= ch andalso ch <= #"9"
- then SOME (W.fromInt (Int.- (PreChar.ord ch,
- PreChar.ord #"0")))
+ then SOME (W.fromInt (Int.- (Char.ord ch,
+ Char.ord #"0")))
else NONE
fun hexDig (ch: char): W.word option =
if #"0" <= ch andalso ch <= #"9"
- then SOME (W.fromInt (Int.- (PreChar.ord ch,
- PreChar.ord #"0")))
+ then SOME (W.fromInt (Int.- (Char.ord ch,
+ Char.ord #"0")))
else if #"a" <= ch andalso ch <= #"f"
- then SOME (W.fromInt (Int.- (PreChar.ord ch,
- Int.- (PreChar.ord #"a", 0xa))))
+ then SOME (W.fromInt (Int.- (Char.ord ch,
+ Int.- (Char.ord #"a", 0xa))))
else if #"A" <= ch andalso ch <= #"F"
- then SOME (W.fromInt (Int.- (PreChar.ord ch,
- Int.- (PreChar.ord #"A", 0xA))))
+ then SOME (W.fromInt (Int.- (Char.ord ch,
+ Int.- (Char.ord #"A", 0xA))))
else NONE
end
@@ -231,24 +231,23 @@
: (int, 'a) reader =
let
fun reader (s: 'a): (int * 'a) option =
- case cread s of
+ case cread (StringCvt.skipWS cread s) of
NONE => NONE
| SOME (ch, s') =>
- if PreChar.isSpace ch then reader s'
- else let
- val (isNeg, s'') =
- case ch of
- #"+" => (false, s')
- | #"-" => (true, s')
- | #"~" => (true, s')
- | _ => (false, s)
- in
- if isNeg
- then case uread s'' of
- NONE => NONE
- | SOME (abs, s''') => SOME (~ abs, s''')
- else uread s''
- end
+ let
+ val (isNeg, s'') =
+ case ch of
+ #"+" => (false, s')
+ | #"-" => (true, s')
+ | #"~" => (true, s')
+ | _ => (false, s)
+ in
+ if isNeg
+ then case uread s'' of
+ NONE => NONE
+ | SOME (abs, s''') => SOME (~ abs, s''')
+ else uread s''
+ end
in
reader
end
Modified: mlton/trunk/basis-library/integer/word.sml
===================================================================
--- mlton/trunk/basis-library/integer/word.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/integer/word.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -107,7 +107,7 @@
let val chars = StringCvt.digitToChar (toInt (q mod radix)) :: chars
val q = q div radix
in if q = zero
- then PreString.implode chars
+ then String.implode chars
else loop (q, chars)
end
in loop (w, [])
Modified: mlton/trunk/basis-library/libs/basis-2002/top-level/basis.sig
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/basis.sig 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/libs/basis-2002/top-level/basis.sig 2007-02-12 01:31:14 UTC (rev 5175)
@@ -246,7 +246,6 @@
structure SysWord : WORD
structure Unix : UNIX
structure UnixSock : UNIX_SOCK
-(*
structure WideChar : CHAR
structure WideCharArray : MONO_ARRAY
structure WideCharArray2 : MONO_ARRAY2
@@ -256,6 +255,8 @@
structure WideString : STRING
structure WideSubstring : SUBSTRING
structure WideText : TEXT
+(*
+ structure WideTextIO : TEXT_IO
structure WideTextPrimIO : PRIM_IO
*)
(*
@@ -560,6 +561,33 @@
sharing type Real64Array2.elem = Real64.real
sharing type Real64Array2.vector = Real64Vector.vector
sharing type Unix.exit_status = Posix.Process.exit_status
+ sharing type WideChar.string = WideString.string
+ sharing type WideCharArray.elem = WideChar.char
+ sharing type WideCharArray.vector = WideCharVector.vector
+ sharing type WideCharArray2.elem = WideChar.char
+ sharing type WideCharArray2.vector = WideCharVector.vector
+ sharing type WideCharArraySlice.elem = WideChar.char
+ sharing type WideCharArraySlice.array = WideCharArray.array
+ sharing type WideCharArraySlice.vector = WideCharVector.vector
+ sharing type WideCharArraySlice.vector_slice = WideCharVectorSlice.slice
+ sharing type WideCharVector.elem = WideChar.char
+ sharing type WideCharVector.vector = WideString.string
+ sharing type WideCharVectorSlice.elem = WideChar.char
+ sharing type WideCharVectorSlice.slice = WideSubstring.substring
+ sharing type WideCharVectorSlice.vector = WideString.string
+ sharing type WideString.char = WideChar.char
+ (* next two are redundant? basis & char both do it... *)
+ sharing type WideString.string = WideCharVector.vector
+ sharing type WideSubstring.substring = WideCharVectorSlice.slice
+ sharing type WideSubstring.string = WideString.string
+ sharing type WideSubstring.char = WideChar.char
+ sharing type WideText.Char.char = WideChar.char
+ sharing type WideText.String.string = WideString.string
+ sharing type WideText.Substring.substring = WideSubstring.substring
+ sharing type WideText.CharVector.vector = WideCharVector.vector
+ sharing type WideText.CharArray.array = WideCharArray.array
+ sharing type WideText.CharArraySlice.slice = WideCharArraySlice.slice
+ sharing type WideText.CharVectorSlice.slice = WideCharVectorSlice.slice
sharing type WordArray.elem = word
sharing type WordArray.vector = WordVector.vector
sharing type WordArraySlice.elem = word
Modified: mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sig
===================================================================
--- mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sig 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sig 2007-02-12 01:31:14 UTC (rev 5175)
@@ -246,7 +246,6 @@
structure SysWord : WORD
structure Unix : UNIX
structure UnixSock : UNIX_SOCK
-(*
structure WideChar : CHAR
structure WideCharArray : MONO_ARRAY
structure WideCharArray2 : MONO_ARRAY2
@@ -256,6 +255,8 @@
structure WideString : STRING
structure WideSubstring : SUBSTRING
structure WideText : TEXT
+(*
+ structure WideTextIO : TEXT_IO
structure WideTextPrimIO : PRIM_IO
*)
(*
@@ -578,6 +579,39 @@
sharing type Real64Array2.elem = Real64.real
sharing type Real64Array2.vector = Real64Vector.vector
sharing type Unix.exit_status = Posix.Process.exit_status
+ sharing type WideChar.string = WideString.string
+ sharing type WideCharArray.elem = WideChar.char
+ sharing type WideCharArray.vector = WideCharVector.vector
+ sharing type WideCharArray2.elem = WideChar.char
+ sharing type WideCharArray2.vector = WideCharVector.vector
+ sharing type WideCharArraySlice.elem = WideChar.char
+ sharing type WideCharArraySlice.array = WideCharArray.array
+ sharing type WideCharArraySlice.vector = WideCharVector.vector
+ sharing type WideCharArraySlice.vector_slice = WideCharVectorSlice.slice
+ sharing type WideCharVector.elem = WideChar.char
+ sharing type WideCharVector.vector = WideString.string
+ sharing type WideCharVectorSlice.elem = WideChar.char
+ sharing type WideCharVectorSlice.slice = WideSubstring.substring
+ sharing type WideCharVectorSlice.vector = WideString.string
+ sharing type WideString.char = WideChar.char
+ (* next two are redundant? basis & char both do it... *)
+ sharing type WideString.string = WideCharVector.vector
+ sharing type WideSubstring.substring = WideCharVectorSlice.slice
+ sharing type WideSubstring.string = WideString.string
+ sharing type WideSubstring.char = WideChar.char
+ sharing type WideText.Char.char = WideChar.char
+ sharing type WideText.String.string = WideString.string
+ sharing type WideText.Substring.substring = WideSubstring.substring
+ sharing type WideText.CharVector.vector = WideCharVector.vector
+ sharing type WideText.CharArray.array = WideCharArray.array
+ sharing type WideText.CharArraySlice.slice = WideCharArraySlice.slice
+ sharing type WideText.CharVectorSlice.slice = WideCharVectorSlice.slice
+(*
+ sharing type WideTextIO.
+ sharing type WideTextPrimIO.array = WideCharArray.array
+ sharing type WideTextPrimIO.vector = WideCharVector.vector
+ sharing type WideTextPrimIO.elem = WideChar.char
+*)
sharing type WordArray.elem = word
sharing type WordArray.vector = WordVector.vector
sharing type WordArraySlice.elem = word
@@ -699,11 +733,17 @@
where type 'a Vector.vector = 'a Vector.vector
*)
where type 'a VectorSlice.slice = 'a VectorSlice.slice
+(*
+ where type WideTextIO.instream = WideTextIO.instream
+ where type WideTextIO.outstream = WideTextIO.outstream
+ where type WideTextPrimIO.reader = WideTextPrimIO.reader
+ where type WideTextPrimIO.writer = WideTextPrimIO.writer
+*)
where type Word8Array.array = Word8Array.array
where type Word8ArraySlice.slice = Word8ArraySlice.slice
where type Word8ArraySlice.vector_slice = Word8ArraySlice.vector_slice
where type Word8Vector.vector = Word8Vector.vector
-
+
where type 'a MLton.Thread.t = 'a MLton.Thread.t
where type MLton.Thread.Runnable.t = MLton.Thread.Runnable.t
@@ -744,6 +784,8 @@
where type IntInf.int = IntInf.int
where type Real32.real = Real32.real
where type Real64.real = Real64.real
+ where type WideChar.char = WideChar.char
+ where type WideString.string = WideString.string
where type Word1.word = Word1.word
where type Word2.word = Word2.word
where type Word3.word = Word3.word
Modified: mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -175,7 +175,6 @@
structure SysWord = SysWord
structure Unix = Unix
structure UnixSock = UnixSock
-(*
structure WideChar = WideChar
structure WideCharArray = WideCharArray
structure WideCharArray2 = WideCharArray2
@@ -185,6 +184,8 @@
structure WideString = WideString
structure WideSubstring = WideSubstring
structure WideText = WideText
+(*
+ structure WideTextIO = WideTextIO
structure WideTextPrimIO = WideTextPrimIO
*)
(*
Modified: mlton/trunk/basis-library/text/char.sig
===================================================================
--- mlton/trunk/basis-library/text/char.sig 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/char.sig 2007-02-12 01:31:14 UTC (rev 5175)
@@ -38,17 +38,18 @@
val isPrint: char -> bool
val isPunct: char -> bool
val isSpace: char -> bool
- val fromString: string -> char option
- val scan: (char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
- val toString: char -> string
- val fromCString: string -> char option
- val toCString: char -> string
+
+ val toString: char -> String.string
+ val scan: (Char.char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
+ val fromString: String.string -> char option
+ val toCString: char -> String.string
+ val fromCString: String.string -> char option
end
signature CHAR_EXTRA =
sig
include CHAR
- val formatSequences: (char, 'a) StringCvt.reader -> 'a -> 'a
- val scanC: (char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
+ val formatSequences: (Char.char, 'a) StringCvt.reader -> 'a -> 'a
+ val scanC: (Char.char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
end
Modified: mlton/trunk/basis-library/text/char.sml
===================================================================
--- mlton/trunk/basis-library/text/char.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/char.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -6,23 +6,136 @@
* See the file MLton-LICENSE for details.
*)
-structure Char: CHAR_EXTRA =
+signature CHAR_ARG =
+ sig
+ structure PreChar : PRE_CHAR
+ structure CharVector: EQTYPE_MONO_VECTOR_EXTRA
+ structure CharArray: MONO_ARRAY_EXTRA
+ sharing type PreChar.char = CharVector.elem = CharArray.elem
+ sharing type PreChar.string = CharVector.vector = CharArray.vector
+ end
+
+functor CharFn(Arg : CHAR_ARG)
+ :> CHAR_EXTRA
+ where type char = Arg.PreChar.char
+ where type string = Arg.PreChar.string =
struct
- open PreChar
+ open Arg.PreChar
+
+ type string = Arg.CharVector.vector
+ val maxOrd: int = numChars - 1
+
+ val fromString = Arg.CharVector.fromPoly o
+ Vector.map (fn x => fromChar x) o
+ String.toPoly
+ fun succ c =
+ if Primitive.Controls.safe
+ andalso c = maxChar
+ then raise Chr
+ else chrUnsafe (Int.+ (ord c, 1))
+
+ fun pred c =
+ if Primitive.Controls.safe
+ andalso c = minChar
+ then raise Chr
+ else chrUnsafe (Int.- (ord c, 1))
+
+ fun chrOpt c =
+ if Primitive.Controls.safe
+ andalso Int.gtu (c, maxOrd)
+ then NONE
+ else SOME (chrUnsafe c)
+
+ fun chr c =
+ case chrOpt c of
+ NONE => raise Chr
+ | SOME c => c
+
+ (* To implement character classes, we cannot use lookup tables on the
+ * order of the number of characters. We don't want to scan the string
+ * each time, so instead we'll sort it and use binary search.
+ *)
+ fun contains s =
+ let
+ val a = Array.tabulate (Arg.CharVector.length s,
+ fn i => Arg.CharVector.sub (s, i))
+ val () = Heap.heapSort (a, op <)
+ in
+ fn c =>
+ let
+ val x = Heap.binarySearch (a, fn d => d < c)
+ in
+ if x = Array.length a then false else
+ Array.sub (a, x) = c
+ end
+ end
+
+ fun notContains s = not o contains s
+
+ val c = fromChar
+ val ( la, lA, lf, lF, lz, lZ, l0, l9, lSPACE,lBANG, lTIL, lDEL) =
+ (c#"a", c#"A", c#"f", c#"F", c#"z", c#"Z", c#"0", c#"9", c#" ", c#"!", c#"~", c#"\127")
+
+ (* Range comparisons don't need tables! It's faster to just compare. *)
+ fun isLower c = c >= la andalso c <= lz
+ fun isUpper c = c >= lA andalso c <= lZ
+ fun isDigit c = c >= l0 andalso c <= l9
+ fun isGraph c = c >= lBANG andalso c <= lTIL
+ fun isPrint c = c >= lSPACE andalso c <= lTIL
+ fun isCntrl c = c < lSPACE orelse c = lDEL
+ fun isAscii c = c <= lDEL
+
+ local
+ (* We can use a table for small ranges *)
+ val limit = 128
+ fun memoize (f: char -> 'a, g: char -> 'a): char -> 'a =
+ let
+ val v = Vector.tabulate (limit, f o chrUnsafe)
+ val limit = chr limit
+ in
+ fn c => if c >= limit then g c else
+ Vector.sub (v, ord c)
+ end
+
+ fun make (test, diff) =
+ memoize (fn c => if test c then chrUnsafe (Int.+? (ord c, diff))
+ else c,
+ fn c => c)
+ val diff = Int.- (ord lA, ord la)
+
+ infix || &&
+ fun f || g = memoize (fn c => f c orelse g c, fn _ => false)
+ fun f && g = memoize (fn c => f c andalso g c, fn _ => false)
+
+ val WS = fromString " \t\r\n\v\f"
+
+ fun laf c = (c >= la andalso c <= lf) orelse
+ (c >= lA andalso c <= lF)
+ in
+ val isAlpha = isUpper || isLower
+ val isHexDigit = isDigit || laf
+ val isAlphaNum = isAlpha || isDigit
+ val isSpace = memoize (contains WS, fn _ => false)
+ val isPunct = isGraph && (not o isAlphaNum)
+
+ val toLower = make (isUpper, Int.~ diff)
+ val toUpper = make (isLower, diff)
+ end
+
fun control reader state =
case reader state of
NONE => NONE
| SOME (c, state) =>
- if #"@" <= c andalso c <= #"_"
- then SOME (chr (Int.-? (ord c, ord #"@")), state)
+ if Char.<= (#"@", c) andalso Char.<= (c, #"_")
+ then SOME (chr (Int.-? (Char.ord c, Char.ord #"@")), state)
else NONE
fun formatChar reader state =
case reader state of
NONE => NONE
| SOME (c, state) =>
- if isSpace c
+ if StringCvt.isSpace c
then SOME ((), state)
else NONE
@@ -36,7 +149,7 @@
loop
end
- val 'a formatSequences: (char, 'a) StringCvt.reader -> 'a -> 'a =
+ val 'a formatSequences: (Char.char, 'a) StringCvt.reader -> 'a -> 'a =
fn reader =>
let
fun loop state =
@@ -57,16 +170,16 @@
loop
end
- fun 'a scan (reader: (char, 'a) StringCvt.reader)
+ fun 'a scan (reader: (Char.char, 'a) StringCvt.reader)
: (char, 'a) StringCvt.reader =
let
- val escape: (char, 'a) StringCvt.reader =
+ val escape : (char, 'a) StringCvt.reader =
fn state =>
case reader state of
NONE => NONE
| SOME (c, state') =>
let
- fun yes c = SOME (c, state')
+ fun yes c = SOME (fromChar c, state')
in
case c of
#"a" => yes #"\a"
@@ -83,6 +196,10 @@
Reader.mapOpt chrOpt
(StringCvt.digitsExact (StringCvt.HEX, 4) reader)
state'
+ | #"U" =>
+ Reader.mapOpt chrOpt
+ (StringCvt.digitsExact (StringCvt.HEX, 8) reader)
+ state'
| _ => (* 3 decimal digits *)
Reader.mapOpt chrOpt
(StringCvt.digitsExact (StringCvt.DEC, 3)
@@ -97,21 +214,22 @@
case reader state of
NONE => NONE
| SOME (c, state) =>
- if isPrint c
+ (* isPrint doesn't exist. yuck: *)
+ if Char.>= (c, #" ") andalso Char.<= (c, #"~")
then
case c of
#"\\" => escape state
| #"\"" => NONE
- | _ => SOME (c, formatSequences reader state)
+ | _ => SOME (fromChar c, formatSequences reader state)
else NONE
end
in
main
end
-
+
val fromString = StringCvt.scanString scan
-
- fun 'a scanC (reader: (char, 'a) StringCvt.reader)
+
+ fun 'a scanC (reader: (Char.char, 'a) StringCvt.reader)
: (char, 'a) StringCvt.reader =
let
val rec escape =
@@ -119,7 +237,7 @@
case reader state of
NONE => NONE
| SOME (c, state') =>
- let fun yes c = SOME (c, state')
+ let fun yes c = SOME (fromChar c, state')
in case c of
#"a" => yes #"\a"
| #"b" => yes #"\b"
@@ -137,6 +255,14 @@
Reader.mapOpt chrOpt
(StringCvt.digits StringCvt.HEX reader)
state'
+ | #"u" =>
+ Reader.mapOpt chrOpt
+ (StringCvt.digitsExact (StringCvt.HEX, 4) reader)
+ state'
+ | #"U" =>
+ Reader.mapOpt chrOpt
+ (StringCvt.digitsExact (StringCvt.HEX, 8) reader)
+ state'
| _ =>
Reader.mapOpt chrOpt
(StringCvt.digitsPlus (StringCvt.OCT, 3) reader)
@@ -145,11 +271,12 @@
and main =
fn NONE => NONE
| SOME (c, state) =>
- if isPrint c
+ (* yuck. isPrint is not defined yet: *)
+ if Char.>= (c, #" ") andalso Char.<= (c, #"~")
then
case c of
#"\\" => escape state
- | _ => SOME (c, state)
+ | _ => SOME (fromChar c, state)
else NONE
in
main o reader
@@ -157,63 +284,98 @@
val fromCString = StringCvt.scanString scanC
- fun padLeft (s: string, n: int): string =
+ fun padLeft (s: String.string, n: int): String.string =
let
- val m = PreString.size s
+ val m = String.size s
val diff = Int.-? (n, m)
in if Int.> (diff, 0)
- then PreString.concat [PreString.new (diff, #"0"), s]
+ then String.concat [String.new (diff, #"0"), s]
else if diff = 0
then s
else raise Fail "padLeft"
end
+
+ fun unicodeEscape ord =
+ if Int.< (ord, 65536)
+ then String.concat
+ ["\\u", padLeft (Int.fmt StringCvt.HEX ord, 4)]
+ else String.concat
+ ["\\U", padLeft (Int.fmt StringCvt.HEX ord, 8)]
+
+ fun toString c =
+ let
+ val ord = ord c
+ in
+ if isPrint c
+ then
+ case ord of
+ 92 (* #"\\" *) => "\\\\"
+ | 34 (* #"\"" *) => "\\\""
+ | _ => String.new (1, Char.chrUnsafe ord)
+ (* ^^^^ safe b/c isPrint < 128 *)
+ else
+ case ord of
+ 7 (* #"\a" *) => "\\a"
+ | 8 (* #"\b" *) => "\\b"
+ | 9 (* #"\t" *) => "\\t"
+ | 10 (* #"\n" *) => "\\n"
+ | 11 (* #"\v" *) => "\\v"
+ | 12 (* #"\f" *) => "\\f"
+ | 13 (* #"\r" *) => "\\r"
+ | _ =>
+ if Int.< (ord, 32)
+ then String.concat
+ ["\\^", String.new
+ (1, Char.chrUnsafe
+ (Int.+? (ord, 64 (* #"@" *) )))]
+ else if Int.< (ord, 256)
+ then String.concat
+ ["\\", padLeft (Int.fmt StringCvt.DEC ord, 3)]
+ else unicodeEscape ord
+ end
+
+ fun toCString c =
+ let
+ val ord = ord c
+ in
+ if isPrint c
+ then
+ case ord of
+ 92 (* #"\\" *) => "\\\\"
+ | 34 (* #"\"" *) => "\\\""
+ | 63 (* #"?" *) => "\\?"
+ | 39 (* #"'" *) => "\\'"
+ | _ => String.new (1, Char.chrUnsafe ord)
+ else
+ case ord of
+ 7 (* #"\a" *) => "\\a"
+ | 8 (* #"\b" *) => "\\b"
+ | 9 (* #"\t" *) => "\\t"
+ | 10 (* #"\n" *) => "\\n"
+ | 11 (* #"\v" *) => "\\v"
+ | 12 (* #"\f" *) => "\\f"
+ | 13 (* #"\r" *) => "\\r"
+ | _ =>
+ if Int.< (ord, 256)
+ then String.concat
+ ["\\", padLeft (Int.fmt StringCvt.OCT ord, 3)]
+ else unicodeEscape ord
+ end
+ end
- val toString =
- memoize
- (fn c =>
- if isPrint c
- then
- (case c of
- #"\\" => "\\\\"
- | #"\"" => "\\\""
- | _ => PreString.str c)
- else
- case c of
- #"\a" => "\\a"
- | #"\b" => "\\b"
- | #"\t" => "\\t"
- | #"\n" => "\\n"
- | #"\v" => "\\v"
- | #"\f" => "\\f"
- | #"\r" => "\\r"
- | _ =>
- if c < #" "
- then (PreString.concat
- ["\\^", PreString.str (chr (Int.+? (ord c, ord #"@")))])
- else PreString.concat
- ["\\", padLeft (Int.fmt StringCvt.DEC (ord c), 3)])
+structure CharArg : CHAR_ARG =
+ struct
+ structure PreChar = Char
+ structure CharVector = CharVector
+ structure CharArray = CharArray
+ end
- val toCString =
- memoize
- (fn c =>
- if isPrint c
- then
- (case c of
- #"\\" => "\\\\"
- | #"\"" => "\\\""
- | #"?" => "\\?"
- | #"'" => "\\'"
- | _ => PreString.str c)
- else
- case c of
- #"\a" => "\\a"
- | #"\b" => "\\b"
- | #"\t" => "\\t"
- | #"\n" => "\\n"
- | #"\v" => "\\v"
- | #"\f" => "\\f"
- | #"\r" => "\\r"
- | _ =>
- PreString.concat
- ["\\", padLeft (Int.fmt StringCvt.OCT (ord c), 3)])
+structure WideCharArg : CHAR_ARG =
+ struct
+ structure PreChar = WideChar
+ structure CharVector = WideCharVector
+ structure CharArray = WideCharArray
end
+
+structure Char : CHAR_EXTRA = CharFn(CharArg)
+structure WideChar : CHAR_EXTRA = CharFn(WideCharArg)
Added: mlton/trunk/basis-library/text/char0.sig
===================================================================
--- mlton/trunk/basis-library/text/char0.sig 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/char0.sig 2007-02-12 01:31:14 UTC (rev 5175)
@@ -0,0 +1,20 @@
+signature PRE_CHAR =
+ sig
+ eqtype char
+ eqtype string
+
+ val chrUnsafe: int -> char
+ val ord: char -> int
+
+ val fromChar: Char.char -> char
+
+ val minChar : char
+ val maxChar : char
+ val numChars : int
+
+ val compare: char * char -> order
+ val < : char * char -> bool
+ val <= : char * char -> bool
+ val > : char * char -> bool
+ val >= : char * char -> bool
+ end
Modified: mlton/trunk/basis-library/text/char0.sml
===================================================================
--- mlton/trunk/basis-library/text/char0.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/char0.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -6,95 +6,72 @@
* See the file MLton-LICENSE for details.
*)
-structure PreChar8 =
- struct
- structure Prim = Primitive.Char8
- open Primitive.Char8
-
- type char = Primitive.Char8.char
- type string = Primitive.String8.string
-
- val chrUnsafe = Prim.idFromWord8 o Int.sextdToWord8
- val ord = Int.zextdFromWord8 o Prim.idToWord8
-
- val minChar: char = #"\000"
- val numChars: int = 256
- val maxOrd: int = 255
- val maxChar:char = #"\255"
-
- fun succ c =
- if Primitive.Controls.safe
- andalso c = maxChar
- then raise Chr
- else chrUnsafe (Int.+ (ord c, 1))
-
- fun pred c =
- if Primitive.Controls.safe
- andalso c = minChar
- then raise Chr
- else chrUnsafe (Int.- (ord c, 1))
-
- fun chrOpt c =
- if Primitive.Controls.safe
- andalso Int.gtu (c, maxOrd)
- then NONE
- else SOME (chrUnsafe c)
-
- fun chr c =
- case chrOpt c of
- NONE => raise Chr
- | SOME c => c
-
- fun oneOf s =
- let
- val a = Array.array (numChars, false)
- val n = PreString8.size s
- fun loop i =
- if Int.>= (i, n) then ()
- else (Array.update (a, ord (PreString8.sub (s, i)), true)
- ; loop (Int.+ (i, 1)))
- in loop 0
- ; fn c => Array.sub (a, ord c)
+local
+ structure PreCharX =
+ struct
+ structure Prim8 = Primitive.Char8
+ structure Prim16 = Primitive.Char16
+ structure Prim32 = Primitive.Char32
+
+ type 'a t = {
+ chrUnsafe: int -> 'a,
+ ord: 'a -> int,
+ minChar: 'a,
+ maxChar: 'a,
+ numChars: int
+ }
+
+ val fChar8 : Prim8.char t = {
+ chrUnsafe = Prim8.idFromWord8 o Int.sextdToWord8,
+ ord = Int.zextdFromWord8 o Prim8.idToWord8,
+ minChar = #"\000",
+ maxChar = #"\255",
+ numChars = 256
+ }
+ val fChar16 : Prim16.char t = {
+ chrUnsafe = Prim16.idFromWord16 o Int.sextdToWord16,
+ ord = Int.zextdFromWord16 o Prim16.idToWord16,
+ minChar = #"\000",
+ maxChar = #"\uFFFF",
+ numChars = 65536
+ }
+ val fChar32 : Prim32.char t = {
+ chrUnsafe = Prim32.idFromWord32 o Int.sextdToWord32,
+ ord = Int.zextdFromWord32 o Prim32.idToWord32,
+ minChar = #"\000",
+ maxChar = #"\U0010FFFF",
+ numChars = 1114112 (* 0x110000 *)
+ }
+ end
+in
+ structure Char : PRE_CHAR =
+ struct
+ (* set by config/default/default-charX.sml *)
+ open Char
+ type string = String.string
+
+ local
+ structure PCX = Char_ChooseChar(PreCharX)
+ in
+ val { chrUnsafe, ord, minChar, maxChar, numChars } = PCX.f
end
- val contains = oneOf
-
- fun notOneOf s = not o (oneOf s)
- val notContains = notOneOf
-
- fun memoize (f: char -> 'a): char -> 'a =
- let val a = Array.tabulate (numChars, f o chr)
- in fn c => Array.sub (a, ord c)
+
+ fun fromChar x = x
+ end
+
+ structure WideChar : PRE_CHAR =
+ struct
+ (* set by config/default/default-widecharX.sml *)
+ open WideChar
+ type string = WideString.string
+
+ local
+ structure PCX = WideChar_ChooseChar(PreCharX)
+ in
+ val { chrUnsafe, ord, minChar, maxChar, numChars } = PCX.f
end
-
- local
- val not = fn f => memoize (not o f)
- infix || &&
- fun f || g = memoize (fn c => f c orelse g c)
- fun f && g = memoize (fn c => f c andalso g c)
- in
- val isLower = oneOf "abcdefghijklmnopqrstuvwxyz"
- val isUpper = oneOf "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- val isDigit = oneOf "0123456789"
- val isAlpha = isUpper || isLower
- val isHexDigit = isDigit || (oneOf "abcdefABCDEF")
- val isAlphaNum = isAlpha || isDigit
- val isPrint = fn c => #" " <= c andalso c <= #"~"
- val isSpace = oneOf " \t\r\n\v\f"
- val isGraph = (not isSpace) && isPrint
- val isPunct = isGraph && (not isAlphaNum)
- val isCntrl = not isPrint
- val isAscii = fn c => c < #"\128"
+
+ (* safe b/c WideChar >= Char *)
+ val fromChar = chrUnsafe o Char.ord
end
-
- local
- fun make (lower, upper, diff) =
- memoize (fn c => if lower <= c andalso c <= upper
- then chr (Int.+? (ord c, diff))
- else c)
- val diff = Int.- (ord #"A", ord #"a")
- in
- val toLower = make (#"A", #"Z", Int.~ diff)
- val toUpper = make (#"a", #"z", diff)
- end
- end
-structure PreChar = PreChar8
+end
Modified: mlton/trunk/basis-library/text/string-cvt.sig
===================================================================
--- mlton/trunk/basis-library/text/string-cvt.sig 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/string-cvt.sig 2007-02-12 01:31:14 UTC (rev 5175)
@@ -32,6 +32,9 @@
val radixToWord: radix -> word
val charToDigit: radix -> char -> int option
val charToWDigit: radix -> char -> word option
+
+ (* this exists before Char.isSpace *)
+ val isSpace: char -> bool
(* maps 0...15 to #"0", #"1", ..., #"F" *)
val digitToChar: int -> char
Modified: mlton/trunk/basis-library/text/string-cvt.sml
===================================================================
--- mlton/trunk/basis-library/text/string-cvt.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/string-cvt.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -34,19 +34,19 @@
local
fun pad f (c: char) i s =
let
- val n = PreString.size s
+ val n = String.size s
in
if n >= i
then s
- else f (s, PreString.vector (i -? n, c))
+ else f (s, String.vector (i -? n, c))
end
in
- val padLeft = pad (fn (s, pad) => PreString.^ (pad, s))
- val padRight = pad PreString.^
+ val padLeft = pad (fn (s, pad) => String.^ (pad, s))
+ val padRight = pad String.^
end
fun splitl p f src =
- let fun done chars = PreString.implode (rev chars)
+ let fun done chars = String.implode (rev chars)
fun loop (src, chars) =
case f src of
NONE => (done chars, src)
@@ -60,14 +60,12 @@
fun takel p f s = #1 (splitl p f s)
fun dropl p f s = #2 (splitl p f s)
- fun skipWS x = dropl PreChar.isSpace x
-
type cs = int
fun stringReader (s: string): (char, cs) reader =
- fn i => if i >= PreString.size s
+ fn i => if i >= String.size s
then NONE
- else SOME (PreString.sub (s, i), i + 1)
+ else SOME (String.sub (s, i), i + 1)
fun 'a scanString (f: ((char, cs) reader -> ('a, cs) reader)) (s: string)
: 'a option =
@@ -76,15 +74,20 @@
| SOME (a, _) => SOME a
local
+ fun memoize (f: char -> 'a): char -> 'a =
+ let val a = Array.tabulate (Char.numChars, f o Char.chrUnsafe)
+ in fn c => Array.sub (a, Char.ord c)
+ end
+
fun range (add: int, cmin: char, cmax: char): char -> int option =
- let val min = PreChar.ord cmin
- in fn c => if PreChar.<= (cmin, c) andalso PreChar.<= (c, cmax)
- then SOME (add +? PreChar.ord c -? min)
+ let val min = Char.ord cmin
+ in fn c => if Char.<= (cmin, c) andalso Char.<= (c, cmax)
+ then SOME (add +? Char.ord c -? min)
else NONE
end
fun 'a combine (ds: (char -> 'a option) list): char -> 'a option =
- PreChar.memoize
+ memoize
(fn c =>
let
val rec loop =
@@ -96,13 +99,19 @@
in loop ds
end)
- val bin = PreChar.memoize (range (0, #"0", #"1"))
- val oct = PreChar.memoize (range (0, #"0", #"7"))
- val dec = PreChar.memoize (range (0, #"0", #"9"))
+ val bin = memoize (range (0, #"0", #"1"))
+ val oct = memoize (range (0, #"0", #"7"))
+ val dec = memoize (range (0, #"0", #"9"))
val hex = combine [range (0, #"0", #"9"),
range (10, #"a", #"f"),
range (10, #"A", #"F")]
+
+ fun isSpace c = (c = #" " orelse c = #"\t" orelse c = #"\r" orelse
+ c = #"\n" orelse c = #"\v" orelse c = #"\f")
in
+ val isSpace = memoize isSpace
+ fun skipWS x = dropl isSpace x
+
fun charToDigit (radix: radix): char -> int option =
case radix of
BIN => bin
@@ -192,5 +201,5 @@
| SOME n => loop (n, state)
end
- fun digitToChar (n: int): char = PreString.sub ("0123456789ABCDEF", n)
+ fun digitToChar (n: int): char = String.sub ("0123456789ABCDEF", n)
end
Modified: mlton/trunk/basis-library/text/string.sig
===================================================================
--- mlton/trunk/basis-library/text/string.sig 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/string.sig 2007-02-12 01:31:14 UTC (rev 5175)
@@ -33,7 +33,7 @@
val isSuffix: string -> string -> bool
val map: (char -> char) -> string -> string
val maxSize: int
- val scan: (char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader
+ val scan: (Char.char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader
val sub: string * int -> char
val toCString: string -> String.string
val toString: string -> String.string
@@ -44,8 +44,9 @@
signature STRING_EXTRA =
sig
include STRING
-
- val fromArray: CharArray.array -> string
+ type array
+
+ val fromArray: array -> string
val new: int * char -> string
val nullTerm: string -> string
val tabulate: int * (int -> char) -> string
Modified: mlton/trunk/basis-library/text/string.sml
===================================================================
--- mlton/trunk/basis-library/text/string.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/string.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -6,9 +6,39 @@
* See the file MLton-LICENSE for details.
*)
-structure String: STRING_EXTRA =
+signature STRING_ARG =
+ sig
+ structure Char: CHAR_EXTRA
+ structure CharVector: EQTYPE_MONO_VECTOR_EXTRA
+ sharing type Char.char = CharVector.elem
+ sharing type Char.string = CharVector.vector
+ end
+
+functor StringFn(Arg : STRING_ARG)
+ :> STRING_EXTRA
+ where type char = Arg.CharVector.elem
+ where type string = Arg.CharVector.vector
+ where type array = Arg.CharVector.array =
struct
- open PreString
+ open Arg
+ open CharVector
+ structure CharVectorSlice = MonoVectorSlice
+
+ type char = elem
+ type string = vector
+
+ val new = vector
+ fun str c = new (1, c)
+
+ val maxSize = maxLen
+ val size = length
+ val op ^ = append
+ val implode = fromList
+ val explode = toList
+
+ fun extract (s, start, len) =
+ CharVectorSlice.vector (CharVectorSlice.slice (s, start, len))
+ fun substring (s, start, len) = extract (s, start, SOME len)
val toLower = translate (str o Char.toLower)
@@ -26,11 +56,13 @@
in
open S
end
+
+ fun Stranslate f = String.fromPoly o Vector.translate f o toPoly
- val toString = translate Char.toString
- val toCString = translate Char.toCString
+ val toString = Stranslate Char.toString
+ val toCString = Stranslate Char.toCString
- val scan: (char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader =
+ val scan =
fn reader =>
let
fun loop (state, cs) =
@@ -44,13 +76,30 @@
val fromString = StringCvt.scanString scan
- fun scanString scanChar (reader: (char, 'a) StringCvt.reader)
- : (string, 'a) StringCvt.reader =
+ fun scanString scanChar reader =
fn state =>
Option.map (fn (cs, state) => (implode cs, state))
(Reader.list (scanChar reader) state)
val fromCString = StringCvt.scanString (scanString Char.scanC)
- fun nullTerm s = s ^ "\000"
+ val null = str (Char.chr 0)
+ fun nullTerm s = s ^ null
end
+
+structure StringArg : STRING_ARG =
+ struct
+ structure Char = Char
+ structure CharVector = CharVector
+ structure CharArray = CharArray
+ end
+
+structure WideStringArg : STRING_ARG =
+ struct
+ structure Char = WideChar
+ structure CharVector = WideCharVector
+ structure CharArray = WideCharArray
+ end
+
+structure String : STRING_EXTRA = StringFn(StringArg)
+structure WideString : STRING_EXTRA = StringFn(WideStringArg)
Modified: mlton/trunk/basis-library/text/string0.sml
===================================================================
--- mlton/trunk/basis-library/text/string0.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/string0.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -6,27 +6,32 @@
* See the file MLton-LICENSE for details.
*)
-structure PreString8 =
+(* This is the minimum needed to bootstrap StringCvt *)
+structure String =
struct
+ (* CharVector comes from mono.sml and default-charX.sml *)
open CharVector
type char = elem
type string = vector
- structure PreSubstring =
- struct
- open CharVectorSlice
- type char = elem
- type string = vector
- type substring = slice
- end
- val maxSize = maxLen
+
val size = length
- fun extract (s, start, len) =
- CharVectorSlice.vector (CharVectorSlice.slice (s, start, len))
- fun substring (s, start, len) = extract (s, start, SOME len)
val op ^ = append
+ val implode = fromList
+ val explode = toList
val new = vector
- fun str c = new (1, c)
+ end
+
+(*
+structure WideString =
+ struct
+ open WideCharVector
+ type char = elem
+ type string = vector
+
+ val size = length
+ val op ^ = append
val implode = fromList
val explode = toList
+ val new = vector
end
-structure PreString = PreString8
+*)
Modified: mlton/trunk/basis-library/text/substring.sml
===================================================================
--- mlton/trunk/basis-library/text/substring.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/substring.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -9,12 +9,18 @@
(* The :> is to hide the type substring. We must add the where's to make char
* and string the same as the toplevel types.
*)
-structure Substring :> SUBSTRING_EXTRA
- where type char = char
- where type string = string
- where type substring = CharVectorSlice.slice =
+functor SubstringFn(Arg : STRING_ARG)
+ :> SUBSTRING_EXTRA
+ where type char = Arg.CharVector.MonoVectorSlice.elem
+ where type string = Arg.CharVector.MonoVectorSlice.vector
+ where type substring = Arg.CharVector.MonoVectorSlice.slice =
struct
- open PreString.PreSubstring
+ open Arg
+ open CharVector.MonoVectorSlice
+
+ type char = elem
+ type string = vector
+ type substring = slice
val size = length
val extract = slice
@@ -51,5 +57,5 @@
*)
end
-structure SubstringGlobal: SUBSTRING_GLOBAL = Substring
-open SubstringGlobal
+structure Substring = SubstringFn(StringArg)
+structure WideSubstring = SubstringFn(WideStringArg)
Modified: mlton/trunk/basis-library/text/text.sml
===================================================================
--- mlton/trunk/basis-library/text/text.sml 2007-02-12 01:13:24 UTC (rev 5174)
+++ mlton/trunk/basis-library/text/text.sml 2007-02-12 01:31:14 UTC (rev 5175)
@@ -15,3 +15,14 @@
structure String = String
structure Substring = Substring
end
+
+structure WideText: TEXT =
+ struct
+ structure Char = WideChar
+ structure CharArray = WideCharArray
+ structure CharArraySlice = WideCharArraySlice
+ structure CharVector = WideCharVector
+ structure CharVectorSlice = WideCharVectorSlice
+ structure String = WideString
+ structure Substring = WideSubstring
+ end
More information about the MLton-commit
mailing list