[MLton-commit] r4250
Stephen Weeks
MLton@mlton.org
Wed, 23 Nov 2005 21:43:34 -0800
Import unicode working copy from carrot
----------------------------------------------------------------------
A mlton/branches/unicode/
U mlton/branches/unicode/basis-library/arrays-and-vectors/mono.sml
U mlton/branches/unicode/basis-library/integer/word.sml
U mlton/branches/unicode/basis-library/libs/basis-2002/top-level/Makefile
U mlton/branches/unicode/basis-library/libs/basis-2002/top-level/basis.sig
U mlton/branches/unicode/basis-library/libs/basis-2002/top-level/generate-overloads.sml
U mlton/branches/unicode/basis-library/libs/basis-2002/top-level/overloads.sml
U mlton/branches/unicode/basis-library/libs/basis-2002/top-level/top-level.sml
U mlton/branches/unicode/basis-library/libs/basis-extra/basis-extra.mlb
U mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sig
U mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sml
U mlton/branches/unicode/basis-library/misc/primitive.sml
A mlton/branches/unicode/basis-library/text/char.fun
U mlton/branches/unicode/basis-library/text/char.sig
U mlton/branches/unicode/basis-library/text/char.sml
D mlton/branches/unicode/basis-library/text/char0.sml
A mlton/branches/unicode/basis-library/text/charset.sml
U mlton/branches/unicode/basis-library/text/string-cvt.sml
A mlton/branches/unicode/basis-library/text/string.fun
U mlton/branches/unicode/basis-library/text/string.sml
D mlton/branches/unicode/basis-library/text/string0.sml
A mlton/branches/unicode/basis-library/text/substring.fun
U mlton/branches/unicode/basis-library/text/substring.sml
U mlton/branches/unicode/basis-library/text/text.sml
A mlton/branches/unicode/basis-library/text/unicode-4.1.0/
A mlton/branches/unicode/basis-library/text/unicode-4.1.0/PropList.txt.gz
A mlton/branches/unicode/basis-library/text/unicode-4.1.0/UnicodeData.txt.gz
----------------------------------------------------------------------
Copied: mlton/branches/unicode (from rev 4246, mlton/trunk)
Modified: mlton/branches/unicode/basis-library/arrays-and-vectors/mono.sml
===================================================================
--- mlton/trunk/basis-library/arrays-and-vectors/mono.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/arrays-and-vectors/mono.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -61,14 +61,44 @@
= EqMono (type elem = char)
open S
in
- structure CharArray = Array
- structure CharArray2 = Array2
- structure CharArraySlice = ArraySlice
- structure CharVector = Vector
- structure CharVectorSlice = VectorSlice
- val _ = CharVector.fromArray: CharArray.array -> CharVector.vector
+ structure Char1Array = Array
+ structure Char1Array2 = Array2
+ structure Char1ArraySlice = ArraySlice
+ structure Char1Vector = Vector
+ structure Char1VectorSlice = VectorSlice
+ val _ = Char1Vector.fromArray: Char1Array.array -> Char1Vector.vector
end
local
+ structure S:>
+ EQ_MONO
+ where type Array.elem = Char2.char
+ where type Vector.vector = String2.string
+ = EqMono (type elem = Char2.char)
+ open S
+in
+ structure Char2Array = Array
+ structure Char2Array2 = Array2
+ structure Char2ArraySlice = ArraySlice
+ structure Char2Vector = Vector
+ structure Char2VectorSlice = VectorSlice
+ val _ = Char2Vector.fromArray: Char2Array.array -> Char2Vector.vector
+end
+local
+ structure S:>
+ EQ_MONO
+ where type Array.elem = Char4.char
+ where type Vector.vector = String4.string
+ = EqMono (type elem = Char4.char)
+ open S
+in
+ structure Char4Array = Array
+ structure Char4Array2 = Array2
+ structure Char4ArraySlice = ArraySlice
+ structure Char4Vector = Vector
+ structure Char4VectorSlice = VectorSlice
+ val _ = Char4Vector.fromArray: Char4Array.array -> Char4Vector.vector
+end
+local
structure S = EqMono (type elem = Int8.int)
open S
in
@@ -182,6 +212,18 @@
structure Word64Array2 = Array2
end
+structure CharVector = Char1Vector
+structure CharVectorSlice = Char1VectorSlice
+structure CharArray = Char1Array
+structure CharArraySlice = Char1ArraySlice
+structure CharArray2 = Char1Array2
+
+structure WideCharVector = Char4Vector
+structure WideCharVectorSlice = Char4VectorSlice
+structure WideCharArray = Char4Array
+structure WideCharArraySlice = Char4ArraySlice
+structure WideCharArray2 = Char4Array2
+
structure IntVector = Int32Vector
structure IntVectorSlice = Int32VectorSlice
structure IntArray = Int32Array
Modified: mlton/branches/unicode/basis-library/integer/word.sml
===================================================================
--- mlton/trunk/basis-library/integer/word.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/integer/word.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -75,7 +75,7 @@
let val chars = StringCvt.digitToChar (toInt (q mod radix)) :: chars
val q = q div radix
in if q = zero
- then String0.implode chars
+ then CharVector.fromList chars
else loop (q, chars)
end
in loop (w, [])
Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/Makefile
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/Makefile 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/Makefile 2005-11-24 00:51:12 UTC (rev 4250)
@@ -9,7 +9,7 @@
overloads.sml: $(GEN).sml
mlton $(GEN).sml
- $(GEN) >overloads.sml
+ ./$(GEN) >overloads.sml
.PHONY: clean
clean:
Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/basis.sig
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/basis.sig 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/basis.sig 2005-11-24 00:51:12 UTC (rev 4250)
@@ -242,7 +242,6 @@
structure SysWord : WORD
structure Unix : UNIX
structure UnixSock : UNIX_SOCK
-(*
structure WideChar : CHAR
structure WideCharArray : MONO_ARRAY
structure WideCharArray2 : MONO_ARRAY2
@@ -252,9 +251,8 @@
structure WideString : STRING
structure WideSubstring : SUBSTRING
structure WideText : TEXT
+(*
structure WideTextPrimIO : PRIM_IO
-*)
-(*
structure Windows : WINDOWS
*)
structure Word1: WORD
@@ -552,6 +550,38 @@
sharing type Real64Array2.elem = Real64.real
sharing type Real64Array2.vector = Real64Vector.vector
sharing type Unix.exit_status = Posix.Process.exit_status
+ sharing type WideChar.string = WideString.string
+ sharing type WideCharArray.elem = WideChar.char
+ sharing type WideCharArray.vector = WideCharVector.vector
+ sharing type WideCharArraySlice.elem = WideChar.char
+ sharing type WideCharArraySlice.array = WideCharArray.array
+ sharing type WideCharArraySlice.vector = WideCharVector.vector
+ sharing type WideCharArraySlice.vector_slice = WideCharVectorSlice.slice
+ sharing type WideCharVector.elem = WideChar.char
+ sharing type WideCharVector.vector = WideString.string
+ sharing type WideCharVectorSlice.elem = WideChar.char
+ sharing type WideCharVectorSlice.vector = WideString.string
+ sharing type WideCharVectorSlice.slice = WideSubstring.substring
+ sharing type WideString.string = WideCharVector.vector
+ sharing type WideString.char = WideChar.char
+ sharing type WideSubstring.substring = WideCharVectorSlice.slice
+ sharing type WideSubstring.string = WideString.string
+ sharing type WideSubstring.char = WideChar.char
+ sharing type WideText.Char.char = WideChar.char
+ sharing type WideText.String.string = WideString.string
+ sharing type WideText.Substring.substring = WideSubstring.substring
+ sharing type WideText.CharVector.vector = WideCharVector.vector
+ sharing type WideText.CharArray.array = WideCharArray.array
+ sharing type WideText.CharArraySlice.slice = WideCharArraySlice.slice
+ sharing type WideText.CharVectorSlice.slice = WideCharVectorSlice.slice
+(*
+ sharing type WideTextPrimIO.array = WideCharArray.array
+ sharing type WideTextPrimIO.array_slice = WideCharArraySlice.slice
+ sharing type WideTextPrimIO.elem = WideChar.char
+ sharing type WideTextPrimIO.pos = Position.int
+ sharing type WideTextPrimIO.vector = WideCharVector.vector
+ sharing type WideTextPrimIO.vector_slice = WideCharVectorSlice.slice
+*)
sharing type WordArray.elem = word
sharing type WordArray.vector = WordVector.vector
sharing type WordArraySlice.elem = word
Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/generate-overloads.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/generate-overloads.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/generate-overloads.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -24,7 +24,7 @@
@ List.map (List.tabulate (32, fn i => i + 1) @ [64],
fn i => concat ["Word", Int.toString i])
-val text = ["Char", "String"]
+val text = ["Char", "String", "WideChar", "WideString", "Char2", "String2"]
(* Order matters here in the appends, since the first element will be the
* default.
Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/overloads.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/overloads.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/overloads.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -1,10 +1,3 @@
-(* Copyright (C) 2002-2005 Henry Cejtin, Matthew Fluet, Suresh
- * Jagannathan, and Stephen Weeks.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
(* This file is automatically generated. Do not edit. *)
_overload 2 ~ : 'a -> 'a
@@ -602,6 +595,10 @@
and LargeReal.<
and Char.<
and String.<
+and WideChar.<
+and WideString.<
+and Char2.<
+and String2.<
_overload 1 <= : 'a * 'a -> bool
as Int.<=
@@ -683,6 +680,10 @@
and LargeReal.<=
and Char.<=
and String.<=
+and WideChar.<=
+and WideString.<=
+and Char2.<=
+and String2.<=
_overload 1 > : 'a * 'a -> bool
as Int.>
@@ -764,6 +765,10 @@
and LargeReal.>
and Char.>
and String.>
+and WideChar.>
+and WideString.>
+and Char2.>
+and String2.>
_overload 1 >= : 'a * 'a -> bool
as Int.>=
@@ -845,3 +850,7 @@
and LargeReal.>=
and Char.>=
and String.>=
+and WideChar.>=
+and WideString.>=
+and Char2.>=
+and String2.>=
Modified: mlton/branches/unicode/basis-library/libs/basis-2002/top-level/top-level.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-2002/top-level/top-level.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-2002/top-level/top-level.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -40,6 +40,8 @@
structure Real64Vector = Real64Vector
structure RealArray = RealArray
structure RealVector = RealVector
+structure WideCharArraySlice = WideCharArraySlice
+structure WideCharArray = WideCharArray
structure Word8Array = Word8Array
structure Word8Vector = Word8Vector
structure Word16Array = Word16Array
Modified: mlton/branches/unicode/basis-library/libs/basis-extra/basis-extra.mlb
===================================================================
--- mlton/trunk/basis-library/libs/basis-extra/basis-extra.mlb 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-extra/basis-extra.mlb 2005-11-24 00:51:12 UTC (rev 4250)
@@ -47,10 +47,9 @@
../../arrays-and-vectors/mono-array2.sig
../../arrays-and-vectors/mono-array2.fun
../../arrays-and-vectors/mono.sml
- ../../text/string0.sml
- ../../text/char0.sml
../../misc/reader.sig
../../misc/reader.sml
+ ../../text/charset.sml
../../text/string-cvt.sig
../../text/string-cvt.sml
../../general/bool.sig
@@ -58,11 +57,14 @@
../../integer/integer.sig
../../integer/int.sml
../../text/char.sig
+ ../../text/char.fun
../../text/char.sml
+ ../../text/string.sig
+ ../../text/string.fun
+ ../../text/string.sml
../../text/substring.sig
+ ../../text/substring.fun
../../text/substring.sml
- ../../text/string.sig
- ../../text/string.sml
../../misc/C.sig
../../misc/C.sml
../../integer/word.sig
Modified: mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sig
===================================================================
--- mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sig 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sig 2005-11-24 00:51:12 UTC (rev 4250)
@@ -242,7 +242,6 @@
structure SysWord : WORD
structure Unix : UNIX
structure UnixSock : UNIX_SOCK
-(*
structure WideChar : CHAR
structure WideCharArray : MONO_ARRAY
structure WideCharArray2 : MONO_ARRAY2
@@ -252,9 +251,8 @@
structure WideString : STRING
structure WideSubstring : SUBSTRING
structure WideText : TEXT
+(*
structure WideTextPrimIO : PRIM_IO
-*)
-(*
structure Windows : WINDOWS
*)
structure Word1: WORD
@@ -316,7 +314,37 @@
structure MLton: MLTON
structure SMLofNJ: SML_OF_NJ
structure Unsafe: UNSAFE
+
+ structure Char4 : CHAR
+ structure Char4Array : MONO_ARRAY
+ structure Char4Array2 : MONO_ARRAY2
+ structure Char4ArraySlice : MONO_ARRAY_SLICE
+ structure Char4Vector : MONO_VECTOR
+ structure Char4VectorSlice : MONO_VECTOR_SLICE
+ structure String4 : STRING
+ structure Substring4 : SUBSTRING
+ structure Text4 : TEXT
+ structure Char2 : CHAR
+ structure Char2Array : MONO_ARRAY
+ structure Char2Array2 : MONO_ARRAY2
+ structure Char2ArraySlice : MONO_ARRAY_SLICE
+ structure Char2Vector : MONO_VECTOR
+ structure Char2VectorSlice : MONO_VECTOR_SLICE
+ structure String2 : STRING
+ structure Substring2 : SUBSTRING
+ structure Text2 : TEXT
+
+ structure Char1 : CHAR
+ structure Char1Array : MONO_ARRAY
+ structure Char1Array2 : MONO_ARRAY2
+ structure Char1ArraySlice : MONO_ARRAY_SLICE
+ structure Char1Vector : MONO_VECTOR
+ structure Char1VectorSlice : MONO_VECTOR_SLICE
+ structure String1 : STRING
+ structure Substring1 : SUBSTRING
+ structure Text1 : TEXT
+
sharing type MLton.IntInf.t = IntInf.int
sharing type MLton.Process.pid = Posix.Process.pid
sharing type MLton.ProcEnv.gid = Posix.ProcEnv.gid
@@ -578,6 +606,38 @@
sharing type Real64Array2.elem = Real64.real
sharing type Real64Array2.vector = Real64Vector.vector
sharing type Unix.exit_status = Posix.Process.exit_status
+ sharing type WideChar.string = WideString.string
+ sharing type WideCharArray.elem = WideChar.char
+ sharing type WideCharArray.vector = WideCharVector.vector
+ sharing type WideCharArraySlice.elem = WideChar.char
+ sharing type WideCharArraySlice.array = WideCharArray.array
+ sharing type WideCharArraySlice.vector = WideCharVector.vector
+ sharing type WideCharArraySlice.vector_slice = WideCharVectorSlice.slice
+ sharing type WideCharVector.elem = WideChar.char
+ sharing type WideCharVector.vector = WideString.string
+ sharing type WideCharVectorSlice.elem = WideChar.char
+ sharing type WideCharVectorSlice.vector = WideString.string
+ sharing type WideCharVectorSlice.slice = WideSubstring.substring
+ sharing type WideString.string = WideCharVector.vector
+ sharing type WideString.char = WideChar.char
+ sharing type WideSubstring.substring = WideCharVectorSlice.slice
+ sharing type WideSubstring.string = WideString.string
+ sharing type WideSubstring.char = WideChar.char
+ sharing type WideText.Char.char = WideChar.char
+ sharing type WideText.String.string = WideString.string
+ sharing type WideText.Substring.substring = WideSubstring.substring
+ sharing type WideText.CharVector.vector = WideCharVector.vector
+ sharing type WideText.CharArray.array = WideCharArray.array
+ sharing type WideText.CharArraySlice.slice = WideCharArraySlice.slice
+ sharing type WideText.CharVectorSlice.slice = WideCharVectorSlice.slice
+(*
+ sharing type WideTextPrimIO.array = WideCharArray.array
+ sharing type WideTextPrimIO.array_slice = WideCharArraySlice.slice
+ sharing type WideTextPrimIO.elem = WideChar.char
+ sharing type WideTextPrimIO.pos = Position.int
+ sharing type WideTextPrimIO.vector = WideCharVector.vector
+ sharing type WideTextPrimIO.vector_slice = WideCharVectorSlice.slice
+*)
sharing type WordArray.elem = word
sharing type WordArray.vector = WordVector.vector
sharing type WordArraySlice.elem = word
@@ -627,6 +687,91 @@
sharing type MLton.BinIO.outstream = BinIO.outstream
sharing type MLton.TextIO.instream = TextIO.instream
sharing type MLton.TextIO.outstream = TextIO.outstream
+
+ (* extensions *)
+ sharing type Char4.string = String4.string
+ sharing type Char4Array.elem = Char4.char
+ sharing type Char4Array.vector = Char4Vector.vector
+ sharing type Char4ArraySlice.elem = Char4.char
+ sharing type Char4ArraySlice.array = Char4Array.array
+ sharing type Char4ArraySlice.vector = Char4Vector.vector
+ sharing type Char4ArraySlice.vector_slice = Char4VectorSlice.slice
+ sharing type Char4Vector.elem = Char4.char
+ sharing type Char4Vector.vector = String4.string
+ sharing type Char4VectorSlice.elem = Char4.char
+ sharing type Char4VectorSlice.vector = String4.string
+ sharing type Char4VectorSlice.slice = Substring4.substring
+ sharing type String4.string = Char4Vector.vector
+ sharing type String4.char = Char4.char
+ sharing type Substring4.substring = Char4VectorSlice.slice
+ sharing type Substring4.string = String4.string
+ sharing type Substring4.char = Char4.char
+ sharing type Text4.Char.char = Char4.char
+ sharing type Text4.String.string = String4.string
+ sharing type Text4.Substring.substring = Substring4.substring
+ sharing type Text4.CharVector.vector = Char4Vector.vector
+ sharing type Text4.CharArray.array = Char4Array.array
+ sharing type Text4.CharArraySlice.slice = Char4ArraySlice.slice
+ sharing type Text4.CharVectorSlice.slice = Char4VectorSlice.slice
+ sharing type Char2.string = String2.string
+ sharing type Char2Array.elem = Char2.char
+ sharing type Char2Array.vector = Char2Vector.vector
+ sharing type Char2ArraySlice.elem = Char2.char
+ sharing type Char2ArraySlice.array = Char2Array.array
+ sharing type Char2ArraySlice.vector = Char2Vector.vector
+ sharing type Char2ArraySlice.vector_slice = Char2VectorSlice.slice
+ sharing type Char2Vector.elem = Char2.char
+ sharing type Char2Vector.vector = String2.string
+ sharing type Char2VectorSlice.elem = Char2.char
+ sharing type Char2VectorSlice.vector = String2.string
+ sharing type Char2VectorSlice.slice = Substring2.substring
+ sharing type String2.string = Char2Vector.vector
+ sharing type String2.char = Char2.char
+ sharing type Substring2.substring = Char2VectorSlice.slice
+ sharing type Substring2.string = String2.string
+ sharing type Substring2.char = Char2.char
+ sharing type Text2.Char.char = Char2.char
+ sharing type Text2.String.string = String2.string
+ sharing type Text2.Substring.substring = Substring2.substring
+ sharing type Text2.CharVector.vector = Char2Vector.vector
+ sharing type Text2.CharArray.array = Char2Array.array
+ sharing type Text2.CharArraySlice.slice = Char2ArraySlice.slice
+ sharing type Text2.CharVectorSlice.slice = Char2VectorSlice.slice
+ sharing type Char1.string = String1.string
+ sharing type Char1Array.elem = Char1.char
+ sharing type Char1Array.vector = Char1Vector.vector
+ sharing type Char1ArraySlice.elem = Char1.char
+ sharing type Char1ArraySlice.array = Char1Array.array
+ sharing type Char1ArraySlice.vector = Char1Vector.vector
+ sharing type Char1ArraySlice.vector_slice = Char1VectorSlice.slice
+ sharing type Char1Vector.elem = Char1.char
+ sharing type Char1Vector.vector = String1.string
+ sharing type Char1VectorSlice.elem = Char1.char
+ sharing type Char1VectorSlice.vector = String1.string
+ sharing type Char1VectorSlice.slice = Substring1.substring
+ sharing type String1.string = Char1Vector.vector
+ sharing type String1.char = Char1.char
+ sharing type Substring1.substring = Char1VectorSlice.slice
+ sharing type Substring1.string = String1.string
+ sharing type Substring1.char = Char1.char
+ sharing type Text1.Char.char = Char1.char
+ sharing type Text1.String.string = String1.string
+ sharing type Text1.Substring.substring = Substring1.substring
+ sharing type Text1.CharVector.vector = Char1Vector.vector
+ sharing type Text1.CharArray.array = Char1Array.array
+ sharing type Text1.CharArraySlice.slice = Char1ArraySlice.slice
+ sharing type Text1.CharVectorSlice.slice = Char1VectorSlice.slice
+ (* Bind the Char1=Char and Char4=WideChar *)
+ sharing type Char1Array.array = CharArray.array
+ sharing type Char1ArraySlice.array_slice = CharArraySlice.array_slice
+ sharing type String1.string = String.string
+ sharing type Substring1.substring = Substring.string
+ sharing type Char1.char = Char.char
+ sharing type Char4Array.array = WideCharArray.array
+ sharing type Char4ArraySlice.array_slice = WideCharArraySlice.array_slice
+ sharing type String4.string = WideString.string
+ sharing type Substring4.substring = WideSubstring.string
+ sharing type Char4.char = WideChar.char
end
(* bool is already defined as bool and so cannot be shared.
* So, we where these to get the needed sharing.
@@ -773,3 +918,12 @@
where type Word31.word = Word31.word
where type Word32.word = Word32.word
where type Word64.word = Word64.word
+ (* Top-level types for wide string and char constants *)
+ where type WideChar.char = WideChar.char
+ where type WideString.string = WideString.string
+ where type Char1.char = Char1.char
+ where type Char2.char = Char2.char
+ where type Char4.char = Char4.char
+ where type String1.string = String1.string
+ where type String2.string = String2.string
+ where type String4.string = String4.string
Modified: mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sml
===================================================================
--- mlton/trunk/basis-library/libs/basis-extra/top-level/basis.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/libs/basis-extra/top-level/basis.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -171,7 +171,6 @@
structure SysWord = SysWord
structure Unix = Unix
structure UnixSock = UnixSock
-(*
structure WideChar = WideChar
structure WideCharArray = WideCharArray
structure WideCharArray2 = WideCharArray2
@@ -181,6 +180,7 @@
structure WideString = WideString
structure WideSubstring = WideSubstring
structure WideText = WideText
+(*
structure WideTextPrimIO = WideTextPrimIO
*)
(*
@@ -246,6 +246,34 @@
structure MLton = MLton
structure SMLofNJ = SMLofNJ
structure Unsafe = Unsafe
+
+ structure Char4 = Char4
+ structure Char4Array = Char4Array
+ structure Char4Array2 = Char4Array2
+ structure Char4ArraySlice = Char4ArraySlice
+ structure Char4Vector = Char4Vector
+ structure Char4VectorSlice = Char4VectorSlice
+ structure String4 = String4
+ structure WideSubstring4 = Substring4
+ structure Text4 = Text4
+ structure Char2 = Char2
+ structure Char2Array = Char2Array
+ structure Char2Array2 = Char2Array2
+ structure Char2ArraySlice = Char2ArraySlice
+ structure Char2Vector = Char2Vector
+ structure Char2VectorSlice = Char2VectorSlice
+ structure String2 = String2
+ structure WideSubstring2 = Substring2
+ structure Text2 = Text2
+ structure Char1 = Char1
+ structure Char1Array = Char1Array
+ structure Char1Array2 = Char1Array2
+ structure Char1ArraySlice = Char1ArraySlice
+ structure Char1Vector = Char1Vector
+ structure Char1VectorSlice = Char1VectorSlice
+ structure String1 = String1
+ structure WideSubstring1 = Substring1
+ structure Text1 = Text1
open ArrayGlobal
BoolGlobal
Modified: mlton/branches/unicode/basis-library/misc/primitive.sml
===================================================================
--- mlton/trunk/basis-library/misc/primitive.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/misc/primitive.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -275,6 +275,8 @@
val fromInt8 = _prim "WordS8_toWord8": Int8.int -> char;
val toWord8 = _prim "WordU8_toWord8": char -> Word8.word;
val fromWord8 = _prim "WordU8_toWord8": Word8.word -> char;
+ val toWord32 = _prim "WordU8_toWord32": char -> Word32.word;
+ val fromWord32 = _prim "WordU8_toWord32": Word32.word -> char;
end
structure Char =
@@ -296,8 +298,8 @@
val ord = _prim "WordU16_toWord32": char -> int;
val toInt16 = _prim "WordS16_toWord16": char -> Int16.int;
val fromInt16 = _prim "WordS16_toWord16": Int16.int -> char;
- (* val toWord16 = _prim "WordU16_toWord16": char -> Word16.word; *)
- (* val fromWord16 = _prim "WordU16_toWord16": Word16.word -> char; *)
+ val toWord32 = _prim "WordU16_toWord32": char -> Word32.word;
+ val fromWord32 = _prim "WordU16_toWord32": Word32.word -> char;
end
structure Char4 =
@@ -309,8 +311,6 @@
val ord = _prim "WordU32_toWord32": char -> int;
val toInt32 = _prim "WordS32_toWord32": char -> Int32.int;
val fromInt32 = _prim "WordS32_toWord32": Int32.int -> char;
- (* val toWord32 = _prim "WordU32_toWord32": char -> Word32.word; *)
- (* val fromWord32 = _prim "WordU32_toWord32": Word32.word -> char; *)
end
structure CommandLine =
Copied: mlton/branches/unicode/basis-library/text/char.fun (from rev 4246, mlton/trunk/basis-library/text/char.sml)
===================================================================
--- mlton/trunk/basis-library/text/char.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/char.fun 2005-11-24 00:51:12 UTC (rev 4250)
@@ -0,0 +1,303 @@
+(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ * Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ *)
+
+signature CHAR0 =
+ sig
+ eqtype char
+ eqtype string
+
+ val minChar: char
+ val maxOrd: int
+ val maxChar: char
+
+ (* raw chr,ord methods *)
+ val chr: int -> char
+ val ord: char -> int
+ val < : char * char -> bool
+
+ val toUnicode: char -> Word32.word
+ val fromUnicode: Word32.word -> char
+
+ structure CharVector :
+ MONO_VECTOR
+ where type elem = char
+ where type vector = string
+ end
+
+functor CharFn(Char0 : CHAR0) : CHAR_EXTRA =
+ struct
+ open Char0
+
+ (* required operators *)
+ fun a >= b = not (a < b)
+ fun a > b = b < a
+ fun a <= b = not (b < a)
+
+ (* for convenience: *)
+ val op + = Int.+
+ val op - = Int.-
+
+ fun succ c =
+ if Primitive.safe andalso c = maxChar
+ then raise Chr
+ else chr (ord c + 1)
+
+ fun pred c =
+ if Primitive.safe andalso c = minChar
+ then raise Chr
+ else chr (ord c - 1)
+
+ fun chrOpt c =
+ if Primitive.safe andalso Primitive.Int.gtu (c, maxOrd)
+ then NONE
+ else SOME (chr c)
+
+ fun chr c =
+ case chrOpt c of
+ NONE => raise Chr
+ | SOME c => c
+
+ val {compare, ...} = Util.makeCompare (op <)
+
+ val isUpper = Charset.isUpper o toUnicode
+ val isLower = Charset.isLower o toUnicode
+ val isDigit = Charset.isDigit o toUnicode
+ val isAlpha = Charset.isAlpha o toUnicode
+ val isGraph = Charset.isGraph o toUnicode
+ val isPrint = Charset.isPrint o toUnicode
+ val isPunct = Charset.isPunct o toUnicode
+ val isCntrl = Charset.isCntrl o toUnicode
+ val isSpace = Charset.isSpace o toUnicode
+ val isAscii = Charset.isAscii o toUnicode
+ val isAlphaNum = Charset.isAlphaNum o toUnicode
+ val isHexDigit = Charset.isHexDigit o toUnicode
+
+ val toUpper = fromUnicode o Charset.toUpper o toUnicode
+ val toLower = fromUnicode o Charset.toLower o toUnicode
+
+ (* !!! fixme, use a table in SOME situations *)
+ fun contains s c = CharVector.exists (fn d => c = d) s
+ fun notContains s c = not (contains s c)
+
+(*
+ fun control reader state =
+ case reader state of
+ NONE => NONE
+ | SOME (c, state) =>
+ if #"@" <= c andalso c <= #"_"
+ then SOME (chr (ord c -? ord #"@"), state)
+ else NONE
+
+ fun formatChar reader state =
+ case reader state of
+ NONE => NONE
+ | SOME (c, state) =>
+ if Ascii.isSpace c
+ then SOME ((), state)
+ else NONE
+
+ fun formatChars reader =
+ let
+ fun loop state =
+ case formatChar reader state of
+ NONE => state
+ | SOME ((), state) => loop state
+ in
+ loop
+ end
+
+ val 'a formatSequences: (char, 'a) StringCvt.reader -> 'a -> 'a =
+ fn reader =>
+ let
+ fun loop state =
+ case reader state of
+ SOME (#"\\", state1) =>
+ (case formatChar reader state1 of
+ NONE => state
+ | SOME ((), state2) =>
+ let
+ val state3 = formatChars reader state2
+ in
+ case reader state3 of
+ SOME (#"\\", state4) => loop state4
+ | _ => state
+ end)
+ | _ => state
+ in
+ loop
+ end
+
+ fun 'a scan (reader: (Char.char, 'a) StringCvt.reader)
+ : (char, 'a) StringCvt.reader =
+ let
+ val escape: (char, 'a) StringCvt.reader =
+ fn state =>
+ case reader state of
+ NONE => NONE
+ | SOME (c, state') =>
+ let
+ fun yes c = SOME (c, state')
+ in
+ case c of
+ #"a" => yes #"\a"
+ | #"b" => yes #"\b"
+ | #"t" => yes #"\t"
+ | #"n" => yes #"\n"
+ | #"v" => yes #"\v"
+ | #"f" => yes #"\f"
+ | #"r" => yes #"\r"
+ | #"\\" => yes #"\\"
+ | #"\"" => yes #"\""
+ | #"^" => control reader state'
+ | #"u" =>
+ Reader.mapOpt chrOpt
+ (StringCvt.digitsExact (StringCvt.HEX, 4) reader)
+ state'
+ | _ => (* 3 decimal digits *)
+ Reader.mapOpt chrOpt
+ (StringCvt.digitsExact (StringCvt.DEC, 3)
+ reader)
+ state
+ end
+ val main: (char, 'a) StringCvt.reader =
+ fn state =>
+ let
+ val state = formatSequences reader state
+ in
+ case reader state of
+ NONE => NONE
+ | SOME (c, state) =>
+ if isPrint c
+ then
+ case c of
+ #"\\" => escape state
+ | #"\"" => NONE
+ | _ => SOME (c, formatSequences reader state)
+ else NONE
+ end
+ in
+ main
+ end
+
+ val fromString = StringCvt.scanString scan
+
+ fun 'a scanC (reader: (char, 'a) StringCvt.reader)
+ : (char, 'a) StringCvt.reader =
+ let
+ val rec escape =
+ fn state =>
+ case reader state of
+ NONE => NONE
+ | SOME (c, state') =>
+ let fun yes c = SOME (c, state')
+ in case c of
+ #"a" => yes #"\a"
+ | #"b" => yes #"\b"
+ | #"t" => yes #"\t"
+ | #"n" => yes #"\n"
+ | #"v" => yes #"\v"
+ | #"f" => yes #"\f"
+ | #"r" => yes #"\r"
+ | #"?" => yes #"?"
+ | #"\\" => yes #"\\"
+ | #"\"" => yes #"\""
+ | #"'" => yes #"'"
+ | #"^" => control reader state'
+ | #"x" =>
+ Reader.mapOpt chrOpt
+ (StringCvt.digits StringCvt.HEX reader)
+ state'
+ | _ =>
+ Reader.mapOpt chrOpt
+ (StringCvt.digitsPlus (StringCvt.OCT, 3) reader)
+ state
+ end
+ and main =
+ fn NONE => NONE
+ | SOME (c, state) =>
+ if isPrint c
+ then
+ case c of
+ #"\\" => escape state
+ | _ => SOME (c, state)
+ else NONE
+ in
+ main o reader
+ end
+
+ val fromCString = StringCvt.scanString scanC
+
+*)
+ fun padLeft (s: String.string, n: int): String.string =
+ let
+ val m = Char1Vector.length s
+ val diff = n -? m
+ in if Int.> (diff, 0)
+ then Char1Vector.concat [Char1Vector.tabulate (diff, fn _ => #"0"), s]
+ else if diff = 0
+ then s
+ else raise Fail "padLeft"
+ end
+
+ fun memoize (f: char -> 'a): char -> 'a =
+ let val a = Array.tabulate (numChars, f o Char.chr)
+ in fn c => Array.sub (a, Char.ord c)
+ end
+
+ val toString =
+ memoize
+ (fn c =>
+ if isPrint c
+ then
+ (case c of
+ #"\\" => "\\\\"
+ | #"\"" => "\\\""
+ | _ => CharVector.new (1, c))
+ else
+ case c of
+ #"\a" => "\\a"
+ | #"\b" => "\\b"
+ | #"\t" => "\\t"
+ | #"\n" => "\\n"
+ | #"\v" => "\\v"
+ | #"\f" => "\\f"
+ | #"\r" => "\\r"
+ | _ =>
+ if c < #" "
+ then (String.concat
+ ["\\^", CharVector.new (1, chr (ord c +? ord #"@"))])
+ else String.concat
+ ["\\", padLeft (Int.fmt StringCvt.DEC (ord c), 3)])
+
+ val toCString =
+ memoize
+ (fn c =>
+ if isPrint c
+ then
+ (case c of
+ #"\\" => "\\\\"
+ | #"\"" => "\\\""
+ | #"?" => "\\?"
+ | #"'" => "\\'"
+ | _ => String0.str c)
+ else
+ case c of
+ #"\a" => "\\a"
+ | #"\b" => "\\b"
+ | #"\t" => "\\t"
+ | #"\n" => "\\n"
+ | #"\v" => "\\v"
+ | #"\f" => "\\f"
+ | #"\r" => "\\r"
+ | _ =>
+ String.concat
+ ["\\", padLeft (Int.fmt StringCvt.OCT (ord c), 3)])
+ end
+
+structure CharGlobal: CHAR_GLOBAL = Char
+open CharGlobal
Modified: mlton/branches/unicode/basis-library/text/char.sig
===================================================================
--- mlton/trunk/basis-library/text/char.sig 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/char.sig 2005-11-24 00:51:12 UTC (rev 4250)
@@ -38,11 +38,11 @@
val isPrint: char -> bool
val isPunct: char -> bool
val isSpace: char -> bool
- val fromString: string -> char option
- val scan: (char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
- val toString: char -> string
- val fromCString: string -> char option
- val toCString: char -> string
+ val fromString: String.string -> char option
+ val scan: (Char.char, 'a) StringCvt.reader -> (char, 'a) StringCvt.reader
+ val toString: char -> String.string
+ val fromCString: String.string -> char option
+ val toCString: char -> String.string
end
signature CHAR_EXTRA =
Modified: mlton/branches/unicode/basis-library/text/char.sml
===================================================================
--- mlton/trunk/basis-library/text/char.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/char.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -6,218 +6,63 @@
* See the file MLton-LICENSE for details.
*)
-structure Char: CHAR_EXTRA =
+structure Char1 =
struct
- open Char0
-
- fun control reader state =
- case reader state of
- NONE => NONE
- | SOME (c, state) =>
- if #"@" <= c andalso c <= #"_"
- then SOME (chr (ord c -? ord #"@"), state)
- else NONE
+ open Primitive.Char1
+
+ type char = char
+ type string = string
- fun formatChar reader state =
- case reader state of
- NONE => NONE
- | SOME (c, state) =>
- if isSpace c
- then SOME ((), state)
- else NONE
+ val minChar = #"\000"
+ val maxOrd: int = 255
+ val maxChar = #"\255"
+
+ val toUnicode = Primitive.Char1.toWord32
+ val fromUnicode = Primitive.Char1.toWord32
+
+ structure CharVector = Char1Vector
+end
- fun formatChars reader =
- let
- fun loop state =
- case formatChar reader state of
- NONE => state
- | SOME ((), state) => loop state
- in
- loop
- end
-
- val 'a formatSequences: (char, 'a) StringCvt.reader -> 'a -> 'a =
- fn reader =>
- let
- fun loop state =
- case reader state of
- SOME (#"\\", state1) =>
- (case formatChar reader state1 of
- NONE => state
- | SOME ((), state2) =>
- let
- val state3 = formatChars reader state2
- in
- case reader state3 of
- SOME (#"\\", state4) => loop state4
- | _ => state
- end)
- | _ => state
- in
- loop
- end
+structure Char2 =
+ struct
+ open Primitive.Char2
+
+ type char = char
+ type string = string
+
+ val minChar = #"\u0000"
+ val maxOrd: int = 65535
+ val maxChar = #"\uFFFF"
+
+ val toUnicode = Primitive.Char2.toWord32
+ val fromUnicode = Primitive.Char2.toWord32
+
+ structure CharVector = Char2Vector
+ end
- fun 'a scan (reader: (char, 'a) StringCvt.reader)
- : (char, 'a) StringCvt.reader =
- let
- val escape: (char, 'a) StringCvt.reader =
- fn state =>
- case reader state of
- NONE => NONE
- | SOME (c, state') =>
- let
- fun yes c = SOME (c, state')
- in
- case c of
- #"a" => yes #"\a"
- | #"b" => yes #"\b"
- | #"t" => yes #"\t"
- | #"n" => yes #"\n"
- | #"v" => yes #"\v"
- | #"f" => yes #"\f"
- | #"r" => yes #"\r"
- | #"\\" => yes #"\\"
- | #"\"" => yes #"\""
- | #"^" => control reader state'
- | #"u" =>
- Reader.mapOpt chrOpt
- (StringCvt.digitsExact (StringCvt.HEX, 4) reader)
- state'
- | _ => (* 3 decimal digits *)
- Reader.mapOpt chrOpt
- (StringCvt.digitsExact (StringCvt.DEC, 3)
- reader)
- state
- end
- val main: (char, 'a) StringCvt.reader =
- fn state =>
- let
- val state = formatSequences reader state
- in
- case reader state of
- NONE => NONE
- | SOME (c, state) =>
- if isPrint c
- then
- case c of
- #"\\" => escape state
- | #"\"" => NONE
- | _ => SOME (c, formatSequences reader state)
- else NONE
- end
- in
- main
- end
-
- val fromString = StringCvt.scanString scan
-
- fun 'a scanC (reader: (char, 'a) StringCvt.reader)
- : (char, 'a) StringCvt.reader =
- let
- val rec escape =
- fn state =>
- case reader state of
- NONE => NONE
- | SOME (c, state') =>
- let fun yes c = SOME (c, state')
- in case c of
- #"a" => yes #"\a"
- | #"b" => yes #"\b"
- | #"t" => yes #"\t"
- | #"n" => yes #"\n"
- | #"v" => yes #"\v"
- | #"f" => yes #"\f"
- | #"r" => yes #"\r"
- | #"?" => yes #"?"
- | #"\\" => yes #"\\"
- | #"\"" => yes #"\""
- | #"'" => yes #"'"
- | #"^" => control reader state'
- | #"x" =>
- Reader.mapOpt chrOpt
- (StringCvt.digits StringCvt.HEX reader)
- state'
- | _ =>
- Reader.mapOpt chrOpt
- (StringCvt.digitsPlus (StringCvt.OCT, 3) reader)
- state
- end
- and main =
- fn NONE => NONE
- | SOME (c, state) =>
- if isPrint c
- then
- case c of
- #"\\" => escape state
- | _ => SOME (c, state)
- else NONE
- in
- main o reader
- end
-
- val fromCString = StringCvt.scanString scanC
-
- fun padLeft (s: string, n: int): string =
- let
- val m = String.size s
- val diff = n -? m
- in if Int.> (diff, 0)
- then String.concat [String.new (diff, #"0"), s]
- else if diff = 0
- then s
- else raise Fail "padLeft"
- end
-
- val toString =
- memoize
- (fn c =>
- if isPrint c
- then
- (case c of
- #"\\" => "\\\\"
- | #"\"" => "\\\""
- | _ => String0.str c)
- else
- case c of
- #"\a" => "\\a"
- | #"\b" => "\\b"
- | #"\t" => "\\t"
- | #"\n" => "\\n"
- | #"\v" => "\\v"
- | #"\f" => "\\f"
- | #"\r" => "\\r"
- | _ =>
- if c < #" "
- then (String.concat
- ["\\^", String0.str (chr (ord c +? ord #"@"))])
- else String.concat
- ["\\", padLeft (Int.fmt StringCvt.DEC (ord c), 3)])
+structure Char4 =
+ struct
+ open Primitive.Char2
- val toCString =
- memoize
- (fn c =>
- if isPrint c
- then
- (case c of
- #"\\" => "\\\\"
- | #"\"" => "\\\""
- | #"?" => "\\?"
- | #"'" => "\\'"
- | _ => String0.str c)
- else
- case c of
- #"\a" => "\\a"
- | #"\b" => "\\b"
- | #"\t" => "\\t"
- | #"\n" => "\\n"
- | #"\v" => "\\v"
- | #"\f" => "\\f"
- | #"\r" => "\\r"
- | _ =>
- String.concat
- ["\\", padLeft (Int.fmt StringCvt.OCT (ord c), 3)])
+ type char = char
+ type string = string
+
+ val minChar = #"\U00000000"
+ val maxOrd: int = 4294967295
+ val maxChar = #"\UFFFFFFFF"
+
+ fun toUnicode x = x
+ fun fromUnicode x = x
+
+ structure CharVector = Char4Vector
end
+structure Char1 : CHAR_EXTRA = CharFn(Char1)
+structure Char2 : CHAR_EXTRA = CharFn(Char2)
+structure Char4 : CHAR_EXTRA = CharFn(Char4)
+
+structure Char = Char1
+structure WideChar = Char4
+
structure CharGlobal: CHAR_GLOBAL = Char
open CharGlobal
-
Deleted: mlton/branches/unicode/basis-library/text/char0.sml
===================================================================
--- mlton/trunk/basis-library/text/char0.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/char0.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -1,97 +0,0 @@
-(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
- * Jagannathan, and Stephen Weeks.
- * Copyright (C) 1997-2000 NEC Research Institute.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-structure Char0 =
- struct
- open Primitive.Int Primitive.Char
-
- type char = char
- type string = string
-
- val minChar = #"\000"
- val numChars: int = 256
- val maxOrd: int = 255
- val maxChar = #"\255"
-
- fun succ c =
- if Primitive.safe andalso c = maxChar
- then raise Chr
- else Primitive.Char.chr (ord c + 1)
-
- fun pred c =
- if Primitive.safe andalso c = minChar
- then raise Chr
- else Primitive.Char.chr (ord c - 1)
-
- fun chrOpt c =
- if Primitive.safe andalso Primitive.Int.gtu (c, maxOrd)
- then NONE
- else SOME (Primitive.Char.chr c)
-
- fun chr c =
- case chrOpt c of
- NONE => raise Chr
- | SOME c => c
-
- val {compare, ...} = Util.makeCompare (op <)
-
- structure String = String0
-
- fun oneOf s =
- let
- val a = Array.array (numChars, false)
- val n = String.size s
- fun loop i =
- if Primitive.Int.>= (i, n) then ()
- else (Array.update (a, ord (String.sub (s, i)), true)
- ; loop (i + 1))
- in loop 0
- ; fn c => Array.sub (a, ord c)
- end
- val contains = oneOf
-
- fun notOneOf s = not o (oneOf s)
- val notContains = notOneOf
-
- fun memoize (f: char -> 'a): char -> 'a =
- let val a = Array.tabulate (numChars, f o chr)
- in fn c => Array.sub (a, ord c)
- end
-
- local
- val not = fn f => memoize (not o f)
- infix or andd
- fun f or g = memoize (fn c => f c orelse g c)
- fun f andd g = memoize (fn c => f c andalso g c)
- in
- val isLower = oneOf "abcdefghijklmnopqrstuvwxyz"
- val isUpper = oneOf "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- val isDigit = oneOf "0123456789"
- val isAlpha = isUpper or isLower
- val isHexDigit = isDigit or (oneOf "abcdefABCDEF")
- val isAlphaNum = isAlpha or isDigit
- val isPrint = fn c => #" " <= c andalso c <= #"~"
- val isSpace = oneOf " \t\r\n\v\f"
- val isGraph = (not isSpace) andd isPrint
- val isPunct = isGraph andd (not isAlphaNum)
- val isCntrl = not isPrint
- val isAscii = fn c => c < #"\128"
- end
-
- local
- fun make (lower, upper, diff) =
- memoize (fn c => if lower <= c andalso c <= upper
- then chr (ord c +? diff)
- else c)
- val diff = ord #"A" - ord #"a"
- in
- val toLower = make (#"A", #"Z", ~diff)
- val toUpper = make (#"a", #"z", diff)
- end
- end
-
Added: mlton/branches/unicode/basis-library/text/charset.sml
===================================================================
--- mlton/trunk/basis-library/text/charset.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/charset.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -0,0 +1,138 @@
+signature CHARSET =
+ sig
+ type t = Word32.word
+
+ val isAscii: t -> bool
+ val isAlpha: t -> bool
+ val isAlphaNum: t -> bool
+ val isCntrl: t -> bool
+ val isDigit: t -> bool
+ val isGraph: t -> bool
+ val isHexDigit: t -> bool
+ val isLower: t -> bool
+ val isPrint: t -> bool
+ val isSpace: t -> bool
+ val isPunct: t -> bool
+ val isUpper: t -> bool
+
+ val toUpper: t -> t
+ val toLower: t -> t
+ end
+
+structure Charset :> CHARSET =
+ struct
+ local
+ open Primitive.Word32
+ in
+ infix 5 >> << andb orb xorb
+ type t = Word32.word
+
+ (* these are computed by running ./hash *)
+ val size = 32768
+ val mask = 0wx7FFF
+ val shift = 0w14
+ val factor = 0w5390
+
+ fun hash c = ((c >> shift) * factor + c) andb mask
+
+ (* this is computed by running ./parse *)
+ val unicodedb = "12345678"
+ val udelta = Vector.fromList [ 0w12, 0w23, 0w22 ]
+ val ldelta = Vector.fromList [ 0w2, 0w3, 0w5 ]
+
+ (* decode the compressed unicode database.
+ * Each entry in the resulting table has format:
+ * bits [ 0, 21) = the Unicode code point in this bucket
+ * bits [ 21, 28) = the uppercase delta
+ * bits [ 28, 30) = CLASS = LETTER | NUMERAL | CONTROL | PUNCTUATION
+ * bits [ 30, 32) = CASE = UPPER | LOWER | WHITESPACE | (other)
+ *)
+ val CODEPOINT = 0wx001FFFFF
+ val CLASS = 0wx30000000
+ val LETTER = 0wx00000000
+ val NUMERAL = 0wx10000000
+ val CONTROL = 0wx20000000
+ val PUNCTUATION = 0wx30000000
+ val CASE = 0wxC0000000
+ val UPPER = 0wx00000000
+ val LOWER = 0wx40000000
+ val WHITESPACE = 0wx80000000
+ val DELTASHIFT = 0w21
+ val DELTAMASK = 0wx7F
+ val table =
+ let
+ val a = Array.tabulate (size, fn _ => 0w0)
+ in
+ Array.vector a
+ end
+
+ fun fetch c =
+ let
+ val x = toInt (hash c)
+ val v = Vector.sub (table, x)
+ in
+ if c = v andb CODEPOINT then SOME v else
+ (* only 6 unicode chars fail that test, catch them here: *)
+ let
+ val v = Vector.sub (table, Primitive.Int.+ (x, 1))
+ in
+ if c = v andb CODEPOINT then SOME v else NONE
+ end
+ end
+
+ fun isClass class c =
+ case fetch c of
+ NONE => false
+ | SOME v => v andb CLASS = class
+
+ val isAlpha = isClass LETTER
+ val isDigit = isClass NUMERAL
+ val isCntrl = isClass CONTROL
+ val isPunct = isClass PUNCTUATION
+
+ fun isCase cs c =
+ case fetch c of
+ NONE => false
+ | SOME v => v andb CASE = cs
+
+ val isUpper = isCase UPPER
+ val isLower = isCase LOWER
+ val isSpace = isCase WHITESPACE
+
+ (* derived methods *)
+ fun isAscii c = c < 0w128
+
+ (* Both LETTER and NUMERAL have a 0 in bit 28 *)
+ fun isAlphaNum c =
+ case fetch c of
+ NONE => false
+ | SOME v => c andb CONTROL = 0w0
+
+ fun isPrint c =
+ case fetch c of
+ NONE => false (* complement of control, excludes non-unicode *)
+ | SOME v => v andb CLASS <> CONTROL
+
+ (* printable, but not whitespace *)
+ fun isGraph c =
+ case fetch c of
+ NONE => false
+ | SOME v => v andb CLASS <> CONTROL andalso v andb CASE <> WHITESPACE
+
+ fun isHexDigit c = false (* !!! damn !!! *)
+
+ (* Use the delta tables to convert case
+ * We exploit the fact that a character cannot have both
+ * uppercase and lowercase mappings simultaneously.
+ *)
+ fun delta v = toInt (v >> DELTASHIFT andb DELTAMASK)
+ fun toLower c =
+ case fetch c of
+ NONE => c
+ | SOME v => c + Vector.sub (ldelta, delta v)
+ fun toUpper c =
+ case fetch c of
+ NONE => c
+ | SOME v => c + Vector.sub (udelta, delta v)
+ end
+ end
Modified: mlton/branches/unicode/basis-library/text/string-cvt.sml
===================================================================
--- mlton/trunk/basis-library/text/string-cvt.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/string-cvt.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -6,9 +6,14 @@
* See the file MLton-LICENSE for details.
*)
+(* The String and Char structures are not yet available at this point.
+ * They need the StringCvt structure for their signatures.
+ * Therefore, we use CharVector methods.
+ *)
structure StringCvt: STRING_CVT_EXTRA =
struct
open Reader
+ structure Char = Primitive.Char
val wordFromInt = Primitive.Word32.fromInt
@@ -31,25 +36,22 @@
open Primitive.Int
- structure Char = Char0
- structure String = String0
-
local
fun pad f (c: char) i s =
let
- val n = String.size s
+ val n = CharVector.length s
in
if n >= i
then s
- else f (s, String0.vector (i -? n, c))
+ else f (s, CharVector.vector (i -? n, c))
end
in
- val padLeft = pad (fn (s, pad) => String.^ (pad, s))
- val padRight = pad String.^
+ val padLeft = pad (fn (s, pad) => CharVector.append (pad, s))
+ val padRight = pad CharVector.append
end
fun splitl p f src =
- let fun done chars = String0.implode (rev chars)
+ let fun done chars = CharVector.fromList (rev chars)
fun loop (src, chars) =
case f src of
NONE => (done chars, src)
@@ -63,14 +65,12 @@
fun takel p f s = #1 (splitl p f s)
fun dropl p f s = #2 (splitl p f s)
- fun skipWS x = dropl Char.isSpace x
-
type cs = int
fun stringReader (s: string): (char, cs) reader =
- fn i => if i >= String.size s
+ fn i => if i >= CharVector.length s
then NONE
- else SOME (String.sub (s, i), i + 1)
+ else SOME (CharVector.sub (s, i), i + 1)
fun 'a scanString (f: ((char, cs) reader -> ('a, cs) reader)) (s: string)
: 'a option =
@@ -79,6 +79,24 @@
| SOME (a, _) => SOME a
local
+ val numChars = 256
+ fun memoize (f: char -> 'a): char -> 'a =
+ let val a = Array.tabulate (numChars, f o Char.chr)
+ in fn c => Array.sub (a, Char.ord c)
+ end
+ fun oneOf s =
+ let
+ val a = Array.array (numChars, false)
+ val n = CharVector.length s
+ fun loop i =
+ if Primitive.Int.>= (i, n) then ()
+ else (Array.update (a, Char.ord (CharVector.sub (s, i)), true)
+ ; loop (i + 1))
+ in loop 0
+ ; fn c => Array.sub (a, Char.ord c)
+ end
+ val isSpace = oneOf " \t\r\n\v\f\u0085\u00A0" (* 85, A0 are latin spaces *)
+
fun range (add: int, cmin: char, cmax: char): char -> int option =
let val min = Char.ord cmin
in fn c => if Char.<= (cmin, c) andalso Char.<= (c, cmax)
@@ -87,7 +105,7 @@
end
fun 'a combine (ds: (char -> 'a option) list): char -> 'a option =
- Char.memoize
+ memoize
(fn c =>
let
val rec loop =
@@ -99,13 +117,14 @@
in loop ds
end)
- val bin = Char.memoize (range (0, #"0", #"1"))
- val oct = Char.memoize (range (0, #"0", #"7"))
- val dec = Char.memoize (range (0, #"0", #"9"))
+ val bin = memoize (range (0, #"0", #"1"))
+ val oct = memoize (range (0, #"0", #"7"))
+ val dec = memoize (range (0, #"0", #"9"))
val hex = combine [range (0, #"0", #"9"),
range (10, #"a", #"f"),
range (10, #"A", #"F")]
in
+ fun skipWS x = dropl isSpace x
fun charToDigit (radix: radix): char -> int option =
case radix of
BIN => bin
@@ -195,5 +214,5 @@
| SOME n => loop (n, state)
end
- fun digitToChar (n: int): char = String.sub ("0123456789ABCDEF", n)
+ fun digitToChar (n: int): char = CharVector.sub ("0123456789ABCDEF", n)
end
Copied: mlton/branches/unicode/basis-library/text/string.fun (from rev 4246, mlton/trunk/basis-library/text/string.sml)
===================================================================
--- mlton/trunk/basis-library/text/string.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/string.fun 2005-11-24 00:51:12 UTC (rev 4250)
@@ -0,0 +1,70 @@
+(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ * Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ *)
+
+signature STRING0 =
+ sig
+ structure CharVector : MONO_VECTOR_EXTRA
+ structure Char : CHAR
+ end
+
+functor StringFn(String0 : STRING0) : STRING_EXTRA =
+ struct
+ open String0 CharVector
+
+ type char = elem
+ type string = vector
+ val maxSize = maxLen
+ val size = length
+ fun extract (s, start, len) =
+ CharVectorSlice.vector (CharVectorSlice.slice (s, start, len))
+ fun substring (s, start, len) = extract (s, start, SOME len)
+ val op ^ = append
+ val new = vector
+ fun str c = new (1, c)
+ val implode = fromList
+ val explode = toList
+
+ val toLower = translate (str o Char.toLower)
+
+ local
+ fun make f = f (op = : char * char -> bool)
+ in
+ val isPrefix = make isPrefix
+ val isSubstring = make isSubvector
+ val isSuffix = make isSuffix
+ end
+ val compare = collate Char.compare
+ val {<, <=, >, >=} = Util.makeOrder compare
+
+ val toString = translate Char.toString
+ val toCString = translate Char.toCString
+
+ val scan: (char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader =
+ fn reader =>
+ let
+ fun loop (state, cs) =
+ case Char.scan reader state of
+ NONE => SOME (implode (rev cs),
+ Char.formatSequences reader state)
+ | SOME (c, state) => loop (state, c :: cs)
+ in
+ fn state => loop (state, [])
+ end
+
+ val fromString = StringCvt.scanString scan
+
+ fun scanString scanChar (reader: (char, 'a) StringCvt.reader)
+ : (string, 'a) StringCvt.reader =
+ fn state =>
+ Option.map (fn (cs, state) => (implode cs, state))
+ (Reader.list (scanChar reader) state)
+
+ val fromCString = StringCvt.scanString (scanString Char.scanC)
+
+ fun nullTerm s = s ^ "\000"
+ end
Modified: mlton/branches/unicode/basis-library/text/string.sml
===================================================================
--- mlton/trunk/basis-library/text/string.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/string.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -6,50 +6,13 @@
* See the file MLton-LICENSE for details.
*)
-structure String: STRING_EXTRA =
- struct
- open String0
+structure String1 = StringFn(Char1Vector)
+structure String2 = StringFn(Char2Vector)
+structure String4 = StringFn(Char4Vector)
- val toLower = translate (str o Char.toLower)
+structure String = String1
+structure WideString = String4
- local
- fun make f = f (op = : char * char -> bool)
- in
- val isPrefix = make isPrefix
- val isSubstring = make isSubvector
- val isSuffix = make isSuffix
- end
- val compare = collate Char.compare
- val {<, <=, >, >=} = Util.makeOrder compare
-
- val toString = translate Char.toString
- val toCString = translate Char.toCString
-
- val scan: (char, 'a) StringCvt.reader -> (string, 'a) StringCvt.reader =
- fn reader =>
- let
- fun loop (state, cs) =
- case Char.scan reader state of
- NONE => SOME (implode (rev cs),
- Char.formatSequences reader state)
- | SOME (c, state) => loop (state, c :: cs)
- in
- fn state => loop (state, [])
- end
-
- val fromString = StringCvt.scanString scan
-
- fun scanString scanChar (reader: (char, 'a) StringCvt.reader)
- : (string, 'a) StringCvt.reader =
- fn state =>
- Option.map (fn (cs, state) => (implode cs, state))
- (Reader.list (scanChar reader) state)
-
- val fromCString = StringCvt.scanString (scanString Char.scanC)
-
- fun nullTerm s = s ^ "\000"
- end
-
structure StringGlobal: STRING_GLOBAL = String
open StringGlobal
Deleted: mlton/branches/unicode/basis-library/text/string0.sml
===================================================================
--- mlton/trunk/basis-library/text/string0.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/string0.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -1,32 +0,0 @@
-(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
- * Jagannathan, and Stephen Weeks.
- * Copyright (C) 1997-2000 NEC Research Institute.
- *
- * MLton is released under a BSD-style license.
- * See the file MLton-LICENSE for details.
- *)
-
-structure String0 =
- struct
- open CharVector
- type char = elem
- type string = vector
- structure Substring0 =
- struct
- open CharVectorSlice
- type char = elem
- type string = vector
- type substring = slice
- end
- val maxSize = maxLen
- val size = length
- fun extract (s, start, len) =
- CharVectorSlice.vector (CharVectorSlice.slice (s, start, len))
- fun substring (s, start, len) = extract (s, start, SOME len)
- val op ^ = append
- val new = vector
- fun str c = new (1, c)
- val implode = fromList
- val explode = toList
- end
-structure Substring0 = String0.Substring0
Copied: mlton/branches/unicode/basis-library/text/substring.fun (from rev 4246, mlton/trunk/basis-library/text/substring.sml)
===================================================================
--- mlton/trunk/basis-library/text/substring.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/substring.fun 2005-11-24 00:51:12 UTC (rev 4250)
@@ -0,0 +1,51 @@
+(* Copyright (C) 1999-2005 Henry Cejtin, Matthew Fluet, Suresh
+ * Jagannathan, and Stephen Weeks.
+ * Copyright (C) 1997-2000 NEC Research Institute.
+ *
+ * MLton is released under a BSD-style license.
+ * See the file MLton-LICENSE for details.
+ *)
+
+(* The :> is to hide the type substring. We must add the where's to make char
+ * and string the same as the toplevel types.
+ *)
+functor SubstringFn(CharVectorSlice : MONO_VECTOR_SLICE) =
+ struct
+ open CharVectorSlice
+ type char = elem
+ type string = vector
+ type substring = slice
+
+ val size = length
+ val extract = slice
+ fun substring (s, start, len) = extract (s, start, SOME len)
+ val string = vector
+ val getc = getItem
+ fun first ss = Option.map #1 (getItem ss)
+ val slice = subslice
+ val explode = toList
+ local
+ fun make f = f (op = : char * char -> bool)
+ in
+ val isPrefix = make isPrefix
+ val isSubstring = make isSubvector
+ val isSuffix = make isSuffix
+ val position = make position
+ end
+ val compare = collate Char.compare
+(*
+ type cs = int
+
+ fun reader (T {str, start, size}): (char, cs) Reader.reader =
+ fn i => if i >= size
+ then NONE
+ else SOME (String.sub (str, start +? i), i + 1)
+
+ fun 'a scanSubstring
+ (f: (char, cs) Reader.reader -> ('a, int) Reader.reader)
+ (ss: substring): 'a option =
+ case f (reader ss) 0 of
+ NONE => NONE
+ | SOME (a, _) => SOME a
+*)
+ end
Modified: mlton/branches/unicode/basis-library/text/substring.sml
===================================================================
--- mlton/trunk/basis-library/text/substring.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/substring.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -9,48 +9,30 @@
(* The :> is to hide the type substring. We must add the where's to make char
* and string the same as the toplevel types.
*)
-structure Substring
+
+structure Substring1
:> SUBSTRING_EXTRA
- where type char = char
- where type string = string
- where type substring = CharVectorSlice.slice
- =
- struct
- open Substring0
+ where type char = Char1.char
+ where type string = String1.string
+ where type substring = Char1VectorSlice.slice
+ = SubstringFn(Char1VectorSlice)
- val size = length
- val extract = slice
- fun substring (s, start, len) = extract (s, start, SOME len)
- val string = vector
- val getc = getItem
- fun first ss = Option.map #1 (getItem ss)
- val slice = subslice
- val explode = toList
- local
- fun make f = f (op = : char * char -> bool)
- in
- val isPrefix = make isPrefix
- val isSubstring = make isSubvector
- val isSuffix = make isSuffix
- val position = make position
- end
- val compare = collate Char.compare
-(*
- type cs = int
-
- fun reader (T {str, start, size}): (char, cs) Reader.reader =
- fn i => if i >= size
- then NONE
- else SOME (String.sub (str, start +? i), i + 1)
-
- fun 'a scanSubstring
- (f: (char, cs) Reader.reader -> ('a, int) Reader.reader)
- (ss: substring): 'a option =
- case f (reader ss) 0 of
- NONE => NONE
- | SOME (a, _) => SOME a
-*)
- end
+structure Substring2
+ :> SUBSTRING_EXTRA
+ where type char = Char2.char
+ where type string = String2.string
+ where type substring = Char2VectorSlice.slice
+ = SubstringFn(Char2VectorSlice)
+structure Substring4
+ :> SUBSTRING_EXTRA
+ where type char = Char4.char
+ where type string = String4.string
+ where type substring = Char4VectorSlice.slice
+ = SubstringFn(Char4VectorSlice)
+
+structure Substring = Substring1
+structure WideSubstring = Substring4
+
structure SubstringGlobal: SUBSTRING_GLOBAL = Substring
open SubstringGlobal
Modified: mlton/branches/unicode/basis-library/text/text.sml
===================================================================
--- mlton/trunk/basis-library/text/text.sml 2005-11-19 20:10:35 UTC (rev 4246)
+++ mlton/branches/unicode/basis-library/text/text.sml 2005-11-24 00:51:12 UTC (rev 4250)
@@ -5,13 +5,36 @@
* See the file MLton-LICENSE for details.
*)
-structure Text: TEXT =
+structure Text1: TEXT =
struct
- structure Char = Char
- structure CharArray = CharArray
- structure CharArraySlice = CharArraySlice
- structure CharVector = CharVector
- structure CharVectorSlice = CharVectorSlice
- structure String = String
- structure Substring = Substring
+ structure Char = Char1
+ structure CharArray = Char1Array
+ structure CharArraySlice = Char1ArraySlice
+ structure CharVector = Char1Vector
+ structure CharVectorSlice = Char1VectorSlice
+ structure String = String1
+ structure Substring = Substring1
end
+structure Text2: TEXT =
+ struct
+ structure Char = Char2
+ structure CharArray = Char2Array
+ structure CharArraySlice = Char2ArraySlice
+ structure CharVector = Char2Vector
+ structure CharVectorSlice = Char2VectorSlice
+ structure String = String2
+ structure Substring = Substring2
+ end
+structure Text4: TEXT =
+ struct
+ structure Char = Char4
+ structure CharArray = Char4Array
+ structure CharArraySlice = Char4ArraySlice
+ structure CharVector = Char4Vector
+ structure CharVectorSlice = Char4VectorSlice
+ structure String = String4
+ structure Substring = Substring4
+ end
+
+structure Text = Text1
+structure WideText = Text4
Added: mlton/branches/unicode/basis-library/text/unicode-4.1.0/PropList.txt.gz
===================================================================
(Binary files differ)
Property changes on: mlton/branches/unicode/basis-library/text/unicode-4.1.0/PropList.txt.gz
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream
Added: mlton/branches/unicode/basis-library/text/unicode-4.1.0/UnicodeData.txt.gz
===================================================================
(Binary files differ)
Property changes on: mlton/branches/unicode/basis-library/text/unicode-4.1.0/UnicodeData.txt.gz
___________________________________________________________________
Name: svn:mime-type
+ application/octet-stream