[MLton-commit] r5750
Vesa Karvonen
vesak at mlton.org
Mon Jul 9 19:56:57 PDT 2007
Somewhat simplified and improved generic hash function.
----------------------------------------------------------------------
U mltonlib/trunk/com/ssh/generic/unstable/detail/value/hash.sml
U mltonlib/trunk/com/ssh/generic/unstable/public/value/hash.sig
----------------------------------------------------------------------
Modified: mltonlib/trunk/com/ssh/generic/unstable/detail/value/hash.sml
===================================================================
--- mltonlib/trunk/com/ssh/generic/unstable/detail/value/hash.sml 2007-07-10 02:50:44 UTC (rev 5749)
+++ mltonlib/trunk/com/ssh/generic/unstable/detail/value/hash.sml 2007-07-10 02:56:56 UTC (rev 5750)
@@ -4,8 +4,6 @@
* See the LICENSE file or http://mlton.org/License for details.
*)
-(* XXX Devise a better hash function. This is not pretty. *)
-
functor WithHash (Arg : WITH_HASH_DOM) : HASH_GENERIC = struct
(* <-- SML/NJ workaround *)
open TopLevel
@@ -20,28 +18,23 @@
infixr 0 -->
(* SML/NJ workaround --> *)
- structure W = Word
+ type 'a t = 'a -> {totWidth : Int.t, maxDepth : Int.t} -> Word.t
- type 'a t = 'a -> {maxWidth : Int.t, maxDepth : Int.t} -> Word.t UnOp.t
+ fun prim f : 'a t = const o f
- structure HC : sig
- val map : ('b -> 'a) -> 'a t -> 'b t
- val withConst : Word.t -> 'a t UnOp.t
- val lift : ('a -> Word.t) -> 'a t
- end = struct
- fun map b2a hA = hA o b2a
- fun withConst w hA a p r = hA a p (W.+ (w, r))
- fun lift toWord a _ r = r * 0w19 + toWord a
- end
-
structure Hash =
LayerGenericRep (structure Outer = Arg.Rep
structure Closed = MkClosedGenericRep (type 'a t = 'a t))
open Hash.This
- fun hash t v = getT t v {maxWidth = 200, maxDepth = 10} 0wx2CA4B13
+ fun hashParam t p =
+ if #totWidth p < 0 orelse #maxDepth p < 0
+ then raise Domain
+ else fn v => getT t v p
+ fun hash t = hashParam t {totWidth = 200, maxDepth = 10}
+
structure Layered = LayerDepGeneric
(structure Outer = Arg and Result = Hash
@@ -50,64 +43,67 @@
fun isoProduct ? = iso' (getP ?)
fun isoSum ? = iso' (getS ?)
- fun op *` (aT, bT) (a & b) {maxWidth, maxDepth} = let
+ fun op *` (aT, bT) (a & b) {totWidth, maxDepth} = let
val aN = Arg.numElems aT
val bN = Arg.numElems bT
- val aW = Int.quot (maxWidth * aN, aN + bN)
- val bW = maxWidth - aW
+ val aW = Int.quot (totWidth * aN, aN + bN)
+ val bW = totWidth - aW
in
- getP bT b {maxWidth = bW, maxDepth = maxDepth} o
- getP aT a {maxWidth = aW, maxDepth = maxDepth}
+ getP bT b {totWidth = bW, maxDepth = maxDepth} * 0w13 +
+ getP aT a {totWidth = aW, maxDepth = maxDepth}
end
val T = getT
fun R _ = getT
- fun product' aP a p = if #maxWidth p = 0 then id else (getP aP) a p
- val tuple = product'
- val record = product'
+ fun tuple aP a p = if #totWidth p = 0 then 0w0 else getP aP a p
+ val record = tuple
- fun op +` ? =
- Sum.sum (Pair.map (HC.withConst 0wx96BA232 o getS,
- HC.withConst 0wxCF24651 o getS) ?)
- val unit = HC.lift (Thunk.mk 0wx2F785)
+ fun op +` ? = let
+ fun withConst c f v p = Word.xorb (f v p, c)
+ in
+ Sum.sum o Pair.map (withConst 0wx96BA232 o getS,
+ withConst 0wxCF24651 o getS)
+ end ?
+ val unit = prim (Thunk.mk 0wx2F785)
fun C0 _ = unit
fun C1 _ = getT
- fun data aS a {maxDepth, maxWidth} =
- if maxDepth = 0 then id
+ fun data aS a {maxDepth, totWidth} =
+ if maxDepth = 0 then 0w0
else getS aS a {maxDepth = maxDepth - 1,
- maxWidth = Int.quot (maxWidth, 2)}
+ totWidth = totWidth}
val Y = Tie.function
fun op --> _ = failing "Hash.--> unsupported"
- fun exn _ = failing "Hash.exn unsupported"
- fun regExn _ _ = ()
+ fun refc aT = getT aT o !
- fun refc aT = HC.withConst 0wx178A2346 (HC.map ! (getT aT))
+ val int = prim Word.fromInt
- fun list xT xs {maxWidth, maxDepth} h = let
- val m = Int.quot (maxWidth, 2)
+ fun list xT xs {totWidth, maxDepth} = let
+ val m = Int.quot (totWidth, 2)
fun len n [] = n
| len n (_::xs) = if m <= n then n else len (n+1) xs
val n = len 0 xs
- val p = {maxWidth = Int.quot (maxWidth, n),
+ val p = {totWidth = Int.quot (totWidth, n),
maxDepth = maxDepth - 1}
fun lp h _ [] = h
- | lp h n (x::xs) = if n = 0 then h else lp (getT xT x p h) (n-1) xs
+ | lp h n (x::xs) =
+ if n = 0 then h else lp (h * 0w17 + getT xT x p) (n-1) xs
in
- lp h n xs
+ lp (Word.fromInt n) n xs
end
- fun hashSeq length sub hashElem s {maxWidth, maxDepth} h = let
+ fun hashSeq length sub hashElem s {totWidth, maxDepth} = let
val n = length s
+ val h = Word.fromInt n
in
- case Int.min (Int.quot (maxWidth+3, 4), Int.quot (n+1, 2)) of
+ case Int.min (Int.quot (totWidth+3, 4), Int.quot (n+1, 2)) of
0 => h
| numSamples => let
- val p = {maxWidth = Int.quot (maxWidth, numSamples),
+ val p = {totWidth = Int.quot (totWidth, numSamples),
maxDepth = maxDepth - 1}
fun lp h 0 = h
- | lp h n = lp (hashElem (sub (s, n-1)) p h) (n-1)
+ | lp h n = lp (h * 0w19 + hashElem (sub (s, n-1)) p) (n-1)
in
lp h (Int.max (numSamples, Int.min (10, n)))
end
@@ -116,27 +112,29 @@
fun array aT = hashSeq Array.length Array.sub (getT aT)
fun vector aT = hashSeq Vector.length Vector.sub (getT aT)
- val char = HC.lift (Word.fromInt o ord)
+ val char = prim (Word.fromInt o ord)
val string = hashSeq String.length String.sub char
- val bool = HC.lift (fn true => 0wx2DA745 | false => 0wx3C24A62)
- val int = HC.lift Word.fromInt
- val word = HC.lift id
+ val exn = string o Exn.message (* XXX Imprecise *)
+ fun regExn _ _ = ()
+ val bool = prim (fn true => 0wx2DA745 | false => 0wx3C24A62)
+ val word = const
+
fun mk x2V op mod (v2w, w2v) =
- HC.map (fn x => v2w (x2V x mod w2v Word.maxValue)) word
+ prim (fn x => v2w (x2V x mod w2v Word.largestPrime))
- val largeInt = mk id LargeInt.mod (Iso.swap Word.isoLargeInt)
- val largeWord = mk id LargeWord.mod LargeWord.isoWord
+ val largeInt = mk id op mod (Iso.swap Word.isoLargeInt)
+ val largeWord = mk id op mod LargeWord.isoWord
val largeReal =
let open CastLargeReal open Word in mk castToWord op mod isoWord end
val real =
let open CastReal open Word in mk castToWord op mod isoWord end
- val word8 = HC.lift Word8.toWord
- val word32 = HC.lift Word32.toWord
- val word64 = mk id Word64.mod Word64.isoWord)
+ val word8 = prim Word8.toWord
+ val word32 = prim Word32.toWord
+ val word64 = mk id op mod Word64.isoWord)
open Layered
end
Modified: mltonlib/trunk/com/ssh/generic/unstable/public/value/hash.sig
===================================================================
--- mltonlib/trunk/com/ssh/generic/unstable/public/value/hash.sig 2007-07-10 02:50:44 UTC (rev 5749)
+++ mltonlib/trunk/com/ssh/generic/unstable/public/value/hash.sig 2007-07-10 02:56:56 UTC (rev 5750)
@@ -10,8 +10,19 @@
signature HASH = sig
structure Hash : OPEN_GENERIC_REP
+ val hashParam :
+ ('a, 'x) Hash.t -> {totWidth : Int.t, maxDepth : Int.t} -> 'a -> Word.t
+ (**
+ * Returns a hash function. The {totWidth} and {maxDepth}
+ * parameters give some control over hashing. The {totWidth}
+ * parameter controls how many elements of sequences, like lists
+ * and vectors, will be examined. The {maxDepth} parameter
+ * controls how many times the hash function descends into a
+ * (possibly recursive) datatype.
+ *)
+
val hash : ('a, 'x) Hash.t -> 'a -> Word.t
- (** Extracts the hash function. *)
+ (** Returns the default hash function. *)
end
signature HASH_GENERIC = sig
More information about the MLton-commit
mailing list