[MLton-commit] r5751
Matthew Fluet
fluet at mlton.org
Mon Jul 9 20:03:59 PDT 2007
Native amd64 implementations of Real_{abs,neg}
----------------------------------------------------------------------
U mlton/trunk/basis-library/real/real.sml
U mlton/trunk/mlton/codegen/amd64-codegen/amd64-allocate-registers.fun
U mlton/trunk/mlton/codegen/amd64-codegen/amd64-mlton.fun
U mlton/trunk/mlton/codegen/amd64-codegen/amd64-pseudo.sig
U mlton/trunk/mlton/codegen/amd64-codegen/amd64-validate.fun
U mlton/trunk/mlton/codegen/amd64-codegen/amd64.fun
U mlton/trunk/mlton/codegen/amd64-codegen/amd64.sig
----------------------------------------------------------------------
Modified: mlton/trunk/basis-library/real/real.sml
===================================================================
--- mlton/trunk/basis-library/real/real.sml 2007-07-10 02:56:56 UTC (rev 5750)
+++ mlton/trunk/basis-library/real/real.sml 2007-07-10 03:03:56 UTC (rev 5751)
@@ -112,7 +112,7 @@
val class = IEEEReal.mkClass R.class
val abs =
- if MLton.Codegen.isX86
+ if MLton.Codegen.isX86 orelse MLton.Codegen.isAmd64
then abs
else
fn x =>
Modified: mlton/trunk/mlton/codegen/amd64-codegen/amd64-allocate-registers.fun
===================================================================
--- mlton/trunk/mlton/codegen/amd64-codegen/amd64-allocate-registers.fun 2007-07-10 02:56:56 UTC (rev 5750)
+++ mlton/trunk/mlton/codegen/amd64-codegen/amd64-allocate-registers.fun 2007-07-10 03:03:56 UTC (rev 5751)
@@ -6710,14 +6710,15 @@
* xmm X
* src imm
* lab
- * add X
+ * add ?
*)
- fun allocateXmmSrcDst {src: Operand.t,
- dst: Operand.t,
- move_dst: bool,
- size: Size.t,
- info as {dead, remove, ...}: Liveness.t,
- registerAllocation: RegisterAllocation.t}
+ fun allocateXmmSrcDstAux {src: Operand.t,
+ address_src: bool,
+ dst: Operand.t,
+ move_dst: bool,
+ size: Size.t,
+ info as {dead, remove, ...}: Liveness.t,
+ registerAllocation: RegisterAllocation.t}
= if Operand.eq(src, dst)
then let
val {operand = final_src_dst,
@@ -6772,7 +6773,7 @@
= RA.allocateXmmOperand
{operand = src,
options = {xmmregister = true,
- address = true},
+ address = address_src},
info = info,
size = size,
move = true,
@@ -6797,7 +6798,7 @@
= RA.allocateXmmOperand
{operand = src,
options = {xmmregister = true,
- address = true},
+ address = address_src},
info = info,
size = size,
move = true,
@@ -6909,8 +6910,58 @@
assembly_dst],
registerAllocation = registerAllocation}
end
- | _ => Error.bug "amd64AllocateRegisters.Instruction.allocateXmmSrcDst"
+ | _ => Error.bug "amd64AllocateRegisters.Instruction.allocateXmmSrcDstAux"
+ (*
+ * Require src/dst operands as follows:
+ *
+ * dst
+ * reg xmm imm lab add
+ * reg
+ * xmm X
+ * src imm
+ * lab
+ * add X
+ *)
+ fun allocateXmmSrcDst {src: Operand.t,
+ dst: Operand.t,
+ move_dst: bool,
+ size: Size.t,
+ info: Liveness.t,
+ registerAllocation: RegisterAllocation.t}
+ = allocateXmmSrcDstAux {src = src,
+ address_src = true,
+ dst = dst,
+ move_dst = move_dst,
+ size = size,
+ info = info,
+ registerAllocation = registerAllocation}
+
+ (*
+ * Require src/dst operands as follows:
+ *
+ * dst
+ * reg xmm imm lab add
+ * reg
+ * xmm X
+ * src imm
+ * lab
+ * add
+ *)
+ fun allocateXmmSrcDstReg {src: Operand.t,
+ dst: Operand.t,
+ move_dst: bool,
+ size: Size.t,
+ info: Liveness.t,
+ registerAllocation: RegisterAllocation.t}
+ = allocateXmmSrcDstAux {src = src,
+ address_src = false,
+ dst = dst,
+ move_dst = move_dst,
+ size = size,
+ info = info,
+ registerAllocation = registerAllocation}
+
(*
* Require src1/src2 operands as follows:
*
@@ -9229,6 +9280,78 @@
in
default ()
end
+ | SSE_BinLP {oper, src, dst, size}
+ (* Packed SSE binary logical instructions (used as scalar).
+ * Require src/dst operands as follows:
+ *
+ * dst
+ * reg xmm imm lab add
+ * reg
+ * xmm X
+ * src imm
+ * lab
+ * add (x)
+ *
+ * Disallow address for src, since it would be a 128-bit load.
+ *)
+ => let
+ val {uses,defs,kills}
+ = Instruction.uses_defs_kills instruction
+ val {assembly = assembly_pre,
+ registerAllocation}
+ = RA.pre {uses = uses,
+ defs = defs,
+ kills = kills,
+ info = info,
+ registerAllocation = registerAllocation}
+
+ fun default ()
+ = let
+ val {final_src,
+ final_dst,
+ assembly_src_dst,
+ registerAllocation}
+ = allocateXmmSrcDstReg {src = src,
+ dst = dst,
+ move_dst = true,
+ size = size,
+ info = info,
+ registerAllocation = registerAllocation}
+
+ val instruction
+ = Instruction.SSE_BinLP
+ {oper = oper,
+ src = final_src,
+ dst = final_dst,
+ size = size}
+
+ val {uses = final_uses,
+ defs = final_defs,
+ ...}
+ = Instruction.uses_defs_kills instruction
+
+ val {assembly = assembly_post,
+ registerAllocation}
+ = RA.post {uses = uses,
+ final_uses = final_uses,
+ defs = defs,
+ final_defs = final_defs,
+ kills = kills,
+ info = info,
+ registerAllocation = registerAllocation}
+ in
+ {assembly
+ = AppendList.appends
+ [assembly_pre,
+ assembly_src_dst,
+ AppendList.single
+ (Assembly.instruction instruction),
+ assembly_post],
+ registerAllocation = registerAllocation}
+ end
+ in
+ default ()
+ end
| SSE_MOVS {src, dst, size}
(* Scalar SSE move instruction.
* Require src/dst operands as follows:
Modified: mlton/trunk/mlton/codegen/amd64-codegen/amd64-mlton.fun
===================================================================
--- mlton/trunk/mlton/codegen/amd64-codegen/amd64-mlton.fun 2007-07-10 02:56:56 UTC (rev 5750)
+++ mlton/trunk/mlton/codegen/amd64-codegen/amd64-mlton.fun 2007-07-10 03:03:56 UTC (rev 5751)
@@ -61,7 +61,7 @@
| Real_Math_sin _ => false
| Real_Math_sqrt _ => true
| Real_Math_tan _ => false
- | Real_abs _ => false (* !! *)
+ | Real_abs _ => true
| Real_add _ => true
| Real_castToWord _ => true
| Real_div _ => true
@@ -72,7 +72,7 @@
| Real_mul _ => true
| Real_muladd _ => true
| Real_mulsub _ => true
- | Real_neg _ => false (* !! *)
+ | Real_neg _ => true
| Real_qequal _ => true
| Real_rndToReal _ => true
| Real_rndToWord (_, _, {signed}) => signed
@@ -675,6 +675,42 @@
transfer = NONE}]
end
| Real_Math_sqrt _ => sse_unas Instruction.SSE_SQRTS
+ | Real_abs s =>
+ let
+ val (dst,dstsize) = getDst1 ()
+ val (src,srcsize) = getSrc1 ()
+ val _
+ = Assert.assert
+ ("amd64MLton.prim: Real_abs, dstsize/srcsize",
+ fn () => srcsize = dstsize)
+ fun mkConst wordSize
+ = WordX.rshift
+ (WordX.allOnes wordSize,
+ WordX.one wordSize,
+ {signed = false})
+
+ val (const,constsize)
+ = case s of
+ R32 => (mkConst WordSize.word32, Size.LONG)
+ | R64 => (mkConst WordSize.word64, Size.QUAD)
+ in
+ AppendList.fromList
+ [Block.mkBlock'
+ {entry = NONE,
+ statements
+ = [Assembly.instruction_sse_movd
+ {dst = dst,
+ dstsize = dstsize,
+ src = Operand.immediate_word const,
+ srcsize = constsize},
+ Assembly.instruction_sse_binlp
+ {oper = Instruction.SSE_ANDP,
+ src = src,
+ dst = dst,
+ size = dstsize}],
+ transfer = NONE}]
+
+ end
| Real_add _ => sse_binas Instruction.SSE_ADDS
| Real_castToWord _ => sse_movd ()
| Real_div _ => sse_binas Instruction.SSE_DIVS
@@ -789,6 +825,42 @@
| Real_mul _ => sse_binas Instruction.SSE_MULS
| Real_muladd _ => sse_binas_mul Instruction.SSE_ADDS
| Real_mulsub _ => sse_binas_mul Instruction.SSE_SUBS
+ | Real_neg s =>
+ let
+ val (dst,dstsize) = getDst1 ()
+ val (src,srcsize) = getSrc1 ()
+ val _
+ = Assert.assert
+ ("amd64MLton.prim: Real_neg, dstsize/srcsize",
+ fn () => srcsize = dstsize)
+ fun mkConst wordSize
+ = (WordX.notb o WordX.rshift)
+ (WordX.allOnes wordSize,
+ WordX.one wordSize,
+ {signed = false})
+
+ val (const,constsize)
+ = case s of
+ R32 => (mkConst WordSize.word32, Size.LONG)
+ | R64 => (mkConst WordSize.word64, Size.QUAD)
+ in
+ AppendList.fromList
+ [Block.mkBlock'
+ {entry = NONE,
+ statements
+ = [Assembly.instruction_sse_movd
+ {dst = dst,
+ dstsize = dstsize,
+ src = Operand.immediate_word const,
+ srcsize = constsize},
+ Assembly.instruction_sse_binlp
+ {oper = Instruction.SSE_XORP,
+ src = src,
+ dst = dst,
+ size = dstsize}],
+ transfer = NONE}]
+
+ end
| Real_qequal _ =>
let
val (dst,dstsize) = getDst1 ()
Modified: mlton/trunk/mlton/codegen/amd64-codegen/amd64-pseudo.sig
===================================================================
--- mlton/trunk/mlton/codegen/amd64-codegen/amd64-pseudo.sig 2007-07-10 02:56:56 UTC (rev 5750)
+++ mlton/trunk/mlton/codegen/amd64-codegen/amd64-pseudo.sig 2007-07-10 03:03:56 UTC (rev 5751)
@@ -217,6 +217,12 @@
(* Scalar SSE unary arithmetic instructions. *)
datatype sse_unas
= SSE_SQRTS (* square root; p. 360,362 *)
+ (* Packed SSE binary logical instructions (used as scalar). *)
+ datatype sse_binlp
+ = SSE_ANDNP (* and-not; p. 17,19 *)
+ | SSE_ANDP (* and; p. 21,23 *)
+ | SSE_ORP (* or; p. 206,208 *)
+ | SSE_XORP (* xor; p. 391,393 *)
type t
end
@@ -326,6 +332,10 @@
src: Operand.t,
dst: Operand.t,
size: Size.t} -> t
+ val instruction_sse_binlp : {oper: Instruction.sse_binlp,
+ src: Operand.t,
+ dst: Operand.t,
+ size: Size.t} -> t
val instruction_sse_movs : {src: Operand.t,
dst: Operand.t,
size: Size.t} -> t
Modified: mlton/trunk/mlton/codegen/amd64-codegen/amd64-validate.fun
===================================================================
--- mlton/trunk/mlton/codegen/amd64-codegen/amd64-validate.fun 2007-07-10 02:56:56 UTC (rev 5750)
+++ mlton/trunk/mlton/codegen/amd64-codegen/amd64-validate.fun 2007-07-10 03:03:56 UTC (rev 5751)
@@ -988,6 +988,62 @@
| _ => (Operand.validate {operand = src}) andalso
(Operand.validate {operand = dst})
end
+ | SSE_BinLP {src, dst, size, ...}
+ (* Packed SSE binary logical instructions (used as scalar).
+ * Require src/dst operands as follows:
+ *
+ * dst
+ * reg xmm imm lab add
+ * reg
+ * xmm X
+ * src imm
+ * lab
+ * add (x)
+ *
+ * Require size modifier class as follows: FLT
+ * Disallow address for src, since it would be a 128-bit load.
+ *)
+ => let
+ val _ = if Size.class size = Size.FLT
+ then ()
+ else Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, size"
+ val _ = case Operand.size src
+ of NONE => ()
+ | SOME srcsize
+ => if srcsize = size
+ then ()
+ else Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, srcsize"
+ val _ = case Operand.size dst
+ of NONE => ()
+ | SOME dstsize
+ => if dstsize = size
+ then ()
+ else Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, dstsize"
+ in
+ case (src,dst)
+ of (Operand.MemLoc _, _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, src:MemLoc"
+ | (_, Operand.MemLoc _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, dst:MemLoc"
+ | (Operand.Register _, _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, src:Register"
+ | (Operand.Immediate _, _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, src:Immediate"
+ | (Operand.Label _, _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, src:Label"
+ | (Operand.Address _, _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, src:Address"
+ | (_, Operand.Register _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, dst:Register"
+ | (_, Operand.Immediate _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, dst:Immediate"
+ | (_, Operand.Label _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, dst:Label"
+ | (_, Operand.Address _)
+ => Error.bug "amd64Validate.Instruction.validate: SSE_BinLP, dst:Address"
+ | _ => (Operand.validate {operand = src}) andalso
+ (Operand.validate {operand = dst})
+ end
| SSE_MOVS {src, dst, size, ...}
(* Scalar SSE move instruction.
* Require src/dst operands as follows:
Modified: mlton/trunk/mlton/codegen/amd64-codegen/amd64.fun
===================================================================
--- mlton/trunk/mlton/codegen/amd64-codegen/amd64.fun 2007-07-10 02:56:56 UTC (rev 5750)
+++ mlton/trunk/mlton/codegen/amd64-codegen/amd64.fun 2007-07-10 03:03:56 UTC (rev 5751)
@@ -1601,8 +1601,22 @@
in
fn SSE_SQRTS => str "sqrts"
end
+ (* Packed SSE binary logical instructions (used as scalar). *)
+ datatype sse_binlp
+ = SSE_ANDNP (* and-not; p. 17,19 *)
+ | SSE_ANDP (* and; p. 21,23 *)
+ | SSE_ORP (* or; p. 206,208 *)
+ | SSE_XORP (* xor; p. 391,393 *)
+ val sse_binlp_layout
+ = let
+ open Layout
+ in
+ fn SSE_ANDNP => str "andnp"
+ | SSE_ANDP => str "andp"
+ | SSE_ORP => str "orp"
+ | SSE_XORP => str "xorp"
+ end
-
(* amd64 Instructions.
* src operands are not changed by the instruction.
* dst operands are changed by the instruction.
@@ -1742,6 +1756,12 @@
src: Operand.t,
dst: Operand.t,
size: Size.t}
+ (* Packed SSE binary logical instructions (used as scalar).
+ *)
+ | SSE_BinLP of {oper: sse_binlp,
+ src: Operand.t,
+ dst: Operand.t,
+ size: Size.t}
(* Scalar SSE move instruction.
*)
| SSE_MOVS of {src: Operand.t,
@@ -1961,6 +1981,11 @@
Size.layout size,
Operand.layout src,
Operand.layout dst)
+ | SSE_BinLP {oper, src, dst, size}
+ => bin (sse_binlp_layout oper,
+ Size.layout size,
+ Operand.layout src,
+ Operand.layout dst)
| SSE_MOVS {src, dst, size}
=> bin (str "movs",
Size.layout size,
@@ -2163,6 +2188,8 @@
=> {uses = [src, dst], defs = [dst], kills = []}
| SSE_UnAS {src, dst, ...}
=> {uses = [src], defs = [dst], kills = []}
+ | SSE_BinLP {src, dst, ...}
+ => {uses = [src, dst], defs = [dst], kills = []}
| SSE_MOVS {src, dst, ...}
=> {uses = [src], defs = [dst], kills = []}
| SSE_COMIS {src1, src2, ...}
@@ -2402,6 +2429,8 @@
=> {srcs = SOME [src, dst], dsts = SOME [dst]}
| SSE_UnAS {src, dst, ...}
=> {srcs = SOME [src], dsts = SOME [dst]}
+ | SSE_BinLP {src, dst, ...}
+ => {srcs = SOME [src, dst], dsts = SOME [dst]}
| SSE_MOVS {src, dst, ...}
=> {srcs = SOME [src], dsts = SOME [dst]}
| SSE_COMIS {src1, src2, ...}
@@ -2529,6 +2558,11 @@
src = replacer {use = true, def = false} src,
dst = replacer {use = false, def = true} dst,
size = size}
+ | SSE_BinLP {oper, src, dst, size}
+ => SSE_BinLP {oper = oper,
+ src = replacer {use = true, def = false} src,
+ dst = replacer {use = true, def = true} dst,
+ size = size}
| SSE_MOVS {src, dst, size}
=> SSE_MOVS {src = replacer {use = true, def = false} src,
dst = replacer {use = false, def = true} dst,
@@ -2590,6 +2624,7 @@
val lea = LEA
val sse_binas = SSE_BinAS
val sse_unas = SSE_UnAS
+ val sse_binlp = SSE_BinLP
val sse_movs = SSE_MOVS
val sse_comis = SSE_COMIS
val sse_ucomis = SSE_UCOMIS
@@ -3291,6 +3326,7 @@
val instruction_lea = Instruction o Instruction.lea
val instruction_sse_binas = Instruction o Instruction.sse_binas
val instruction_sse_unas = Instruction o Instruction.sse_unas
+ val instruction_sse_binlp = Instruction o Instruction.sse_binlp
val instruction_sse_movs = Instruction o Instruction.sse_movs
val instruction_sse_comis = Instruction o Instruction.sse_comis
val instruction_sse_ucomis = Instruction o Instruction.sse_ucomis
Modified: mlton/trunk/mlton/codegen/amd64-codegen/amd64.sig
===================================================================
--- mlton/trunk/mlton/codegen/amd64-codegen/amd64.sig 2007-07-10 02:56:56 UTC (rev 5750)
+++ mlton/trunk/mlton/codegen/amd64-codegen/amd64.sig 2007-07-10 03:03:56 UTC (rev 5751)
@@ -432,6 +432,12 @@
(* Scalar SSE unary arithmetic instructions. *)
datatype sse_unas
= SSE_SQRTS (* square root; p. 360,362 *)
+ (* Packed SSE binary logical instructions (used as scalar). *)
+ datatype sse_binlp
+ = SSE_ANDNP (* and-not; p. 17,19 *)
+ | SSE_ANDP (* and; p. 21,23 *)
+ | SSE_ORP (* or; p. 206,208 *)
+ | SSE_XORP (* xor; p. 391,393 *)
(* amd64 Instructions.
* src operands are not changed by the instruction.
@@ -572,6 +578,12 @@
src: Operand.t,
dst: Operand.t,
size: Size.t}
+ (* Packed SSE binary logic instructions (used as scalar).
+ *)
+ | SSE_BinLP of {oper: sse_binlp,
+ src: Operand.t,
+ dst: Operand.t,
+ size: Size.t}
(* Scalar SSE move instruction.
*)
| SSE_MOVS of {src: Operand.t,
@@ -938,6 +950,10 @@
src: Operand.t,
dst: Operand.t,
size: Size.t} -> t
+ val instruction_sse_binlp : {oper: Instruction.sse_binlp,
+ src: Operand.t,
+ dst: Operand.t,
+ size: Size.t} -> t
val instruction_sse_movs : {src: Operand.t,
dst: Operand.t,
size: Size.t} -> t
More information about the MLton-commit
mailing list