[MLton-user] Why is recursion slower than C?
Wesley W. Terpstra
wesley at terpstra.ca
Tue Apr 24 08:33:37 PDT 2007
According to
http://shootout.alioth.debian.org/debian/benchmark.php?
test=recursive&lang=all
MLton is much slower than C. Indeed, if one looks at the simplest
case where it's slower (Fibonacci) the difference is over 50%.
In looking over the assembler generated for the SML function
fun fib n = if n < 2 then 1 else fib (n - 2) + fib (n - 1)
as compared to the C function
int fib(int n) {
if (n < 2) {
return 1;
}
return fib(n - 2) + fib(n - 1);
}
The main differences appear to be that:
MLton checks for stack and integer overflow (the gcc code doesn't)
MLton uses jmp instead of call/return
gcc uses leal to do the subtraction, while MLton uses subl.
What I don't understand is why the MLton version is so much slower.
The overflow checks don't seem particularly costly. I'm assuming the
problem is that MLton uses a lot of MOVs+JMP instead of the CALL
instruction. Is this correct? My assembler is really rusty, so I'm
not sure what's slower here.
From a theoretical point of view, shouldn't MLton have a performance
advantage from not having to conform to the C calling convention?
Most of the function prelude in gcc's assembler seems superfluous.
Also, it seems MLton is using %ebp for the stack pointer, instead of %
esp. Why is this? I gather this is why call/return are out.
GCC:
fib:
pushl %ebp
movl $1, %eax
movl %esp, %ebp
subl $12, %esp
movl %esi, -4(%ebp)
movl 8(%ebp), %esi
movl %ebx, -8(%ebp)
cmpl $1, %esi
jle .L12
leal -2(%esi), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -1(%esi), %eax
movl %eax, (%esp)
call fib
leal (%eax,%ebx), %eax
.L12:
movl -8(%ebp), %ebx
movl -4(%ebp), %esi
movl %ebp, %esp
popl %ebp
ret
MLton:
fib_0:
L_534:
cmpl %ebp,((gcState+12)+(0*4)) // guard against stack overflow
jnb L_533
L_525:
// This seems to all be about growing the stack.
movl (c_stackP+(0*4)),%esi
xchgl %esi,%esp
subl $12,%esp
pushl $(__LINE__+9)
pushl $fileName
pushl $0x0
pushl $0x0
pushl $gcState
addl $8,%ebp
movl $L_524,(0+(-1*4))(%ebp)
movl %ebp,((gcState+8)+(0*4))
movl %esi,((gcState+0)+(0*4))
call GC_collect
addl $32,%esp
movl ((gcState+0)+(0*4)),%esi
xchgl %esi,%esp
movl ((gcState+8)+(0*4)),%ebp
jmp L_524
.p2align 4
.long 21
L_524:
addl $-8,%ebp
L_533:
// Method starts here
movl (0+((0*1)+(0*1)))(%ebp),%esi
cmpl $0x2,%esi
jl L_1284
L_532:
subl $0x2,%esi
jo L_1283 // catch integer overflow
L_530:
movl %esi,(0+((8*1)+(0*1)))(%ebp)
addl $8,%ebp
movl $L_529,(0+(-1*4))(%ebp)
jmp fib_0 // resumes control at L_529
.p2align 4
L_1283:
L_531:
movl $0x1,(globalPointerNonRoot+((0*4)+(0*1)))
movl ((gcState+1028)+(0*4)),%ebp
addl ((gcState+16)+(0*4)),%ebp
jmp *(0+(-1*4))(%ebp)
.p2align 4
L_1284:
L_526:
movl $0x1,(0+((0*1)+(0*1)))(%ebp)
jmp *(0+(-1*4))(%ebp)
.p2align 4
.long 48
L_529:
addl $-8,%ebp
movl (0+((8*1)+(0*1)))(%ebp),%esi
movl %esi,(0+((4*1)+(0*1)))(%ebp)
movl (0+((0*1)+(0*1)))(%ebp),%esi
decl %esi
movl %esi,(0+((12*1)+(0*1)))(%ebp)
addl $12,%ebp
movl $L_528,(0+(-1*4))(%ebp)
jmp fib_0
.p2align 4
.long 4
L_528:
addl $-12,%ebp
movl (0+((12*1)+(0*1)))(%ebp),%esi
movl %esi,(0+((8*1)+(0*1)))(%ebp)
movl (0+((4*1)+(0*1)))(%ebp),%edi
addl %esi,%edi
jo L_531
L_527:
movl %edi,(0+((0*1)+(0*1)))(%ebp)
jmp *(0+(-1*4))(%ebp)
More information about the MLton-user
mailing list