[MLton-user] Why is recursion slower than C?
Scott Cruzen
sic at lerp.com
Wed Apr 25 16:32:31 PDT 2007
* Scott Cruzen <sic at lerp.com> [070425 16:13]:
> * Wesley W. Terpstra <wesley at terpstra.ca> [070425 09:40]:
> > On Apr 25, 2007, at 5:44 PM, Neal Glew wrote:
> > >Intel hardware (probably AMD as well) has a return address stack as
> > >part of the branch prediction logic. This stack stores the most
> > >recent n (for some n) addresses pushed by call instructions. At a ret
> > >instruction the top of this stack is used to predict where the return
> > >will return to (and the stack is popped). Not using these
> > >instructions means not using this part of the branch prediction logic
> > >and generally results in very poor branch prediction for returns, and
> > >thus much stalling. I suspect that this accounts for most of the
> > >performance difference you are experiencing.
> >
> > Yeah. It sounds like we're all in agreement about why C is faster
> > here. It's a shame because one would really like SML to be fast at
> > recursion. As long as
> > >>Also, it seems MLton is using %ebp for the stack pointer, instead
> > >>of %esp. Why is this? I gather this is why call/return are out
> > >In a MLton compiled program, there are effectively two stacks: the
> > >system C stack and the ML stack (allocated in the ML heap, which in
> > >turn is allocated in the system C heap).
> > this remains true, it seems unlikely that MLton will ever be able to
> > compete with C on recursion with two non-tail branches.
>
> I think that the reason for this is somewhat simpler. GCC is converting
> the recursion into a loop and then unrolling that loop.
>
Note that the assembly in the first email is not the same as the
assembly for the fib function as generated by GCC in the shootout.
The shootout uses "-pipe -Wall -O3 -fomit-frame-pointer -march=pentium4"
which generates a significantly longer (and faster) fib function.
fib:
pushl %ebp
pushl %edi
pushl %esi
pushl %ebx
subl $36, %esp
movl 56(%esp), %ebp
movl $1, %eax
cmpl $1, %ebp
jle .L4
leal -2(%ebp), %edi
movl $1, 4(%esp)
cmpl $1, %edi
jle .L7
leal -4(%ebp), %esi
movl $1, 8(%esp)
cmpl $1, %esi
jle .L10
leal -6(%ebp), %eax
movl $1, 12(%esp)
subl $1, %eax
jle .L13
leal -8(%ebp), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -7(%ebp), %eax
movl %eax, (%esp)
call fib
addl %eax, %ebx
movl %ebx, 12(%esp)
.L13:
movl $1, %eax
cmpl $2, %esi
je .L16
leal -3(%esi), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -2(%esi), %eax
movl %eax, (%esp)
call fib
leal (%ebx,%eax), %eax
.L16:
addl 12(%esp), %eax
movl %eax, 8(%esp)
.L10:
leal -1(%edi), %esi
movl $1, %eax
cmpl $1, %esi
je .L19
leal -3(%edi), %eax
movl $1, 16(%esp)
subl $1, %eax
jle .L22
leal -5(%edi), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -4(%edi), %eax
movl %eax, (%esp)
call fib
addl %eax, %ebx
movl %ebx, 16(%esp)
.L22:
movl $1, %eax
cmpl $2, %esi
je .L25
leal -3(%esi), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -2(%esi), %eax
movl %eax, (%esp)
call fib
leal (%ebx,%eax), %eax
.L25:
addl 16(%esp), %eax
.L19:
addl 8(%esp), %eax
movl %eax, 4(%esp)
.L7:
leal -1(%ebp), %edi
movl $1, %eax
cmpl $1, %edi
je .L28
leal -3(%ebp), %esi
movl $1, 20(%esp)
cmpl $1, %esi
movl $1, 20(%esp)
cmpl $1, %esi
jle .L31
leal -5(%ebp), %eax
movl $1, 24(%esp)
subl $1, %eax
jle .L34
leal -7(%ebp), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -6(%ebp), %eax
movl %eax, (%esp)
call fib
addl %eax, %ebx
movl %ebx, 24(%esp)
.L34:
movl $1, %eax
cmpl $2, %esi
je .L37
leal -3(%esi), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -2(%esi), %eax
movl %eax, (%esp)
call fib
leal (%ebx,%eax), %eax
.L37:
addl 24(%esp), %eax
movl %eax, 20(%esp)
.L31:
leal -1(%edi), %ebp
movl $1, %eax
cmpl $1, %ebp
je .L40
leal -3(%edi), %esi
movl $1, 28(%esp)
cmpl $1, %esi
jle .L43
leal -5(%edi), %eax
movl $1, 32(%esp)
subl $1, %eax
jle .L46
leal -7(%edi), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -6(%edi), %eax
movl %eax, (%esp)
call fib
addl %eax, %ebx
movl %ebx, 32(%esp)
.L46:
movl $1, %eax
cmpl $2, %esi
je .L49
leal -3(%esi), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -2(%esi), %eax
movl %eax, (%esp)
call fib
leal (%ebx,%eax), %eax
.L49:
addl 32(%esp), %eax
movl %eax, 28(%esp)
.L43:
leal -1(%ebp), %esi
movl $1, %eax
cmpl $1, %esi
je .L52
leal -3(%ebp), %eax
movl $1, %edi
subl $1, %eax
jle .L55
leal -5(%ebp), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -4(%ebp), %eax
movl %eax, %ebx
leal -4(%ebp), %eax
movl %eax, (%esp)
call fib
leal (%ebx,%eax), %edi
movl %eax, (%esp)
call fib
leal (%ebx,%eax), %edi
.L55:
movl $1, %eax
cmpl $2, %esi
je .L58
leal -3(%esi), %eax
movl %eax, (%esp)
call fib
movl %eax, %ebx
leal -2(%esi), %eax
movl %eax, (%esp)
call fib
leal (%ebx,%eax), %eax
.L58:
leal (%edi,%eax), %eax
.L52:
addl 28(%esp), %eax
.L40:
addl 20(%esp), %eax
.L28:
addl 4(%esp), %eax
.L4:
addl $36, %esp
popl %ebx
popl %esi
popl %edi
popl %ebp
ret
More information about the MLton-user
mailing list