[MLton-user] Why is recursion slower than C?

Wed Apr 25 16:32:31 PDT 2007

* Scott Cruzen <sic at lerp.com> [070425 16:13]:
> * Wesley W. Terpstra <wesley at terpstra.ca> [070425 09:40]:
> > On Apr 25, 2007, at 5:44 PM, Neal Glew wrote:
> > >Intel hardware (probably AMD as well) has a return address stack as
> > >part of the branch prediction logic.  This stack stores the most
> > >recent n (for some n) addresses pushed by call instructions.  At a ret
> > >instruction the top of this stack is used to predict where the return
> > >will return to (and the stack is popped).  Not using these
> > >instructions means not using this part of the branch prediction logic
> > >and generally results in very poor branch prediction for returns, and
> > >thus much stalling.  I suspect that this accounts for most of the
> > >performance difference you are experiencing.
> > 
> > Yeah. It sounds like we're all in agreement about why C is faster  
> > here. It's a shame because one would really like SML to be fast at  
> > recursion. As long as
> > >>Also, it seems MLton is using %ebp for the stack pointer, instead  
> > >>of %esp. Why is this? I gather this is why call/return are out
> > >In a MLton compiled program, there are effectively two stacks: the  
> > >system C stack and the ML stack (allocated in the ML heap, which in  
> > >turn is allocated in the system C heap).
> > this remains true, it seems unlikely that MLton will ever be able to  
> > compete with C on recursion with two non-tail branches.
> 
> I think that the reason for this is somewhat simpler. GCC is converting
> the recursion into a loop and then unrolling that loop.
> 

Note that the assembly in the first email is not the same as the
assembly for the fib function as generated by GCC in the shootout.

The shootout uses "-pipe -Wall -O3 -fomit-frame-pointer -march=pentium4"
which generates a significantly longer (and faster) fib function.

fib:
        pushl   %ebp
        pushl   %edi
        pushl   %esi
        pushl   %ebx
        subl    $36, %esp
        movl    56(%esp), %ebp
        movl    $1, %eax
        cmpl    $1, %ebp
        jle     .L4
        leal    -2(%ebp), %edi
        movl    $1, 4(%esp)
        cmpl    $1, %edi
        jle     .L7
        leal    -4(%ebp), %esi
        movl    $1, 8(%esp)
        cmpl    $1, %esi
        jle     .L10
        leal    -6(%ebp), %eax
        movl    $1, 12(%esp)
        subl    $1, %eax
        jle     .L13
        leal    -8(%ebp), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -7(%ebp), %eax
        movl    %eax, (%esp)
        call    fib
        addl    %eax, %ebx
        movl    %ebx, 12(%esp)
.L13:
        movl    $1, %eax
        cmpl    $2, %esi
        je      .L16
        leal    -3(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -2(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        leal    (%ebx,%eax), %eax
.L16:
        addl    12(%esp), %eax
        movl    %eax, 8(%esp)
.L10:
        leal    -1(%edi), %esi
        movl    $1, %eax
        cmpl    $1, %esi
        je      .L19
        leal    -3(%edi), %eax
        movl    $1, 16(%esp)
        subl    $1, %eax
        jle     .L22
        leal    -5(%edi), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -4(%edi), %eax
        movl    %eax, (%esp)
        call    fib
        addl    %eax, %ebx
        movl    %ebx, 16(%esp)
.L22:
        movl    $1, %eax
        cmpl    $2, %esi
        je      .L25
        leal    -3(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -2(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        leal    (%ebx,%eax), %eax
.L25:
        addl    16(%esp), %eax
.L19:
        addl    8(%esp), %eax
        movl    %eax, 4(%esp)
.L7:
        leal    -1(%ebp), %edi
        movl    $1, %eax
        cmpl    $1, %edi
        je      .L28
        leal    -3(%ebp), %esi
        movl    $1, 20(%esp)
        cmpl    $1, %esi
        movl    $1, 20(%esp)
        cmpl    $1, %esi
        jle     .L31
        leal    -5(%ebp), %eax
        movl    $1, 24(%esp)
        subl    $1, %eax
        jle     .L34
        leal    -7(%ebp), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -6(%ebp), %eax
        movl    %eax, (%esp)
        call    fib
        addl    %eax, %ebx
        movl    %ebx, 24(%esp)
.L34:
        movl    $1, %eax
        cmpl    $2, %esi
        je      .L37
        leal    -3(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -2(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        leal    (%ebx,%eax), %eax
.L37:
        addl    24(%esp), %eax
        movl    %eax, 20(%esp)
.L31:
        leal    -1(%edi), %ebp
        movl    $1, %eax
        cmpl    $1, %ebp
        je      .L40
        leal    -3(%edi), %esi
        movl    $1, 28(%esp)
        cmpl    $1, %esi
        jle     .L43
        leal    -5(%edi), %eax
        movl    $1, 32(%esp)
        subl    $1, %eax
        jle     .L46
        leal    -7(%edi), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -6(%edi), %eax
        movl    %eax, (%esp)
        call    fib
        addl    %eax, %ebx
        movl    %ebx, 32(%esp)
.L46:
        movl    $1, %eax
        cmpl    $2, %esi
        je      .L49
        leal    -3(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -2(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        leal    (%ebx,%eax), %eax
.L49:
        addl    32(%esp), %eax
        movl    %eax, 28(%esp)
.L43:
        leal    -1(%ebp), %esi
        movl    $1, %eax
        cmpl    $1, %esi
        je      .L52
        leal    -3(%ebp), %eax
        movl    $1, %edi
        subl    $1, %eax
        jle     .L55
        leal    -5(%ebp), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -4(%ebp), %eax
        movl    %eax, %ebx
        leal    -4(%ebp), %eax
        movl    %eax, (%esp)
        call    fib
        leal    (%ebx,%eax), %edi
        movl    %eax, (%esp)
        call    fib
        leal    (%ebx,%eax), %edi
.L55:
        movl    $1, %eax
        cmpl    $2, %esi
        je      .L58
        leal    -3(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        movl    %eax, %ebx
        leal    -2(%esi), %eax
        movl    %eax, (%esp)
        call    fib
        leal    (%ebx,%eax), %eax
.L58:
        leal    (%edi,%eax), %eax
.L52:
        addl    28(%esp), %eax
.L40:
        addl    20(%esp), %eax
.L28:
        addl    4(%esp), %eax
.L4:
        addl    $36, %esp
        popl    %ebx
        popl    %esi
        popl    %edi
        popl    %ebp
        ret