[MLton-devel] detecting overflow with the C codegen
Stephen Weeks
MLton@mlton.org
Thu, 21 Nov 2002 10:25:52 -0800
> Still, I would think that
> using the overflow flag would be the fastest when you get it.
> Testing for overflow in C could be done by something like
> long long tmp;
>
> tmp = x * y; /* or x + y, ... */
> if (tmp != (int)tmp)
> --- overflow ---;
> The generated code is probably bad but I would guess not too horrible and
> the best you can get out of C.
I am not convinced. I've attached a C program below that does the
add in 5 different ways
1. no check
2. jo
3. single compare
4. two compares and a sub
5. long long
I've listed them in that order because that corresponds to increasing
order of running time. For example, when I run the program compiled
with gcc 3.2 -O2, I get
999999999 1006
1000000999 1132
1000000999 1131
1000000999 1346
1000000999 1402
1000000999 1913
Here, the first line is a baseline, and the next five lines are the
five methods.
I've also included the assembly below that gcc generates for each
method. It seems consistent with the runtime.
Given this, I think we should use add4 in the C codegen, and maybe put
in some code to do add3 sometime (to at least make the code size
smaller, if nothing else).
--------------------------------------------------------------------------------
#include <sys/times.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
enum {
MAXINT = 0x7FFFFFFF,
MININT = 0x80000000,
ADD_CONST = 123456,
};
void die (char *s) {
fprintf (stderr, "%s\n", s);
exit (1);
}
static void overflow () {
die ("overflow");
}
int add0 (int n1, int n2) {
return n1;
}
int add1 (int n1, int n2) {
return n1 + n2;
}
int add2 (int n1, int n2) {
__asm__ __volatile__ ("addl %1, %0\n\tjo overflow"
: "+r" (n1) : "g" (n2) : "cc");
return n1;
}
int add3 (int n1, int n2) {
if (n1 > MAXINT - ADD_CONST)
overflow ();
return n1 + n2;
}
int add4 (int n1, int n2) {
if (n1 >= 0) {
if (n2 > MAXINT - n1)
overflow ();
} else
if (n2 > MININT - n1)
overflow ();
return n1 + n2;
}
int add5 (int n1, int n2) {
long long tmp;
tmp = (long long)n1 + n2;
if (tmp != (int)tmp)
overflow ();
return tmp;
}
void test (int (*f) (int, int), int y) {
struct tms tms1;
struct tms tms2;
int i;
int x;
times (&tms1);
for (i = 0; i < 1000 * 1000 * 1000; ++i)
x = f (i, y);
times (&tms2);
fprintf (stderr, "%d %d\n",
x,
(int) (tms2.tms_utime + tms2.tms_stime
- tms1.tms_utime + tms1.tms_stime));
}
int main (int argc, char **argv) {
int y;
if (argc != 2)
die ("need an arg");
y = atoi (argv[1]);
test (add0, y);
test (add1, y);
test (add2, y);
test (add3, y);
test (add4, y);
test (add5, y);
return 0;
}
--------------------------------------------------------------------------------
add0:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax
popl %ebp
ret
--------------------------------------------------------------------------------
add1:
pushl %ebp
movl %esp, %ebp
movl 12(%ebp), %eax
movl 8(%ebp), %edx
popl %ebp
addl %edx, %eax
ret
--------------------------------------------------------------------------------
add2:
pushl %ebp
movl %esp, %ebp
movl 8(%ebp), %eax
#APP
addl 12(%ebp), %eax
jo overflow
#NO_APP
popl %ebp
ret
--------------------------------------------------------------------------------
add3:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movl %ebx, -4(%ebp)
movl 8(%ebp), %ebx
cmpl $2147360191, %ebx
jg .L8
.L7:
movl 12(%ebp), %ecx
movl %ebx, %eax
movl -4(%ebp), %ebx
movl %ebp, %esp
popl %ebp
addl %ecx, %eax
ret
.p2align 4,,7
.L8:
call overflow
jmp .L7
--------------------------------------------------------------------------------
add4:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movl %ebx, (%esp)
movl 8(%ebp), %ebx
movl %esi, 4(%esp)
movl 12(%ebp), %esi
testl %ebx, %ebx
js .L10
movl $2147483647, %eax
subl %ebx, %eax
cmpl %eax, %esi
jg .L14
.L12:
leal (%esi,%ebx), %eax
movl 4(%esp), %esi
movl (%esp), %ebx
movl %ebp, %esp
popl %ebp
ret
.L10:
movl $-2147483648, %eax
subl %ebx, %eax
cmpl %eax, %esi
jbe .L12
jmp .L14
--------------------------------------------------------------------------------
add5:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movl %ebx, (%esp)
movl 8(%ebp), %eax
movl %esi, 4(%esp)
movl %eax, %ebx
movl %eax, %esi
movl 12(%ebp), %eax
sarl $31, %esi
cltd
addl %eax, %ebx
movl %ebx, %eax
adcl %edx, %esi
cltd
movl %esi, %ecx
xorl %edx, %ecx
xorl %ebx, %eax
orl %eax, %ecx
jne .L18
.L17:
movl %ebx, %eax
movl 4(%esp), %esi
movl (%esp), %ebx
movl %ebp, %esp
popl %ebp
ret
.p2align 4,,7
.L18:
call overflow
jmp .L17
-------------------------------------------------------
This sf.net email is sponsored by:ThinkGeek
Welcome to geek heaven.
http://thinkgeek.com/sf
_______________________________________________
MLton-devel mailing list
MLton-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mlton-devel