new snapshot
Stephen Weeks
sweeks@intertrust.com
Fri, 7 May 1999 17:41:47 -0700 (PDT)
There is a new snapshot available at
http://www.intertrust.com/star/sweeks/mlton-1999-5-7.tgz
The changes are
* bug fix in cps/simplify-types.fun for Henry's bug
* callcc/throw added
* Profiling comments are output in C code. This was implemented by
changes to backend/{machine.{sig,fun},backend.fun}.
Each profiling comment is on a line by itself and looks like
/* PROFILE: foo */
where "foo" is the name of a Cps toplevel function. The comment
applies to all code up until the next such comment, or the end of the
C function. The profiling comments are output at the beginning of
each basic block (from the point of view of backend/machine). Thus
the same name may (and often does) appear many times. There are also
two special names, "return" and "gc", which always appear before the
return case in the chunk switch and the gc code for the chunk,
respectively. Also "initGlobals" labels the initialization code for
constants that runs once at the beginning of the program. Some code,
e.g. the trampoline, is unlabeled.
The closure converter goes to some effort (see lines 168-189 of
closure-convert/closure-convert.fun) to map functions names in the
input Sxml to the same name in the Cps. The passes prior to closure
conversion also try to preserve names, so, in the end many of the
names should look like the name in the source (with a suffix added).
For now, the profiling comments are always added to the C. The -p
switch only controls how gcc is called.
Henry, in case you lost it, here is your old python script from last
September that did the data collection.
Enjoy.
--------------------------------------------------------------------------------
#!/usr/bin/env python
import regex
import os # popen, path.basename
import string # atoi
import sys # argv, exit, stderr
def main(argv):
if len(argv) <> 4:
die('Usage: %s c-file a.out-file gmon-file'
% os.path.basename(argv[0]))
lineFunc = readC(argv[1])
(mainFwa, mainLimit, addrFunc, addrLine) = readAOUT(argv[2])
counts = readgmon(argv[3])
addrFol = merge1(lineFunc, addrLine)
total = {}
for (fwa, limit, count) in counts:
addr = (fwa + limit) / 2 # wimp
fol = find(addr, addrFol)
if total.has_key(fol):
total[fol] = total[fol] + count
else:
total[fol] = count
banana = 0
widest = 0
for (fol, count) in total.items():
banana = banana + count
width = len(fol)
if widest < width:
widest = width
print '%d ticks total' % banana
banana = float(banana)
result = []
for (fol, count) in total.items():
result.append((fol, count / banana))
result.sort(countcmp)
for (name, weight) in result:
print '%-*s %6.2f%%' % (widest, name, 100.0 * weight)
sys.exit(0)
def find(addr, addrFol):
(oldaddr, oldfol) = addrFol[0]
if oldaddr > addr:
return "~unknown~"
i = 1
while i < len(addrFol):
(newaddr, newfol) = addrFol[i]
if newaddr > addr:
return oldfol
oldfol = newfol
i = i + 1
return "UNKNOWN"
def countcmp((fol1, wgt1), (fol2, wgt2)):
return wgt1 < wgt2 or (wgt1 == wgt2 and fol1 > fol2)
# This isn't quite right for lots of reasons.
# It doesn't handle things that cross boundaries correctly.
# It doesn't handle addresses that appear more than once correctly.
# Anyway, it merges the (line, fol function name) list and the
# (address, line) list into a (addr, fol function name) list.
def merge1(lineFunc, addrLine):
lfi = 0
ali = 0
res = []
if lfi < len(lineFunc) and ali < len(addrLine):
(lfline, lfname) = lineFunc[lfi]
(aladdr, alline) = addrLine[ali]
while 1:
if lfline < alline:
res.append((aladdr, lfname))
lfi = lfi + 1
if lfi == len(lineFunc):
break
(lfline, lfname) = lineFunc[lfi]
elif lfline == alline:
res.append((aladdr, lfname))
lfi = lfi + 1
ali = ali + 1
if lfi == len(lineFunc) or ali == len(addrLine):
break
(lfline, lfname) = lineFunc[lfi]
(aladdr, alline) = addrLine[ali]
else:
ali = ali + 1
if ali == len(addrLine):
break
(aladdr, alline) = addrLine[ali]
if lfi <> len(lineFunc):
die('I ran out of addresses')
return res
# Given an open file with C code in it, return a dictionary which
# maps `procedure' names to line numbers.
# Given the name of a file with C code in it, return a list with entries
# (line number, label)
# in increasing line number order.
# The labels must all occur in the function main (the last function in the
# file) and bogus labels are eliminated.
def readC(name):
f = open(name)
lnum = 0
reg = regex.compile('[ ]*void[ ]+main(int[ ]+argc,[ ]+char[ ]+\*\*argv)[ ]*{[ ]*$')
line = f.readline()
while line:
lnum = lnum + 1
if reg.match(line) >= 0:
break
line = f.readline()
else:
raise 'no main'
label = regex.compile('[A-Za-z_][0-9A-Za-z_]*:')
bogus = regex.compile('\(\(l\|return\|handler\)_[0-9][0-9]*\)\|\(default\):')
lineFunc = []
line = f.readline()
while line:
lnum = lnum + 1
len = label.match(line)
if (len >= 0) and bogus.match(line) < 0:
lineFunc.append((lnum, line[:len-1]))
line = f.readline()
f.close()
return lineFunc
# Given the name of an object file, return the line number -> address
# dictionary.
# The check to make sure that the file ends in .c is to eliminate the
# references to smlc-gc.h.
def readAOUT(name):
reg = regex.compile('\([0-9a-fA-F]+\) T \(.*\)$')
f = os.popen('nm -n ' + name)
addrFunc = []
line = f.readline()
while line:
if reg.match(line) >= 0:
addr = hextolong(reg.group(1))
func = reg.group(2)
addrFunc.append((addr, func))
line = f.readline()
f.close()
mainFwa = None
for i in xrange(len(addrFunc)):
(addr, func) = addrFunc[i]
if func == 'main':
mainFwa = addr
(mainLimit, func) = addrFunc[i + 1]
if not mainFwa:
die('main is last function, how did this happen?')
reg = regex.compile('\(.*\.c\):\([0-9]+\)$')
apart = regex.compile('\([0-9a-f]+\) <')
f = os.popen('objdump -l --start-address=%s --stop-address=%s -d %s'
% (fromL(mainFwa), fromL(mainLimit), name))
line = f.readline()
addrLine = []
while line:
if reg.match(line) >= 0:
file = reg.group(1)
lnum = string.atoi(reg.group(2))
line = f.readline()
if apart.match(line) < 0:
die('Line number followed by non-address: |%s|' % line)
addr = hextolong(apart.group(1))
addrLine.append((addr, lnum))
line = f.readline()
f.close()
addrLine.sort(addrlcmp)
return (mainFwa, mainLimit, addrFunc, addrLine)
# Comparison function for (address, line number) pairs.
def addrlcmp ((lhsaddr, lhsline), (rhsaddr, rhsline)):
return lhsaddr > rhsaddr or (lhsaddr == rhsaddr and lhsline > rhsline)
# Convert hex number to long integer.
def hextolong(str):
res = 0L
for ch in str:
res = 0x10L * res
if ('0' <= ch and ch <= '9'):
res = res + (ord(ch) - ord('0'))
elif ('a' <= ch and ch <= 'f'):
res = res + (ord(ch) + 0xa - ord('a'))
elif ('A' <= ch and ch <= 'F'):
res = res + (ord(ch) + 0xA - ord('A'))
else:
raise "bad hex number"
return res
# Given the name of a gprof file, return a list of triples:
# (low pc, limit pc, count)
# with non-zero counts.
def readgmon(name):
f = open(name, 'r')
low = crackaddr(f.read(4))
high = crackaddr(f.read(4))
plen = high - low
size = crackuint(f.read(4)) - 3*4
len = size / 2
# pc -> (pc - low) * len / plen
# ps in [low + ceil(plen*index/len), low + floor((plen*index + plen-1)/len)]
res = []
for i in xrange(len):
cnt = crackushort(f.read(2))
if (cnt <> 0):
pclhs = low + (plen * i + len-1) / len
pcrhs = low + (plen * i + plen-1) / len
res.append((pclhs, pcrhs+1, cnt))
if f.read(1):
raise 'expected EOF'
return res
def crackaddr(str):
if len(str) <> 4:
raise 'unexpected eof'
res = 0L
for i in xrange(4):
res = res + (ord(str[i]) << 8L*i)
return (res)
def crackuint(str):
if len(str) <> 4:
raise 'unexpected eof'
res = 0
for i in xrange(4):
res = res + (ord(str[i]) << 8*i)
return (res)
def crackushort(str):
if len(str) <> 2:
raise 'unexpected eof'
return (ord(str[0]) + (ord(str[1])<<8))
# Convert a long integer into unsigned.
def fromL(num):
res = ''
while 1:
digit = num % 10
num = num / 10
res = chr(digit + ord('0')) + res
if num == 0:
break;
return res
def die(str):
sys.stderr.write(str + '\n')
sys.exit(1)
if __name__ == '__main__':
main(sys.argv)
sys.exit(0)