[MLton] cvs commit: sped up output1 a lot

Stephen Weeks sweeks@mlton.org
Thu, 1 Jan 2004 19:32:34 -0800


sweeks      04/01/01 19:32:34

  Modified:    basis-library/io stream-io.fun
  Log:
  MAIL sped up output1 a lot
  
  Made the common case fast, so that when there is space in the buffer,
  then there is a single test, store, and increment.  I also found it
  useful to use "-profile alloc -profile-il ssa" to find a spurious
  allocation of 3 words per call.  I worked around this by manually
  flattening a little.  With the rewrite, MLton is now on par with gcc
  -O2 for repeated output of a single character.  Here are the times in
  seconds on my 1.6 GHz machine to write out one billion characters.
  
  	new	old	SML/NJ
  fast C	MLton	MLton	110.44	slow C
  ------	-----	-----	------	------
  10.32	8.84	28.25	 98.48	234.93
  
  fast C is with the appropriate #defines to avoid the really slow
  thread safety:
  
  	#undef getc
  	#define getc	getc_unlocked
  	#undef putc
  	#define putc	putc_unlocked
  
  new MLton is after the change of this checkin.
  
  old MLton is before the change of this checkin.
  
  slow C is without the #defines.
  
  For completeness, here is the C and SML code.  If someone could do a
  sanity check on my timings that would be nice.
  
  --------------------------------------------------------------------------------
  
  #include <stdio.h>
  #include <stdlib.h>
  
  int main (int argc, char **argv) {
  	int count;
  	int i;
  	FILE *out;
  
  	if (2 != argc) {
  		fprintf (stderr, "usage\n");
  		return -1;
  	}
  	count =	atoi (argv[1]);
  	out = fopen ("/dev/null", "w");
  	for (i = count; i > 0; --i)
  		putc ('a', out);
  	fclose (out);
  	return 0;
  }
  
  --------------------------------------------------------------------------------
  
  val count =
     case CommandLine.arguments () of
        [count] => valOf (Int.fromString count)
      | _ => raise Fail "usage"
  
  open TextIO
  
  val out = openOut "/dev/null"
  
  fun loop n =
     if n = 0
        then ()
     else (output1 (out, #"a"); loop (n - 1))
  
  val _ = loop count
  
  val _ = closeOut out
  
  --------------------------------------------------------------------------------

Revision  Changes    Path
1.21      +95 -42    mlton/basis-library/io/stream-io.fun

Index: stream-io.fun
===================================================================
RCS file: /cvsroot/mlton/mlton/basis-library/io/stream-io.fun,v
retrieving revision 1.20
retrieving revision 1.21
diff -u -r1.20 -r1.21
--- stream-io.fun	21 Nov 2003 21:47:53 -0000	1.20
+++ stream-io.fun	2 Jan 2004 03:32:34 -0000	1.21
@@ -63,8 +63,8 @@
       (*   outstream   *)
       (*---------------*)
 
-      datatype buf = Buf of {size: int ref,
-			     array: A.array}
+      datatype buf = Buf of {array: A.array,
+			     size: int ref}
       datatype buffer_mode = NO_BUF
                            | LINE_BUF of buf
                            | BLOCK_BUF of buf
@@ -132,7 +132,7 @@
 	     | SOME writeArr => flushGen (writeArr, AS.base, AS.slice, x)
       end
 
-      fun flushBuf (writer, Buf {size, array}) =
+      fun flushBuf' (writer, size, array) =
 	 let
 	    val size' = !size 
 	 in 
@@ -140,6 +140,8 @@
 	    ; flushArr (writer, AS.slice (array, 0, SOME size'))
 	 end
 
+      fun flushBuf (writer, Buf {size, array}) = flushBuf' (writer, size, array)
+
       fun output (os as Out {augmented_writer,
 			     state, 
 			     buffer_mode, ...}, v) =
@@ -166,41 +168,79 @@
 		     | BLOCK_BUF buf => doit (buf, fn () => false)
 		 end
 	         handle exn => liftExn (outstreamName os) "output" exn
-				   
+
+      fun ensureActive (os as Out {state, ...}) =
+	 if active (!state)
+	    then ()
+	 else liftExn (outstreamName os) "output" IO.ClosedStream
+
       local
 	 val buf1 = A.array (1, someElem)
+	 fun flush (os, size, array) =
+	    let
+	       val Out {augmented_writer, ...} = os
+	    in
+	       flushBuf' (augmented_writer, size, array)
+	       handle exn => liftExn (outstreamName os) "output1" exn
+	    end
       in
-	 fun output1 (os as Out {augmented_writer, 
-				 state,
-				 buffer_mode, ...}, c) =
-	    if terminated (!state)
-	       then liftExn (outstreamName os) "output" IO.ClosedStream
-	       else let
-		       fun doit (buf as Buf {size, array}, maybe) =
-			  let
-			     val _ = if 1 + !size >= A.length array
-					then flushBuf (augmented_writer, buf)
-					else ()
-			     val _ = A.update (array, !size, c)
-			     val _ = size := !size + 1
-			     val _ = if maybe
-					then flushBuf (augmented_writer, buf)
-					else () 
-			  in
-			     ()
-			  end
-		    in
-		       case !buffer_mode of
-			  NO_BUF => (A.update (buf1, 0, c);
-				     flushArr (augmented_writer,
-					       AS.slice (buf1, 0, SOME 1)))
-			| LINE_BUF buf => doit (buf, 
-						case line of
-						   NONE => false
-						 | SOME {isLine, ...} => isLine c)
-			| BLOCK_BUF buf => doit (buf, false)
-		    end
-		    handle exn => liftExn (outstreamName os) "output1" exn
+	 (* output1 is implemented very carefully to make it fast.  Think hard
+	  * before modifying it, and test after you do, to make sure that it
+	  * hasn't been slowed down.
+	  *)
+	 fun output1 (os as Out {buffer_mode, ...}, c): unit =
+	    case !buffer_mode of
+	       BLOCK_BUF (Buf {array, size}) =>
+		  let
+		     val n = !size
+		  in
+		     (* Use the bounds check for the update to make sure there
+		      * is space to put the character in the array.
+		      *)
+		     (A.update (array, n, c)
+		      ; size := 1 + n)
+		     handle Subscript =>
+			let
+			   val _ = ensureActive os
+			   val _ = flush (os, size, array)
+			   val _ = A.update (array, 0, c)
+			   val _ = size := 1
+			in
+			   ()
+			end
+		  end
+	     | LINE_BUF (Buf {array, size}) =>
+		  let
+		     val n = !size
+		     val _ = 
+			(* Use the bounds check for the update to make sure there
+			 * is space to put the character in the array.
+			 *)
+			(A.update (array, n, c)
+			 ; size := 1 + n)
+			handle Subscript =>
+			   let
+			      val _ = ensureActive os
+			      val _ = flush (os, size, array)
+			      val _ = A.update (array, 0, c)
+			      val _ = size := 1
+			   in
+			      ()
+			   end
+		  in
+		     case line of
+			NONE => ()
+		      | SOME {isLine, ...} =>
+			   if isLine c then flush (os, size, array) else ()
+		  end
+	     | NO_BUF =>
+		  let
+		     val _ = ensureActive os
+		     val _ = A.update (buf1, 0, c)
+		     val Out {augmented_writer, ...} = os
+		  in
+		     flushArr (augmented_writer, AS.slice (buf1, 0, SOME 1))
+		  end
       end
 
       fun outputSlice (os as Out {augmented_writer,
@@ -241,6 +281,16 @@
 	       | BLOCK_BUF buf => flushBuf (augmented_writer, buf)
 	handle exn => liftExn (outstreamName os) "flushOut" exn
 
+      fun makeTerminated (Out {buffer_mode, ...}) =
+	 let
+	    fun doit (Buf {array, size}) = size := A.length array
+	 in
+	    case !buffer_mode of
+	       BLOCK_BUF b => doit b
+	     | LINE_BUF b => doit b
+	     | NO_BUF => ()
+	 end
+      
       fun closeOut (os as Out {state, ...}) =
 	if closed (!state)
 	  then ()
@@ -248,7 +298,8 @@
 		if terminated (!state)
 		  then ()
 		  else (writerSel (outstreamWriter os, #close)) ();
-		state := Closed)
+		state := Closed
+		; makeTerminated os)
 	handle exn => liftExn (outstreamName os) "closeOut" exn
 
       fun getBufferMode (os as Out {buffer_mode, ...}) =
@@ -300,12 +351,14 @@
       fun getWriter (os as Out {writer, state, buffer_mode, ...}) =
 	if closed (!state)
 	  then liftExn (outstreamName os) "getWriter" IO.ClosedStream
-	  else (flushOut os;
-		state := Terminated;
-		(writer, case !buffer_mode of
-		           NO_BUF => IO.NO_BUF
-			 | LINE_BUF _ => IO.LINE_BUF
-			 | BLOCK_BUF _ => IO.BLOCK_BUF))
+	  else (flushOut os
+		; state := Terminated
+		; makeTerminated os
+		; (writer,
+		   case !buffer_mode of
+		      NO_BUF => IO.NO_BUF
+		    | LINE_BUF _ => IO.LINE_BUF
+		    | BLOCK_BUF _ => IO.BLOCK_BUF))
 
       datatype out_pos = OutPos of {pos: pos,
 				    outstream: outstream}