[MLton-commit] r6441
spoons at mlton.org
spoons at mlton.org
Mon Mar 3 07:26:30 PST 2008
Support processor-local allocation.
The GC_heap structure now defines the global nursery, while each
processor's GC_state tracks the per-processor allocation area.
Allocation from a processor-local area occurs just as for the single-
processor version of MLton. When processor-local space is exhausted,
the processor attempts to take a chunk from the global space using an
atomic compare-and-swap operation. If no global space is available,
the processor signals that a collection is necessary and waits for the
remaining processors to catch up.
One complicating factor is that we must ensure that the unused portion
of a processor's allocation area can always be filled. To do so, we
add a small amount of space to each processor-local allocation area that
ensures that the area is large enough to be filled. This additional
space is known as the "bonus slop" in the code.
----------------------------------------------------------------------
U mlton/branches/shared-heap-multicore/basis-library/primitive/prim-mlton.sml
U mlton/branches/shared-heap-multicore/runtime/gc/cheney-copy.c
U mlton/branches/shared-heap-multicore/runtime/gc/forward.c
U mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.c
U mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.h
U mlton/branches/shared-heap-multicore/runtime/gc/gc_state.c
U mlton/branches/shared-heap-multicore/runtime/gc/heap.c
U mlton/branches/shared-heap-multicore/runtime/gc/heap.h
U mlton/branches/shared-heap-multicore/runtime/gc/heap_predicates.c
U mlton/branches/shared-heap-multicore/runtime/gc/init-world.c
U mlton/branches/shared-heap-multicore/runtime/gc/new-object.c
U mlton/branches/shared-heap-multicore/runtime/gc/object.h
----------------------------------------------------------------------
Modified: mlton/branches/shared-heap-multicore/basis-library/primitive/prim-mlton.sml
===================================================================
--- mlton/branches/shared-heap-multicore/basis-library/primitive/prim-mlton.sml 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/basis-library/primitive/prim-mlton.sml 2008-03-03 15:26:28 UTC (rev 6441)
@@ -112,8 +112,9 @@
structure FFI =
struct
+ (* XXX all these symbols need to be per-processor *)
val cpointerArray = #1 _symbol "MLton_FFI_CPointer": Pointer.t GetSet.t; ()
- val getOp = #1 _symbol "MLton_FFI_op": Int32.t GetSet.t;
+ val getOp = _prim "FFI_getOp": unit -> Int32.int;
val int8Array = #1 _symbol "MLton_FFI_Int8": Pointer.t GetSet.t; ()
val int16Array = #1 _symbol "MLton_FFI_Int16": Pointer.t GetSet.t; ()
val int32Array = #1 _symbol "MLton_FFI_Int32": Pointer.t GetSet.t; ()
Modified: mlton/branches/shared-heap-multicore/runtime/gc/cheney-copy.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/cheney-copy.c 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/cheney-copy.c 2008-03-03 15:26:28 UTC (rev 6441)
@@ -96,24 +96,40 @@
void minorCheneyCopyGC (GC_state s) {
size_t bytesAllocated;
+ size_t bytesFilled = 0;
size_t bytesCopied;
struct rusage ru_start;
if (DEBUG_GENERATIONAL)
fprintf (stderr, "minorGC nursery = "FMTPTR" frontier = "FMTPTR"\n",
- (uintptr_t)s->heap.nursery, (uintptr_t)s->frontier);
+ (uintptr_t)s->heap->nursery, (uintptr_t)s->frontier);
assert (invariantForGC (s));
- bytesAllocated = s->frontier - s->heap.nursery;
+ /* XXX spoons not accurate if this doesn't account for gaps */
+ bytesAllocated = s->heap->frontier - s->heap->nursery;
if (bytesAllocated == 0)
return;
- s->cumulativeStatistics.bytesAllocated += bytesAllocated;
if (not s->canMinor) {
- s->heap.oldGenSize += bytesAllocated;
+ for (int proc = 0; proc < s->numberOfProcs; proc++) {
+ /* Add in the bonus slop now since we need to fill it */
+ s->procStates[proc].limitPlusSlop += GC_BONUS_SLOP;
+ if (s->procStates[proc].limitPlusSlop != s->heap->frontier) {
+ /* Fill to avoid an uninitialized gap in the middle of the heap */
+ bytesFilled += fillGap (s, s->procStates[proc].frontier,
+ s->procStates[proc].limitPlusSlop);
+ }
+ else {
+ /* If this is at the end of the heap there is no need to fill the gap
+ -- there will be no break in the initialized portion of the
+ heap. Also, this is the last chunk allocated in the nursery, so it is
+ safe to use the frontier from this processor as the global frontier. */
+ s->heap->oldGenSize = s->procStates[proc].frontier - s->heap->start;
+ }
+ }
bytesCopied = 0;
} else {
- if (DEBUG_GENERATIONAL or s->controls.messages)
+ if (DEBUG_GENERATIONAL or s->controls->messages)
fprintf (stderr, "[GC: Minor Cheney-copy; nursery at "FMTPTR" of size %s bytes,]\n",
- (uintptr_t)(s->heap.nursery),
+ (uintptr_t)(s->heap->nursery),
uintmaxToCommaString(bytesAllocated));
if (detailedGCTime (s))
startTiming (&ru_start);
@@ -145,4 +161,7 @@
fprintf (stderr, "[GC: Minor Cheney-copy done; %s bytes copied.]\n",
uintmaxToCommaString(bytesCopied));
}
+ bytesAllocated -= bytesFilled;
+ s->cumulativeStatistics->bytesAllocated += bytesAllocated;
+ s->cumulativeStatistics->bytesFilled += bytesFilled;
}
Modified: mlton/branches/shared-heap-multicore/runtime/gc/forward.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/forward.c 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/forward.c 2008-03-03 15:26:28 UTC (rev 6441)
@@ -155,7 +155,7 @@
fprintf (stderr,
"forwardObjptrIfInNursery opp = "FMTPTR" op = "FMTOBJPTR" p = "FMTPTR"\n",
(uintptr_t)opp, op, (uintptr_t)p);
- assert (s->heap.nursery <= p and p < s->limitPlusSlop);
+ assert (s->heap->nursery <= p and p < s->heap->frontier);
forwardObjptr (s, opp);
}
Modified: mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.c 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.c 2008-03-03 15:26:28 UTC (rev 6441)
@@ -111,13 +111,25 @@
: sizeofStackWithHeaderAligned (s, sizeofStackGrow (s, getStackCurrent (s)));
totalBytesRequested =
oldGenBytesRequested
- + nurseryBytesRequested
+ stackBytesRequested;
+ getThreadCurrent(s)->bytesNeeded = nurseryBytesRequested;
+ for (int proc = 0; proc < s->numberOfProcs; proc++) {
+ /* It could be that other threads have already worked to satisfy their own
+ requests. We need to make sure that we don't invalidate the work
+ they've done.
+ */
+ if (getThreadCurrent(&s->procStates[proc])->bytesNeeded == 0) {
+ getThreadCurrent(&s->procStates[proc])->bytesNeeded = GC_HEAP_LIMIT_SLOP;
+ }
+ totalBytesRequested += getThreadCurrent(&s->procStates[proc])->bytesNeeded;
+ totalBytesRequested += GC_BONUS_SLOP;
+ }
+
if (forceMajor
- or totalBytesRequested > s->heap.size - s->heap.oldGenSize)
+ or totalBytesRequested > s->heap->availableSize - s->heap->oldGenSize)
majorGC (s, totalBytesRequested, mayResize);
setGCStateCurrentHeap (s, oldGenBytesRequested + stackBytesRequested,
- nurseryBytesRequested);
+ nurseryBytesRequested, false);
assert (hasHeapBytesFree (s, oldGenBytesRequested + stackBytesRequested,
nurseryBytesRequested));
unless (stackTopOk)
@@ -152,29 +164,249 @@
leaveGC (s);
}
-void ensureInvariantForMutator (GC_state s, bool force) {
- if (force
- or not (invariantForMutatorFrontier(s))
- or not (invariantForMutatorStack(s))) {
- /* This GC will grow the stack, if necessary. */
- performGC (s, 0, getThreadCurrent(s)->bytesNeeded, force, TRUE);
+size_t fillGap (__attribute__ ((unused)) GC_state s, pointer start, pointer end) {
+ size_t diff = end - start;
+
+ if (diff == 0) {
+ return 0;
}
- assert (invariantForMutatorFrontier(s));
- assert (invariantForMutatorStack(s));
+
+ if (DEBUG)
+ fprintf (stderr, "[GC: Filling gap between "FMTPTR" and "FMTPTR" (size = "FMTARRLEN").]\n",
+ (uintptr_t)start, (uintptr_t)end, diff);
+
+ if (start) {
+ /* See note in the array case of foreach.c (line 103) */
+ if (diff >= GC_ARRAY_HEADER_SIZE + OBJPTR_SIZE) {
+ assert (diff >= GC_ARRAY_HEADER_SIZE);
+ /* Counter */
+ *((GC_arrayCounter *)start) = 0;
+ start = start + GC_ARRAY_COUNTER_SIZE;
+ /* Length */
+ *((GC_arrayLength *)start) = diff - GC_ARRAY_HEADER_SIZE;
+ start = start + GC_ARRAY_LENGTH_SIZE;
+ /* Header */
+ *((GC_header *)start) = GC_WORD8_VECTOR_HEADER;
+ start = start + GC_HEADER_SIZE;
+ }
+ else if (diff == GC_HEADER_SIZE) {
+ *((GC_header *)start) = GC_HEADER_ONLY_HEADER;
+ start = start + GC_HEADER_SIZE;
+ }
+ else if (diff >= GC_BONUS_SLOP) {
+ assert (diff < INT_MAX);
+ *((GC_header *)start) = GC_FILL_HEADER;
+ start = start + GC_HEADER_SIZE;
+ *((GC_smallGapSize *)start) = diff - (GC_HEADER_SIZE + GC_SMALL_GAP_SIZE_SIZE);
+ start = start + GC_SMALL_GAP_SIZE_SIZE;
+ }
+ else {
+ assert(0 == diff);
+ /* XXX */
+ fprintf (stderr, "FOUND A GAP OF "FMTARRLEN" BYTES!\n", diff);
+ exit (1);
+ }
+
+ /* XXX debug only */
+ /*
+ while (start < end) {
+ *(start++) = 0xDF;
+ }
+ */
+
+ return diff;
+ }
+ else {
+ return 0;
+ }
}
-/* ensureHasHeapBytesFree (s, oldGen, nursery)
- */
-void ensureHasHeapBytesFree (GC_state s,
- size_t oldGenBytesRequested,
- size_t nurseryBytesRequested) {
- assert (s->heap.nursery <= s->limitPlusSlop);
- assert (s->frontier <= s->limitPlusSlop);
- if (not hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested))
- performGC (s, oldGenBytesRequested, nurseryBytesRequested, FALSE, TRUE);
- assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
+static void maybeSatisfyAllocationRequestLocally (GC_state s,
+ size_t nurseryBytesRequested) {
+ /* First try and take another chunk from the shared nursery */
+ while (TRUE)
+ {
+ /* This is the only read of the global frontier -- never read it again
+ until after the swap. */
+ pointer oldFrontier = s->heap->frontier;
+ pointer newHeapFrontier, newProcFrontier;
+ pointer newStart;
+ /* heap->start and heap->size are read-only (unless you hold the global
+ lock) so it's ok to read them here */
+ size_t availableBytes = (size_t)((s->heap->start + s->heap->availableSize)
+ - oldFrontier);
+
+ /* If another thread is trying to get exclusive access, the join the
+ queue. */
+ if (Proc_threadInSection (s)) {
+ if (DEBUG)
+ fprintf (stderr, "[GC: aborting local alloc: mutex.]\n");
+ return;
+ }
+ /* See if the mutator frontier invariant is already true */
+ assert (s->limitPlusSlop >= s->frontier);
+ if (nurseryBytesRequested <= (size_t)(s->limitPlusSlop - s->frontier)) {
+ if (DEBUG)
+ fprintf (stderr, "[GC: aborting local alloc: satisfied.]\n");
+ return;
+ }
+ /* Perhaps there is not enough space in the nursery to satify this
+ request; if that's true then we need to do a full collection */
+ if (nurseryBytesRequested + GC_BONUS_SLOP > availableBytes) {
+ if (DEBUG)
+ fprintf (stderr, "[GC: aborting local alloc: no space.]\n");
+ return;
+ }
+
+ /* OK! We might possibly satisfy this request without the runtime lock!
+ Let's see what that will entail... */
+
+ /* Now see if we were the most recent thread to allocate */
+ if (oldFrontier == s->limitPlusSlop + GC_BONUS_SLOP) {
+ /* This is the next chunk so no need to fill */
+ newHeapFrontier = s->frontier + nurseryBytesRequested + GC_BONUS_SLOP;
+ /* Leave "start" and "frontier" where they are */
+ newStart = s->start;
+ newProcFrontier = s->frontier;
+ }
+ else {
+ /* Fill the old gap */
+ fillGap (s, s->frontier, s->limitPlusSlop + GC_BONUS_SLOP);
+ /* Don't update frontier or limitPlusSlop since we will either
+ overwrite them (if we succeed) or just fill the same gap again
+ (if we fail). (There is no obvious other pair of values that
+ we can set them to that is safe.) */
+ newHeapFrontier = oldFrontier + nurseryBytesRequested + GC_BONUS_SLOP;
+ newProcFrontier = oldFrontier;
+ /* Move "start" since the space between old-start and frontier is not
+ necessary filled */
+ newStart = oldFrontier;
+ }
+
+ if (__sync_bool_compare_and_swap (&s->heap->frontier,
+ oldFrontier, newHeapFrontier)) {
+ if (DEBUG)
+ fprintf (stderr, "[GC: Local alloction of chunk @ "FMTPTR".]\n",
+ (uintptr_t)newProcFrontier);
+
+ s->start = newStart;
+ s->frontier = newProcFrontier;
+ assert (isFrontierAligned (s, s->frontier));
+ s->limitPlusSlop = newHeapFrontier - GC_BONUS_SLOP;
+ s->limit = s->limitPlusSlop - GC_HEAP_LIMIT_SLOP;
+
+ return;
+ }
+ else {
+ if (DEBUG)
+ fprintf (stderr, "[GC: Contention for alloction (frontier is "FMTPTR").]\n",
+ (uintptr_t)s->heap->frontier);
+ }
+ }
}
+// assumes that stack->used and thread->exnstack are up to date
+// assumes exclusive access to runtime if !mustEnter
+// forceGC = force major collection
+void ensureHasHeapBytesFreeAndOrInvariantForMutator (GC_state s, bool forceGC,
+ bool ensureFrontier,
+ bool ensureStack,
+ size_t oldGenBytesRequested,
+ size_t nurseryBytesRequested) {
+ bool stackTopOk;
+ size_t stackBytesRequested;
+
+ /* To ensure the mutator frontier invariant, set the requested bytes
+ to include those needed by the thread.
+ */
+ if (ensureFrontier) {
+ nurseryBytesRequested += getThreadCurrent(s)->bytesNeeded;
+ }
+
+ /* XXX (sort of) copied from performGC */
+ stackTopOk = (not ensureStack) or invariantForMutatorStack (s);
+ stackBytesRequested =
+ stackTopOk
+ ? 0
+ : sizeofStackWithHeaderAligned (s, sizeofStackGrow (s, getStackCurrent (s)));
+
+ /* try to satisfy (at least part of the) request locally */
+ maybeSatisfyAllocationRequestLocally (s, nurseryBytesRequested + stackBytesRequested);
+
+ if (not stackTopOk
+ and (hasHeapBytesFree (s, 0, stackBytesRequested))) {
+ if (DEBUG or s->controls->messages)
+ fprintf (stderr, "GC: growing stack locally... [%d]\n",
+ s->procStates ? Proc_processorNumber (s) : -1);
+ growStackCurrent (s, FALSE);
+ setGCStateCurrentThreadAndStack (s);
+ }
+
+ if (DEBUG or s->controls->messages) {
+ fprintf (stderr, "GC: stackInvariant: %d,%d hasHeapBytesFree: %d inSection: %d force: %d [%d]\n",
+ ensureStack, ensureStack and invariantForMutatorStack (s),
+ hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested),
+ Proc_threadInSection (s),
+ forceGC,
+ s->procStates ? Proc_processorNumber (s) : -1);
+ }
+
+ if (/* check the stack of the current thread */
+ ((ensureStack and not invariantForMutatorStack (s))
+ and (s->syncReason = SYNC_STACK))
+ /* this subsumes invariantForMutatorFrontier */
+ or (not hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested)
+ and (s->syncReason = SYNC_HEAP))
+ /* another thread is waiting for exclusive access */
+ or Proc_threadInSection (s)
+ /* we are forcing a major collection */
+ or (forceGC
+ and (s->syncReason = SYNC_FORCE))) {
+ /* Copy the value here so other threads will see it (if we synchronize and
+ one of the other threads does the work). */
+ if (isObjptr (getThreadCurrentObjptr(s)))
+ getThreadCurrent(s)->bytesNeeded = nurseryBytesRequested;
+
+ ENTER0 (s);
+ /* XXX should this go here? */
+ switchToSignalHandlerThreadIfNonAtomicAndSignalPending (s);
+
+ /* Recheck invariants now that we hold the lock */
+ if ((ensureStack and not invariantForMutatorStack (s))
+ or not hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested)
+ or forceGC) {
+ performGC (s, oldGenBytesRequested, nurseryBytesRequested, forceGC, TRUE);
+ }
+ else
+ if (DEBUG or s->controls->messages)
+ fprintf (stderr, "GC: Skipping GC (inside of sync). [%d]\n", s->procStates ? Proc_processorNumber (s) : -1);
+
+ LEAVE0 (s);
+ }
+ else {
+ if (DEBUG or s->controls->messages)
+ fprintf (stderr, "GC: Skipping GC (invariants already hold / request satisfied locally). [%d]\n", s->procStates ? Proc_processorNumber (s) : -1);
+
+ /* These are safe even without ENTER/LEAVE */
+ assert (isAligned (s->heap->size, s->sysvals.pageSize));
+ assert (isAligned ((size_t)s->heap->start, CARD_SIZE));
+ assert (isFrontierAligned (s, s->heap->start + s->heap->oldGenSize));
+ assert (isFrontierAligned (s, s->heap->nursery));
+ assert (isFrontierAligned (s, s->frontier));
+ assert (s->heap->start + s->heap->oldGenSize <= s->heap->nursery);
+ assert (s->heap->nursery <= s->heap->start + s->heap->availableSize);
+ assert (s->heap->nursery <= s->frontier or 0 == s->frontier);
+ assert (s->start <= s->frontier);
+ unless (0 == s->heap->size or 0 == s->frontier) {
+ assert (s->frontier <= s->limitPlusSlop);
+ assert (s->limit == s->limitPlusSlop - GC_HEAP_LIMIT_SLOP);
+ assert (hasHeapBytesFree (s, 0, 0));
+ }
+ }
+ assert (not ensureFrontier or invariantForMutatorFrontier(s));
+ assert (not ensureStack or invariantForMutatorStack(s));
+}
+
void GC_collect (GC_state s, size_t bytesRequested, bool force,
char *file, int line) {
if (DEBUG)
@@ -184,11 +416,24 @@
* much as GC_HEAP_LIMIT_SLOP.
*/
if (0 == bytesRequested)
- bytesRequested = GC_HEAP_LIMIT_SLOP;
+ bytesRequested = s->controls->allocChunkSize;
+ else if (bytesRequested < s->controls->allocChunkSize)
+ bytesRequested = s->controls->allocChunkSize;
+ else
+ bytesRequested += GC_HEAP_LIMIT_SLOP;
+
+ /* XXX copied from enter() */
+ /* used needs to be set because the mutator has changed s->stackTop. */
+ getStackCurrent(s)->used = sizeofGCStateCurrentStackUsed (s);
+ getThreadCurrent(s)->exnStack = s->exnStack;
+
getThreadCurrent(s)->bytesNeeded = bytesRequested;
- switchToSignalHandlerThreadIfNonAtomicAndSignalPending (s);
- ensureInvariantForMutator (s, force);
- assert (invariantForMutatorFrontier(s));
- assert (invariantForMutatorStack(s));
- leave (s);
+
+ ensureHasHeapBytesFreeAndOrInvariantForMutator (s, force,
+ TRUE, TRUE,
+ 0, 0);
}
+
+uint32_t FFI_getOp (GC_state s) {
+ return s->ffiOp;
+}
Modified: mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.h
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.h 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/garbage-collection.h 2008-03-03 15:26:28 UTC (rev 6441)
@@ -10,7 +10,7 @@
static void minorGC (GC_state s);
static void majorGC (GC_state s, size_t bytesRequested, bool mayResize);
-static inline void growStackCurrent (GC_state s);
+static inline void growStackCurrent (GC_state s, bool allocInOldGen);
static inline void enterGC (GC_state s);
static inline void leaveGC (GC_state s);
static void performGC (GC_state s,
@@ -18,10 +18,13 @@
size_t nurseryBytesRequested,
bool forceMajor,
bool mayResize);
-static inline void ensureInvariantForMutator (GC_state s, bool force);
-static inline void ensureHasHeapBytesFree (GC_state s,
- size_t oldGenBytesRequested,
- size_t nurseryBytesRequested);
+static size_t fillGap (GC_state s, pointer start, pointer end);
+static void ensureHasHeapBytesFreeAndOrInvariantForMutator (GC_state s,
+ bool forceGC,
+ bool ensureFrontier,
+ bool ensureStack,
+ size_t oldGenBytesRequested,
+ size_t nurseryBytesRequested);
#endif /* (defined (MLTON_GC_INTERNAL_FUNCS)) */
@@ -29,5 +32,7 @@
void GC_collect (GC_state s, size_t bytesRequested, bool force,
char *file, int line);
+/* XX spoons should probably go somewhere else... or just get removed */
+uint32_t FFI_getOp (GC_state s);
#endif /* (defined (MLTON_GC_INTERNAL_BASIS)) */
Modified: mlton/branches/shared-heap-multicore/runtime/gc/gc_state.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/gc_state.c 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/gc_state.c 2008-03-03 15:26:28 UTC (rev 6441)
@@ -53,34 +53,46 @@
void setGCStateCurrentHeap (GC_state s,
size_t oldGenBytesRequested,
- size_t nurseryBytesRequested) {
+ size_t nurseryBytesRequested,
+ bool duringInit) {
GC_heap h;
pointer nursery;
size_t nurserySize;
pointer genNursery;
size_t genNurserySize;
+ pointer limit;
+ pointer frontier;
+ size_t bonus = GC_BONUS_SLOP * s->numberOfProcs;
+ if (not duringInit) {
+ nurseryBytesRequested = 0;
+ for (int proc = 0; proc < s->numberOfProcs; proc++) {
+ GC_thread thread = getThreadCurrent(&s->procStates[proc]);
+ if (thread)
+ nurseryBytesRequested += thread->bytesNeeded;
+ }
+ }
+
if (DEBUG_DETAILED)
fprintf (stderr, "setGCStateCurrentHeap(%s, %s)\n",
uintmaxToCommaString(oldGenBytesRequested),
uintmaxToCommaString(nurseryBytesRequested));
- h = &s->heap;
+ h = s->heap;
assert (isFrontierAligned (s, h->start + h->oldGenSize + oldGenBytesRequested));
- s->limitPlusSlop = h->start + h->size;
- s->limit = s->limitPlusSlop - GC_HEAP_LIMIT_SLOP;
- nurserySize = h->size - (h->oldGenSize + oldGenBytesRequested);
- assert (isFrontierAligned (s, s->limitPlusSlop - nurserySize));
- nursery = s->limitPlusSlop - nurserySize;
- genNursery = alignFrontier (s, s->limitPlusSlop - (nurserySize / 2));
- genNurserySize = s->limitPlusSlop - genNursery;
+ limit = h->start + h->size - bonus;
+ nurserySize = h->size - (h->oldGenSize + oldGenBytesRequested) - bonus;
+ assert (isFrontierAligned (s, limit - nurserySize));
+ nursery = limit - nurserySize;
+ genNursery = alignFrontier (s, limit - (nurserySize / 2));
+ genNurserySize = limit - genNursery;
if (/* The mutator marks cards. */
s->mutatorMarksCards
/* There is enough space in the generational nursery. */
and (nurseryBytesRequested <= genNurserySize)
/* The nursery is large enough to be worth it. */
- and (((float)(h->size - s->lastMajorStatistics.bytesLive)
+ and (((float)(h->size - s->lastMajorStatistics->bytesLive)
/ (float)nurserySize)
- <= s->controls.ratios.nursery)
+ <= s->controls->ratios.nursery)
and /* There is a reason to use generational GC. */
(
/* We must use it for debugging purposes. */
@@ -97,23 +109,148 @@
nursery = genNursery;
nurserySize = genNurserySize;
clearCardMap (s);
+ /* XXX copy card map to other processors? */
} else {
unless (nurseryBytesRequested <= nurserySize)
die ("Out of memory. Insufficient space in nursery.");
s->canMinor = FALSE;
}
+
+ if (s->controls->restrictAvailableSize
+ and
+ (s->cumulativeStatistics->maxBytesLiveSinceReset > 0)) {
+ float actualRatio;
+ h->availableSize =
+ (size_t)(s->controls->ratios.available
+ * s->cumulativeStatistics->maxBytesLiveSinceReset);
+
+ if ((h->oldGenSize + oldGenBytesRequested + nurserySize + bonus)
+ > h->availableSize) {
+ /* Limit allocation in this round */
+ if ((h->oldGenSize + oldGenBytesRequested + nurseryBytesRequested + bonus)
+ > h->availableSize) {
+ /* We can't limit as much as we'd like, so offer enough space to
+ satisfy the current request. */
+ h->availableSize = h->oldGenSize + oldGenBytesRequested
+ + nurseryBytesRequested + bonus;
+ }
+ if (h->availableSize > h->size) {
+ /* Can't offer more than we have. */
+ h->availableSize = h->size;
+ }
+ limit = h->start + h->availableSize - bonus;
+ nurserySize = h->availableSize - (h->oldGenSize + oldGenBytesRequested) - bonus;
+ assert (isFrontierAligned (s, limit - nurserySize));
+ nursery = limit - nurserySize;
+
+ if (s->canMinor) {
+ /* If we are planning for a minor collection, we must also adjust the
+ start of the nursery */
+ nursery = alignFrontier (s, limit - (nurserySize / 2));
+ nurserySize = limit - nursery;
+ }
+ if (DEBUG) {
+ fprintf (stderr,
+ "[GC: Restricted nursery at "FMTPTR" of %s bytes (%.1f%%).]\n",
+ (uintptr_t)nursery, uintmaxToCommaString(limit - nursery),
+ 100.0 * ((double)(limit - nursery)
+ / (double)h->availableSize));
+ }
+ }
+ else {
+ /* No need to limit in this round... reset availableSize. */
+ h->availableSize = h->size;
+ }
+
+ actualRatio = (float)h->availableSize
+ / s->cumulativeStatistics->maxBytesLiveSinceReset;
+ if ((DEBUG or s->controls->messages)
+ and
+ (actualRatio > s->controls->ratios.available)) {
+ fprintf (stderr,
+ "[GC: Can't restrict available ratio to %f, using %f; worst-case max-live is %s bytes.]\n",
+ s->controls->ratios.available, actualRatio,
+ uintmaxToCommaString(h->oldGenSize + oldGenBytesRequested + nurserySize));
+ }
+ }
+ else {
+ /* Otherwise, make all unused space available */
+ h->availableSize = h->size;
+ }
+
assert (nurseryBytesRequested <= nurserySize);
- s->heap.nursery = nursery;
- s->frontier = nursery;
- assert (nurseryBytesRequested <= (size_t)(s->limitPlusSlop - s->frontier));
- assert (isFrontierAligned (s, s->heap.nursery));
- assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
+ s->heap->nursery = nursery;
+ frontier = nursery;
+
+ if (not duringInit) {
+ for (int proc = 0; proc < s->numberOfProcs; proc++) {
+ s->procStates[proc].canMinor = s->canMinor;
+ assert (isFrontierAligned (s, frontier));
+ s->procStates[proc].start = s->procStates[proc].frontier = frontier;
+ s->procStates[proc].limitPlusSlop = s->procStates[proc].start +
+ getThreadCurrent(&s->procStates[proc])->bytesNeeded;
+ s->procStates[proc].limit = s->procStates[proc].limitPlusSlop - GC_HEAP_LIMIT_SLOP;
+ assert (s->procStates[proc].frontier <= s->procStates[proc].limitPlusSlop);
+ /* XXX clearCardMap (?) */
+
+ if (DEBUG)
+ for (size_t i = 0; i < GC_BONUS_SLOP; i++)
+ *(s->procStates[proc].limitPlusSlop + i) = 0xBF;
+
+ frontier = s->procStates[proc].limitPlusSlop + GC_BONUS_SLOP;
+ }
+ }
+ else {
+ assert (Proc_processorNumber (s) == 0);
+ /* XXX this is a lot of copy-paste */
+ for (int proc = 1; proc < s->numberOfProcs; proc++) {
+ s->procStates[proc].canMinor = s->canMinor;
+ assert (isFrontierAligned (s, frontier));
+ s->procStates[proc].start = s->procStates[proc].frontier = frontier;
+ s->procStates[proc].limitPlusSlop = s->procStates[proc].start +
+ GC_HEAP_LIMIT_SLOP;
+ s->procStates[proc].limit = s->procStates[proc].limitPlusSlop - GC_HEAP_LIMIT_SLOP;
+ assert (s->procStates[proc].frontier <= s->procStates[proc].limitPlusSlop);
+ /* XXX clearCardMap (?) */
+
+ if ( DEBUG)
+ for (size_t i = 0; i < GC_BONUS_SLOP; i++)
+ *(s->procStates[proc].limitPlusSlop + i) = 0xBF;
+
+ frontier = s->procStates[proc].limitPlusSlop + GC_BONUS_SLOP;
+ }
+
+ s->start = s->frontier = frontier;
+ s->limitPlusSlop = limit;
+ s->limit = s->limitPlusSlop - GC_HEAP_LIMIT_SLOP;
+ /* XXX clearCardMap (?) */
+
+ if (DEBUG)
+ for (size_t i = 0; i < GC_BONUS_SLOP; i++)
+ *(s->limitPlusSlop + i) = 0xBF;
+
+ frontier = s->limitPlusSlop + GC_BONUS_SLOP;
+ }
+ h->frontier = frontier;
+ assert (h->frontier <= h->start + h->availableSize);
+
+ if (not duringInit) {
+ assert (getThreadCurrent(s)->bytesNeeded <= (size_t)(s->limitPlusSlop - s->frontier));
+ assert (hasHeapBytesFree (s, oldGenBytesRequested, getThreadCurrent(s)->bytesNeeded));
+ }
+ else {
+ assert (nurseryBytesRequested <= (size_t)(s->limitPlusSlop - s->frontier));
+ assert (hasHeapBytesFree (s, oldGenBytesRequested, nurseryBytesRequested));
+ }
+ assert (isFrontierAligned (s, s->frontier));
}
-bool GC_getAmOriginal (GC_state s) {
+bool GC_getAmOriginal (__attribute__ ((unused)) GC_state *gs) {
+ GC_state s = pthread_getspecific (gcstate_key);
return s->amOriginal;
}
-void GC_setAmOriginal (GC_state s, bool b) {
+void GC_setAmOriginal (__attribute__ ((unused)) GC_state *gs, bool b) {
+ GC_state s = pthread_getspecific (gcstate_key);
s->amOriginal = b;
}
Modified: mlton/branches/shared-heap-multicore/runtime/gc/heap.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/heap.c 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/heap.c 2008-03-03 15:26:28 UTC (rev 6441)
@@ -12,11 +12,15 @@
"\t\tnursery = "FMTPTR"\n"
"\t\toldGenSize = %zu\n"
"\t\tsize = %zu\n"
- "\t\tstart = "FMTPTR"\n",
+ "\t\tavailableSize = %zu\n"
+ "\t\tstart = "FMTPTR"\n"
+ "\t\tfrontier = "FMTPTR"\n",
(uintptr_t)heap->nursery,
heap->oldGenSize,
heap->size,
- (uintptr_t)heap->start);
+ heap->availableSize,
+ (uintptr_t)heap->start,
+ (uintptr_t)heap->frontier);
}
@@ -25,7 +29,9 @@
h->nursery = NULL;
h->oldGenSize = 0;
h->size = 0;
+ h->availableSize = h->size;
h->start = NULL;
+ h->frontier = NULL;
}
/* sizeofHeapDesired (s, l, cs)
Modified: mlton/branches/shared-heap-multicore/runtime/gc/heap.h
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/heap.h 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/heap.h 2008-03-03 15:26:28 UTC (rev 6441)
@@ -22,6 +22,9 @@
*/
typedef struct GC_heap {
+ size_t availableSize; /* may be smaller than size if we are limiting
+ allocation for profiling purposes */
+ pointer frontier; /* next (globally) unallocated space */
size_t oldGenSize; /* size of old generation */
pointer nursery; /* start of nursery */
size_t size; /* size of heap */
Modified: mlton/branches/shared-heap-multicore/runtime/gc/heap_predicates.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/heap_predicates.c 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/heap_predicates.c 2008-03-03 15:26:28 UTC (rev 6441)
@@ -56,15 +56,19 @@
or isObjptrInNursery (s, op));
}
+/* Is there space in the heap for "oldGen" additional bytes;
+ also, can "nursery" bytes be allocated by the current thread
+ without using/claiming any shared resources */
bool hasHeapBytesFree (GC_state s, size_t oldGen, size_t nursery) {
size_t total;
bool res;
total =
- s->heap.oldGenSize + oldGen
- + (s->canMinor ? 2 : 1) * (s->limitPlusSlop - s->heap.nursery);
+ s->heap->oldGenSize + oldGen
+ + (s->canMinor ? 2 : 1) * (s->heap->frontier - s->heap->nursery);
res =
- (total <= s->heap.size)
+ (total <= s->heap->availableSize)
+ and (s->heap->start + s->heap->oldGenSize + oldGen <= s->heap->nursery)
and (nursery <= (size_t)(s->limitPlusSlop - s->frontier));
if (DEBUG_DETAILED)
fprintf (stderr, "%s = hasBytesFree (%s, %s)\n",
Modified: mlton/branches/shared-heap-multicore/runtime/gc/init-world.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/init-world.c 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/init-world.c 2008-03-03 15:26:28 UTC (rev 6441)
@@ -141,14 +141,17 @@
for (i = 0; i < s->globalsLength; ++i)
s->globals[i] = BOGUS_OBJPTR;
- s->lastMajorStatistics.bytesLive = sizeofInitialBytesLive (s);
- createHeap (s, &s->heap,
- sizeofHeapDesired (s, s->lastMajorStatistics.bytesLive, 0),
- s->lastMajorStatistics.bytesLive);
+ s->lastMajorStatistics->bytesLive = sizeofInitialBytesLive (s);
+ minSize = s->lastMajorStatistics->bytesLive
+ + ((GC_HEAP_LIMIT_SLOP + GC_BONUS_SLOP) * s->numberOfProcs);
+ createHeap (s, s->heap,
+ sizeofHeapDesired (s, minSize, 0),
+ minSize);
+
createCardMapAndCrossMap (s);
- start = alignFrontier (s, s->heap.start);
- s->frontier = start;
- s->limitPlusSlop = s->heap.start + s->heap.size;
+ start = alignFrontier (s, s->heap->start);
+ s->start = s->frontier = start;
+ s->limitPlusSlop = s->heap->start + s->heap->size - GC_BONUS_SLOP;
s->limit = s->limitPlusSlop - GC_HEAP_LIMIT_SLOP;
initIntInfs (s);
initVectors (s);
Modified: mlton/branches/shared-heap-multicore/runtime/gc/new-object.c
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/new-object.c 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/new-object.c 2008-03-03 15:26:28 UTC (rev 6441)
@@ -75,7 +75,7 @@
GC_thread thread;
pointer res;
- ensureHasHeapBytesFree (s, 0, sizeofStackWithHeaderAligned (s, reserved) + sizeofThread (s));
+ ensureHasHeapBytesFreeAndOrInvariantForMutator (s, FALSE, FALSE, FALSE, 0, sizeofStackWithHeaderAligned (s, alignStackReserved (s, reserved)) + sizeofThread (s));
stack = newStack (s, reserved, FALSE);
res = newObject (s, GC_THREAD_HEADER,
sizeofThread (s),
Modified: mlton/branches/shared-heap-multicore/runtime/gc/object.h
===================================================================
--- mlton/branches/shared-heap-multicore/runtime/gc/object.h 2008-03-03 15:20:13 UTC (rev 6440)
+++ mlton/branches/shared-heap-multicore/runtime/gc/object.h 2008-03-03 15:26:28 UTC (rev 6441)
@@ -99,6 +99,10 @@
*/
#define GC_NORMAL_HEADER_SIZE GC_HEADER_SIZE
+typedef uint32_t GC_smallGapSize;
+#define GC_SMALL_GAP_SIZE_SIZE sizeof (GC_smallGapSize)
+#define GC_BONUS_SLOP (GC_HEADER_SIZE + GC_SMALL_GAP_SIZE_SIZE)
+
#endif /* (defined (MLTON_GC_INTERNAL_TYPES)) */
More information about the MLton-commit
mailing list