From 4be9799259a9da29dd893d76b8bb83cc0eb010be Mon Sep 17 00:00:00 2001 From: theraven Date: Sat, 15 May 2010 13:27:20 +0000 Subject: [PATCH] Unrolled the SparseArrayLookup loop. This means we only need one jump, instead of 2, which gives the same performance as the old GNU code and about half the memory usage. It's really, really, ugly code though - I need to teach the compiler to do this optimisation so I don't have to. --- sarray2.c | 38 ++++++++++++++++++++++++++++++++++---- sarray2.h | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 5 deletions(-) diff --git a/sarray2.c b/sarray2.c index 36a2bd4..219991e 100644 --- a/sarray2.c +++ b/sarray2.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "sarray2.h" @@ -27,17 +28,46 @@ static void init_pointers(SparseArray * sarray) } } } - -SparseArray * SparseArrayNew() +SparseArray * SparseArrayNewWithDepth(uint32_t depth) { SparseArray * sarray = calloc(1, sizeof(SparseArray)); sarray->refCount = 1; - sarray->shift = 32-base_shift; + sarray->shift = depth-base_shift; sarray->mask = base_mask << sarray->shift; init_pointers(sarray); return sarray; } +SparseArray *SparseArrayNew() +{ + return SparseArrayNewWithDepth(32); +} +SparseArray *SparseArrayExpandingArray(SparseArray *sarray) +{ + // Expanding a child sarray has undefined results. + assert(sarray->refCount == 1); + SparseArray *new = calloc(1, sizeof(SparseArray)); + new->refCount = 1; + new->shift = sarray->shift; + new->mask = sarray->mask; + void **newData = malloc(DATA_SIZE(sarray) * sizeof(void*)); + for(unsigned i=0 ; i<=MAX_INDEX(sarray) ; i++) + { + newData[i] = &EmptyArray; + } + new->data = sarray->data; + // new is now an exact copy of sarray. + newData[0] = new; + sarray->data = newData; + // Now, any lookup in sarray for any value less than its capacity will have + // all non-zero values shifted away, resulting in 0. All lookups will + // therefore go to the new sarray. + sarray->shift += base_shift; + // Finally, set the mask to the correct value. Now all lookups should work. + sarray->mask <<= base_shift; + return new; +} + void * SparseArrayNext(SparseArray * sarray, uint32_t * index) { @@ -106,7 +136,7 @@ void SparseArrayInsert(SparseArray * sarray, uint32_t index, void *value) sarray->data[i] = newsarray; child = newsarray; fprintf(stderr, "Created child: %p\n", child); - } + }// FIXME: Concurrency (don't CoW twice) else if (child->refCount > 1) { // Copy the copy-on-write part of the tree diff --git a/sarray2.h b/sarray2.h index 4b7e523..8667484 100644 --- a/sarray2.h +++ b/sarray2.h @@ -44,6 +44,32 @@ typedef struct */ static inline void* SparseArrayLookup(SparseArray * sarray, uint32_t index) { + // This unrolled version of the commented-out segment below only works with + // sarrays that use one-byte leaves. It's really ugly, but seems to be faster. + // With this version, we get the same performance as the old GNU code, but + // with about half the memory usage. + uint32_t i = index; + switch (sarray->shift) + { + default: assert(0 && "broken sarray"); + case 0: + return sarray->data[i & 0xff]; + case 8: + return + ((SparseArray*)sarray->data[(i & 0xff00)>>8])->data[(i & 0xff)]; + case 16: + return + ((SparseArray*)((SparseArray*) + sarray->data[(i & 0xff0000)>>16])-> + data[(i & 0xff00)>>8])->data[(i & 0xff)]; + case 24: + return + ((SparseArray*)((SparseArray*)((SparseArray*) + sarray->data[(i & 0xff000000)>>24])-> + data[(i & 0xff0000)>>16])-> + data[(i & 0xff00)>>8])->data[(i & 0xff)]; + } + /* while(sarray->shift > 0) { uint32_t i = MASK_INDEX(index); @@ -51,11 +77,23 @@ static inline void* SparseArrayLookup(SparseArray * sarray, uint32_t index) } uint32_t i = index & sarray->mask; return sarray->data[i]; + */ } /** * Create a new sparse array. */ -SparseArray * SparseArrayNew(); +SparseArray *SparseArrayNew(); +/** + * Creates a new sparse array with the specified capacity. The depth indicates + * the number of bits to use for the key. Must be a value between 8 and 32 and + * should ideally be a multiple of base_shift. + */ +SparseArray *SparseArrayNewWithDepth(uint32_t depth); +/** + * Returns a new sparse array created by adding this one as the first child + * node in an expanded one. + */ +SparseArray *SparseArrayExpandingArray(SparseArray *sarray); /** * Insert a value at the specified index. */