Unrolled the SparseArrayLookup loop. This means we only need one jump, instead of 2, which gives the same performance as the old GNU code and about half the memory usage. It's really, really, ugly code though - I need to teach the compiler to do this optimisation so I don't have to.

main
theraven 16 years ago
parent 0d78186238
commit 4be9799259

@ -1,6 +1,7 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include "sarray2.h"
@ -27,17 +28,46 @@ static void init_pointers(SparseArray * sarray)
}
}
}
SparseArray * SparseArrayNew()
SparseArray * SparseArrayNewWithDepth(uint32_t depth)
{
SparseArray * sarray = calloc(1, sizeof(SparseArray));
sarray->refCount = 1;
sarray->shift = 32-base_shift;
sarray->shift = depth-base_shift;
sarray->mask = base_mask << sarray->shift;
init_pointers(sarray);
return sarray;
}
SparseArray *SparseArrayNew()
{
return SparseArrayNewWithDepth(32);
}
SparseArray *SparseArrayExpandingArray(SparseArray *sarray)
{
// Expanding a child sarray has undefined results.
assert(sarray->refCount == 1);
SparseArray *new = calloc(1, sizeof(SparseArray));
new->refCount = 1;
new->shift = sarray->shift;
new->mask = sarray->mask;
void **newData = malloc(DATA_SIZE(sarray) * sizeof(void*));
for(unsigned i=0 ; i<=MAX_INDEX(sarray) ; i++)
{
newData[i] = &EmptyArray;
}
new->data = sarray->data;
// new is now an exact copy of sarray.
newData[0] = new;
sarray->data = newData;
// Now, any lookup in sarray for any value less than its capacity will have
// all non-zero values shifted away, resulting in 0. All lookups will
// therefore go to the new sarray.
sarray->shift += base_shift;
// Finally, set the mask to the correct value. Now all lookups should work.
sarray->mask <<= base_shift;
return new;
}
void * SparseArrayNext(SparseArray * sarray, uint32_t * index)
{
@ -106,7 +136,7 @@ void SparseArrayInsert(SparseArray * sarray, uint32_t index, void *value)
sarray->data[i] = newsarray;
child = newsarray;
fprintf(stderr, "Created child: %p\n", child);
}
}// FIXME: Concurrency (don't CoW twice)
else if (child->refCount > 1)
{
// Copy the copy-on-write part of the tree

@ -44,6 +44,32 @@ typedef struct
*/
static inline void* SparseArrayLookup(SparseArray * sarray, uint32_t index)
{
// This unrolled version of the commented-out segment below only works with
// sarrays that use one-byte leaves. It's really ugly, but seems to be faster.
// With this version, we get the same performance as the old GNU code, but
// with about half the memory usage.
uint32_t i = index;
switch (sarray->shift)
{
default: assert(0 && "broken sarray");
case 0:
return sarray->data[i & 0xff];
case 8:
return
((SparseArray*)sarray->data[(i & 0xff00)>>8])->data[(i & 0xff)];
case 16:
return
((SparseArray*)((SparseArray*)
sarray->data[(i & 0xff0000)>>16])->
data[(i & 0xff00)>>8])->data[(i & 0xff)];
case 24:
return
((SparseArray*)((SparseArray*)((SparseArray*)
sarray->data[(i & 0xff000000)>>24])->
data[(i & 0xff0000)>>16])->
data[(i & 0xff00)>>8])->data[(i & 0xff)];
}
/*
while(sarray->shift > 0)
{
uint32_t i = MASK_INDEX(index);
@ -51,11 +77,23 @@ static inline void* SparseArrayLookup(SparseArray * sarray, uint32_t index)
}
uint32_t i = index & sarray->mask;
return sarray->data[i];
*/
}
/**
* Create a new sparse array.
*/
SparseArray * SparseArrayNew();
SparseArray *SparseArrayNew();
/**
* Creates a new sparse array with the specified capacity. The depth indicates
* the number of bits to use for the key. Must be a value between 8 and 32 and
* should ideally be a multiple of base_shift.
*/
SparseArray *SparseArrayNewWithDepth(uint32_t depth);
/**
* Returns a new sparse array created by adding this one as the first child
* node in an expanded one.
*/
SparseArray *SparseArrayExpandingArray(SparseArray *sarray);
/**
* Insert a value at the specified index.
*/

Loading…
Cancel
Save