Skip to content
This repository has been archived by the owner on May 4, 2021. It is now read-only.

Commit

Permalink
move hashtable and utils to new files
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdavis committed Jul 19, 2017
1 parent 97a97c7 commit 64aefb5
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 155 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ include_directories(
bson-numpy/bson
bson-numpy/jsonsl)

file(GLOB bsonnumpy_SRC "bson-numpy/bson/*.c" "bson-numpy/jsonsl/*.c")
add_library(bsonnumpy SHARED bson-numpy/bsonnumpy.c ${bsonnumpy_SRC})
file(GLOB bsonnumpy_SRC "bson-numpy/*.c" "bson-numpy/bson/*.c" "bson-numpy/jsonsl/*.c")
add_library(bsonnumpy SHARED ${bsonnumpy_SRC})
target_include_directories(bsonnumpy PRIVATE bson-numpy)
target_link_libraries(bsonnumpy ${PYTHON_LIBRARIES} ${BSON_LIBRARIES})
target_compile_definitions(bsonnumpy PRIVATE BSON_COMPILATION)
Expand Down
3 changes: 3 additions & 0 deletions bson-numpy/bson/bson-iter.h
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,9 @@ bson_iter_visit_all (bson_iter_t *iter,
const bson_visitor_t *visitor,
void *data);

bool
_bson_iter_next_internal2 (bson_iter_t *iter,
size_t keylen);

BSON_END_DECLS

Expand Down
10 changes: 5 additions & 5 deletions bson-numpy/bson/bson-memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ bson_malloc0 (size_t num_bytes) /* IN */
*
* Parameters:
* @mem: The memory to realloc, or NULL.
* @num_bytes: The size of the new allocation or 0 to free.
* @num_bytes: The maxsize of the new allocation or 0 to free.
*
* Returns:
* The new allocation if successful; otherwise abort() is called and
Expand All @@ -139,7 +139,7 @@ bson_realloc (void *mem, /* IN */
{
/*
* Not all platforms are guaranteed to free() the memory if a call to
* realloc() with a size of zero occurs. Windows, Linux, and FreeBSD do,
* realloc() with a maxsize of zero occurs. Windows, Linux, and FreeBSD do,
* however, OS X does not.
*/
if (BSON_UNLIKELY (num_bytes == 0)) {
Expand Down Expand Up @@ -167,7 +167,7 @@ bson_realloc (void *mem, /* IN */
*
* Parameters:
* @mem: The memory to realloc, or NULL.
* @num_bytes: The size of the new allocation or 0 to free.
* @num_bytes: The maxsize of the new allocation or 0 to free.
* @ctx: Ignored
*
* Returns:
Expand Down Expand Up @@ -224,13 +224,13 @@ bson_free (void *mem) /* IN */
*
* bson_zero_free --
*
* Frees @mem using the underlying allocator. @size bytes of @mem will
* Frees @mem using the underlying allocator. @maxsize bytes of @mem will
* be zeroed before freeing the memory. This is useful in scenarios
* where @mem contains passwords or other sensitive information.
*
* Parameters:
* @mem: An allocation to free.
* @size: The number of bytes in @mem.
* @maxsize: The number of bytes in @mem.
*
* Returns:
* None.
Expand Down
152 changes: 5 additions & 147 deletions bson-numpy/bsonnumpy.c
Original file line number Diff line number Diff line change
@@ -1,162 +1,20 @@
#include <Python.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "bsonnumpy.h"
#include "bsonnumpy_hashtable.h"

#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
/* #include <numpy/arrayobject.h> */
/* #include <numpy/npy_common.h> */
#include <numpy/ndarrayobject.h>

#include "bson/bson.h"

static PyObject *BsonNumpyError;

typedef enum {
typedef enum
{
DTYPE_NESTED, /* like np.dtype([('a', np.int64), ('b', np.double)]) */
DTYPE_SCALAR, /* like np.int64 */
DTYPE_ARRAY, /* like np.dtype('3i') */
} node_type_t;


/* how much larger the table is than the number of entries */
const Py_ssize_t TABLE_MULTIPLE = 4;

typedef struct {
const char *key;
size_t keylen;
Py_ssize_t ideal_pos;
Py_ssize_t value;
} hash_table_entry_t;


typedef struct {
hash_table_entry_t *entries;
Py_ssize_t size;
Py_ssize_t used;
} hash_table_t;


static const Py_ssize_t EMPTY = -1;


static Py_ssize_t
table_next_power_of_two (Py_ssize_t v)
typedef struct _parsed_dtype_t
{
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
#if BSON_WORD_SIZE == 64
v |= v >> 32;
#endif
v++;

return v;
}


static void
table_init(hash_table_t *table, Py_ssize_t n_entries)
{
Py_ssize_t i;

table->size = table_next_power_of_two(n_entries * TABLE_MULTIPLE);
table->entries = bson_malloc0(
table->size * sizeof(hash_table_entry_t));

for (i = 0; i < table->size; i++) {
table->entries[i].value = EMPTY;
}
}


/* simple insertion w/ robin hood hashing. keys are always unique. no resize. */
static void
table_insert(hash_table_t *table, const char *key, Py_ssize_t value)
{
Py_ssize_t mask = table->size - 1;
Py_ssize_t dist_key = 0;
Py_hash_t hash;
Py_ssize_t i;

hash_table_entry_t entry;
entry.key = key;
entry.keylen = strlen(key);
entry.value = value;

hash = _Py_HashBytes(key, entry.keylen);

/* table size is power of 2, hash & (size-1) is faster than hash % size */
i = entry.ideal_pos = hash & mask;

while (true) {
hash_table_entry_t *inplace;
Py_ssize_t dist_inplace;

inplace = &table->entries[i];
if (inplace->value == EMPTY) {
memcpy(inplace, &entry, sizeof(hash_table_entry_t));
table->used++;
return;
}

/* this spot is taken. if this entry is closer to its ideal spot than
* the input is, swap them and find a new place for this entry. */
dist_inplace = (i - inplace->ideal_pos) & mask;
if (dist_inplace < dist_key) {
hash_table_entry_t tmp;

/* swap with input, start searching for place for swapped entry */
memcpy(&tmp, inplace, sizeof(hash_table_entry_t));
memcpy(inplace, &entry, sizeof(hash_table_entry_t));
memcpy(&entry, &tmp, sizeof(hash_table_entry_t));

dist_key = dist_inplace;
}

dist_key++;
i++;
i &= mask;
}
}


static Py_ssize_t
table_lookup(hash_table_t* table, const char *key)
{
Py_ssize_t mask = table->size - 1;
Py_hash_t hash;
Py_ssize_t i;
Py_ssize_t dist_key = 0;

hash = _Py_HashBytes(key, strlen(key));
i = hash & mask;

while (true) {
hash_table_entry_t *entry = &table->entries[i];

if (entry->value == EMPTY || !strcmp(entry->key, key)) {
return entry->value;
}

/* we haven't yet found the key in the table, and this entry is farther
* from its ideal spot than key would be if it were here, so we know
* the key is absent */
if (dist_key > ((i - entry->ideal_pos) & mask)) {
return EMPTY;
}

dist_key++;
i++;
i &= mask;
}
}


typedef struct _parsed_dtype_t {
node_type_t node_type;
char *field_name;
char *repr;
Expand Down
28 changes: 28 additions & 0 deletions bson-numpy/bsonnumpy.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef BSONNUMPY_H
#define BSONNUMPY_H

#include <Python.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/ndarrayobject.h>

#include "bson/bson.h"

typedef struct
{
const char *s;
size_t len;
} bsnp_string_t;


ssize_t
bsnp_next_power_of_two(ssize_t v);

void
bsnp_string_init(bsnp_string_t *string, const char *s);


#endif //BSONNUMPY_H
102 changes: 102 additions & 0 deletions bson-numpy/bsonnumpy_hashtable.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#include "bsonnumpy_hashtable.h"


/* how much larger the table is than the number of entries */
const ssize_t TABLE_MULTIPLE = 4;


void
table_init(hash_table_t *table, ssize_t n_entries)
{
ssize_t i;

table->size = bsnp_next_power_of_two(n_entries * TABLE_MULTIPLE);
table->entries = bson_malloc0(table->size * sizeof(hash_table_entry_t));

for (i = 0; i < table->size; i++) {
table->entries[i].value = EMPTY;
}
}


/* simple insertion w/ robin hood hashing. keys are always unique. no resize. */
void
table_insert(hash_table_t *table, const char *key, ssize_t value)
{
ssize_t mask = table->size - 1;
ssize_t dist_key = 0;
Py_hash_t hash;
ssize_t i;

hash_table_entry_t entry;

bsnp_string_init(&entry.key, key);
entry.value = value;

hash = _Py_HashBytes(key, entry.key.len);

/* table size is power of 2, hash & (size-1) is faster than hash % size */
i = entry.ideal_pos = hash & mask;

while (true) {
hash_table_entry_t *inplace;
ssize_t dist_inplace;

inplace = &table->entries[i];
if (inplace->value == EMPTY) {
memcpy(inplace, &entry, sizeof(hash_table_entry_t));
table->used++;
return;
}

/* this spot is taken. if this entry is closer to its ideal spot than
* the input is, swap them and find a new place for this entry. */
dist_inplace = (i - inplace->ideal_pos) & mask;
if (dist_inplace < dist_key) {
hash_table_entry_t tmp;

/* swap with input, start searching for place for swapped entry */
memcpy(&tmp, inplace, sizeof(hash_table_entry_t));
memcpy(inplace, &entry, sizeof(hash_table_entry_t));
memcpy(&entry, &tmp, sizeof(hash_table_entry_t));

dist_key = dist_inplace;
}

dist_key++;
i++;
i &= mask;
}
}


ssize_t
table_lookup(hash_table_t *table, const char *key)
{
ssize_t mask = table->size - 1;
Py_hash_t hash;
ssize_t i;
ssize_t dist_key = 0;

hash = _Py_HashBytes(key, strlen(key));
i = hash & mask;

while (true) {
hash_table_entry_t *entry = &table->entries[i];

if (entry->value == EMPTY || !strcmp(entry->key.s, key)) {
return entry->value;
}

/* we haven't yet found the key in the table, and this entry is farther
* from its ideal spot than key would be if it were here, so we know
* the key is absent */
if (dist_key > ((i - entry->ideal_pos) & mask)) {
return EMPTY;
}

dist_key++;
i++;
i &= mask;
}
}
32 changes: 32 additions & 0 deletions bson-numpy/bsonnumpy_hashtable.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef BSONNUMPY_HASHTABLE_H
#define BSONNUMPY_HASHTABLE_H

#include "bsonnumpy.h"


typedef struct
{
bsnp_string_t key;
ssize_t ideal_pos;
ssize_t value;
} hash_table_entry_t;

typedef struct
{
hash_table_entry_t *entries;
ssize_t size;
ssize_t used;
} hash_table_t;

static const ssize_t EMPTY = -1;

void
table_init(hash_table_t *table, ssize_t n_entries);

void
table_insert(hash_table_t *table, const char *key, ssize_t value);

ssize_t
table_lookup(hash_table_t *table, const char *key);

#endif //BSONNUMPY_HASHTABLE_H
Loading

0 comments on commit 64aefb5

Please sign in to comment.