blender/tests/gtests/blenlib/BLI_ghash_performance_test.cc
2020-03-19 09:33:58 +01:00

664 lines
16 KiB
C++

/* Apache License, Version 2.0 */
#include "BLI_ressource_strings.h"
#include "testing/testing.h"
#define GHASH_INTERNAL_API
#include "MEM_guardedalloc.h"
extern "C" {
#include "BLI_ghash.h"
#include "BLI_rand.h"
#include "BLI_string.h"
#include "BLI_utildefines.h"
#include "PIL_time_utildefines.h"
}
/* Using http://corpora.uni-leipzig.de/downloads/eng_wikipedia_2010_1M-text.tar.gz
* (1 million of words, about 122MB of text) from
* http://corpora.informatik.uni-leipzig.de/download.html */
#if 0
# define TEXT_CORPUS_PATH \
"/path/to/Téléchargements/eng_wikipedia_2010_1M-text/eng_wikipedia_2010_1M-sentences.txt"
#endif
/* Resizing the hash has a huge cost over global filling operation! */
//#define GHASH_RESERVE
/* Run the longest tests! */
//#define GHASH_RUN_BIG
/* Size of 'small case' ghash (number of entries). */
#define TESTCASE_SIZE_SMALL 17
#define PRINTF_GHASH_STATS(_gh) \
{ \
double q, lf, var, pempty, poverloaded; \
int bigb; \
q = BLI_ghash_calc_quality_ex((_gh), &lf, &var, &pempty, &poverloaded, &bigb); \
printf( \
"GHash stats (%u entries):\n\t" \
"Quality (the lower the better): %f\n\tVariance (the lower the better): %f\n\tLoad: " \
"%f\n\t" \
"Empty buckets: %.2f%%\n\tOverloaded buckets: %.2f%% (biggest bucket: %d)\n", \
BLI_ghash_len(_gh), \
q, \
var, \
lf, \
pempty * 100.0, \
poverloaded * 100.0, \
bigb); \
} \
void(0)
/* Str: whole text, lines and words from a 'corpus' text. */
static void str_ghash_tests(GHash *ghash, const char *id)
{
printf("\n========== STARTING %s ==========\n", id);
#ifdef TEXT_CORPUS_PATH
size_t sz = 0;
char *data;
{
struct stat st;
if (stat(TEXT_CORPUS_PATH, &st) == 0)
sz = st.st_size;
}
if (sz != 0) {
FILE *f = fopen(TEXT_CORPUS_PATH, "r");
data = (char *)MEM_mallocN(sz + 1, __func__);
if (fread(data, sizeof(*data), sz, f) != sz) {
printf("ERROR in reading file %s!", TEXT_CORPUS_PATH);
MEM_freeN(data);
data = BLI_strdup(words10k);
}
data[sz] = '\0';
fclose(f);
}
else {
data = BLI_strdup(words10k);
}
#else
char *data = BLI_strdup(words10k);
#endif
char *data_p = BLI_strdup(data);
char *data_w = BLI_strdup(data);
char *data_bis = BLI_strdup(data);
{
char *p, *w, *c_p, *c_w;
TIMEIT_START(string_insert);
#ifdef GHASH_RESERVE
BLI_ghash_reserve(ghash, strlen(data) / 32); /* rough estimation... */
#endif
BLI_ghash_insert(ghash, data, POINTER_FROM_INT(data[0]));
for (p = c_p = data_p, w = c_w = data_w; *c_w; c_w++, c_p++) {
if (*c_p == '.') {
*c_p = *c_w = '\0';
if (!BLI_ghash_haskey(ghash, p)) {
BLI_ghash_insert(ghash, p, POINTER_FROM_INT(p[0]));
}
if (!BLI_ghash_haskey(ghash, w)) {
BLI_ghash_insert(ghash, w, POINTER_FROM_INT(w[0]));
}
p = c_p + 1;
w = c_w + 1;
}
else if (*c_w == ' ') {
*c_w = '\0';
if (!BLI_ghash_haskey(ghash, w)) {
BLI_ghash_insert(ghash, w, POINTER_FROM_INT(w[0]));
}
w = c_w + 1;
}
}
TIMEIT_END(string_insert);
}
PRINTF_GHASH_STATS(ghash);
{
char *p, *w, *c;
void *v;
TIMEIT_START(string_lookup);
v = BLI_ghash_lookup(ghash, data_bis);
EXPECT_EQ(POINTER_AS_INT(v), data_bis[0]);
for (p = w = c = data_bis; *c; c++) {
if (*c == '.') {
*c = '\0';
v = BLI_ghash_lookup(ghash, w);
EXPECT_EQ(POINTER_AS_INT(v), w[0]);
v = BLI_ghash_lookup(ghash, p);
EXPECT_EQ(POINTER_AS_INT(v), p[0]);
p = w = c + 1;
}
else if (*c == ' ') {
*c = '\0';
v = BLI_ghash_lookup(ghash, w);
EXPECT_EQ(POINTER_AS_INT(v), w[0]);
w = c + 1;
}
}
TIMEIT_END(string_lookup);
}
BLI_ghash_free(ghash, NULL, NULL);
MEM_freeN(data);
MEM_freeN(data_p);
MEM_freeN(data_w);
MEM_freeN(data_bis);
printf("========== ENDED %s ==========\n\n", id);
}
TEST(ghash, TextGHash)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_strhash_p, BLI_ghashutil_strcmp, __func__);
str_ghash_tests(ghash, "StrGHash - GHash");
}
TEST(ghash, TextMurmur2a)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_strhash_p_murmur, BLI_ghashutil_strcmp, __func__);
str_ghash_tests(ghash, "StrGHash - Murmur");
}
/* Int: uniform 100M first integers. */
static void int_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
{
printf("\n========== STARTING %s ==========\n", id);
{
unsigned int i = nbr;
TIMEIT_START(int_insert);
#ifdef GHASH_RESERVE
BLI_ghash_reserve(ghash, nbr);
#endif
while (i--) {
BLI_ghash_insert(ghash, POINTER_FROM_UINT(i), POINTER_FROM_UINT(i));
}
TIMEIT_END(int_insert);
}
PRINTF_GHASH_STATS(ghash);
{
unsigned int i = nbr;
TIMEIT_START(int_lookup);
while (i--) {
void *v = BLI_ghash_lookup(ghash, POINTER_FROM_UINT(i));
EXPECT_EQ(POINTER_AS_UINT(v), i);
}
TIMEIT_END(int_lookup);
}
{
void *k, *v;
TIMEIT_START(int_pop);
GHashIterState pop_state = {0};
while (BLI_ghash_pop(ghash, &pop_state, &k, &v)) {
EXPECT_EQ(k, v);
}
TIMEIT_END(int_pop);
}
EXPECT_EQ(BLI_ghash_len(ghash), 0);
BLI_ghash_free(ghash, NULL, NULL);
printf("========== ENDED %s ==========\n\n", id);
}
TEST(ghash, IntGHash12000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
int_ghash_tests(ghash, "IntGHash - GHash - 12000", 12000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, IntGHash100000000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
int_ghash_tests(ghash, "IntGHash - GHash - 100000000", 100000000);
}
#endif
TEST(ghash, IntMurmur2a12000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
int_ghash_tests(ghash, "IntGHash - Murmur - 12000", 12000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, IntMurmur2a100000000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
int_ghash_tests(ghash, "IntGHash - Murmur - 100000000", 100000000);
}
#endif
/* Int: random 50M integers. */
static void randint_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
{
printf("\n========== STARTING %s ==========\n", id);
unsigned int *data = (unsigned int *)MEM_mallocN(sizeof(*data) * (size_t)nbr, __func__);
unsigned int *dt;
unsigned int i;
{
RNG *rng = BLI_rng_new(0);
for (i = nbr, dt = data; i--; dt++) {
*dt = BLI_rng_get_uint(rng);
}
BLI_rng_free(rng);
}
{
TIMEIT_START(int_insert);
#ifdef GHASH_RESERVE
BLI_ghash_reserve(ghash, nbr);
#endif
for (i = nbr, dt = data; i--; dt++) {
BLI_ghash_insert(ghash, POINTER_FROM_UINT(*dt), POINTER_FROM_UINT(*dt));
}
TIMEIT_END(int_insert);
}
PRINTF_GHASH_STATS(ghash);
{
TIMEIT_START(int_lookup);
for (i = nbr, dt = data; i--; dt++) {
void *v = BLI_ghash_lookup(ghash, POINTER_FROM_UINT(*dt));
EXPECT_EQ(POINTER_AS_UINT(v), *dt);
}
TIMEIT_END(int_lookup);
}
BLI_ghash_free(ghash, NULL, NULL);
printf("========== ENDED %s ==========\n\n", id);
}
TEST(ghash, IntRandGHash12000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
randint_ghash_tests(ghash, "RandIntGHash - GHash - 12000", 12000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, IntRandGHash50000000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
randint_ghash_tests(ghash, "RandIntGHash - GHash - 50000000", 50000000);
}
#endif
TEST(ghash, IntRandMurmur2a12000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
randint_ghash_tests(ghash, "RandIntGHash - Murmur - 12000", 12000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, IntRandMurmur2a50000000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
randint_ghash_tests(ghash, "RandIntGHash - Murmur - 50000000", 50000000);
}
#endif
static unsigned int ghashutil_tests_nohash_p(const void *p)
{
return POINTER_AS_UINT(p);
}
static bool ghashutil_tests_cmp_p(const void *a, const void *b)
{
return a != b;
}
TEST(ghash, Int4NoHash12000)
{
GHash *ghash = BLI_ghash_new(ghashutil_tests_nohash_p, ghashutil_tests_cmp_p, __func__);
randint_ghash_tests(ghash, "RandIntGHash - No Hash - 12000", 12000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, Int4NoHash50000000)
{
GHash *ghash = BLI_ghash_new(ghashutil_tests_nohash_p, ghashutil_tests_cmp_p, __func__);
randint_ghash_tests(ghash, "RandIntGHash - No Hash - 50000000", 50000000);
}
#endif
/* Int_v4: 20M of randomly-generated integer vectors. */
static void int4_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
{
printf("\n========== STARTING %s ==========\n", id);
void *data_v = MEM_mallocN(sizeof(unsigned int[4]) * (size_t)nbr, __func__);
unsigned int(*data)[4] = (unsigned int(*)[4])data_v;
unsigned int(*dt)[4];
unsigned int i, j;
{
RNG *rng = BLI_rng_new(0);
for (i = nbr, dt = data; i--; dt++) {
for (j = 4; j--;) {
(*dt)[j] = BLI_rng_get_uint(rng);
}
}
BLI_rng_free(rng);
}
{
TIMEIT_START(int_v4_insert);
#ifdef GHASH_RESERVE
BLI_ghash_reserve(ghash, nbr);
#endif
for (i = nbr, dt = data; i--; dt++) {
BLI_ghash_insert(ghash, *dt, POINTER_FROM_UINT(i));
}
TIMEIT_END(int_v4_insert);
}
PRINTF_GHASH_STATS(ghash);
{
TIMEIT_START(int_v4_lookup);
for (i = nbr, dt = data; i--; dt++) {
void *v = BLI_ghash_lookup(ghash, (void *)(*dt));
EXPECT_EQ(POINTER_AS_UINT(v), i);
}
TIMEIT_END(int_v4_lookup);
}
BLI_ghash_free(ghash, NULL, NULL);
MEM_freeN(data);
printf("========== ENDED %s ==========\n\n", id);
}
TEST(ghash, Int4GHash2000)
{
GHash *ghash = BLI_ghash_new(
BLI_ghashutil_uinthash_v4_p, BLI_ghashutil_uinthash_v4_cmp, __func__);
int4_ghash_tests(ghash, "Int4GHash - GHash - 2000", 2000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, Int4GHash20000000)
{
GHash *ghash = BLI_ghash_new(
BLI_ghashutil_uinthash_v4_p, BLI_ghashutil_uinthash_v4_cmp, __func__);
int4_ghash_tests(ghash, "Int4GHash - GHash - 20000000", 20000000);
}
#endif
TEST(ghash, Int4Murmur2a2000)
{
GHash *ghash = BLI_ghash_new(
BLI_ghashutil_uinthash_v4_p_murmur, BLI_ghashutil_uinthash_v4_cmp, __func__);
int4_ghash_tests(ghash, "Int4GHash - Murmur - 2000", 2000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, Int4Murmur2a20000000)
{
GHash *ghash = BLI_ghash_new(
BLI_ghashutil_uinthash_v4_p_murmur, BLI_ghashutil_uinthash_v4_cmp, __func__);
int4_ghash_tests(ghash, "Int4GHash - Murmur - 20000000", 20000000);
}
#endif
/* GHash inthash_v2 tests */
TEST(ghash, Int2NoHash12000)
{
GHash *ghash = BLI_ghash_new(ghashutil_tests_nohash_p, ghashutil_tests_cmp_p, __func__);
randint_ghash_tests(ghash, "RandIntGHash - No Hash - 12000", 12000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, Int2NoHash50000000)
{
GHash *ghash = BLI_ghash_new(ghashutil_tests_nohash_p, ghashutil_tests_cmp_p, __func__);
randint_ghash_tests(ghash, "RandIntGHash - No Hash - 50000000", 50000000);
}
#endif
/* Int_v2: 20M of randomly-generated integer vectors. */
static void int2_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
{
printf("\n========== STARTING %s ==========\n", id);
void *data_v = MEM_mallocN(sizeof(unsigned int[2]) * (size_t)nbr, __func__);
unsigned int(*data)[2] = (unsigned int(*)[2])data_v;
unsigned int(*dt)[2];
unsigned int i, j;
{
RNG *rng = BLI_rng_new(0);
for (i = nbr, dt = data; i--; dt++) {
for (j = 2; j--;) {
(*dt)[j] = BLI_rng_get_uint(rng);
}
}
BLI_rng_free(rng);
}
{
TIMEIT_START(int_v2_insert);
#ifdef GHASH_RESERVE
BLI_ghash_reserve(ghash, nbr);
#endif
for (i = nbr, dt = data; i--; dt++) {
BLI_ghash_insert(ghash, *dt, POINTER_FROM_UINT(i));
}
TIMEIT_END(int_v2_insert);
}
PRINTF_GHASH_STATS(ghash);
{
TIMEIT_START(int_v2_lookup);
for (i = nbr, dt = data; i--; dt++) {
void *v = BLI_ghash_lookup(ghash, (void *)(*dt));
EXPECT_EQ(POINTER_AS_UINT(v), i);
}
TIMEIT_END(int_v2_lookup);
}
BLI_ghash_free(ghash, NULL, NULL);
MEM_freeN(data);
printf("========== ENDED %s ==========\n\n", id);
}
TEST(ghash, Int2GHash2000)
{
GHash *ghash = BLI_ghash_new(
BLI_ghashutil_uinthash_v2_p, BLI_ghashutil_uinthash_v2_cmp, __func__);
int2_ghash_tests(ghash, "Int2GHash - GHash - 2000", 2000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, Int2GHash20000000)
{
GHash *ghash = BLI_ghash_new(
BLI_ghashutil_uinthash_v2_p, BLI_ghashutil_uinthash_v2_cmp, __func__);
int2_ghash_tests(ghash, "Int2GHash - GHash - 20000000", 20000000);
}
#endif
TEST(ghash, Int2Murmur2a2000)
{
GHash *ghash = BLI_ghash_new(
BLI_ghashutil_uinthash_v2_p_murmur, BLI_ghashutil_uinthash_v2_cmp, __func__);
int2_ghash_tests(ghash, "Int2GHash - Murmur - 2000", 2000);
}
#ifdef GHASH_RUN_BIG
TEST(ghash, Int2Murmur2a20000000)
{
GHash *ghash = BLI_ghash_new(
BLI_ghashutil_uinthash_v2_p_murmur, BLI_ghashutil_uinthash_v2_cmp, __func__);
int2_ghash_tests(ghash, "Int2GHash - Murmur - 20000000", 20000000);
}
#endif
/* MultiSmall: create and manipulate a lot of very small ghashes
* (90% < 10 items, 9% < 100 items, 1% < 1000 items). */
static void multi_small_ghash_tests_one(GHash *ghash, RNG *rng, const unsigned int nbr)
{
unsigned int *data = (unsigned int *)MEM_mallocN(sizeof(*data) * (size_t)nbr, __func__);
unsigned int *dt;
unsigned int i;
for (i = nbr, dt = data; i--; dt++) {
*dt = BLI_rng_get_uint(rng);
}
#ifdef GHASH_RESERVE
BLI_ghash_reserve(ghash, nbr);
#endif
for (i = nbr, dt = data; i--; dt++) {
BLI_ghash_insert(ghash, POINTER_FROM_UINT(*dt), POINTER_FROM_UINT(*dt));
}
for (i = nbr, dt = data; i--; dt++) {
void *v = BLI_ghash_lookup(ghash, POINTER_FROM_UINT(*dt));
EXPECT_EQ(POINTER_AS_UINT(v), *dt);
}
BLI_ghash_clear(ghash, NULL, NULL);
}
static void multi_small_ghash_tests(GHash *ghash, const char *id, const unsigned int nbr)
{
printf("\n========== STARTING %s ==========\n", id);
RNG *rng = BLI_rng_new(0);
TIMEIT_START(multi_small_ghash);
unsigned int i = nbr;
while (i--) {
const int nbr = 1 + (BLI_rng_get_int(rng) % TESTCASE_SIZE_SMALL) *
(!(i % 100) ? 100 : (!(i % 10) ? 10 : 1));
multi_small_ghash_tests_one(ghash, rng, nbr);
}
TIMEIT_END(multi_small_ghash);
TIMEIT_START(multi_small2_ghash);
unsigned int i = nbr;
while (i--) {
const int nbr = 1 + (BLI_rng_get_int(rng) % TESTCASE_SIZE_SMALL) / 2 *
(!(i % 100) ? 100 : (!(i % 10) ? 10 : 1));
multi_small_ghash_tests_one(ghash, rng, nbr);
}
TIMEIT_END(multi_small2_ghash);
BLI_ghash_free(ghash, NULL, NULL);
BLI_rng_free(rng);
printf("========== ENDED %s ==========\n\n", id);
}
TEST(ghash, MultiRandIntGHash2000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - GHash - 2000", 2000);
}
TEST(ghash, MultiRandIntGHash200000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p, BLI_ghashutil_intcmp, __func__);
multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - GHash - 200000", 200000);
}
TEST(ghash, MultiRandIntMurmur2a2000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - Murmur2a - 2000", 2000);
}
TEST(ghash, MultiRandIntMurmur2a200000)
{
GHash *ghash = BLI_ghash_new(BLI_ghashutil_inthash_p_murmur, BLI_ghashutil_intcmp, __func__);
multi_small_ghash_tests(ghash, "MultiSmall RandIntGHash - Murmur2a - 200000", 200000);
}