Support compressed signed integers in NumberUtils.
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/cef8cbc1 Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/cef8cbc1 Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/cef8cbc1 Branch: refs/heads/master Commit: cef8cbc1dc507196ac0881f72709031b2889abff Parents: ce89b9c Author: Marvin Humphrey <[email protected]> Authored: Mon Apr 18 17:12:14 2016 -0700 Committer: Marvin Humphrey <[email protected]> Committed: Mon Apr 18 17:12:14 2016 -0700 ---------------------------------------------------------------------- core/Lucy/Test/Util/TestNumberUtils.c | 104 ++++++++++++++++++++++++++++- core/Lucy/Util/NumberUtils.cfh | 52 +++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/cef8cbc1/core/Lucy/Test/Util/TestNumberUtils.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Util/TestNumberUtils.c b/core/Lucy/Test/Util/TestNumberUtils.c index ffd3079..125f325 100644 --- a/core/Lucy/Test/Util/TestNumberUtils.c +++ b/core/Lucy/Test/Util/TestNumberUtils.c @@ -94,6 +94,55 @@ test_u4(TestBatchRunner *runner) { } static void +test_ci32(TestBatchRunner *runner) { + int64_t mins[] = { -500, -0x4000 - 100, INT32_MIN }; + int64_t limits[] = { 500, -0x4000 + 100, INT32_MIN + 10 }; + int32_t set_num; + int32_t num_sets = sizeof(mins) / sizeof(int64_t); + size_t count = 64; + int64_t *ints = NULL; + size_t amount = count * CI32_MAX_BYTES; + char *encoded = (char*)CALLOCATE(amount, sizeof(char)); + char *target = encoded; + char *limit = target + amount; + const char *decode; + + for (set_num = 0; set_num < num_sets; set_num++) { + const char *skip; + ints = TestUtils_random_i64s(ints, count, + mins[set_num], limits[set_num]); + target = encoded; + for (size_t i = 0; i < count; i++) { + NumUtil_encode_ci32((int32_t)ints[i], &target); + } + decode = encoded; + skip = encoded; + for (size_t i = 0; i < count; i++) { + TEST_INT_EQ(runner, NumUtil_decode_ci32(&decode), ints[i], + "ci32 %" PRId64, ints[i]); + NumUtil_skip_cint(&skip); + if (decode > limit) { THROW(ERR, "overrun"); } + } + TEST_TRUE(runner, skip == decode, "skip %" PRIu64 " == %" PRIu64, + (uint64_t)skip, (uint64_t)decode); + } + + target = encoded; + NumUtil_encode_ci32(INT32_MAX, &target); + decode = encoded; + TEST_INT_EQ(runner, NumUtil_decode_ci32(&decode), INT32_MAX, + "ci32 INT32_MAX"); + target = encoded; + NumUtil_encode_ci32(INT32_MIN, &target); + decode = encoded; + TEST_INT_EQ(runner, NumUtil_decode_ci32(&decode), INT32_MIN, + "ci32 INT32_MIN"); + + FREEMEM(encoded); + FREEMEM(ints); +} + +static void test_cu32(TestBatchRunner *runner) { uint64_t mins[] = { 0, 0x4000 - 100, (uint32_t)INT32_MAX - 100, UINT32_MAX - 10 }; uint64_t limits[] = { 500, 0x4000 + 100, (uint32_t)INT32_MAX + 100, UINT32_MAX }; @@ -156,6 +205,57 @@ test_cu32(TestBatchRunner *runner) { } static void +test_ci64(TestBatchRunner *runner) { + int64_t mins[] = { -500, -0x4000 - 100, (int64_t)INT32_MIN - 100, INT64_MIN }; + int64_t limits[] = { 500, -0x4000 + 100, (int64_t)INT32_MIN + 1000, INT64_MIN + 10 }; + int32_t set_num; + int32_t num_sets = sizeof(mins) / sizeof(int64_t); + size_t count = 64; + int64_t *ints = NULL; + size_t amount = count * CI64_MAX_BYTES; + char *encoded = (char*)CALLOCATE(amount, sizeof(char)); + char *target = encoded; + char *limit = target + amount; + const char *decode; + + for (set_num = 0; set_num < num_sets; set_num++) { + const char *skip; + ints = TestUtils_random_i64s(ints, count, + mins[set_num], limits[set_num]); + target = encoded; + for (size_t i = 0; i < count; i++) { + NumUtil_encode_ci64(ints[i], &target); + } + decode = encoded; + skip = encoded; + for (size_t i = 0; i < count; i++) { + int64_t got = NumUtil_decode_ci64(&decode); + TEST_INT_EQ(runner, got, ints[i], + "ci64 %" PRId64 " == %" PRId64, got, ints[i]); + if (decode > limit) { THROW(ERR, "overrun"); } + NumUtil_skip_cint(&skip); + } + TEST_TRUE(runner, skip == decode, "skip %lu == %lu", + (unsigned long)skip, (unsigned long)decode); + } + + target = encoded; + NumUtil_encode_ci64(INT64_MAX, &target); + decode = encoded; + int64_t got = NumUtil_decode_ci64(&decode); + TEST_INT_EQ(runner, got, INT64_MAX, "ci64 INT64_MAX"); + + target = encoded; + NumUtil_encode_ci64(INT64_MIN, &target); + decode = encoded; + got = NumUtil_decode_ci64(&decode); + TEST_INT_EQ(runner, got, INT64_MIN, "ci64 INT64_MIN"); + + FREEMEM(encoded); + FREEMEM(ints); +} + +static void test_cu64(TestBatchRunner *runner) { uint64_t mins[] = { 0, 0x4000 - 100, (uint64_t)UINT32_MAX - 100, UINT64_MAX - 10 }; uint64_t limits[] = { 500, 0x4000 + 100, (uint64_t)UINT32_MAX + 1000, UINT64_MAX }; @@ -359,12 +459,14 @@ test_bigend_f64(TestBatchRunner *runner) { void TestNumUtil_Run_IMP(TestNumberUtils *self, TestBatchRunner *runner) { - TestBatchRunner_Plan(runner, (TestBatch*)self, 1196); + TestBatchRunner_Plan(runner, (TestBatch*)self, 1655); srand((unsigned int)time((time_t*)NULL)); test_u1(runner); test_u2(runner); test_u4(runner); + test_ci32(runner); test_cu32(runner); + test_ci64(runner); test_cu64(runner); test_bigend_u16(runner); test_bigend_u32(runner); http://git-wip-us.apache.org/repos/asf/lucy/blob/cef8cbc1/core/Lucy/Util/NumberUtils.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/NumberUtils.cfh b/core/Lucy/Util/NumberUtils.cfh index 3d9c79d..8d1b0b1 100644 --- a/core/Lucy/Util/NumberUtils.cfh +++ b/core/Lucy/Util/NumberUtils.cfh @@ -87,6 +87,13 @@ inert class Lucy::Util::NumberUtils nickname NumUtil { inert inline void encode_c32(uint32_t value, char **dest); + /** Encode a compressed 32-bit signed integer at the space pointed to by + * `dest`. As a side effect, `dest` will be advanced to immediately after + * the end of the compressed data. + */ + inert inline void + encode_ci32(int32_t value, char **dest); + /** Encode a compressed 32-bit unsigned integer at the space pointed to by * `dest`. As a side effect, `dest` will be advanced to immediately after * the end of the compressed data. @@ -117,6 +124,13 @@ inert class Lucy::Util::NumberUtils nickname NumUtil { inert inline void encode_c64(uint64_t value, char **dest); + /** Encode a compressed 64-bit signed integer at the space pointed to by + * `dest`. As a side effect, `dest` will be advanced to immediately after + * the end of the compressed data. + */ + inert inline void + encode_ci64(int64_t value, char **dest); + /** Encode a compressed 64-bit unsigned integer at the space pointed to by * `dest`. As a side effect, `dest` will be advanced to immediately after * the end of the compressed data. @@ -131,6 +145,13 @@ inert class Lucy::Util::NumberUtils nickname NumUtil { inert inline uint32_t decode_c32(const char **source); + /** Read a compressed 32-bit signed integer from the buffer pointed to + * by `source`. As a side effect, advance the pointer, consuming the + * bytes occupied by the compressed number. + */ + inert inline int32_t + decode_ci32(const char **source); + /** Read a compressed 32-bit unsigned integer from the buffer pointed to * by `source`. As a side effect, advance the pointer, consuming the * bytes occupied by the compressed number. @@ -145,6 +166,13 @@ inert class Lucy::Util::NumberUtils nickname NumUtil { inert inline uint64_t decode_c64(const char **source); + /** Read a compressed 64-bit signed integer from the buffer pointed to + * by `source`. As a side effect, advance the pointer, consuming the + * bytes occupied by the compressed number. + */ + inert inline int64_t + decode_ci64(const char **source); + /** Read a compressed 64-bit unsigned integer from the buffer pointed to * by `source`. As a side effect, advance the pointer, consuming the * bytes occupied by the compressed number. @@ -332,7 +360,9 @@ lucy_NumUtil_decode_bigend_f64(const void *source) { #define LUCY_NUMUTIL_C32_MAX_BYTES ((sizeof(uint32_t) * 8 / 7) + 1) /* 5 */ #define LUCY_NUMUTIL_C64_MAX_BYTES ((sizeof(uint64_t) * 8 / 7) + 1) /* 10 */ +#define LUCY_NUMUTIL_CI32_MAX_BYTES ((sizeof(int32_t) * 8 / 7) + 1) /* 5 */ #define LUCY_NUMUTIL_CU32_MAX_BYTES ((sizeof(uint32_t) * 8 / 7) + 1) /* 5 */ +#define LUCY_NUMUTIL_CI64_MAX_BYTES ((sizeof(int64_t) * 8 / 7) + 1) /* 10 */ #define LUCY_NUMUTIL_CU64_MAX_BYTES ((sizeof(uint64_t) * 8 / 7) + 1) /* 10 */ static CFISH_INLINE void @@ -341,6 +371,11 @@ lucy_NumUtil_encode_c32(uint32_t value, char **out_buf) { } static CFISH_INLINE void +lucy_NumUtil_encode_ci32(int32_t value, char **out_buf) { + lucy_NumUtil_encode_cu32((uint32_t)value, out_buf); +} + +static CFISH_INLINE void lucy_NumUtil_encode_cu32(uint32_t value, char **out_buf) { uint8_t buf[LUCY_NUMUTIL_CU32_MAX_BYTES]; uint8_t *const limit = buf + sizeof(buf); @@ -365,6 +400,11 @@ lucy_NumUtil_encode_c64(uint64_t value, char **out_buf) { } static CFISH_INLINE void +lucy_NumUtil_encode_ci64(int64_t value, char **out_buf) { + lucy_NumUtil_encode_cu64((uint64_t)value, out_buf); +} + +static CFISH_INLINE void lucy_NumUtil_encode_cu64(uint64_t value, char **out_buf) { uint8_t buf[LUCY_NUMUTIL_CU64_MAX_BYTES]; uint8_t *const limit = buf + sizeof(buf); @@ -424,6 +464,11 @@ lucy_NumUtil_decode_c32(const char **source_ptr) { return decoded; } +static CFISH_INLINE int32_t +lucy_NumUtil_decode_ci32(const char **source_ptr) { + return (int32_t)lucy_NumUtil_decode_cu32(source_ptr); +} + static CFISH_INLINE uint32_t lucy_NumUtil_decode_cu32(const char **source_ptr) { const char *source = *source_ptr; @@ -442,6 +487,11 @@ lucy_NumUtil_decode_c64(const char **source_ptr) { return decoded; } +static CFISH_INLINE int64_t +lucy_NumUtil_decode_ci64(const char **source_ptr) { + return (int64_t)lucy_NumUtil_decode_cu64(source_ptr); +} + static CFISH_INLINE uint64_t lucy_NumUtil_decode_cu64(const char **source_ptr) { const char *source = *source_ptr; @@ -530,6 +580,8 @@ lucy_NumUtil_u4set(void *array, uint32_t tick, uint8_t value) { #ifdef LUCY_USE_SHORT_NAMES #define C32_MAX_BYTES LUCY_NUMUTIL_C32_MAX_BYTES #define C64_MAX_BYTES LUCY_NUMUTIL_C64_MAX_BYTES + #define CI32_MAX_BYTES LUCY_NUMUTIL_CI32_MAX_BYTES + #define CI64_MAX_BYTES LUCY_NUMUTIL_CI64_MAX_BYTES #define CU32_MAX_BYTES LUCY_NUMUTIL_CU32_MAX_BYTES #define CU64_MAX_BYTES LUCY_NUMUTIL_CU64_MAX_BYTES #endif
