Copy NumberUtils from Clownfish to Lucy
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/8782bd56 Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/8782bd56 Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/8782bd56 Branch: refs/heads/master Commit: 8782bd56b4d204116400cc9729ad5de11260bb3d Parents: 51825fd Author: Nick Wellnhofer <[email protected]> Authored: Sat May 9 17:50:14 2015 +0200 Committer: Nick Wellnhofer <[email protected]> Committed: Tue May 12 19:55:15 2015 +0200 ---------------------------------------------------------------------- core/Lucy/Index/DocVector.c | 1 + core/Lucy/Index/HighlightWriter.c | 1 + core/Lucy/Index/LexIndex.c | 1 + core/Lucy/Index/Posting/RichPosting.c | 1 + core/Lucy/Index/Posting/ScorePosting.c | 1 + core/Lucy/Index/SortCache.c | 1 + core/Lucy/Index/SortFieldWriter.c | 1 + core/Lucy/Object/BitVector.c | 1 + core/Lucy/Search/Collector/SortCollector.c | 1 + core/Lucy/Store/InStream.c | 1 + core/Lucy/Store/OutStream.c | 1 + core/Lucy/Test.c | 2 + core/Lucy/Test/Store/TestIOChunks.c | 1 - core/Lucy/Test/Store/TestIOPrimitives.c | 2 +- core/Lucy/Test/Store/TestInStream.c | 1 - core/Lucy/Test/Util/TestNumberUtils.c | 375 +++++++++++++++++++ core/Lucy/Test/Util/TestNumberUtils.cfh | 29 ++ core/Lucy/Util/NumberUtils.cfh | 466 ++++++++++++++++++++++++ core/Lucy/Util/ToolSet.h | 1 - perl/t/core/030-number_utils.t | 25 ++ 20 files changed, 909 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Index/DocVector.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/DocVector.c b/core/Lucy/Index/DocVector.c index cc08125..4868890 100644 --- a/core/Lucy/Index/DocVector.c +++ b/core/Lucy/Index/DocVector.c @@ -25,6 +25,7 @@ #include "Lucy/Store/InStream.h" #include "Lucy/Store/OutStream.h" #include "Lucy/Util/Freezer.h" +#include "Lucy/Util/NumberUtils.h" // Extract a document's compressed TermVector data into (term_text => // compressed positional data) pairs. http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Index/HighlightWriter.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/HighlightWriter.c b/core/Lucy/Index/HighlightWriter.c index fbb4e91..a8115ae 100644 --- a/core/Lucy/Index/HighlightWriter.c +++ b/core/Lucy/Index/HighlightWriter.c @@ -37,6 +37,7 @@ #include "Lucy/Store/OutStream.h" #include "Lucy/Store/InStream.h" #include "Lucy/Util/Freezer.h" +#include "Lucy/Util/NumberUtils.h" static OutStream* S_lazy_init(HighlightWriter *self); http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Index/LexIndex.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/LexIndex.c b/core/Lucy/Index/LexIndex.c index 7cbe022..f9f6064 100644 --- a/core/Lucy/Index/LexIndex.c +++ b/core/Lucy/Index/LexIndex.c @@ -26,6 +26,7 @@ #include "Lucy/Plan/Schema.h" #include "Lucy/Store/Folder.h" #include "Lucy/Store/InStream.h" +#include "Lucy/Util/NumberUtils.h" // Read the data we've arrived at after a seek operation. static void http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Index/Posting/RichPosting.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/Posting/RichPosting.c b/core/Lucy/Index/Posting/RichPosting.c index a008514..17e3e95 100644 --- a/core/Lucy/Index/Posting/RichPosting.c +++ b/core/Lucy/Index/Posting/RichPosting.c @@ -31,6 +31,7 @@ #include "Lucy/Search/Compiler.h" #include "Lucy/Store/InStream.h" #include "Lucy/Util/MemoryPool.h" +#include "Lucy/Util/NumberUtils.h" #define FREQ_MAX_LEN C32_MAX_BYTES #define MAX_RAW_POSTING_LEN(_raw_posting_size, _text_len, _freq) \ http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Index/Posting/ScorePosting.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/Posting/ScorePosting.c b/core/Lucy/Index/Posting/ScorePosting.c index 18b4692..3e995a8 100644 --- a/core/Lucy/Index/Posting/ScorePosting.c +++ b/core/Lucy/Index/Posting/ScorePosting.c @@ -32,6 +32,7 @@ #include "Lucy/Search/Matcher.h" #include "Lucy/Store/InStream.h" #include "Lucy/Util/MemoryPool.h" +#include "Lucy/Util/NumberUtils.h" #define FIELD_BOOST_LEN 1 #define FREQ_MAX_LEN C32_MAX_BYTES http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Index/SortCache.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/SortCache.c b/core/Lucy/Index/SortCache.c index c451da6..fe25049 100644 --- a/core/Lucy/Index/SortCache.c +++ b/core/Lucy/Index/SortCache.c @@ -19,6 +19,7 @@ #include "Lucy/Index/SortCache.h" #include "Lucy/Plan/FieldType.h" +#include "Lucy/Util/NumberUtils.h" SortCache* SortCache_init(SortCache *self, String *field, FieldType *type, http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Index/SortFieldWriter.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c index b40fae7..d63bed2 100644 --- a/core/Lucy/Index/SortFieldWriter.c +++ b/core/Lucy/Index/SortFieldWriter.c @@ -35,6 +35,7 @@ #include "Lucy/Store/Folder.h" #include "Lucy/Store/InStream.h" #include "Lucy/Store/OutStream.h" +#include "Lucy/Util/NumberUtils.h" #include "Clownfish/Blob.h" #include "Clownfish/Util/Memory.h" #include "Clownfish/Util/SortUtils.h" http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Object/BitVector.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Object/BitVector.c b/core/Lucy/Object/BitVector.c index efc7278..8b7dd9e 100644 --- a/core/Lucy/Object/BitVector.c +++ b/core/Lucy/Object/BitVector.c @@ -20,6 +20,7 @@ #include <math.h> #include "Lucy/Object/BitVector.h" +#include "Lucy/Util/NumberUtils.h" // Shared subroutine for performing both OR and XOR ops. #define DO_OR 1 http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Search/Collector/SortCollector.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Search/Collector/SortCollector.c b/core/Lucy/Search/Collector/SortCollector.c index 5354642..e58e80a 100644 --- a/core/Lucy/Search/Collector/SortCollector.c +++ b/core/Lucy/Search/Collector/SortCollector.c @@ -33,6 +33,7 @@ #include "Lucy/Search/Matcher.h" #include "Lucy/Search/SortRule.h" #include "Lucy/Search/SortSpec.h" +#include "Lucy/Util/NumberUtils.h" #define COMPARE_BY_SCORE 0x1 #define COMPARE_BY_SCORE_REV 0x2 http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Store/InStream.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Store/InStream.c b/core/Lucy/Store/InStream.c index 43881e8..fd5b36f 100644 --- a/core/Lucy/Store/InStream.c +++ b/core/Lucy/Store/InStream.c @@ -25,6 +25,7 @@ #include "Lucy/Store/FileWindow.h" #include "Lucy/Store/RAMFile.h" #include "Lucy/Store/RAMFileHandle.h" +#include "Lucy/Util/NumberUtils.h" // Inlined version of InStream_Tell. static CFISH_INLINE int64_t http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Store/OutStream.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Store/OutStream.c b/core/Lucy/Store/OutStream.c index f61466e..ca4e676 100644 --- a/core/Lucy/Store/OutStream.c +++ b/core/Lucy/Store/OutStream.c @@ -26,6 +26,7 @@ #include "Lucy/Store/InStream.h" #include "Lucy/Store/RAMFile.h" #include "Lucy/Store/RAMFileHandle.h" +#include "Lucy/Util/NumberUtils.h" // Inlined version of OutStream_Write_Bytes. static CFISH_INLINE void http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Test.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test.c b/core/Lucy/Test.c index 6817911..3046494 100644 --- a/core/Lucy/Test.c +++ b/core/Lucy/Test.c @@ -81,6 +81,7 @@ #include "Lucy/Test/Util/TestIndexFileNames.h" #include "Lucy/Test/Util/TestJson.h" #include "Lucy/Test/Util/TestMemoryPool.h" +#include "Lucy/Test/Util/TestNumberUtils.h" #include "Lucy/Test/Util/TestPriorityQueue.h" #include "Lucy/Test/Util/TestSortExternal.h" @@ -92,6 +93,7 @@ Test_create_test_suite() { TestSuite_Add_Batch(suite, (TestBatch*)TestBitVector_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestSortExternal_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestMemPool_new()); + TestSuite_Add_Batch(suite, (TestBatch*)TestNumUtil_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestIxFileNames_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestJson_new()); TestSuite_Add_Batch(suite, (TestBatch*)TestFreezer_new()); http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Test/Store/TestIOChunks.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Store/TestIOChunks.c b/core/Lucy/Test/Store/TestIOChunks.c index c09d665..bf35d0b 100644 --- a/core/Lucy/Test/Store/TestIOChunks.c +++ b/core/Lucy/Test/Store/TestIOChunks.c @@ -33,7 +33,6 @@ #include "Lucy/Store/OutStream.h" #include "Lucy/Store/RAMFile.h" #include "Lucy/Store/RAMFileHandle.h" -#include "Clownfish/Util/NumberUtils.h" TestIOChunks* TestIOChunks_new() { http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Test/Store/TestIOPrimitives.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Store/TestIOPrimitives.c b/core/Lucy/Test/Store/TestIOPrimitives.c index 30008d6..551f1ac 100644 --- a/core/Lucy/Test/Store/TestIOPrimitives.c +++ b/core/Lucy/Test/Store/TestIOPrimitives.c @@ -33,7 +33,7 @@ #include "Lucy/Store/OutStream.h" #include "Lucy/Store/RAMFile.h" #include "Lucy/Store/RAMFileHandle.h" -#include "Clownfish/Util/NumberUtils.h" +#include "Lucy/Util/NumberUtils.h" TestIOPrimitives* TestIOPrimitives_new() { http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Test/Store/TestInStream.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Store/TestInStream.c b/core/Lucy/Test/Store/TestInStream.c index 248e4f2..764fa83 100644 --- a/core/Lucy/Test/Store/TestInStream.c +++ b/core/Lucy/Test/Store/TestInStream.c @@ -30,7 +30,6 @@ #include "Lucy/Store/OutStream.h" #include "Lucy/Store/RAMFile.h" #include "Lucy/Store/RAMFileHandle.h" -#include "Clownfish/Util/NumberUtils.h" TestInStream* TestInStream_new() { http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Test/Util/TestNumberUtils.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Util/TestNumberUtils.c b/core/Lucy/Test/Util/TestNumberUtils.c new file mode 100644 index 0000000..c639daf --- /dev/null +++ b/core/Lucy/Test/Util/TestNumberUtils.c @@ -0,0 +1,375 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdlib.h> +#include <time.h> + +#define TESTLUCY_USE_SHORT_NAMES +#include "Lucy/Util/ToolSet.h" + +#include "charmony.h" + +#include "Lucy/Test/Util/TestNumberUtils.h" + +#include "Clownfish/TestHarness/TestBatchRunner.h" +#include "Clownfish/TestHarness/TestUtils.h" +#include "Lucy/Util/NumberUtils.h" + +TestNumberUtils* +TestNumUtil_new() { + return (TestNumberUtils*)Class_Make_Obj(TESTNUMBERUTILS); +} + +static void +test_u1(TestBatchRunner *runner) { + size_t count = 64; + uint64_t *ints = TestUtils_random_u64s(NULL, count, 0, 2); + size_t amount = count / 8; + uint8_t *bits = (uint8_t*)CALLOCATE(amount, sizeof(uint8_t)); + + for (size_t i = 0; i < count; i++) { + if (ints[i]) { NumUtil_u1set(bits, i); } + } + for (size_t i = 0; i < count; i++) { + TEST_INT_EQ(runner, NumUtil_u1get(bits, i), (long)ints[i], + "u1 set/get"); + } + + for (size_t i = 0; i < count; i++) { + NumUtil_u1flip(bits, i); + } + for (size_t i = 0; i < count; i++) { + TEST_INT_EQ(runner, NumUtil_u1get(bits, i), !ints[i], "u1 flip"); + } + + FREEMEM(bits); + FREEMEM(ints); +} + +static void +test_u2(TestBatchRunner *runner) { + size_t count = 32; + uint64_t *ints = TestUtils_random_u64s(NULL, count, 0, 4); + uint8_t *bits = (uint8_t*)CALLOCATE((count / 4), sizeof(uint8_t)); + + for (size_t i = 0; i < count; i++) { + NumUtil_u2set(bits, i, (uint8_t)ints[i]); + } + for (size_t i = 0; i < count; i++) { + TEST_INT_EQ(runner, NumUtil_u2get(bits, i), (long)ints[i], "u2"); + } + + FREEMEM(bits); + FREEMEM(ints); +} + +static void +test_u4(TestBatchRunner *runner) { + size_t count = 128; + uint64_t *ints = TestUtils_random_u64s(NULL, count, 0, 16); + uint8_t *bits = (uint8_t*)CALLOCATE((count / 2), sizeof(uint8_t)); + + for (size_t i = 0; i < count; i++) { + NumUtil_u4set(bits, i, (uint8_t)ints[i]); + } + for (size_t i = 0; i < count; i++) { + TEST_INT_EQ(runner, NumUtil_u4get(bits, i), (long)ints[i], "u4"); + } + + FREEMEM(bits); + FREEMEM(ints); +} + +static void +test_c32(TestBatchRunner *runner) { + uint64_t mins[] = { 0, 0x4000 - 100, (uint32_t)INT32_MAX - 100, UINT32_MAX - 10 }; + uint64_t limits[] = { 500, 0x4000 + 100, (uint32_t)INT32_MAX + 100, UINT32_MAX }; + uint32_t set_num; + uint32_t num_sets = sizeof(mins) / sizeof(uint64_t); + size_t count = 64; + uint64_t *ints = NULL; + size_t amount = count * C32_MAX_BYTES; + char *encoded = (char*)CALLOCATE(amount, sizeof(char)); + char *target = encoded; + char *limit = target + amount; + const char *decode; + + for (set_num = 0; set_num < num_sets; set_num++) { + const char *skip; + ints = TestUtils_random_u64s(ints, count, + mins[set_num], limits[set_num]); + target = encoded; + for (size_t i = 0; i < count; i++) { + NumUtil_encode_c32((uint32_t)ints[i], &target); + } + decode = encoded; + skip = encoded; + for (size_t i = 0; i < count; i++) { + TEST_INT_EQ(runner, NumUtil_decode_c32(&decode), (long)ints[i], + "c32 %lu", (long)ints[i]); + NumUtil_skip_cint(&skip); + if (decode > limit) { THROW(ERR, "overrun"); } + } + TEST_TRUE(runner, skip == decode, "skip %lu == %lu", + (unsigned long)skip, (unsigned long)decode); + + target = encoded; + for (size_t i = 0; i < count; i++) { + NumUtil_encode_padded_c32((uint32_t)ints[i], &target); + } + TEST_TRUE(runner, target == limit, + "padded c32 uses 5 bytes (%lu == %lu)", (unsigned long)target, + (unsigned long)limit); + decode = encoded; + skip = encoded; + for (size_t i = 0; i < count; i++) { + TEST_INT_EQ(runner, NumUtil_decode_c32(&decode), (long)ints[i], + "padded c32 %lu", (long)ints[i]); + NumUtil_skip_cint(&skip); + if (decode > limit) { THROW(ERR, "overrun"); } + } + TEST_TRUE(runner, skip == decode, "skip padded %lu == %lu", + (unsigned long)skip, (unsigned long)decode); + } + + target = encoded; + NumUtil_encode_c32(UINT32_MAX, &target); + decode = encoded; + TEST_INT_EQ(runner, NumUtil_decode_c32(&decode), UINT32_MAX, "c32 UINT32_MAX"); + + FREEMEM(encoded); + FREEMEM(ints); +} + +static void +test_c64(TestBatchRunner *runner) { + uint64_t mins[] = { 0, 0x4000 - 100, (uint64_t)UINT32_MAX - 100, UINT64_MAX - 10 }; + uint64_t limits[] = { 500, 0x4000 + 100, (uint64_t)UINT32_MAX + 1000, UINT64_MAX }; + uint32_t set_num; + uint32_t num_sets = sizeof(mins) / sizeof(uint64_t); + size_t count = 64; + uint64_t *ints = NULL; + size_t amount = count * C64_MAX_BYTES; + char *encoded = (char*)CALLOCATE(amount, sizeof(char)); + char *target = encoded; + char *limit = target + amount; + const char *decode; + + for (set_num = 0; set_num < num_sets; set_num++) { + const char *skip; + ints = TestUtils_random_u64s(ints, count, + mins[set_num], limits[set_num]); + target = encoded; + for (size_t i = 0; i < count; i++) { + NumUtil_encode_c64(ints[i], &target); + } + decode = encoded; + skip = encoded; + for (size_t i = 0; i < count; i++) { + uint64_t got = NumUtil_decode_c64(&decode); + TEST_TRUE(runner, got == ints[i], + "c64 %" PRIu64 " == %" PRIu64, got, ints[i]); + if (decode > limit) { THROW(ERR, "overrun"); } + NumUtil_skip_cint(&skip); + } + TEST_TRUE(runner, skip == decode, "skip %lu == %lu", + (unsigned long)skip, (unsigned long)decode); + } + + target = encoded; + NumUtil_encode_c64(UINT64_MAX, &target); + + decode = encoded; + uint64_t got = NumUtil_decode_c64(&decode); + TEST_TRUE(runner, got == UINT64_MAX, "c64 UINT64_MAX"); + + FREEMEM(encoded); + FREEMEM(ints); +} + +static void +test_bigend_u16(TestBatchRunner *runner) { + size_t count = 32; + uint64_t *ints = TestUtils_random_u64s(NULL, count, 0, UINT16_MAX + 1); + size_t amount = (count + 1) * sizeof(uint16_t); + char *allocated = (char*)CALLOCATE(amount, sizeof(char)); + char *encoded = allocated + 1; // Intentionally misaligned. + char *target = encoded; + + for (size_t i = 0; i < count; i++) { + NumUtil_encode_bigend_u16((uint16_t)ints[i], &target); + target += sizeof(uint16_t); + } + target = encoded; + for (size_t i = 0; i < count; i++) { + uint16_t got = NumUtil_decode_bigend_u16(target); + TEST_INT_EQ(runner, got, (long)ints[i], "bigend u16"); + target += sizeof(uint16_t); + } + + target = encoded; + NumUtil_encode_bigend_u16(1, &target); + TEST_INT_EQ(runner, encoded[0], 0, "Truly big-endian u16"); + TEST_INT_EQ(runner, encoded[1], 1, "Truly big-endian u16"); + + FREEMEM(allocated); + FREEMEM(ints); +} + +static void +test_bigend_u32(TestBatchRunner *runner) { + size_t count = 32; + uint64_t *ints = TestUtils_random_u64s(NULL, count, 0, UINT64_C(1) + UINT32_MAX); + size_t amount = (count + 1) * sizeof(uint32_t); + char *allocated = (char*)CALLOCATE(amount, sizeof(char)); + char *encoded = allocated + 1; // Intentionally misaligned. + char *target = encoded; + + for (size_t i = 0; i < count; i++) { + NumUtil_encode_bigend_u32((uint32_t)ints[i], &target); + target += sizeof(uint32_t); + } + target = encoded; + for (size_t i = 0; i < count; i++) { + uint32_t got = NumUtil_decode_bigend_u32(target); + TEST_INT_EQ(runner, got, (long)ints[i], "bigend u32"); + target += sizeof(uint32_t); + } + + target = encoded; + NumUtil_encode_bigend_u32(1, &target); + TEST_INT_EQ(runner, encoded[0], 0, "Truly big-endian u32"); + TEST_INT_EQ(runner, encoded[3], 1, "Truly big-endian u32"); + + FREEMEM(allocated); + FREEMEM(ints); +} + +static void +test_bigend_u64(TestBatchRunner *runner) { + size_t count = 32; + uint64_t *ints = TestUtils_random_u64s(NULL, count, 0, UINT64_MAX); + size_t amount = (count + 1) * sizeof(uint64_t); + char *allocated = (char*)CALLOCATE(amount, sizeof(char)); + char *encoded = allocated + 1; // Intentionally misaligned. + char *target = encoded; + + for (size_t i = 0; i < count; i++) { + NumUtil_encode_bigend_u64(ints[i], &target); + target += sizeof(uint64_t); + } + target = encoded; + for (size_t i = 0; i < count; i++) { + uint64_t got = NumUtil_decode_bigend_u64(target); + TEST_TRUE(runner, got == ints[i], "bigend u64"); + target += sizeof(uint64_t); + } + + target = encoded; + NumUtil_encode_bigend_u64(1, &target); + TEST_INT_EQ(runner, encoded[0], 0, "Truly big-endian"); + TEST_INT_EQ(runner, encoded[7], 1, "Truly big-endian"); + + FREEMEM(allocated); + FREEMEM(ints); +} + +static void +test_bigend_f32(TestBatchRunner *runner) { + float source[] = { -1.3f, 0.0f, 100.2f }; + size_t count = 3; + size_t amount = (count + 1) * sizeof(float); + uint8_t *allocated = (uint8_t*)CALLOCATE(amount, sizeof(uint8_t)); + uint8_t *encoded = allocated + 1; // Intentionally misaligned. + uint8_t *target = encoded; + + for (size_t i = 0; i < count; i++) { + NumUtil_encode_bigend_f32(source[i], &target); + target += sizeof(float); + } + target = encoded; + for (size_t i = 0; i < count; i++) { + float got = NumUtil_decode_bigend_f32(target); + TEST_TRUE(runner, got == source[i], "bigend f32"); + target += sizeof(float); + } + + target = encoded; + NumUtil_encode_bigend_f32(-2.0f, &target); + TEST_INT_EQ(runner, (encoded[0] & 0x80), 0x80, + "Truly big-endian (IEEE 754 sign bit set for negative number)"); + TEST_INT_EQ(runner, encoded[0], 0xC0, + "IEEE 754 representation of -2.0f, byte 0"); + for (size_t i = 1; i < sizeof(float); i++) { + TEST_INT_EQ(runner, encoded[i], 0, + "IEEE 754 representation of -2.0f, byte %d", (int)i); + } + + FREEMEM(allocated); +} + +static void +test_bigend_f64(TestBatchRunner *runner) { + double source[] = { -1.3, 0.0, 100.2 }; + size_t count = 3; + size_t amount = (count + 1) * sizeof(double); + uint8_t *allocated = (uint8_t*)CALLOCATE(amount, sizeof(uint8_t)); + uint8_t *encoded = allocated + 1; // Intentionally misaligned. + uint8_t *target = encoded; + + for (size_t i = 0; i < count; i++) { + NumUtil_encode_bigend_f64(source[i], &target); + target += sizeof(double); + } + target = encoded; + for (size_t i = 0; i < count; i++) { + double got = NumUtil_decode_bigend_f64(target); + TEST_TRUE(runner, got == source[i], "bigend f64"); + target += sizeof(double); + } + + target = encoded; + NumUtil_encode_bigend_f64(-2.0, &target); + TEST_INT_EQ(runner, (encoded[0] & 0x80), 0x80, + "Truly big-endian (IEEE 754 sign bit set for negative number)"); + TEST_INT_EQ(runner, encoded[0], 0xC0, + "IEEE 754 representation of -2.0, byte 0"); + for (size_t i = 1; i < sizeof(double); i++) { + TEST_INT_EQ(runner, encoded[i], 0, + "IEEE 754 representation of -2.0, byte %d", (int)i); + } + + FREEMEM(allocated); +} + +void +TestNumUtil_Run_IMP(TestNumberUtils *self, TestBatchRunner *runner) { + TestBatchRunner_Plan(runner, (TestBatch*)self, 1196); + srand((unsigned int)time((time_t*)NULL)); + test_u1(runner); + test_u2(runner); + test_u4(runner); + test_c32(runner); + test_c64(runner); + test_bigend_u16(runner); + test_bigend_u32(runner); + test_bigend_u64(runner); + test_bigend_f32(runner); + test_bigend_f64(runner); +} + + + http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Test/Util/TestNumberUtils.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Test/Util/TestNumberUtils.cfh b/core/Lucy/Test/Util/TestNumberUtils.cfh new file mode 100644 index 0000000..fcb20a9 --- /dev/null +++ b/core/Lucy/Test/Util/TestNumberUtils.cfh @@ -0,0 +1,29 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +parcel TestLucy; + +class Lucy::Test::Util::TestNumberUtils nickname TestNumUtil + inherits Clownfish::TestHarness::TestBatch { + + inert incremented TestNumberUtils* + new(); + + void + Run(TestNumberUtils *self, TestBatchRunner *runner); +} + + http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Util/NumberUtils.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/NumberUtils.cfh b/core/Lucy/Util/NumberUtils.cfh new file mode 100644 index 0000000..b7c08b4 --- /dev/null +++ b/core/Lucy/Util/NumberUtils.cfh @@ -0,0 +1,466 @@ +/* Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +parcel Lucy; + +/** Provide various number-related utilies. + * + * Provide utilities for dealing with endian issues, sub-byte-width arrays, + * compressed integers, and so on. + */ +inert class Lucy::Util::NumberUtils nickname NumUtil { + + /** Encode an unsigned 16-bit integer as 2 bytes in the buffer provided, + * using big-endian byte order. + */ + inert inline void + encode_bigend_u16(uint16_t value, void *dest); + + /** Encode an unsigned 32-bit integer as 4 bytes in the buffer provided, + * using big-endian byte order. + */ + inert inline void + encode_bigend_u32(uint32_t value, void *dest); + + /** Encode an unsigned 64-bit integer as 8 bytes in the buffer provided, + * using big-endian byte order. + */ + inert inline void + encode_bigend_u64(uint64_t value, void *dest); + + /** Interpret a sequence of bytes as a big-endian unsigned 16-bit int. + */ + inert inline uint16_t + decode_bigend_u16(const void *source); + + /** Interpret a sequence of bytes as a big-endian unsigned 32-bit int. + */ + inert inline uint32_t + decode_bigend_u32(const void *source); + + /** Interpret a sequence of bytes as a big-endian unsigned 64-bit int. + */ + inert inline uint64_t + decode_bigend_u64(const void *source); + + /** Encode a 32-bit floating point number as 4 bytes in the buffer + * provided, using big-endian byte order. + */ + inert inline void + encode_bigend_f32(float value, void *dest); + + /** Encode a 64-bit floating point number as 8 bytes in the buffer + * provided, using big-endian byte order. + */ + inert inline void + encode_bigend_f64(double value, void *dest); + + /** Interpret a sequence of bytes as a 32-bit float stored in big-endian + * byte order. + */ + inert inline float + decode_bigend_f32(const void *source); + + /** Interpret a sequence of bytes as a 64-bit float stored in big-endian + * byte order. + */ + inert inline double + decode_bigend_f64(const void *source); + + /** Encode a C32 at the space pointed to by `dest`. As a side + * effect, `dest` will be advanced to immediately after the end + * of the C32. + */ + inert inline void + encode_c32(uint32_t value, char **dest); + + /** Encode a C32 at the space pointed to by `dest`, but add + * "leading zeroes" so that the space consumed will always be 5 bytes. As + * a side effect, `dest` will be advanced to immediately after + * the end of the C32. + */ + inert inline void + encode_padded_c32(uint32_t value, char **dest); + + /** Encode a C64 at the space pointed to by `dest`. As a side + * effect, `dest` will be advanced to immediately after the end + * of the C64. + */ + inert inline void + encode_c64(uint64_t value, char **dest); + + /** Read a C32 from the buffer pointed to by `source`. As a + * side effect, advance the pointer, consuming the bytes occupied by the + * C32. + */ + inert inline uint32_t + decode_c32(const char **source); + + /** Read a C64 from the buffer pointed to by `source`. As a + * side effect, advance the pointer, consuming the bytes occupied by the + * C64. + */ + inert inline uint64_t + decode_c64(const char **source); + + /** Advance `source` past one encoded C32 or C64. + */ + inert inline void + skip_cint(const char **source); + + /** Interpret `array` as an array of bits; return true if the + * bit at `tick` is set, false otherwise. + */ + inert inline bool + u1get(const void *array, uint32_t tick); + + /** Interpret `array` as an array of bits; set the bit at + * `tick`. + */ + inert inline void + u1set(void *array, uint32_t tick); + + /** Interpret `array` as an array of bits; clear the bit at + * `tick`. + */ + inert inline void + u1clear(void *array, uint32_t tick); + + /** Interpret `array` as an array of bits; flip the bit at + * `tick`. + */ + inert inline void + u1flip(void *array, uint32_t tick); + + /** Interpret `array` as an array of two-bit integers; return + * the value at `tick`. + */ + inert inline uint8_t + u2get(const void *array, uint32_t tick); + + /** Interpret `array` as an array of two-bit integers; set the + * element at `tick` to `value`. + */ + inert inline void + u2set(void *array, uint32_t tick, uint8_t value); + + /** Interpret `array` as an array of four-bit integers; return + * the value at `tick`. + */ + inert inline uint8_t + u4get(const void *array, uint32_t tick); + + /** Interpret `array` as an array of four-bit integers; set the + * element at `tick` to `value`. + */ + inert inline void + u4set(void *array, uint32_t tick, uint8_t value); +} + +__C__ + +#include <string.h> + +static CFISH_INLINE void +lucy_NumUtil_encode_bigend_u16(uint16_t value, void *dest_ptr) { + uint8_t *dest = *(uint8_t**)dest_ptr; +#ifdef CFISH_BIG_END + memcpy(dest, &value, sizeof(uint16_t)); +#else /* little endian */ + uint8_t *source = (uint8_t*)&value; + dest[0] = source[1]; + dest[1] = source[0]; +#endif /* CFISH_BIG_END (and little endian) */ +} + +static CFISH_INLINE void +lucy_NumUtil_encode_bigend_u32(uint32_t value, void *dest_ptr) { + uint8_t *dest = *(uint8_t**)dest_ptr; +#ifdef CFISH_BIG_END + memcpy(dest, &value, sizeof(uint32_t)); +#else /* little endian */ + uint8_t *source = (uint8_t*)&value; + dest[0] = source[3]; + dest[1] = source[2]; + dest[2] = source[1]; + dest[3] = source[0]; +#endif /* CFISH_BIG_END (and little endian) */ +} + +static CFISH_INLINE void +lucy_NumUtil_encode_bigend_u64(uint64_t value, void *dest_ptr) { + uint8_t *dest = *(uint8_t**)dest_ptr; +#ifdef CFISH_BIG_END + memcpy(dest, &value, sizeof(uint64_t)); +#else /* little endian */ + uint8_t *source = (uint8_t*)&value; + dest[0] = source[7]; + dest[1] = source[6]; + dest[2] = source[5]; + dest[3] = source[4]; + dest[4] = source[3]; + dest[5] = source[2]; + dest[6] = source[1]; + dest[7] = source[0]; +#endif /* CFISH_BIG_END (and little endian) */ +} + +static CFISH_INLINE uint16_t +lucy_NumUtil_decode_bigend_u16(const void *source) { + const uint8_t *const buf = (const uint8_t*)source; + return ((uint16_t)buf[0] << 8) | + ((uint16_t)buf[1]); +} + +static CFISH_INLINE uint32_t +lucy_NumUtil_decode_bigend_u32(const void *source) { + const uint8_t *const buf = (const uint8_t*)source; + return ((uint32_t)buf[0] << 24) | + ((uint32_t)buf[1] << 16) | + ((uint32_t)buf[2] << 8) | + ((uint32_t)buf[3]); +} + +static CFISH_INLINE uint64_t +lucy_NumUtil_decode_bigend_u64(const void *source) { + const uint8_t *const buf = (const uint8_t*)source; + uint64_t high_bits = ((uint32_t)buf[0] << 24) | + ((uint32_t)buf[1] << 16) | + ((uint32_t)buf[2] << 8) | + ((uint32_t)buf[3]); + uint32_t low_bits = ((uint32_t)buf[4] << 24) | + ((uint32_t)buf[5] << 16) | + ((uint32_t)buf[6] << 8) | + ((uint32_t)buf[7]); + uint64_t retval = high_bits << 32; + retval |= low_bits; + return retval; +} + +static CFISH_INLINE void +lucy_NumUtil_encode_bigend_f32(float value, void *dest_ptr) { + uint8_t *dest = *(uint8_t**)dest_ptr; +#ifdef CFISH_BIG_END + memcpy(dest, &value, sizeof(float)); +#else + union { float f; uint32_t u32; } duo; + duo.f = value; + lucy_NumUtil_encode_bigend_u32(duo.u32, &dest); +#endif +} + +static CFISH_INLINE void +lucy_NumUtil_encode_bigend_f64(double value, void *dest_ptr) { + uint8_t *dest = *(uint8_t**)dest_ptr; +#ifdef CFISH_BIG_END + memcpy(dest, &value, sizeof(double)); +#else + union { double d; uint64_t u64; } duo; + duo.d = value; + lucy_NumUtil_encode_bigend_u64(duo.u64, &dest); +#endif +} + +static CFISH_INLINE float +lucy_NumUtil_decode_bigend_f32(const void *source) { + union { float f; uint32_t u32; } duo; + memcpy(&duo, source, sizeof(float)); +#ifdef CFISH_LITTLE_END + duo.u32 = lucy_NumUtil_decode_bigend_u32(&duo.u32); +#endif + return duo.f; +} + +static CFISH_INLINE double +lucy_NumUtil_decode_bigend_f64(const void *source) { + union { double d; uint64_t u64; } duo; + memcpy(&duo, source, sizeof(double)); +#ifdef CFISH_LITTLE_END + duo.u64 = lucy_NumUtil_decode_bigend_u64(&duo.u64); +#endif + return duo.d; +} + +#define LUCY_NUMUTIL_C32_MAX_BYTES ((sizeof(uint32_t) * 8 / 7) + 1) /* 5 */ +#define LUCY_NUMUTIL_C64_MAX_BYTES ((sizeof(uint64_t) * 8 / 7) + 1) /* 10 */ + +static CFISH_INLINE void +lucy_NumUtil_encode_c32(uint32_t value, char **out_buf) { + uint8_t buf[LUCY_NUMUTIL_C32_MAX_BYTES]; + uint8_t *const limit = buf + sizeof(buf); + uint8_t *ptr = limit - 1; + int num_bytes; + /* Write last byte first, which has no continue bit. */ + *ptr = value & 0x7f; + value >>= 7; + while (value) { + /* Work backwards, writing bytes with continue bits set. */ + *--ptr = ((value & 0x7f) | 0x80); + value >>= 7; + } + num_bytes = limit - ptr; + memcpy(*out_buf, ptr, num_bytes); + *out_buf += num_bytes; +} + +static CFISH_INLINE void +lucy_NumUtil_encode_c64(uint64_t value, char **out_buf) { + uint8_t buf[LUCY_NUMUTIL_C64_MAX_BYTES]; + uint8_t *const limit = buf + sizeof(buf); + uint8_t *ptr = limit - 1; + int num_bytes; + /* Write last byte first, which has no continue bit. */ + *ptr = value & 0x7f; + value >>= 7; + while (value) { + /* Work backwards, writing bytes with continue bits set. */ + *--ptr = ((value & 0x7f) | 0x80); + value >>= 7; + } + num_bytes = limit - ptr; + memcpy(*out_buf, ptr, num_bytes); + *out_buf += num_bytes; +} + +static CFISH_INLINE void +lucy_NumUtil_encode_padded_c32(uint32_t value, char **out_buf) { + uint8_t buf[LUCY_NUMUTIL_C32_MAX_BYTES] + = { 0x80, 0x80, 0x80, 0x80, 0x80 }; + uint8_t *const limit = buf + sizeof(buf); + uint8_t *ptr = limit - 1; + /* Write last byte first, which has no continue bit. */ + *ptr = value & 0x7f; + value >>= 7; + while (value) { + /* Work backwards, writing bytes with continue bits set. */ + *--ptr = ((value & 0x7f) | 0x80); + value >>= 7; + } + memcpy(*out_buf, buf, LUCY_NUMUTIL_C32_MAX_BYTES); + *out_buf += sizeof(buf); +} + +/* Decode a compressed integer up to size of 'var', advancing 'source' */ +#define LUCY_NUMUTIL_DECODE_CINT(var, source) \ + do { \ + var = (*source & 0x7f); \ + while (*source++ & 0x80) { \ + var = (*source & 0x7f) | (var << 7); \ + } \ + } while (0) + +static CFISH_INLINE uint32_t +lucy_NumUtil_decode_c32(const char **source_ptr) { + const char *source = *source_ptr; + uint32_t decoded; + LUCY_NUMUTIL_DECODE_CINT(decoded, source); + *source_ptr = source; + return decoded; +} + +static CFISH_INLINE uint64_t +lucy_NumUtil_decode_c64(const char **source_ptr) { + const char *source = *source_ptr; + uint64_t decoded; + LUCY_NUMUTIL_DECODE_CINT(decoded, source); + *source_ptr = source; + return decoded; +} + +static CFISH_INLINE void +lucy_NumUtil_skip_cint(const char **source_ptr) { + const uint8_t *ptr = *(const uint8_t**)source_ptr; + while ((*ptr++ & 0x80) != 0) { } + *source_ptr = (const char*)ptr; +} + +static CFISH_INLINE bool +lucy_NumUtil_u1get(const void *array, uint32_t tick) { + uint8_t *const u8bits = (uint8_t*)array; + const uint32_t byte_offset = tick >> 3; + const uint8_t mask = 1 << (tick & 0x7); + return !((u8bits[byte_offset] & mask) == 0); +} + +static CFISH_INLINE void +lucy_NumUtil_u1set(void *array, uint32_t tick) { + uint8_t *const u8bits = (uint8_t*)array; + const uint32_t byte_offset = tick >> 3; + const uint8_t mask = 1 << (tick & 0x7); + u8bits[byte_offset] |= mask; +} + +static CFISH_INLINE void +lucy_NumUtil_u1clear(void *array, uint32_t tick) { + uint8_t *const u8bits = (uint8_t*)array; + const uint32_t byte_offset = tick >> 3; + const uint8_t mask = 1 << (tick & 0x7); + u8bits[byte_offset] &= ~mask; +} + +static CFISH_INLINE void +lucy_NumUtil_u1flip(void *array, uint32_t tick) { + uint8_t *const u8bits = (uint8_t*)array; + const uint32_t byte_offset = tick >> 3; + const uint8_t mask = 1 << (tick & 0x7); + u8bits[byte_offset] ^= mask; +} + +static CFISH_INLINE uint8_t +lucy_NumUtil_u2get(const void *array, uint32_t tick) { + uint8_t *ints = (uint8_t*)array; + uint8_t byte = ints[(tick >> 2)]; + int shift = 2 * (tick & 0x3); + return (byte >> shift) & 0x3; +} + +static CFISH_INLINE void +lucy_NumUtil_u2set(void *array, uint32_t tick, uint8_t value) { + uint8_t *ints = (uint8_t*)array; + int shift = 2 * (tick & 0x3); + uint8_t mask = 0x3 << shift; + uint8_t new_val = value & 0x3; + uint8_t new_bits = new_val << shift; + ints[(tick >> 2)] = (ints[(tick >> 2)] & ~mask) | new_bits; +} + + +static CFISH_INLINE uint8_t +lucy_NumUtil_u4get(const void *array, uint32_t tick) { + uint8_t *ints = (uint8_t*)array; + uint8_t byte = ints[(tick >> 1)]; + int shift = 4 * (tick & 1); + return (byte >> shift) & 0xF; +} + +static CFISH_INLINE void +lucy_NumUtil_u4set(void *array, uint32_t tick, uint8_t value) { + uint8_t *ints = (uint8_t*)array; + int shift = 4 * (tick & 0x1); + uint8_t mask = 0xF << shift; + uint8_t new_val = value & 0xF; + uint8_t new_bits = new_val << shift; + ints[(tick >> 1)] = (ints[(tick >> 1)] & ~mask) | new_bits; +} + +#ifdef LUCY_USE_SHORT_NAMES + #define C32_MAX_BYTES LUCY_NUMUTIL_C32_MAX_BYTES + #define C64_MAX_BYTES LUCY_NUMUTIL_C64_MAX_BYTES +#endif + +__END_C__ + + http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/core/Lucy/Util/ToolSet.h ---------------------------------------------------------------------- diff --git a/core/Lucy/Util/ToolSet.h b/core/Lucy/Util/ToolSet.h index 853d51a..ee1b810 100644 --- a/core/Lucy/Util/ToolSet.h +++ b/core/Lucy/Util/ToolSet.h @@ -45,7 +45,6 @@ extern "C" { #include "Clownfish/Num.h" #include "Clownfish/Vector.h" #include "Clownfish/Class.h" -#include "Clownfish/Util/NumberUtils.h" #include "Clownfish/Util/Memory.h" #include "Clownfish/Util/StringHelper.h" http://git-wip-us.apache.org/repos/asf/lucy/blob/8782bd56/perl/t/core/030-number_utils.t ---------------------------------------------------------------------- diff --git a/perl/t/core/030-number_utils.t b/perl/t/core/030-number_utils.t new file mode 100644 index 0000000..2960c4e --- /dev/null +++ b/perl/t/core/030-number_utils.t @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +use strict; +use warnings; + +use Lucy::Test; +my $success = Lucy::Test::run_tests( + "Lucy::Test::Util::TestNumberUtils" +); + +exit($success ? 0 : 1); +
