Repository: lucy Updated Branches: refs/heads/264_dont_subclass_hash 1622b4e4f -> d136b8675
Track memory consumption in a Counter. Anticipating the elimination of MemoryPool, duplicate tracking of memory consumption in a simple "Counter" object. Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/1f159329 Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/1f159329 Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/1f159329 Branch: refs/heads/264_dont_subclass_hash Commit: 1f159329056d0bc549f799f2dd5ed0461f65679e Parents: 1622b4e Author: Marvin Humphrey <[email protected]> Authored: Wed Jul 2 13:22:31 2014 -0700 Committer: Marvin Humphrey <[email protected]> Committed: Wed Jul 2 18:07:40 2014 -0700 ---------------------------------------------------------------------- core/Lucy/Index/SortFieldWriter.c | 47 ++++++++++++++++++++++++-------- core/Lucy/Index/SortFieldWriter.cfh | 5 ++-- core/Lucy/Index/SortWriter.c | 39 ++++++++++++++++++++++++-- core/Lucy/Index/SortWriter.cfh | 19 +++++++++++++ 4 files changed, 94 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/1f159329/core/Lucy/Index/SortFieldWriter.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/SortFieldWriter.c b/core/Lucy/Index/SortFieldWriter.c index b6faec5..d0c2291 100644 --- a/core/Lucy/Index/SortFieldWriter.c +++ b/core/Lucy/Index/SortFieldWriter.c @@ -29,6 +29,7 @@ #include "Lucy/Index/SortCache/NumericSortCache.h" #include "Lucy/Index/SortCache/TextSortCache.h" #include "Lucy/Index/SortReader.h" +#include "Lucy/Index/SortWriter.h" #include "Lucy/Index/ZombieKeyedHash.h" #include "Lucy/Plan/FieldType.h" #include "Lucy/Plan/Schema.h" @@ -54,18 +55,28 @@ S_write_files(SortFieldWriter *self, OutStream *ord_out, OutStream *ix_out, // allocation itself will come from the MemoryPool, so the the element will be // deallocated via MemPool_Release_All(). static SFWriterElem* -S_SFWriterElem_create(MemoryPool *mem_pool, Obj *value, int32_t doc_id); +S_SFWriterElem_create(MemoryPool *mem_pool, Counter *counter, Obj *value, int32_t doc_id); + +static int64_t +SI_increase_to_word_multiple(int64_t amount) { + const int64_t remainder = amount % sizeof(void*); + if (remainder) { + amount += sizeof(void*); + amount -= remainder; + } + return amount; +} SortFieldWriter* SortFieldWriter_new(Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader, String *field, - MemoryPool *memory_pool, size_t mem_thresh, + MemoryPool *memory_pool, Counter *counter, size_t mem_thresh, OutStream *temp_ord_out, OutStream *temp_ix_out, OutStream *temp_dat_out) { SortFieldWriter *self = (SortFieldWriter*)VTable_Make_Obj(SORTFIELDWRITER); return SortFieldWriter_init(self, schema, snapshot, segment, polyreader, - field, memory_pool, mem_thresh, temp_ord_out, + field, memory_pool, counter, mem_thresh, temp_ord_out, temp_ix_out, temp_dat_out); } @@ -73,7 +84,7 @@ SortFieldWriter* SortFieldWriter_init(SortFieldWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader, String *field, - MemoryPool *memory_pool, size_t mem_thresh, + MemoryPool *memory_pool, Counter *counter, size_t mem_thresh, OutStream *temp_ord_out, OutStream *temp_ix_out, OutStream *temp_dat_out) { // Init. @@ -103,6 +114,7 @@ SortFieldWriter_init(SortFieldWriter *self, Schema *schema, ivars->segment = (Segment*)INCREF(segment); ivars->polyreader = (PolyReader*)INCREF(polyreader); ivars->mem_pool = (MemoryPool*)INCREF(memory_pool); + ivars->counter = (Counter*)INCREF(counter); ivars->temp_ord_out = (OutStream*)INCREF(temp_ord_out); ivars->temp_ix_out = (OutStream*)INCREF(temp_ix_out); ivars->temp_dat_out = (OutStream*)INCREF(temp_dat_out); @@ -158,6 +170,7 @@ SortFieldWriter_Destroy_IMP(SortFieldWriter *self) { DECREF(ivars->polyreader); DECREF(ivars->type); DECREF(ivars->mem_pool); + DECREF(ivars->counter); DECREF(ivars->temp_ord_out); DECREF(ivars->temp_ix_out); DECREF(ivars->temp_dat_out); @@ -181,11 +194,18 @@ SortFieldWriter_Get_Ord_Width_IMP(SortFieldWriter *self) { } static Obj* -S_find_unique_value(Hash *uniq_vals, Obj *val) { +S_find_unique_value(Hash *uniq_vals, Counter *counter, Obj *val) { int32_t hash_sum = Obj_Hash_Sum(val); Obj *uniq_val = Hash_Find_Key(uniq_vals, val, hash_sum); if (!uniq_val) { Hash_Store(uniq_vals, val, (Obj*)CFISH_TRUE); + VTable *vtable = Obj_Get_VTable(val); + Counter_Add(counter, VTable_Get_Obj_Alloc_Size(vtable)); + if (vtable == STRING) { + int64_t size = Str_Get_Size((String*)val) + 1; + size = SI_increase_to_word_multiple(size); + Counter_Add(counter, size); + } uniq_val = Hash_Find_Key(uniq_vals, val, hash_sum); } return uniq_val; @@ -196,8 +216,8 @@ SortFieldWriter_Add_IMP(SortFieldWriter *self, int32_t doc_id, Obj *value) { SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); // Uniq-ify the value, and record it for this document. - Obj *copy = S_find_unique_value(ivars->uniq_vals, value); - SFWriterElem *elem = S_SFWriterElem_create(ivars->mem_pool, copy, doc_id); + Obj *copy = S_find_unique_value(ivars->uniq_vals, ivars->counter, value); + SFWriterElem *elem = S_SFWriterElem_create(ivars->mem_pool, ivars->counter, copy, doc_id); SortFieldWriter_Feed(self, (Obj*)elem); ivars->count++; } @@ -209,7 +229,7 @@ SortFieldWriter_Add_Segment_IMP(SortFieldWriter *self, SegReader *reader, SortFieldWriterIVARS *const ivars = SortFieldWriter_IVARS(self); SortFieldWriter *run = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, - ivars->polyreader, ivars->field, ivars->mem_pool, + ivars->polyreader, ivars->field, ivars->mem_pool, ivars->counter, ivars->mem_thresh, NULL, NULL, NULL); SortFieldWriterIVARS *const run_ivars = SortFieldWriter_IVARS(run); run_ivars->sort_cache = (SortCache*)INCREF(sort_cache); @@ -400,7 +420,7 @@ SortFieldWriter_Flush_IMP(SortFieldWriter *self) { SortFieldWriter_Sort_Buffer(self); SortFieldWriter *run = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, - ivars->polyreader, ivars->field, ivars->mem_pool, + ivars->polyreader, ivars->field, ivars->mem_pool, ivars->counter, ivars->mem_thresh, NULL, NULL, NULL); SortFieldWriterIVARS *const run_ivars = SortFieldWriter_IVARS(run); @@ -454,16 +474,16 @@ SortFieldWriter_Refill_IMP(SortFieldWriter *self) { } SortFieldWriter_Clear_Buffer(self); MemPool_Release_All(ivars->mem_pool); + Counter_Reset(ivars->counter); S_lazy_init_sorted_ids(self); const int32_t null_ord = ivars->null_ord; - Hash *const uniq_vals = ivars->uniq_vals; I32Array *const doc_map = ivars->doc_map; SortCache *const sort_cache = ivars->sort_cache; uint32_t count = 0; while (ivars->run_tick <= ivars->run_max - && MemPool_Get_Consumed(ivars->mem_pool) < ivars->mem_thresh + && Counter_Get_Value(ivars->counter) < ivars->mem_thresh ) { int32_t raw_doc_id = ivars->sorted_ids[ivars->run_tick]; int32_t ord = SortCache_Ordinal(sort_cache, raw_doc_id); @@ -675,7 +695,9 @@ S_flip_run(SortFieldWriter *run, size_t sub_thresh, InStream *ord_in, // Get our own MemoryPool, ZombieKeyedHash, and slice of mem_thresh. DECREF(run_ivars->uniq_vals); DECREF(run_ivars->mem_pool); + DECREF(run_ivars->counter); run_ivars->mem_pool = MemPool_new(0); + run_ivars->counter = Counter_new(); run_ivars->uniq_vals = (Hash*)ZKHash_new(run_ivars->mem_pool, run_ivars->prim_id); run_ivars->mem_thresh = sub_thresh; @@ -755,8 +777,9 @@ S_flip_run(SortFieldWriter *run, size_t sub_thresh, InStream *ord_in, /***************************************************************************/ static SFWriterElem* -S_SFWriterElem_create(MemoryPool *mem_pool, Obj *value, int32_t doc_id) { +S_SFWriterElem_create(MemoryPool *mem_pool, Counter *counter, Obj *value, int32_t doc_id) { size_t size = VTable_Get_Obj_Alloc_Size(SFWRITERELEM); + Counter_Add(counter, size); SFWriterElem *self = (SFWriterElem*)MemPool_Grab(mem_pool, size); VTable_Init_Obj(SFWRITERELEM, (Obj*)self); SFWriterElemIVARS *ivars = SFWriterElem_IVARS(self); http://git-wip-us.apache.org/repos/asf/lucy/blob/1f159329/core/Lucy/Index/SortFieldWriter.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/SortFieldWriter.cfh b/core/Lucy/Index/SortFieldWriter.cfh index 0128c61..7ca6b5a 100644 --- a/core/Lucy/Index/SortFieldWriter.cfh +++ b/core/Lucy/Index/SortFieldWriter.cfh @@ -27,6 +27,7 @@ class Lucy::Index::SortFieldWriter FieldType *type; I32Array *doc_map; MemoryPool *mem_pool; + Counter *counter; int32_t field_num; int32_t null_ord; int8_t prim_id; @@ -54,14 +55,14 @@ class Lucy::Index::SortFieldWriter inert incremented SortFieldWriter* new(Schema *schema, Snapshot *snapshot, Segment *segment, - PolyReader *polyreader, String *field, MemoryPool *memory_pool, + PolyReader *polyreader, String *field, MemoryPool *memory_pool, Counter *counter, size_t mem_thresh, OutStream *temp_ord_out, OutStream *temp_ix_out, OutStream *temp_dat_out); inert SortFieldWriter* init(SortFieldWriter *self, Schema *schema, Snapshot *snapshot, Segment *segment, PolyReader *polyreader, String *field, - MemoryPool *memory_pool, size_t mem_thresh, OutStream *temp_ord_out, + MemoryPool *memory_pool, Counter *counter, size_t mem_thresh, OutStream *temp_ord_out, OutStream *temp_ix_out, OutStream *temp_dat_out); void http://git-wip-us.apache.org/repos/asf/lucy/blob/1f159329/core/Lucy/Index/SortWriter.c ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/SortWriter.c b/core/Lucy/Index/SortWriter.c index fe1641c..fc323d0 100644 --- a/core/Lucy/Index/SortWriter.c +++ b/core/Lucy/Index/SortWriter.c @@ -15,6 +15,7 @@ */ #define C_LUCY_SORTWRITER +#define C_LUCY_COUNTER #include "Lucy/Util/ToolSet.h" #include <math.h> @@ -63,6 +64,7 @@ SortWriter_init(SortWriter *self, Schema *schema, Snapshot *snapshot, ivars->temp_ix_out = NULL; ivars->temp_dat_out = NULL; ivars->mem_pool = MemPool_new(0); + ivars->counter = Counter_new(); ivars->mem_thresh = default_mem_thresh; ivars->flush_at_finish = false; @@ -80,6 +82,7 @@ SortWriter_Destroy_IMP(SortWriter *self) { DECREF(ivars->temp_ix_out); DECREF(ivars->temp_dat_out); DECREF(ivars->mem_pool); + DECREF(ivars->counter); SUPER_DESTROY(self, SORTWRITER); } @@ -123,7 +126,7 @@ S_lazy_init_field_writer(SortWriter *self, int32_t field_num) { String *field = Seg_Field_Name(ivars->segment, field_num); field_writer = SortFieldWriter_new(ivars->schema, ivars->snapshot, ivars->segment, - ivars->polyreader, field, ivars->mem_pool, + ivars->polyreader, field, ivars->mem_pool, ivars->counter, ivars->mem_thresh, ivars->temp_ord_out, ivars->temp_ix_out, ivars->temp_dat_out); VA_Store(ivars->field_writers, field_num, (Obj*)field_writer); @@ -150,13 +153,14 @@ SortWriter_Add_Inverted_Doc_IMP(SortWriter *self, Inverter *inverter, // If our SortFieldWriters have collectively passed the memory threshold, // flush all of them, then release all unique values with a single action. - if (MemPool_Get_Consumed(ivars->mem_pool) > ivars->mem_thresh) { + if (Counter_Get_Value(ivars->counter) > ivars->mem_thresh) { for (uint32_t i = 0; i < VA_Get_Size(ivars->field_writers); i++) { SortFieldWriter *const field_writer = (SortFieldWriter*)VA_Fetch(ivars->field_writers, i); if (field_writer) { SortFieldWriter_Flush(field_writer); } } MemPool_Release_All(ivars->mem_pool); + Counter_Reset(ivars->counter); ivars->flush_at_finish = true; } } @@ -272,4 +276,35 @@ SortWriter_Format_IMP(SortWriter *self) { return SortWriter_current_file_format; } +/*************************************************************************/ + +Counter* +Counter_new() { + Counter *self = (Counter*)VTable_Make_Obj(COUNTER); + return Counter_init(self); +} + +Counter* +Counter_init(Counter *self) { + CounterIVARS *ivars = Counter_IVARS(self); + ivars->value = 0; + return self; +} + +int64_t +Counter_Add_IMP(Counter *self, int64_t amount) { + CounterIVARS *ivars = Counter_IVARS(self); + ivars->value += amount; + return ivars->value; +} + +int64_t +Counter_Get_Value_IMP(Counter *self) { + return Counter_IVARS(self)->value; +} + +void +Counter_Reset_IMP(Counter *self) { + Counter_IVARS(self)->value = 0; +} http://git-wip-us.apache.org/repos/asf/lucy/blob/1f159329/core/Lucy/Index/SortWriter.cfh ---------------------------------------------------------------------- diff --git a/core/Lucy/Index/SortWriter.cfh b/core/Lucy/Index/SortWriter.cfh index ab3ec91..ba9bd35 100644 --- a/core/Lucy/Index/SortWriter.cfh +++ b/core/Lucy/Index/SortWriter.cfh @@ -36,6 +36,7 @@ class Lucy::Index::SortWriter inherits Lucy::Index::DataWriter { OutStream *temp_ix_out; OutStream *temp_dat_out; MemoryPool *mem_pool; + Counter *counter; size_t mem_thresh; bool flush_at_finish; @@ -73,4 +74,22 @@ class Lucy::Index::SortWriter inherits Lucy::Index::DataWriter { Destroy(SortWriter *self); } +final class Lucy::Index::Counter inherits Clownfish::Obj { + int64_t value; + + inert incremented Counter* + new(); + + inert Counter* + init(Counter *self); + + int64_t + Add(Counter *self, int64_t amount); + + int64_t + Get_Value(Counter *self); + + void + Reset(Counter *self); +}
