Updated Branches: refs/heads/c-bindings-wip1 [created] b9b16b0a2
Implement Index methods for C bindings Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/b9b16b0a Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/b9b16b0a Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/b9b16b0a Branch: refs/heads/c-bindings-wip1 Commit: b9b16b0a264a15574c71f6eded1db475091d6615 Parents: 61483f1 Author: Nick Wellnhofer <[email protected]> Authored: Tue Dec 4 23:57:22 2012 +0100 Committer: Nick Wellnhofer <[email protected]> Committed: Wed Dec 5 00:00:32 2012 +0100 ---------------------------------------------------------------------- c/src/Lucy/Index/DocReader.c | 89 ++++++++++++++++++++++++++++++++++- c/src/Lucy/Index/Inverter.c | 93 ++++++++++++++++++++++++++++++++++++- 2 files changed, 179 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/b9b16b0a/c/src/Lucy/Index/DocReader.c ---------------------------------------------------------------------- diff --git a/c/src/Lucy/Index/DocReader.c b/c/src/Lucy/Index/DocReader.c index 430ccbe..a20ecbb 100644 --- a/c/src/Lucy/Index/DocReader.c +++ b/c/src/Lucy/Index/DocReader.c @@ -19,11 +19,96 @@ #include "CFBind.h" #include "Lucy/Index/DocReader.h" +#include "Clownfish/Util/Memory.h" #include "Lucy/Document/HitDoc.h" +#include "Lucy/Plan/FieldType.h" +#include "Lucy/Plan/Schema.h" +#include "Lucy/Store/InStream.h" lucy_HitDoc* lucy_DefDocReader_fetch_doc(lucy_DefaultDocReader *self, int32_t doc_id) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(lucy_HitDoc*); + lucy_Schema *const schema = self->schema; + lucy_InStream *const dat_in = self->dat_in; + lucy_InStream *const ix_in = self->ix_in; + lucy_Hash *const fields = lucy_Hash_new(1); + int64_t start; + uint32_t num_fields; + uint32_t field_name_cap = 31; + char *field_name = (char*)LUCY_MALLOCATE(field_name_cap + 1); + + // Get data file pointer from index, read number of fields. + Lucy_InStream_Seek(ix_in, (int64_t)doc_id * 8); + start = Lucy_InStream_Read_U64(ix_in); + Lucy_InStream_Seek(dat_in, start); + num_fields = Lucy_InStream_Read_C32(dat_in); + + // Decode stored data and build up the doc field by field. + while (num_fields--) { + uint32_t field_name_len; + lucy_Obj *value; + lucy_FieldType *type; + + // Read field name. + field_name_len = Lucy_InStream_Read_C32(dat_in); + if (field_name_len > field_name_cap) { + field_name_cap = field_name_len; + field_name = (char*)LUCY_REALLOCATE(field_name, + field_name_cap + 1); + } + Lucy_InStream_Read_Bytes(dat_in, field_name, field_name_len); + + // Find the Field's FieldType. + lucy_ZombieCharBuf *field_name_zcb + = CFISH_ZCB_WRAP_STR(field_name, field_name_len); + type = Lucy_Schema_Fetch_Type(schema, (lucy_CharBuf*)field_name_zcb); + + // Read the field value. + switch (Lucy_FType_Primitive_ID(type) & lucy_FType_PRIMITIVE_ID_MASK) { + case lucy_FType_TEXT: { + uint32_t value_len = Lucy_InStream_Read_C32(dat_in); + char *buf = (char*)LUCY_MALLOCATE(value_len + 1); + Lucy_InStream_Read_Bytes(dat_in, buf, value_len); + buf[value_len] = '\0'; + value = (lucy_Obj*)lucy_CB_new_steal_from_trusted_str( + buf, value_len, value_len + 1); + break; + } + case lucy_FType_BLOB: { + uint32_t value_len = Lucy_InStream_Read_C32(dat_in); + char *buf = (char*)LUCY_MALLOCATE(value_len); + Lucy_InStream_Read_Bytes(dat_in, buf, value_len); + value = (lucy_Obj*)lucy_BB_new_steal_bytes( + buf, value_len, value_len); + break; + } + case lucy_FType_FLOAT32: + value = (lucy_Obj*)lucy_Float32_new( + Lucy_InStream_Read_F32(dat_in)); + break; + case lucy_FType_FLOAT64: + value = (lucy_Obj*)lucy_Float64_new( + Lucy_InStream_Read_F64(dat_in)); + break; + case lucy_FType_INT32: + value = (lucy_Obj*)lucy_Int32_new( + (int32_t)Lucy_InStream_Read_C32(dat_in)); + break; + case lucy_FType_INT64: + value = (lucy_Obj*)lucy_Int64_new( + (int64_t)Lucy_InStream_Read_C64(dat_in)); + break; + default: + value = NULL; + CFISH_THROW(LUCY_ERR, "Unrecognized type: %o", type); + } + + // Store the value. + Lucy_Hash_Store_Str(fields, field_name, field_name_len, value); + } + LUCY_FREEMEM(field_name); + + lucy_HitDoc *retval = lucy_HitDoc_new(fields, doc_id, 0.0); + CFISH_DECREF(fields); + return retval; } http://git-wip-us.apache.org/repos/asf/lucy/blob/b9b16b0a/c/src/Lucy/Index/Inverter.c ---------------------------------------------------------------------- diff --git a/c/src/Lucy/Index/Inverter.c b/c/src/Lucy/Index/Inverter.c index e77c067..9dc6529 100644 --- a/c/src/Lucy/Index/Inverter.c +++ b/c/src/Lucy/Index/Inverter.c @@ -20,10 +20,101 @@ #include "CFBind.h" #include "Lucy/Index/Inverter.h" #include "Lucy/Document/Doc.h" +#include "Lucy/Index/Segment.h" +#include "Lucy/Plan/FieldType.h" +#include "Lucy/Plan/Schema.h" + +static lucy_InverterEntry* +S_fetch_entry(lucy_Inverter *self, lucy_CharBuf *field) { + lucy_Schema *const schema = self->schema; + int32_t field_num = Lucy_Seg_Field_Num(self->segment, field); + if (!field_num) { + // This field seems not to be in the segment yet. Try to find it in + // the Schema. + if (Lucy_Schema_Fetch_Type(schema, field)) { + // The field is in the Schema. Get a field num from the Segment. + field_num = Lucy_Seg_Add_Field(self->segment, field); + } + else { + // We've truly failed to find the field. The user must + // not have spec'd it. + THROW(LUCY_ERR, "Unknown field name: '%o'", field); + } + } + + lucy_InverterEntry *entry + = (lucy_InverterEntry*)Lucy_VA_Fetch(self->entry_pool, field_num); + if (!entry) { + entry = lucy_InvEntry_new(schema, (lucy_CharBuf*)field, field_num); + Lucy_VA_Store(self->entry_pool, field_num, (lucy_Obj*)entry); + } + return entry; +} void lucy_Inverter_invert_doc(lucy_Inverter *self, lucy_Doc *doc) { - THROW(LUCY_ERR, "TODO"); + lucy_Hash *const fields = (lucy_Hash*)Lucy_Doc_Get_Fields(doc); + uint32_t num_keys = Lucy_Hash_Iterate(fields); + + // Prepare for the new doc. + Lucy_Inverter_Set_Doc(self, doc); + + // Extract and invert the doc's fields. + while (num_keys--) { + lucy_Obj *key, *obj; + Lucy_Hash_Next(fields, &key, &obj); + lucy_CharBuf *field = (lucy_CharBuf*)CFISH_CERTIFY(key, LUCY_CHARBUF); + lucy_InverterEntry *inv_entry = S_fetch_entry(self, field); + lucy_FieldType *type = inv_entry->type; + + // Get the field value. + switch (Lucy_FType_Primitive_ID(type) & lucy_FType_PRIMITIVE_ID_MASK) { + case lucy_FType_TEXT: { + lucy_CharBuf *char_buf + = (lucy_CharBuf*)CFISH_CERTIFY(obj, LUCY_CHARBUF); + lucy_ViewCharBuf *value + = (lucy_ViewCharBuf*)inv_entry->value; + Lucy_ViewCB_Assign(value, char_buf); + break; + } + case lucy_FType_BLOB: { + lucy_ByteBuf *byte_buf + = (lucy_ByteBuf*)CFISH_CERTIFY(obj, LUCY_BYTEBUF); + lucy_ViewByteBuf *value + = (lucy_ViewByteBuf*)inv_entry->value; + Lucy_ViewBB_Assign(value, byte_buf); + break; + } + case lucy_FType_INT32: { + int32_t int_val = (int32_t)Lucy_Obj_To_I64(obj); + lucy_Integer32* value = (lucy_Integer32*)inv_entry->value; + Lucy_Int32_Set_Value(value, int_val); + break; + } + case lucy_FType_INT64: { + int64_t int_val = Lucy_Obj_To_I64(obj); + lucy_Integer64* value = (lucy_Integer64*)inv_entry->value; + Lucy_Int64_Set_Value(value, int_val); + break; + } + case lucy_FType_FLOAT32: { + float float_val = (float)Lucy_Obj_To_F64(obj); + lucy_Float32* value = (lucy_Float32*)inv_entry->value; + Lucy_Float32_Set_Value(value, float_val); + break; + } + case lucy_FType_FLOAT64: { + double float_val = Lucy_Obj_To_F64(obj); + lucy_Float64* value = (lucy_Float64*)inv_entry->value; + Lucy_Float64_Set_Value(value, float_val); + break; + } + default: + THROW(LUCY_ERR, "Unrecognized type: %o", type); + } + + Lucy_Inverter_Add_Field(self, inv_entry); + } }
