Start to implement Lucy methods for C bindings
Project: http://git-wip-us.apache.org/repos/asf/lucy/repo Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/e737b717 Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/e737b717 Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/e737b717 Branch: refs/heads/master Commit: e737b7170f85c44081715be26869d50792299648 Parents: 4025736 Author: Nick Wellnhofer <[email protected]> Authored: Sun Nov 25 20:39:34 2012 +0100 Committer: Nick Wellnhofer <[email protected]> Committed: Sat Mar 9 17:51:54 2013 +0100 ---------------------------------------------------------------------- c/src/Lucy/Document/Doc.c | 110 ++++++++++++++++++++++++------------- c/src/Lucy/Index/DocReader.c | 101 ++++++++++++++++++++++++++++++++-- c/src/Lucy/Index/Inverter.c | 104 ++++++++++++++++++++++++++++++++++- 3 files changed, 269 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Document/Doc.c ---------------------------------------------------------------------- diff --git a/c/src/Lucy/Document/Doc.c b/c/src/Lucy/Document/Doc.c index db1d8f7..2cf88ec 100644 --- a/c/src/Lucy/Document/Doc.c +++ b/c/src/Lucy/Document/Doc.c @@ -15,79 +15,113 @@ */ #define C_LUCY_DOC +#define CHY_USE_SHORT_NAMES +#define LUCY_USE_SHORT_NAMES -#include "CFBind.h" #include "Lucy/Document/Doc.h" +#include "Clownfish/CharBuf.h" +#include "Clownfish/Err.h" +#include "Clownfish/Hash.h" +#include "Clownfish/VTable.h" #include "Lucy/Store/InStream.h" #include "Lucy/Store/OutStream.h" -lucy_Doc* -lucy_Doc_init(lucy_Doc *self, void *fields, int32_t doc_id) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(lucy_Doc*); +Doc* +Doc_init(Doc *self, void *fields, int32_t doc_id) { + Hash *hash; + + if (fields) { + hash = (Hash *)CERTIFY(fields, HASH); + INCREF(hash); + } + else { + hash = Hash_new(0); + } + self->fields = hash; + self->doc_id = doc_id; + + return self; } void -lucy_Doc_set_fields(lucy_Doc *self, void *fields) { - THROW(LUCY_ERR, "TODO"); +Doc_set_fields(Doc *self, void *fields) { + DECREF(self->fields); + self->fields = CERTIFY(fields, HASH); } uint32_t -lucy_Doc_get_size(lucy_Doc *self) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(uint32_t); +Doc_get_size(Doc *self) { + Hash *hash = (Hash *)self->fields; + return Hash_Get_Size(hash); } void -lucy_Doc_store(lucy_Doc *self, const lucy_CharBuf *field, lucy_Obj *value) { - THROW(LUCY_ERR, "TODO"); +Doc_store(Doc *self, const CharBuf *field, Obj *value) { + Hash *hash = (Hash *)self->fields; + Hash_Store(hash, (Obj *)field, value); + INCREF(value); } void -lucy_Doc_serialize(lucy_Doc *self, lucy_OutStream *outstream) { - THROW(LUCY_ERR, "TODO"); +Doc_serialize(Doc *self, OutStream *outstream) { + Hash *hash = (Hash *)self->fields; + Hash_Serialize(hash, outstream); + OutStream_Write_C32(outstream, self->doc_id); } -lucy_Doc* -lucy_Doc_deserialize(lucy_Doc *self, lucy_InStream *instream) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(lucy_Doc*); +Doc* +Doc_deserialize(Doc *self, InStream *instream) { + Hash *hash = (Hash*)VTable_Make_Obj(HASH); + self->fields = Hash_Deserialize(hash, instream); + self->doc_id = InStream_Read_C32(instream); + return self; } -lucy_Obj* -lucy_Doc_extract(lucy_Doc *self, lucy_CharBuf *field, - lucy_ViewCharBuf *target) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(lucy_Obj*); +Obj* +Doc_extract(Doc *self, CharBuf *field, + ViewCharBuf *target) { + Hash *hash = (Hash *)self->fields; + Obj *obj = Hash_Fetch(hash, (Obj *)field); + + if (obj && Obj_Is_A(obj, CHARBUF)) { + ViewCB_Assign(target, (CharBuf *)obj); + } + + return obj; } void* -lucy_Doc_to_host(lucy_Doc *self) { - THROW(LUCY_ERR, "TODO"); +Doc_to_host(Doc *self) { + THROW(ERR, "TODO"); UNREACHABLE_RETURN(void*); } -lucy_Hash* -lucy_Doc_dump(lucy_Doc *self) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(lucy_Hash*); +Hash* +Doc_dump(Doc *self) { + THROW(ERR, "TODO"); + UNREACHABLE_RETURN(Hash*); } -lucy_Doc* -lucy_Doc_load(lucy_Doc *self, lucy_Obj *dump) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(lucy_Doc*); +Doc* +Doc_load(Doc *self, Obj *dump) { + THROW(ERR, "TODO"); + UNREACHABLE_RETURN(Doc*); } bool -lucy_Doc_equals(lucy_Doc *self, lucy_Obj *other) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(bool); +Doc_equals(Doc *self, Obj *other) { + Doc *twin = (Doc*)other; + + if (twin == self) { return true; } + if (!Obj_Is_A(other, DOC)) { return false; } + + return Hash_Equals(self->fields, twin->fields); } void -lucy_Doc_destroy(lucy_Doc *self) { - THROW(LUCY_ERR, "TODO"); +Doc_destroy(Doc *self) { + DECREF(self->fields); + SUPER_DESTROY(self, DOC); } http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Index/DocReader.c ---------------------------------------------------------------------- diff --git a/c/src/Lucy/Index/DocReader.c b/c/src/Lucy/Index/DocReader.c index 430ccbe..e06e7ee 100644 --- a/c/src/Lucy/Index/DocReader.c +++ b/c/src/Lucy/Index/DocReader.c @@ -16,14 +16,105 @@ #define C_LUCY_DOCREADER #define C_LUCY_DEFAULTDOCREADER +#define CHY_USE_SHORT_NAMES +#define LUCY_USE_SHORT_NAMES -#include "CFBind.h" #include "Lucy/Index/DocReader.h" +#include "Clownfish/ByteBuf.h" +#include "Clownfish/CharBuf.h" +#include "Clownfish/Err.h" +#include "Clownfish/Hash.h" +#include "Clownfish/Num.h" +#include "Clownfish/Util/Memory.h" #include "Lucy/Document/HitDoc.h" +#include "Lucy/Plan/FieldType.h" +#include "Lucy/Plan/Schema.h" +#include "Lucy/Store/InStream.h" -lucy_HitDoc* -lucy_DefDocReader_fetch_doc(lucy_DefaultDocReader *self, int32_t doc_id) { - THROW(LUCY_ERR, "TODO"); - UNREACHABLE_RETURN(lucy_HitDoc*); +HitDoc* +DefDocReader_fetch_doc(DefaultDocReader *self, int32_t doc_id) { + Schema *const schema = self->schema; + InStream *const dat_in = self->dat_in; + InStream *const ix_in = self->ix_in; + Hash *const fields = Hash_new(1); + int64_t start; + uint32_t num_fields; + uint32_t field_name_cap = 31; + char *field_name = (char*)MALLOCATE(field_name_cap + 1); + + // Get data file pointer from index, read number of fields. + InStream_Seek(ix_in, (int64_t)doc_id * 8); + start = InStream_Read_U64(ix_in); + InStream_Seek(dat_in, start); + num_fields = InStream_Read_C32(dat_in); + + // Decode stored data and build up the doc field by field. + while (num_fields--) { + uint32_t field_name_len; + Obj *value; + FieldType *type; + + // Read field name. + field_name_len = InStream_Read_C32(dat_in); + if (field_name_len > field_name_cap) { + field_name_cap = field_name_len; + field_name = (char*)REALLOCATE(field_name, + field_name_cap + 1); + } + InStream_Read_Bytes(dat_in, field_name, field_name_len); + + // Find the Field's FieldType. + ZombieCharBuf *field_name_zcb + = ZCB_WRAP_STR(field_name, field_name_len); + type = Schema_Fetch_Type(schema, (CharBuf*)field_name_zcb); + + // Read the field value. + switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { + case FType_TEXT: { + uint32_t value_len = InStream_Read_C32(dat_in); + char *buf = (char*)MALLOCATE(value_len + 1); + InStream_Read_Bytes(dat_in, buf, value_len); + buf[value_len] = '\0'; + value = (Obj*)CB_new_steal_from_trusted_str( + buf, value_len, value_len + 1); + break; + } + case FType_BLOB: { + uint32_t value_len = InStream_Read_C32(dat_in); + char *buf = (char*)MALLOCATE(value_len); + InStream_Read_Bytes(dat_in, buf, value_len); + value = (Obj*)BB_new_steal_bytes( + buf, value_len, value_len); + break; + } + case FType_FLOAT32: + value = (Obj*)Float32_new( + InStream_Read_F32(dat_in)); + break; + case FType_FLOAT64: + value = (Obj*)Float64_new( + InStream_Read_F64(dat_in)); + break; + case FType_INT32: + value = (Obj*)Int32_new( + (int32_t)InStream_Read_C32(dat_in)); + break; + case FType_INT64: + value = (Obj*)Int64_new( + (int64_t)InStream_Read_C64(dat_in)); + break; + default: + value = NULL; + THROW(ERR, "Unrecognized type: %o", type); + } + + // Store the value. + Hash_Store_Str(fields, field_name, field_name_len, value); + } + FREEMEM(field_name); + + HitDoc *retval = HitDoc_new(fields, doc_id, 0.0); + DECREF(fields); + return retval; } http://git-wip-us.apache.org/repos/asf/lucy/blob/e737b717/c/src/Lucy/Index/Inverter.c ---------------------------------------------------------------------- diff --git a/c/src/Lucy/Index/Inverter.c b/c/src/Lucy/Index/Inverter.c index e77c067..cfaafcd 100644 --- a/c/src/Lucy/Index/Inverter.c +++ b/c/src/Lucy/Index/Inverter.c @@ -16,14 +16,112 @@ #define C_LUCY_INVERTER #define C_LUCY_INVERTERENTRY +#define CHY_USE_SHORT_NAMES +#define LUCY_USE_SHORT_NAMES -#include "CFBind.h" #include "Lucy/Index/Inverter.h" +#include "Clownfish/ByteBuf.h" +#include "Clownfish/CharBuf.h" +#include "Clownfish/Err.h" +#include "Clownfish/Hash.h" +#include "Clownfish/Num.h" +#include "Clownfish/VArray.h" #include "Lucy/Document/Doc.h" +#include "Lucy/Index/Segment.h" +#include "Lucy/Plan/FieldType.h" +#include "Lucy/Plan/Schema.h" + +static InverterEntry* +S_fetch_entry(Inverter *self, CharBuf *field) { + Schema *const schema = self->schema; + int32_t field_num = Seg_Field_Num(self->segment, field); + if (!field_num) { + // This field seems not to be in the segment yet. Try to find it in + // the Schema. + if (Schema_Fetch_Type(schema, field)) { + // The field is in the Schema. Get a field num from the Segment. + field_num = Seg_Add_Field(self->segment, field); + } + else { + // We've truly failed to find the field. The user must + // not have spec'd it. + THROW(ERR, "Unknown field name: '%o'", field); + } + } + + InverterEntry *entry + = (InverterEntry*)VA_Fetch(self->entry_pool, field_num); + if (!entry) { + entry = InvEntry_new(schema, (CharBuf*)field, field_num); + VA_Store(self->entry_pool, field_num, (Obj*)entry); + } + return entry; +} void -lucy_Inverter_invert_doc(lucy_Inverter *self, lucy_Doc *doc) { - THROW(LUCY_ERR, "TODO"); +Inverter_invert_doc(Inverter *self, Doc *doc) { + Hash *const fields = (Hash*)Doc_Get_Fields(doc); + uint32_t num_keys = Hash_Iterate(fields); + + // Prepare for the new doc. + Inverter_Set_Doc(self, doc); + + // Extract and invert the doc's fields. + while (num_keys--) { + Obj *key, *obj; + Hash_Next(fields, &key, &obj); + CharBuf *field = (CharBuf*)CERTIFY(key, CHARBUF); + InverterEntry *inv_entry = S_fetch_entry(self, field); + FieldType *type = inv_entry->type; + + // Get the field value. + switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) { + case FType_TEXT: { + CharBuf *char_buf + = (CharBuf*)CERTIFY(obj, CHARBUF); + ViewCharBuf *value + = (ViewCharBuf*)inv_entry->value; + ViewCB_Assign(value, char_buf); + break; + } + case FType_BLOB: { + ByteBuf *byte_buf + = (ByteBuf*)CERTIFY(obj, BYTEBUF); + ViewByteBuf *value + = (ViewByteBuf*)inv_entry->value; + ViewBB_Assign(value, byte_buf); + break; + } + case FType_INT32: { + int32_t int_val = (int32_t)Obj_To_I64(obj); + Integer32* value = (Integer32*)inv_entry->value; + Int32_Set_Value(value, int_val); + break; + } + case FType_INT64: { + int64_t int_val = Obj_To_I64(obj); + Integer64* value = (Integer64*)inv_entry->value; + Int64_Set_Value(value, int_val); + break; + } + case FType_FLOAT32: { + float float_val = (float)Obj_To_F64(obj); + Float32* value = (Float32*)inv_entry->value; + Float32_Set_Value(value, float_val); + break; + } + case FType_FLOAT64: { + double float_val = Obj_To_F64(obj); + Float64* value = (Float64*)inv_entry->value; + Float64_Set_Value(value, float_val); + break; + } + default: + THROW(ERR, "Unrecognized type: %o", type); + } + + Inverter_Add_Field(self, inv_entry); + } }
