[7/9] lucy git commit: Port DefDocReader code to CGO.

marvin Mon, 03 Aug 2015 14:59:31 -0700

Port DefDocReader code to CGO.


Project: http://git-wip-us.apache.org/repos/asf/lucy/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucy/commit/7749e595
Tree: http://git-wip-us.apache.org/repos/asf/lucy/tree/7749e595
Diff: http://git-wip-us.apache.org/repos/asf/lucy/diff/7749e595

Branch: refs/heads/master
Commit: 7749e595b2a6c32af57b904d1a00b066691ace37
Parents: acd74d2
Author: Marvin Humphrey <[email protected]>
Authored: Sun Jul 19 12:57:13 2015 -0700
Committer: Marvin Humphrey <[email protected]>
Committed: Fri Jul 31 17:59:21 2015 -0700

----------------------------------------------------------------------
 go/cfext/lucy.c | 80 ++------------------------------------------
 go/lucy/lucy.go | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+), 77 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucy/blob/7749e595/go/cfext/lucy.c
----------------------------------------------------------------------
diff --git a/go/cfext/lucy.c b/go/cfext/lucy.c
index f10a23f..9e9b840 100644
--- a/go/cfext/lucy.c
+++ b/go/cfext/lucy.c
@@ -166,85 +166,11 @@ Doc_Destroy_IMP(Doc *self) {
 
 /**************************** DocReader *****************************/
 
+DefDocReader_Fetch_Doc_t GOLUCY_DefDocReader_Fetch_Doc_BRIDGE;
+
 HitDoc*
 DefDocReader_Fetch_Doc_IMP(DefaultDocReader *self, int32_t doc_id) {
-    DefaultDocReaderIVARS *const ivars = DefDocReader_IVARS(self);
-    Schema   *const schema = ivars->schema;
-    InStream *const dat_in = ivars->dat_in;
-    InStream *const ix_in  = ivars->ix_in;
-    Hash     *const fields = Hash_new(1);
-    int64_t   start;
-    uint32_t  num_fields;
-    uint32_t  field_name_cap = 31;
-    char     *field_name = (char*)MALLOCATE(field_name_cap + 1);
-
-    // Get data file pointer from index, read number of fields.
-    InStream_Seek(ix_in, (int64_t)doc_id * 8);
-    start = InStream_Read_U64(ix_in);
-    InStream_Seek(dat_in, start);
-    num_fields = InStream_Read_C32(dat_in);
-
-    // Decode stored data and build up the doc field by field.
-    while (num_fields--) {
-        uint32_t        field_name_len;
-        Obj       *value;
-        FieldType *type;
-
-        // Read field name.
-        field_name_len = InStream_Read_C32(dat_in);
-        if (field_name_len > field_name_cap) {
-            field_name_cap = field_name_len;
-            field_name     = (char*)REALLOCATE(field_name,
-                                                    field_name_cap + 1);
-        }
-        InStream_Read_Bytes(dat_in, field_name, field_name_len);
-
-        // Find the Field's FieldType.
-        String *field_name_str = SSTR_WRAP_UTF8(field_name, field_name_len);
-        type = Schema_Fetch_Type(schema, field_name_str);
-
-        // Read the field value.
-        switch (FType_Primitive_ID(type) & FType_PRIMITIVE_ID_MASK) {
-            case FType_TEXT: {
-                    uint32_t value_len = InStream_Read_C32(dat_in);
-                    char *buf = (char*)MALLOCATE(value_len + 1);
-                    InStream_Read_Bytes(dat_in, buf, value_len);
-                    buf[value_len] = '\0';
-                    value = (Obj*)Str_new_steal_utf8(buf, value_len);
-                    break;
-                }
-            case FType_BLOB: {
-                    uint32_t value_len = InStream_Read_C32(dat_in);
-                    char *buf = (char*)MALLOCATE(value_len);
-                    InStream_Read_Bytes(dat_in, buf, value_len);
-                    value = (Obj*)Blob_new_steal(buf, value_len);
-                    break;
-                }
-            case FType_FLOAT32:
-                value = (Obj*)Float_new(InStream_Read_F32(dat_in));
-                break;
-            case FType_FLOAT64:
-                value = (Obj*)Float_new(InStream_Read_F64(dat_in));
-                break;
-            case FType_INT32:
-                value = (Obj*)Int_new((int32_t)InStream_Read_C32(dat_in));
-                break;
-            case FType_INT64:
-                value = (Obj*)Int_new((int64_t)InStream_Read_C64(dat_in));
-                break;
-            default:
-                value = NULL;
-                THROW(ERR, "Unrecognized type: %o", type);
-        }
-
-        // Store the value.
-        Hash_Store_Utf8(fields, field_name, field_name_len, value);
-    }
-    FREEMEM(field_name);
-
-    HitDoc *retval = HitDoc_new(fields, doc_id, 0.0);
-    DECREF(fields);
-    return retval;
+    return GOLUCY_DefDocReader_Fetch_Doc_BRIDGE(self, doc_id);
 }
 
 /**************************** Inverter *****************************/

http://git-wip-us.apache.org/repos/asf/lucy/blob/7749e595/go/lucy/lucy.go
----------------------------------------------------------------------
diff --git a/go/lucy/lucy.go b/go/lucy/lucy.go
index 664a200..556235e 100644
--- a/go/lucy/lucy.go
+++ b/go/lucy/lucy.go
@@ -17,19 +17,28 @@
 package lucy
 
 /*
+
+#include <stdlib.h>
+
 #define C_LUCY_DOC
 #define C_LUCY_REGEXTOKENIZER
+#define C_LUCY_DEFAULTDOCREADER
 #define C_LUCY_INVERTER
 #define C_LUCY_INVERTERENTRY
 
 #include "lucy_parcel.h"
 #include "Lucy/Analysis/RegexTokenizer.h"
 #include "Lucy/Document/Doc.h"
+#include "Lucy/Index/DocReader.h"
 #include "Lucy/Index/Inverter.h"
 
+#include "Clownfish/String.h"
+#include "Clownfish/Blob.h"
+#include "Clownfish/Num.h"
 #include "Clownfish/Hash.h"
 #include "Clownfish/HashIterator.h"
 #include "Clownfish/Vector.h"
+#include "Lucy/Document/HitDoc.h"
 #include "Lucy/Plan/FieldType.h"
 #include "Lucy/Plan/Schema.h"
 #include "Lucy/Index/Segment.h"
@@ -90,6 +99,11 @@ GOLUCY_Doc_Destroy(lucy_Doc *self);
 extern void
 (*GOLUCY_Doc_Destroy_BRIDGE)(lucy_Doc *self);
 
+extern lucy_HitDoc*
+GOLUCY_DefDocReader_Fetch_Doc(lucy_DefaultDocReader *self, int32_t doc_id);
+extern lucy_HitDoc*
+(*GOLUCY_DefDocReader_Fetch_Doc_BRIDGE)(lucy_DefaultDocReader *self, int32_t 
doc_id);
+
 extern void
 GOLUCY_Inverter_Invert_Doc(lucy_Inverter *self, lucy_Doc *doc);
 extern void
@@ -115,9 +129,16 @@ GOLUCY_glue_exported_symbols() {
        GOLUCY_Doc_Extract_BRIDGE = GOLUCY_Doc_Extract;
        GOLUCY_Doc_Equals_BRIDGE = GOLUCY_Doc_Equals;
        GOLUCY_Doc_Destroy_BRIDGE = GOLUCY_Doc_Destroy;
+       GOLUCY_DefDocReader_Fetch_Doc_BRIDGE = GOLUCY_DefDocReader_Fetch_Doc;
        GOLUCY_Inverter_Invert_Doc_BRIDGE = GOLUCY_Inverter_Invert_Doc;
 }
 
+
+static void
+null_terminate_string(char *string, size_t len) {
+       string[len] = '\0';
+}
+
 */
 import "C"
 import "unsafe"
@@ -256,6 +277,79 @@ func fetchEntry(ivars *C.lucy_InverterIVARS, field 
*C.cfish_String) *C.lucy_Inve
        return (*C.lucy_InverterEntry)(unsafe.Pointer(entry))
 }
 
+//export GOLUCY_DefDocReader_Fetch_Doc
+func GOLUCY_DefDocReader_Fetch_Doc(ddr *C.lucy_DefaultDocReader,
+       docID C.int32_t) *C.lucy_HitDoc {
+       ivars := C.lucy_DefDocReader_IVARS(ddr)
+       schema := ivars.schema
+       datInstream := ivars.dat_in
+       ixInstream := ivars.ix_in
+       fields := C.cfish_Hash_new(1)
+       fieldNameCap := C.size_t(31)
+       var fieldName *C.char = ((*C.char)(C.malloc(fieldNameCap + 1)))
+
+       // Get data file pointer from index, read number of fields.
+       C.LUCY_InStream_Seek(ixInstream, C.int64_t(docID*8))
+       start := C.LUCY_InStream_Read_U64(ixInstream)
+       C.LUCY_InStream_Seek(datInstream, C.int64_t(start))
+       numFields := uint32(C.LUCY_InStream_Read_C32(datInstream))
+
+       // Decode stored data and build up the doc field by field.
+       for i := uint32(0); i < numFields; i++ {
+               // Read field name.
+               fieldNameLen := C.size_t(C.LUCY_InStream_Read_C32(datInstream))
+               if fieldNameLen > fieldNameCap {
+                       fieldNameCap = fieldNameLen
+                       fieldName = 
((*C.char)(C.realloc(unsafe.Pointer(fieldName), fieldNameCap+1)))
+               }
+               C.LUCY_InStream_Read_Bytes(datInstream, fieldName, fieldNameLen)
+
+               // Find the Field's FieldType.
+               // TODO: Creating and destroying a new string each time is
+               // inefficient.  The solution should be to add a privte
+               // Schema_Fetch_Type_Utf8 method which takes char* and size_t.
+               fieldNameStr := C.cfish_Str_new_from_utf8(fieldName, 
fieldNameLen)
+               fieldType := C.LUCY_Schema_Fetch_Type(schema, fieldNameStr)
+               C.cfish_dec_refcount(unsafe.Pointer(fieldNameStr))
+
+               // Read the field value.
+               var value *C.cfish_Obj
+               switch C.LUCY_FType_Primitive_ID(fieldType) & 
C.lucy_FType_PRIMITIVE_ID_MASK {
+               case C.lucy_FType_TEXT:
+                       valueLen := 
C.size_t(C.LUCY_InStream_Read_C32(datInstream))
+                       buf := ((*C.char)(C.malloc(valueLen + 1)))
+                       C.LUCY_InStream_Read_Bytes(datInstream, buf, valueLen)
+                       C.null_terminate_string(buf, valueLen)
+                       value = ((*C.cfish_Obj)(C.cfish_Str_new_steal_utf8(buf, 
valueLen)))
+               case C.lucy_FType_BLOB:
+                       valueLen := 
C.size_t(C.LUCY_InStream_Read_C32(datInstream))
+                       buf := ((*C.char)(C.malloc(valueLen)))
+                       C.LUCY_InStream_Read_Bytes(datInstream, buf, valueLen)
+                       value = ((*C.cfish_Obj)(C.cfish_Blob_new_steal(buf, 
valueLen)))
+               case C.lucy_FType_FLOAT32:
+                       value = 
((*C.cfish_Obj)(C.cfish_Float_new(C.double(C.LUCY_InStream_Read_F32(datInstream)))))
+               case C.lucy_FType_FLOAT64:
+                       value = 
((*C.cfish_Obj)(C.cfish_Float_new(C.LUCY_InStream_Read_F64(datInstream))))
+               case C.lucy_FType_INT32:
+                       value = 
((*C.cfish_Obj)(C.cfish_Int_new(C.int64_t(C.LUCY_InStream_Read_C32(datInstream)))))
+               case C.lucy_FType_INT64:
+                       value = 
((*C.cfish_Obj)(C.cfish_Int_new(C.int64_t(C.LUCY_InStream_Read_C64(datInstream)))))
+               default:
+                       value = nil
+                       panic(clownfish.NewErr("Internal Lucy error: bad type 
id for field " +
+                               C.GoStringN(fieldName, C.int(fieldNameLen))))
+               }
+
+               // Store the value.
+               C.CFISH_Hash_Store_Utf8(fields, fieldName, fieldNameLen, value)
+       }
+       C.free(unsafe.Pointer(fieldName))
+
+       retval := C.lucy_HitDoc_new(unsafe.Pointer(fields), docID, 0.0)
+       C.cfish_dec_refcount(unsafe.Pointer(fields))
+       return retval
+}
+
 //export GOLUCY_Inverter_Invert_Doc
 func GOLUCY_Inverter_Invert_Doc(inverter *C.lucy_Inverter, doc *C.lucy_Doc) {
        ivars := C.lucy_Inverter_IVARS(inverter)

[7/9] lucy git commit: Port DefDocReader code to CGO.

Reply via email to