Changeset: e1d0cb52187a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=e1d0cb52187a
Modified Files:
        sql/backends/monet5/UDF/capi/capi.c
        sql/backends/monet5/UDF/capi/cheader.h
Branch: jitudf
Log Message:

For numeric columns, create result BATs immediately instead of manually 
allocating a region and then assigning that region to a BAT.

This is to allow the GDK to choose whether to malloc or mmap for a specific 
BAT, rather than always mallocing.


diffs (213 lines):

diff --git a/sql/backends/monet5/UDF/capi/capi.c 
b/sql/backends/monet5/UDF/capi/capi.c
--- a/sql/backends/monet5/UDF/capi/capi.c
+++ b/sql/backends/monet5/UDF/capi/capi.c
@@ -195,6 +195,27 @@ static void *wrapped_GDK_zalloc_nojump(s
        return add_allocated_region(ptr);
 }
 
+#define GENERATE_NUMERIC_FUNCTIONS(type, tpename) \
+       static void tpename##_initialize(struct cudf_data_struct_##tpename 
*self,  \
+                                                                        size_t 
count)                             \
+       {                                                                       
   \
+               BAT* b;                                                         
       \
+               if (self->bat) {                                                
       \
+                       BBPunfix(((BAT*)self->bat)->batCacheid);                
           \
+                       self->bat = NULL;                                       
           \
+               }                                                               
       \
+               b = COLnew(0, TYPE_##tpename, count, TRANSIENT);                
       \
+               if (!b) {                                                       
       \
+                       longjmp(jump_buffer[THRgettid()], 2);                   
           \
+               }                                                               
       \
+               self->bat = (void*) b;                                          
       \
+               self->count = count;                                            
       \
+               self->data = (type*) b->theap.base;                             
       \
+               BATsetcount(b, count);                                          
       \
+       }                                                                       
   \
+       static int tpename##_is_null(type value) { return value == 
tpename##_nil; }
+
+
 #define GENERATE_BASE_HEADERS(type, tpename)                                   
\
        static int tpename##_is_null(type value);                               
   \
        static void tpename##_initialize(struct cudf_data_struct_##tpename 
*self,  \
@@ -208,13 +229,14 @@ static void *wrapped_GDK_zalloc_nojump(s
        GENERATE_BASE_HEADERS(tpe, tpename);                                    
   \
        static int tpename##_is_null(tpe value) { return value == 
tpename##_nil; }
 
-GENERATE_BASE_FUNCTIONS(bte, bte);
-GENERATE_BASE_FUNCTIONS(sht, sht);
-GENERATE_BASE_FUNCTIONS(int, int);
-GENERATE_BASE_FUNCTIONS(lng, lng);
-GENERATE_BASE_FUNCTIONS(flt, flt);
-GENERATE_BASE_FUNCTIONS(dbl, dbl);
-GENERATE_BASE_FUNCTIONS(oid, oid);
+GENERATE_NUMERIC_FUNCTIONS(bit, bit);
+GENERATE_NUMERIC_FUNCTIONS(bte, bte);
+GENERATE_NUMERIC_FUNCTIONS(sht, sht);
+GENERATE_NUMERIC_FUNCTIONS(int, int);
+GENERATE_NUMERIC_FUNCTIONS(lng, lng);
+GENERATE_NUMERIC_FUNCTIONS(flt, flt);
+GENERATE_NUMERIC_FUNCTIONS(dbl, dbl);
+GENERATE_NUMERIC_FUNCTIONS(oid, oid);
 
 GENERATE_BASE_HEADERS(char *, str);
 GENERATE_BASE_HEADERS(cudf_data_date, date);
@@ -226,7 +248,7 @@ static void blob_initialize(struct cudf_
 
 #define GENERATE_BAT_INPUT_BASE(tpe)                                           
\
        struct cudf_data_struct_##tpe *bat_data =                               
   \
-               GDKmalloc(sizeof(struct cudf_data_struct_##tpe));               
       \
+               GDKzalloc(sizeof(struct cudf_data_struct_##tpe));               
       \
        if (!bat_data) {                                                        
   \
                msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);       
       \
                goto wrapup;                                                    
       \
@@ -235,6 +257,7 @@ static void blob_initialize(struct cudf_
        bat_data->is_null = tpe##_is_null;                                      
   \
        bat_data->scale =                                                       
   \
                argnode ? pow(10, ((sql_arg *)argnode->data)->type.scale) : 1;  
       \
+       bat_data->bat = NULL;                                                   
   \
        bat_data->initialize = (void (*)(void *, size_t))tpe##_initialize;
 
 #define GENERATE_BAT_INPUT(b, tpe)                                             
\
@@ -282,7 +305,7 @@ static void blob_initialize(struct cudf_
 
 #define GENERATE_BAT_OUTPUT_BASE(tpe)                                          
\
        struct cudf_data_struct_##tpe *bat_data =                               
   \
-               GDKmalloc(sizeof(struct cudf_data_struct_##tpe));               
       \
+               GDKzalloc(sizeof(struct cudf_data_struct_##tpe));               
       \
        if (!bat_data) {                                                        
   \
                msg = createException(MAL, "cudf.eval", MAL_MALLOC_FAIL);       
       \
                goto wrapup;                                                    
       \
@@ -320,6 +343,7 @@ const char *ldflags_pragma = "#pragma LD
 
 static size_t GetTypeCount(int type, void *struct_ptr);
 static void *GetTypeData(int type, void *struct_ptr);
+static void *GetTypeBat(int type, void *struct_ptr);
 static const char *GetTypeName(int type);
 
 static void data_from_date(date d, cudf_data_date *ptr);
@@ -912,7 +936,9 @@ static str CUDFeval(Client cntxt, MalBlk
                                BATdescriptor(*getArgReference_bat(stk, pci, 
i));
                }
 
-               if (bat_type == TYPE_bit || bat_type == TYPE_bte) {
+               if (bat_type == TYPE_bit) {
+                       GENERATE_BAT_INPUT(input_bats[index], bit);
+               } else if (bat_type == TYPE_bte) {
                        GENERATE_BAT_INPUT(input_bats[index], bte);
                } else if (bat_type == TYPE_sht) {
                        GENERATE_BAT_INPUT(input_bats[index], sht);
@@ -1142,7 +1168,9 @@ static str CUDFeval(Client cntxt, MalBlk
        for (i = 0; i < output_count; i++) {
                index = i;
                bat_type = getBatType(getArgType(mb, pci, i));
-               if (bat_type == TYPE_bit || bat_type == TYPE_bte) {
+               if (bat_type == TYPE_bit) {
+                       GENERATE_BAT_OUTPUT(bit);
+               } else if (bat_type == TYPE_bte) {
                        GENERATE_BAT_OUTPUT(bte);
                } else if (bat_type == TYPE_sht) {
                        GENERATE_BAT_OUTPUT(sht);
@@ -1286,26 +1314,13 @@ static str CUDFeval(Client cntxt, MalBlk
                        bat_type == TYPE_sht || bat_type == TYPE_int ||
                        bat_type == TYPE_oid || bat_type == TYPE_lng ||
                        bat_type == TYPE_flt || bat_type == TYPE_dbl) {
-                       b = COLnew(0, bat_type, 0, TRANSIENT);
+                       b = GetTypeBat(bat_type, outputs[i]);
                        if (!b) {
-                               msg = createException(MAL, "cudf.eval", 
MAL_MALLOC_FAIL);
+                               msg = createException(MAL, "cudf.eval", "Output 
column was not properly initialized.");
                                goto wrapup;
                        }
-                       // we pass the data we have directly into the BAT for 
simple
-                       // numeric types
-                       // this way we do not need to copy any data 
unnecessarily
-                       // free the current (initial) storage
-                       GDKfree(b->theap.base);
-                       // set the heap to use the new storage
-                       b->theap.base = data;
-                       b->theap.size = count * b->twidth;
-                       b->theap.free = b->theap.size;
-                       b->theap.storage = STORE_MEM;
-                       b->theap.newstorage = STORE_MEM;
-                       b->batCount = (BUN)count;
-                       b->batCapacity = (BUN)count;
-                       b->batCopiedtodisk = false;
                } else {
+                       assert(GetTypeBat(bat_type, outputs[i]) == NULL);
                        b = COLnew(0, bat_type, count, TRANSIENT);
                        if (!b) {
                                msg = createException(MAL, "cudf.eval", 
MAL_MALLOC_FAIL);
@@ -1541,9 +1556,14 @@ wrapup:
                                                           ? 
getBatType(getArgType(mb, pci, i))
                                                           : getArgType(mb, 
pci, i);
                        if (outputs[i]) {
-                               void *data = GetTypeData(bat_type, outputs[i]);
-                               if (data) {
-                                       GDKfree(data);
+                               void* b = GetTypeBat(bat_type, outputs[i]);
+                               if (b) {
+                                       BBPunfix(((BAT*)b)->batCacheid);
+                               } else {
+                                       void *data = GetTypeData(bat_type, 
outputs[i]);
+                                       if (data) {
+                                               GDKfree(data);
+                                       }
                                }
                                GDKfree(outputs[i]);
                        }
@@ -1643,6 +1663,41 @@ void *GetTypeData(int type, void *struct
        return data;
 }
 
+void *GetTypeBat(int type, void *struct_ptr)
+{
+       void *bat = NULL;
+
+       if (type == TYPE_bit || type == TYPE_bte) {
+               bat = ((struct cudf_data_struct_bte *)struct_ptr)->bat;
+       } else if (type == TYPE_sht) {
+               bat = ((struct cudf_data_struct_sht *)struct_ptr)->bat;
+       } else if (type == TYPE_int) {
+               bat = ((struct cudf_data_struct_int *)struct_ptr)->bat;
+       } else if (type == TYPE_oid) {
+               bat = ((struct cudf_data_struct_oid *)struct_ptr)->bat;
+       } else if (type == TYPE_lng) {
+               bat = ((struct cudf_data_struct_lng *)struct_ptr)->bat;
+       } else if (type == TYPE_flt) {
+               bat = ((struct cudf_data_struct_flt *)struct_ptr)->bat;
+       } else if (type == TYPE_dbl) {
+               bat = ((struct cudf_data_struct_dbl *)struct_ptr)->bat;
+       } else if (type == TYPE_str) {
+               bat = ((struct cudf_data_struct_str *)struct_ptr)->bat;
+       } else if (type == TYPE_date) {
+               bat = ((struct cudf_data_struct_date *)struct_ptr)->bat;
+       } else if (type == TYPE_daytime) {
+               bat = ((struct cudf_data_struct_time *)struct_ptr)->bat;
+       } else if (type == TYPE_timestamp) {
+               bat = ((struct cudf_data_struct_timestamp *)struct_ptr)->bat;
+       } else if (type == TYPE_blob || type == TYPE_sqlblob) {
+               bat = ((struct cudf_data_struct_blob *)struct_ptr)->bat;
+       } else {
+               // unsupported type: string
+               bat = ((struct cudf_data_struct_str *)struct_ptr)->bat;
+       }
+       return bat;
+}
+
 size_t GetTypeCount(int type, void *struct_ptr)
 {
        size_t count = 0;
diff --git a/sql/backends/monet5/UDF/capi/cheader.h 
b/sql/backends/monet5/UDF/capi/cheader.h
--- a/sql/backends/monet5/UDF/capi/cheader.h
+++ b/sql/backends/monet5/UDF/capi/cheader.h
@@ -33,8 +33,10 @@ typedef struct {
                double scale;                                                   
       \
                int (*is_null)(type value);                                     
       \
                void (*initialize)(void *self, size_t count);                   
       \
+               void *bat;                                                      
       \
        }
 
+DEFAULT_STRUCT_DEFINITION(signed char, bit);
 DEFAULT_STRUCT_DEFINITION(signed char, bte);
 DEFAULT_STRUCT_DEFINITION(short, sht);
 DEFAULT_STRUCT_DEFINITION(int, int);
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to