Changeset: b414e3913f19 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB/rev/b414e3913f19
Removed Files:
        gdk/gdk_imprints.c
        gdk/gdk_imprints.h
        monetdb5/modules/mal/Tests/imprints.maltest
Modified Files:
        clients/Tests/MAL-signatures-hge.test
        clients/Tests/MAL-signatures.test
        clients/Tests/exports.stable.out
        gdk/CMakeLists.txt
        gdk/ChangeLog
        gdk/gdk.h
        gdk/gdk_aggr.c
        gdk/gdk_align.c
        gdk/gdk_bat.c
        gdk/gdk_batop.c
        gdk/gdk_bbp.c
        gdk/gdk_join.c
        gdk/gdk_private.h
        gdk/gdk_select.c
        gdk/gdk_storage.c
        monetdb5/mal/mal_profiler.c
        monetdb5/mal/mal_resource.c
        monetdb5/mal/mal_runtime.c
        monetdb5/modules/kernel/bat5.c
        monetdb5/modules/mal/Tests/All
        monetdb5/modules/mal/batExtensions.c
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_cat.c
        sql/test/emptydb/Tests/check.stable.out
        sql/test/emptydb/Tests/check.stable.out.int128
Branch: default
Log Message:

Removed imprints on numeric columns.
Creating imprints is really expensive.
TPC-H SF 10 query 6 takes roughly 230 ms when using imprints, and 130 ms
when not using imprints.  And that's when they already exist.  (Debug
build on my laptop.)


diffs (truncated from 4605 to 300 lines):

diff --git a/clients/Tests/MAL-signatures-hge.test 
b/clients/Tests/MAL-signatures-hge.test
--- a/clients/Tests/MAL-signatures-hge.test
+++ b/clients/Tests/MAL-signatures-hge.test
@@ -3782,7 +3782,7 @@ bat
 getSize
 command bat.getSize(X_0:bat[:any_1]):lng
 BKCgetSize;
-Calculate the actual size of the BAT descriptor, heaps, hashes and imprint 
indices in bytes@rounded to the memory page size (see bbp.getPageSize()).
+Calculate the actual size of the BAT descriptor, heaps, hashes in 
bytes@rounded to the memory page size (see bbp.getPageSize()).
 bat
 getVHeapSize
 command bat.getVHeapSize(X_0:bat[:any_1]):lng
@@ -3799,76 +3799,6 @@ pattern bat.hasorderidx(X_0:bat[:any_1])
 OIDXhasorderidx;
 Return true if order index exists
 bat
-imprints
-command bat.imprints(X_0:bat[:bte]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:dbl]):void
-CMDBATimprints;
-Check for existence or create an imprint index on the BAT.
-bat
-imprints
-command bat.imprints(X_0:bat[:flt]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:hge]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:int]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:lng]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:sht]):void
-CMDBATimprints;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:bte]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:dbl]):lng
-CMDBATimprintsize;
-Return the storage size of the imprints index structure.
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:flt]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:hge]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:int]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:lng]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:sht]):lng
-CMDBATimprintsize;
-(empty)
-bat
 info
 command bat.info(X_0:bat[:any_1]) (X_1:bat[:str], X_2:bat[:str])
 BKCinfo;
@@ -4019,11 +3949,6 @@ command bat.setHash(X_0:bat[:any_1]):bit
 BKCsetHash;
 Create a hash structure on the column
 bat
-setImprints
-command bat.setImprints(X_0:bat[:any_1]):bit
-BKCsetImprints;
-Create an imprints structure on the column
-bat
 setName
 command bat.setName(X_0:bat[:any_1], X_1:str):void
 BKCsetName;
diff --git a/clients/Tests/MAL-signatures.test 
b/clients/Tests/MAL-signatures.test
--- a/clients/Tests/MAL-signatures.test
+++ b/clients/Tests/MAL-signatures.test
@@ -3217,7 +3217,7 @@ bat
 getSize
 command bat.getSize(X_0:bat[:any_1]):lng
 BKCgetSize;
-Calculate the actual size of the BAT descriptor, heaps, hashes and imprint 
indices in bytes@rounded to the memory page size (see bbp.getPageSize()).
+Calculate the actual size of the BAT descriptor, heaps, hashes in 
bytes@rounded to the memory page size (see bbp.getPageSize()).
 bat
 getVHeapSize
 command bat.getVHeapSize(X_0:bat[:any_1]):lng
@@ -3234,66 +3234,6 @@ pattern bat.hasorderidx(X_0:bat[:any_1])
 OIDXhasorderidx;
 Return true if order index exists
 bat
-imprints
-command bat.imprints(X_0:bat[:bte]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:dbl]):void
-CMDBATimprints;
-Check for existence or create an imprint index on the BAT.
-bat
-imprints
-command bat.imprints(X_0:bat[:flt]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:int]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:lng]):void
-CMDBATimprints;
-(empty)
-bat
-imprints
-command bat.imprints(X_0:bat[:sht]):void
-CMDBATimprints;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:bte]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:dbl]):lng
-CMDBATimprintsize;
-Return the storage size of the imprints index structure.
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:flt]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:int]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:lng]):lng
-CMDBATimprintsize;
-(empty)
-bat
-imprintsize
-command bat.imprintsize(X_0:bat[:sht]):lng
-CMDBATimprintsize;
-(empty)
-bat
 info
 command bat.info(X_0:bat[:any_1]) (X_1:bat[:str], X_2:bat[:str])
 BKCinfo;
@@ -3444,11 +3384,6 @@ command bat.setHash(X_0:bat[:any_1]):bit
 BKCsetHash;
 Create a hash structure on the column
 bat
-setImprints
-command bat.setImprints(X_0:bat[:any_1]):bit
-BKCsetImprints;
-Create an imprints structure on the column
-bat
 setName
 command bat.setName(X_0:bat[:any_1], X_1:str):void
 BKCsetName;
diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out
--- a/clients/Tests/exports.stable.out
+++ b/clients/Tests/exports.stable.out
@@ -153,7 +153,6 @@ BUN BATguess_uniques(BAT *b, struct cand
 gdk_return BAThash(BAT *b);
 bool BAThasstrimps(BAT *b);
 void BAThseqbase(BAT *b, oid o);
-gdk_return BATimprints(BAT *b);
 BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool 
max_one, BUN estimate);
 BAT *BATintersectcand(BAT *a, BAT *b);
 gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, 
bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__));
@@ -360,8 +359,6 @@ gdk_return HEAPextend(Heap *h, size_t si
 void HEAPincref(Heap *h);
 size_t HEAPmemsize(Heap *h);
 size_t HEAPvmsize(Heap *h);
-void IMPSdestroy(BAT *b);
-lng IMPSimprintsize(BAT *b);
 int MT_access(const char *pathname, int mode);
 gdk_return MT_alloc_tls(MT_TLS_t *newkey);
 int MT_check_nr_cores(void);
diff --git a/gdk/CMakeLists.txt b/gdk/CMakeLists.txt
--- a/gdk/CMakeLists.txt
+++ b/gdk/CMakeLists.txt
@@ -77,7 +77,6 @@ target_sources(bat
   gdk_private.h
   gdk_system_private.h
   gdk_group.c
-  gdk_imprints.c gdk_imprints.h
   gdk_join.c
   gdk_project.c
   gdk_time.c gdk_time.h
diff --git a/gdk/ChangeLog b/gdk/ChangeLog
--- a/gdk/ChangeLog
+++ b/gdk/ChangeLog
@@ -1,3 +1,8 @@
 # ChangeLog file for GDK
 # This file is updated with Maddlog
 
+* Fri Sep 13 2024 Sjoerd Mullender <[email protected]>
+- The implementation for the imprints index on numeric columns has
+  been removed.  It hasn't been used in years, and when it is enabled,
+  it doesn't really make queries go faster.
+
diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -565,7 +565,6 @@ typedef struct {
 } Heap;
 
 typedef struct Hash Hash;
-typedef struct Imprints Imprints;
 typedef struct Strimps Strimps;
 
 #ifdef HAVE_RTREE
@@ -688,7 +687,6 @@ gdk_export bool VALisnil(const ValRecord
  *           int    tloc;             // byte-offset in BUN for tail elements
  *           Heap   *theap;           // heap for varsized tail values
  *           Hash   *thash;           // linear chained hash table on tail
- *           Imprints *timprints;     // column imprints index on tail
  *           orderidx torderidx;      // order oid index on tail
  *  } BAT;
  * @end verbatim
@@ -735,7 +733,6 @@ typedef struct {
 #ifdef HAVE_RTREE
        RTree *rtree;           /* rtree geometric index */
 #endif
-       Imprints *imprints;     /* column imprints index */
        Heap *orderidx;         /* order oid index */
        Strimps *strimps;       /* string imprint index  */
 
@@ -804,7 +801,6 @@ typedef struct BAT {
        MT_Lock theaplock;      /* lock protecting heap reference changes */
        MT_RWLock thashlock;    /* lock specifically for hash management */
        MT_Lock batIdxLock;     /* lock to manipulate other indexes/properties 
*/
-       MT_Sema imprsema;       /* semaphore to synchronize imprints creation */
        Heap *oldtail;          /* old tail heap, to be destroyed after commit 
*/
 } BAT;
 
@@ -830,7 +826,6 @@ typedef struct BAT {
 #define tbaseoff       T.baseoff
 #define tvheap         T.vheap
 #define thash          T.hash
-#define timprints      T.imprints
 #define tprops         T.props
 #define tstrimps       T.strimps
 #ifdef HAVE_RTREE
@@ -1877,23 +1872,6 @@ bunfastapp_nocheckVAR(BAT *b, const void
        return rc;
 }
 
-/*
- * @- Column Imprints Functions
- *
- * @multitable @columnfractions 0.08 0.7
- * @item BAT*
- * @tab
- *  BATimprints (BAT *b)
- * @end multitable
- *
- * The column imprints index structure.
- *
- */
-
-gdk_export gdk_return BATimprints(BAT *b);
-gdk_export void IMPSdestroy(BAT *b);
-gdk_export lng IMPSimprintsize(BAT *b);
-
 /* Strimps exported functions */
_______________________________________________
checkin-list mailing list -- [email protected]
To unsubscribe send an email to [email protected]

Reply via email to