Changeset: b414e3913f19 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB/rev/b414e3913f19 Removed Files: gdk/gdk_imprints.c gdk/gdk_imprints.h monetdb5/modules/mal/Tests/imprints.maltest Modified Files: clients/Tests/MAL-signatures-hge.test clients/Tests/MAL-signatures.test clients/Tests/exports.stable.out gdk/CMakeLists.txt gdk/ChangeLog gdk/gdk.h gdk/gdk_aggr.c gdk/gdk_align.c gdk/gdk_bat.c gdk/gdk_batop.c gdk/gdk_bbp.c gdk/gdk_join.c gdk/gdk_private.h gdk/gdk_select.c gdk/gdk_storage.c monetdb5/mal/mal_profiler.c monetdb5/mal/mal_resource.c monetdb5/mal/mal_runtime.c monetdb5/modules/kernel/bat5.c monetdb5/modules/mal/Tests/All monetdb5/modules/mal/batExtensions.c sql/backends/monet5/sql.c sql/backends/monet5/sql_cat.c sql/test/emptydb/Tests/check.stable.out sql/test/emptydb/Tests/check.stable.out.int128 Branch: default Log Message:
Removed imprints on numeric columns. Creating imprints is really expensive. TPC-H SF 10 query 6 takes roughly 230 ms when using imprints, and 130 ms when not using imprints. And that's when they already exist. (Debug build on my laptop.) diffs (truncated from 4605 to 300 lines): diff --git a/clients/Tests/MAL-signatures-hge.test b/clients/Tests/MAL-signatures-hge.test --- a/clients/Tests/MAL-signatures-hge.test +++ b/clients/Tests/MAL-signatures-hge.test @@ -3782,7 +3782,7 @@ bat getSize command bat.getSize(X_0:bat[:any_1]):lng BKCgetSize; -Calculate the actual size of the BAT descriptor, heaps, hashes and imprint indices in bytes@rounded to the memory page size (see bbp.getPageSize()). +Calculate the actual size of the BAT descriptor, heaps, hashes in bytes@rounded to the memory page size (see bbp.getPageSize()). bat getVHeapSize command bat.getVHeapSize(X_0:bat[:any_1]):lng @@ -3799,76 +3799,6 @@ pattern bat.hasorderidx(X_0:bat[:any_1]) OIDXhasorderidx; Return true if order index exists bat -imprints -command bat.imprints(X_0:bat[:bte]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:dbl]):void -CMDBATimprints; -Check for existence or create an imprint index on the BAT. -bat -imprints -command bat.imprints(X_0:bat[:flt]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:hge]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:int]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:lng]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:sht]):void -CMDBATimprints; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:bte]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:dbl]):lng -CMDBATimprintsize; -Return the storage size of the imprints index structure. -bat -imprintsize -command bat.imprintsize(X_0:bat[:flt]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:hge]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:int]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:lng]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:sht]):lng -CMDBATimprintsize; -(empty) -bat info command bat.info(X_0:bat[:any_1]) (X_1:bat[:str], X_2:bat[:str]) BKCinfo; @@ -4019,11 +3949,6 @@ command bat.setHash(X_0:bat[:any_1]):bit BKCsetHash; Create a hash structure on the column bat -setImprints -command bat.setImprints(X_0:bat[:any_1]):bit -BKCsetImprints; -Create an imprints structure on the column -bat setName command bat.setName(X_0:bat[:any_1], X_1:str):void BKCsetName; diff --git a/clients/Tests/MAL-signatures.test b/clients/Tests/MAL-signatures.test --- a/clients/Tests/MAL-signatures.test +++ b/clients/Tests/MAL-signatures.test @@ -3217,7 +3217,7 @@ bat getSize command bat.getSize(X_0:bat[:any_1]):lng BKCgetSize; -Calculate the actual size of the BAT descriptor, heaps, hashes and imprint indices in bytes@rounded to the memory page size (see bbp.getPageSize()). +Calculate the actual size of the BAT descriptor, heaps, hashes in bytes@rounded to the memory page size (see bbp.getPageSize()). bat getVHeapSize command bat.getVHeapSize(X_0:bat[:any_1]):lng @@ -3234,66 +3234,6 @@ pattern bat.hasorderidx(X_0:bat[:any_1]) OIDXhasorderidx; Return true if order index exists bat -imprints -command bat.imprints(X_0:bat[:bte]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:dbl]):void -CMDBATimprints; -Check for existence or create an imprint index on the BAT. -bat -imprints -command bat.imprints(X_0:bat[:flt]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:int]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:lng]):void -CMDBATimprints; -(empty) -bat -imprints -command bat.imprints(X_0:bat[:sht]):void -CMDBATimprints; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:bte]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:dbl]):lng -CMDBATimprintsize; -Return the storage size of the imprints index structure. -bat -imprintsize -command bat.imprintsize(X_0:bat[:flt]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:int]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:lng]):lng -CMDBATimprintsize; -(empty) -bat -imprintsize -command bat.imprintsize(X_0:bat[:sht]):lng -CMDBATimprintsize; -(empty) -bat info command bat.info(X_0:bat[:any_1]) (X_1:bat[:str], X_2:bat[:str]) BKCinfo; @@ -3444,11 +3384,6 @@ command bat.setHash(X_0:bat[:any_1]):bit BKCsetHash; Create a hash structure on the column bat -setImprints -command bat.setImprints(X_0:bat[:any_1]):bit -BKCsetImprints; -Create an imprints structure on the column -bat setName command bat.setName(X_0:bat[:any_1], X_1:str):void BKCsetName; diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -153,7 +153,6 @@ BUN BATguess_uniques(BAT *b, struct cand gdk_return BAThash(BAT *b); bool BAThasstrimps(BAT *b); void BAThseqbase(BAT *b, oid o); -gdk_return BATimprints(BAT *b); BAT *BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one, BUN estimate); BAT *BATintersectcand(BAT *a, BAT *b); gdk_return BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); @@ -360,8 +359,6 @@ gdk_return HEAPextend(Heap *h, size_t si void HEAPincref(Heap *h); size_t HEAPmemsize(Heap *h); size_t HEAPvmsize(Heap *h); -void IMPSdestroy(BAT *b); -lng IMPSimprintsize(BAT *b); int MT_access(const char *pathname, int mode); gdk_return MT_alloc_tls(MT_TLS_t *newkey); int MT_check_nr_cores(void); diff --git a/gdk/CMakeLists.txt b/gdk/CMakeLists.txt --- a/gdk/CMakeLists.txt +++ b/gdk/CMakeLists.txt @@ -77,7 +77,6 @@ target_sources(bat gdk_private.h gdk_system_private.h gdk_group.c - gdk_imprints.c gdk_imprints.h gdk_join.c gdk_project.c gdk_time.c gdk_time.h diff --git a/gdk/ChangeLog b/gdk/ChangeLog --- a/gdk/ChangeLog +++ b/gdk/ChangeLog @@ -1,3 +1,8 @@ # ChangeLog file for GDK # This file is updated with Maddlog +* Fri Sep 13 2024 Sjoerd Mullender <[email protected]> +- The implementation for the imprints index on numeric columns has + been removed. It hasn't been used in years, and when it is enabled, + it doesn't really make queries go faster. + diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -565,7 +565,6 @@ typedef struct { } Heap; typedef struct Hash Hash; -typedef struct Imprints Imprints; typedef struct Strimps Strimps; #ifdef HAVE_RTREE @@ -688,7 +687,6 @@ gdk_export bool VALisnil(const ValRecord * int tloc; // byte-offset in BUN for tail elements * Heap *theap; // heap for varsized tail values * Hash *thash; // linear chained hash table on tail - * Imprints *timprints; // column imprints index on tail * orderidx torderidx; // order oid index on tail * } BAT; * @end verbatim @@ -735,7 +733,6 @@ typedef struct { #ifdef HAVE_RTREE RTree *rtree; /* rtree geometric index */ #endif - Imprints *imprints; /* column imprints index */ Heap *orderidx; /* order oid index */ Strimps *strimps; /* string imprint index */ @@ -804,7 +801,6 @@ typedef struct BAT { MT_Lock theaplock; /* lock protecting heap reference changes */ MT_RWLock thashlock; /* lock specifically for hash management */ MT_Lock batIdxLock; /* lock to manipulate other indexes/properties */ - MT_Sema imprsema; /* semaphore to synchronize imprints creation */ Heap *oldtail; /* old tail heap, to be destroyed after commit */ } BAT; @@ -830,7 +826,6 @@ typedef struct BAT { #define tbaseoff T.baseoff #define tvheap T.vheap #define thash T.hash -#define timprints T.imprints #define tprops T.props #define tstrimps T.strimps #ifdef HAVE_RTREE @@ -1877,23 +1872,6 @@ bunfastapp_nocheckVAR(BAT *b, const void return rc; } -/* - * @- Column Imprints Functions - * - * @multitable @columnfractions 0.08 0.7 - * @item BAT* - * @tab - * BATimprints (BAT *b) - * @end multitable - * - * The column imprints index structure. - * - */ - -gdk_export gdk_return BATimprints(BAT *b); -gdk_export void IMPSdestroy(BAT *b); -gdk_export lng IMPSimprintsize(BAT *b); - /* Strimps exported functions */ _______________________________________________ checkin-list mailing list -- [email protected] To unsubscribe send an email to [email protected]
