Changeset: fe5156c932e5 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=fe5156c932e5 Added Files: gdk/xoshiro256starstar.h sql/test/sample/Tests/sample-queries.stable.err sql/test/sample/Tests/sample-queries.stable.out Removed Files: gdk/xoshiro128starstar.h Modified Files: clients/Tests/exports.stable.out gdk/gdk.h gdk/gdk_sample.c monetdb5/modules/mal/sample.c Branch: sample-with-seed Log Message:
Use 64 bits version of prg and use gdk.h types. diffs (truncated from 629 to 300 lines): diff --git a/clients/Tests/exports.stable.out b/clients/Tests/exports.stable.out --- a/clients/Tests/exports.stable.out +++ b/clients/Tests/exports.stable.out @@ -164,7 +164,7 @@ gdk_return BATreplace(BAT *b, BAT *p, BA void BATrmprop(BAT *b, int idx); gdk_return BATroles(BAT *b, const char *tnme); BAT *BATsample(BAT *b, BUN n); -BAT *BATsample_with_seed(BAT *b, BUN n, uint32_t seed); +BAT *BATsample_with_seed(BAT *b, BUN n, unsigned seed); BAT *BATselect(BAT *b, BAT *s, const void *tl, const void *th, bool li, bool hi, bool anti); gdk_return BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate) __attribute__((__warn_unused_result__)); gdk_return BATsetaccess(BAT *b, int mode); diff --git a/gdk/gdk.h b/gdk/gdk.h --- a/gdk/gdk.h +++ b/gdk/gdk.h @@ -2763,7 +2763,7 @@ gdk_export gdk_return BATfirstn(BAT **to * */ gdk_export BAT *BATsample(BAT *b, BUN n); -gdk_export BAT *BATsample_with_seed(BAT *b, BUN n, uint32_t seed); +gdk_export BAT *BATsample_with_seed(BAT *b, BUN n, unsigned seed); /* * diff --git a/gdk/gdk_sample.c b/gdk/gdk_sample.c --- a/gdk/gdk_sample.c +++ b/gdk/gdk_sample.c @@ -26,7 +26,7 @@ #include "monetdb_config.h" #include "gdk.h" #include "gdk_private.h" -#include "xoshiro128starstar.h" +#include "xoshiro256starstar.h" #undef BATsample @@ -109,7 +109,7 @@ OIDTreeToBATAntiset(struct oidtreenode * /* BATsample implements sampling for void headed BATs */ BAT * -BATsample_with_seed(BAT *b, BUN n, uint32_t seed) +BATsample_with_seed(BAT *b, BUN n, unsigned seed) { BAT *bn; BUN cnt, slen; @@ -185,7 +185,7 @@ BATsample_with_seed(BAT *b, BUN n, uint3 BAT * BATsample(BAT *b, BUN n) { - uint32_t some_random_seed = (uint32_t) rand(); + unsigned some_random_seed = (unsigned) rand(); return BATsample_with_seed(b, n,some_random_seed); } diff --git a/gdk/xoshiro128starstar.h b/gdk/xoshiro128starstar.h deleted file mode 100644 --- a/gdk/xoshiro128starstar.h +++ /dev/null @@ -1,37 +0,0 @@ -#include <stdint.h> -#include <string.h> - -static inline uint32_t rotl(const uint32_t x, int k) { - return (x << k) | (x >> (32 - k)); -} - -typedef uint32_t random_state_engine[4]; - -void init_random_state_engine(random_state_engine* engine, uint32_t seed); -void init_random_state_engine(random_state_engine* engine, uint32_t seed) { - - random_state_engine s = { seed << 1, seed + 1, seed >> 1, seed - 1 }; - - memcpy(engine, &s, sizeof(random_state_engine)); -} - -static inline uint32_t next_uint32(random_state_engine rse) { - uint32_t output = rotl(rse[0] * 5, 7) * 9; - - const uint32_t t = rse[1] << 9; - - rse[2] ^= rse[0]; - rse[3] ^= rse[1]; - rse[1] ^= rse[2]; - rse[0] ^= rse[3]; - - rse[2] ^= t; - - rse[3] = rotl(rse[3], 11); - - return output; -} - -static inline double next_double(random_state_engine rse) { - return (double) next_uint32(rse) / UINT32_MAX; -} diff --git a/gdk/xoshiro256starstar.h b/gdk/xoshiro256starstar.h new file mode 100644 --- /dev/null +++ b/gdk/xoshiro256starstar.h @@ -0,0 +1,45 @@ +#include <limits.h> +#include <string.h> +#include "gdk.h" + +/* +This 64 bit pseudo random number generator is based on the engine written by +Sebastiano Vigna from Dipartimento di Informatica of the Università degli Studi di Milano. +The original source code can be found here http://xoshiro.di.unimi.it/xoshiro256starstar.c. +The engine is supposed to perform very well on various random engine benchmarks. +The original author offered his work to the public domain. +*/ + +static inline ulng rotl(const ulng x, int k) { + return (x << k) | (x >> (64 - k)); +} + +typedef ulng random_state_engine[4]; + +static inline void init_random_state_engine(random_state_engine* engine, const unsigned seed) { + + random_state_engine s = { seed << 1, seed + 1, seed >> 1, seed - 1 }; + + memcpy(engine, &s, sizeof(random_state_engine)); +} + +static inline ulng next(random_state_engine rse) { + const ulng output = rotl(rse[0] * 5, 7) * 9; + + const ulng t = rse[1] << 17; + + rse[2] ^= rse[0]; + rse[3] ^= rse[1]; + rse[1] ^= rse[2]; + rse[0] ^= rse[3]; + + rse[2] ^= t; + + rse[3] = rotl(rse[3], 45); + + return output; +} + +static inline double next_double(random_state_engine rse) { + return (double) next(rse) / ULONG_MAX; +} diff --git a/monetdb5/modules/mal/sample.c b/monetdb5/modules/mal/sample.c --- a/monetdb5/modules/mal/sample.c +++ b/monetdb5/modules/mal/sample.c @@ -33,7 +33,7 @@ #include "gdk.h" #include "mal_exception.h" #include "sample.h" -// TODO: Go through this documentation. +// TODO: Go through this documentation and update it with an explanation about seeds. /* * @- Uniform Sampling. * @@ -72,13 +72,13 @@ SAMPLEuniform(Client cntxt, MalBlkPtr mb bat *r, *b; lng sample_size; - int seed; + unsigned seed; (void) cntxt; BAT *br, *bb; r = getArgReference_bat(stk, pci, 0); - b = getArgReference_bat(stk, pci, 1);\ + b = getArgReference_bat(stk, pci, 1); if ((bb = BATdescriptor(*b)) == NULL) { throw(MAL, "sample.subuniform", INTERNAL_BAT_ACCESS); @@ -102,7 +102,7 @@ SAMPLEuniform(Client cntxt, MalBlkPtr mb } if (pci->argc == 4) { - seed = *getArgReference_int(stk, pci, 3); + seed = (unsigned) *getArgReference_int(stk, pci, 3); br = BATsample_with_seed(bb, (BUN) sample_size, seed); } else { diff --git a/sql/test/sample/Tests/sample-queries.stable.err b/sql/test/sample/Tests/sample-queries.stable.err new file mode 100644 --- /dev/null +++ b/sql/test/sample/Tests/sample-queries.stable.err @@ -0,0 +1,40 @@ +stderr of test 'sample-queries` in directory 'sql/test/sample` itself: + + +# 14:40:06 > +# 14:40:06 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=34313" "--set" "mapi_usock=/var/tmp/mtest-23651/.s.monetdb.34313" "--set" "monet_prompt=" "--forcemito" "--dbpath=/home/aris/Installations/default-env/var/MonetDB/mTests_sql_test_sample" "--set" "embedded_c=true" +# 14:40:06 > + +# builtin opt gdk_dbpath = /home/aris/Installations/default-env/var/monetdb5/dbfarm/demo +# builtin opt gdk_debug = 0 +# builtin opt gdk_vmtrim = no +# builtin opt monet_prompt = > +# builtin opt monet_daemon = no +# builtin opt mapi_port = 50000 +# builtin opt mapi_open = false +# builtin opt mapi_autosense = false +# builtin opt sql_optimizer = default_pipe +# builtin opt sql_debug = 0 +# cmdline opt gdk_nr_threads = 0 +# cmdline opt mapi_open = true +# cmdline opt mapi_port = 34313 +# cmdline opt mapi_usock = /var/tmp/mtest-23651/.s.monetdb.34313 +# cmdline opt monet_prompt = +# cmdline opt gdk_dbpath = /home/aris/Installations/default-env/var/MonetDB/mTests_sql_test_sample +# cmdline opt embedded_c = true +# cmdline opt gdk_debug = 553648138 + +# 14:40:06 > +# 14:40:06 > "mclient" "-lsql" "-ftest" "-tnone" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-23651" "--port=34313" +# 14:40:06 > + +MAPI = (monetdb) /var/tmp/mtest-23651/.s.monetdb.34313 +QUERY = select * from GENERATE_SERIES(1,41) sample 1.25; --should give error +ERROR = !Illegal argument p should be between 0 and 1.0 + + + +# 14:40:06 > +# 14:40:06 > "Done." +# 14:40:06 > + diff --git a/sql/test/sample/Tests/sample-queries.stable.out b/sql/test/sample/Tests/sample-queries.stable.out new file mode 100644 --- /dev/null +++ b/sql/test/sample/Tests/sample-queries.stable.out @@ -0,0 +1,396 @@ +stdout of test 'sample-queries` in directory 'sql/test/sample` itself: + + +# 14:40:06 > +# 14:40:06 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=34313" "--set" "mapi_usock=/var/tmp/mtest-23651/.s.monetdb.34313" "--set" "monet_prompt=" "--forcemito" "--dbpath=/home/aris/Installations/default-env/var/MonetDB/mTests_sql_test_sample" "--set" "embedded_c=true" +# 14:40:06 > + +# MonetDB 5 server v11.32.0 +# This is an unreleased version +# Serving database 'mTests_sql_test_sample', using 4 threads +# Compiled for x86_64-pc-linux-gnu/64bit with 128bit integers +# Found 15.558 GiB available main-memory. +# Copyright (c) 1993 - July 2008 CWI. +# Copyright (c) August 2008 - 2018 MonetDB B.V., all rights reserved +# Visit https://www.monetdb.org/ for further information +# Listening for connection requests on mapi:monetdb://aris-MonetDB:34313/ +# Listening for UNIX domain connection requests on mapi:monetdb:///var/tmp/mtest-23651/.s.monetdb.34313 +# SQL catalog created, loading sql scripts once +# loading sql script: 09_like.sql +# loading sql script: 10_math.sql +# loading sql script: 11_times.sql +# loading sql script: 12_url.sql +# loading sql script: 13_date.sql +# loading sql script: 14_inet.sql +# loading sql script: 15_querylog.sql +# loading sql script: 16_tracelog.sql +# loading sql script: 17_temporal.sql +# loading sql script: 18_index.sql +# loading sql script: 20_vacuum.sql +# loading sql script: 21_dependency_views.sql +# loading sql script: 22_clients.sql +# loading sql script: 23_skyserver.sql +# loading sql script: 25_debug.sql +# loading sql script: 26_sysmon.sql +# loading sql script: 27_rejects.sql +# loading sql script: 39_analytics.sql +# loading sql script: 39_analytics_hge.sql +# loading sql script: 40_json.sql +# loading sql script: 40_json_hge.sql +# loading sql script: 41_md5sum.sql +# loading sql script: 45_uuid.sql +# loading sql script: 46_profiler.sql +# loading sql script: 51_sys_schema_extension.sql +# loading sql script: 60_wlcr.sql +# loading sql script: 75_storagemodel.sql +# loading sql script: 80_statistics.sql +# loading sql script: 80_udf.sql +# loading sql script: 80_udf_hge.sql +# loading sql script: 90_generator.sql +# loading sql script: 90_generator_hge.sql +# loading sql script: 99_system.sql +# MonetDB/SQL module loaded + +Ready. + +# 14:40:06 > +# 14:40:06 > "mclient" "-lsql" "-ftest" "-tnone" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-23651" "--port=34313" +# 14:40:06 > + +#select * from GENERATE_SERIES(1,41) sample 0; --empty set +% .L3 # table_name +% value # name +% tinyint # type +% 1 # length +#select * from GENERATE_SERIES(1,41) sample 1 seed 1234; --1 sample +% .L3 # table_name +% value # name _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list