This is an automated email from the ASF dual-hosted git repository.
okislal pushed a commit to branch madlib2-master
in repository https://gitbox.apache.org/repos/asf/madlib.git
The following commit(s) were added to refs/heads/madlib2-master by this push:
new f91813be Build: Add support for PG15
f91813be is described below
commit f91813bedb275bb042968a5e38d4293bbad6b325
Author: Orhan Kislal <[email protected]>
AuthorDate: Thu Jun 15 21:02:08 2023 -0400
Build: Add support for PG15
---
methods/array_ops/src/pg_gp/array_ops.c | 4 ++++
methods/sketch/src/pg_gp/sketch_support.c | 10 ++++++++--
src/madpack/madpack.py | 12 ++++++++++++
src/ports/postgres/dbconnector/UDF_impl.hpp | 4 ++++
.../modules/bayes/test/gaussian_naive_bayes.sql_in | 2 +-
src/ports/postgres/modules/crf/crf.sql_in | 14 ++++++++------
.../modules/deep_learning/madlib_keras_gpu_info.py_in | 8 +++++++-
src/ports/postgres/modules/graph/wcc.sql_in | 4 ++--
src/ports/postgres/modules/sample/balance_sample.py_in | 4 ++--
.../postgres/modules/utilities/encode_categorical.py_in | 2 +-
10 files changed, 49 insertions(+), 15 deletions(-)
diff --git a/methods/array_ops/src/pg_gp/array_ops.c
b/methods/array_ops/src/pg_gp/array_ops.c
index a842a605..574a9571 100644
--- a/methods/array_ops/src/pg_gp/array_ops.c
+++ b/methods/array_ops/src/pg_gp/array_ops.c
@@ -8,7 +8,11 @@
#include "utils/numeric.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
+#if GP_VERSION_NUM >= 70000 || PG_VERSION_NUM >= 130000
+#include "utils/fmgrprotos.h"
+#else
#include "utils/int8.h"
+#endif
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/typcache.h"
diff --git a/methods/sketch/src/pg_gp/sketch_support.c
b/methods/sketch/src/pg_gp/sketch_support.c
index 1cdeb904..f8baf6a6 100644
--- a/methods/sketch/src/pg_gp/sketch_support.c
+++ b/methods/sketch/src/pg_gp/sketch_support.c
@@ -306,6 +306,7 @@ bytea *sketch_md5_bytea(Datum dat, Oid typOid)
bool byval = get_typbyval(typOid);
int len = ExtractDatumLen(dat, get_typlen(typOid), byval, -1);
void *datp = DatumExtractPointer(dat, byval);
+
/*
* it's very common to be hashing 0 for countmin sketches. Rather than
* hard-code it here, we cache on first lookup. In future a bigger cache
here
@@ -318,9 +319,14 @@ bytea *sketch_md5_bytea(Datum dat, Oid typOid)
if (byval && len == sizeof(int64) && *(int64 *)datp == 0 && zero_cached) {
return md5_of_0;
}
- else
+ else{
+ #if defined(GP_VERSION_NUM) || PG_VERSION_NUM < 150000
pg_md5_hash(datp, len, outbuf);
-
+ #else
+ const char *errstr = NULL;
+ pg_md5_hash(datp, len, outbuf, &errstr);
+ #endif
+ }
hex_to_bytes(outbuf, (uint8 *)VARDATA(out), MD5_HASHLEN*2);
SET_VARSIZE(out, MD5_HASHLEN+VARHDRSZ);
if (byval && len == sizeof(int64) && *(int64 *)datp == 0 && !zero_cached) {
diff --git a/src/madpack/madpack.py b/src/madpack/madpack.py
index 6524a3de..7c91911b 100755
--- a/src/madpack/madpack.py
+++ b/src/madpack/madpack.py
@@ -188,6 +188,18 @@ def _run_m4_and_append(schema, maddir_mod_py, module,
sqlfile,
'-DMODULE_NAME=' + module,
'-I' + maddir_madpack,
sqlfile]
+ if (((portid == 'postgres') & (dbver == '15'))):
+ m4args = ['m4',
+ '-P',
+ '-DMADLIB_SCHEMA=' + schema,
+ '-DPLPYTHON_LIBDIR=' + maddir_mod_py,
+ '-DEXT_PYTHON_LIBDIR=' + maddir_ext_py,
+ '-DMODULE_PATHNAME=' + maddir_lib,
+ '-DMADLIB_LIBRARY_PATH=' + madlib_library_path,
+ '-DMODULE_NAME=' + module,
+ '-DIS_PG_15=TRUE',
+ '-I' + maddir_madpack,
+ sqlfile]
info_(this, "> ... parsing: " + " ".join(m4args), verbose)
output_filehandle.flush()
diff --git a/src/ports/postgres/dbconnector/UDF_impl.hpp
b/src/ports/postgres/dbconnector/UDF_impl.hpp
index cb377083..6674a346 100644
--- a/src/ports/postgres/dbconnector/UDF_impl.hpp
+++ b/src/ports/postgres/dbconnector/UDF_impl.hpp
@@ -13,6 +13,9 @@ namespace dbconnector {
namespace postgres {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+
#define MADLIB_HANDLE_STANDARD_EXCEPTION(err) \
sqlerrcode = err; \
strncpy(msg, exc.what(), sizeof(msg));
@@ -215,6 +218,7 @@ UDF::call(FunctionCallInfo fcinfo) {
#undef MADLIB_HANDLE_STANDARD_EXCEPTION
+#pragma GCC diagnostic pop
} // namespace postgres
} // namespace dbconnector
diff --git a/src/ports/postgres/modules/bayes/test/gaussian_naive_bayes.sql_in
b/src/ports/postgres/modules/bayes/test/gaussian_naive_bayes.sql_in
index 26c874f7..62dab91c 100644
--- a/src/ports/postgres/modules/bayes/test/gaussian_naive_bayes.sql_in
+++ b/src/ports/postgres/modules/bayes/test/gaussian_naive_bayes.sql_in
@@ -221,7 +221,7 @@ m4_include(`SQLCommon.m4')
m4_changequote(`<!', `!>')
m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!
SET optimizer = off;
-!>
+!>)
SELECT assert(count(*) = 10, 'Gaussian Naive Bayes produces wrong classes!')
FROM iris_clasif, iris_test
diff --git a/src/ports/postgres/modules/crf/crf.sql_in
b/src/ports/postgres/modules/crf/crf.sql_in
index acb1612b..302de8a4 100644
--- a/src/ports/postgres/modules/crf/crf.sql_in
+++ b/src/ports/postgres/modules/crf/crf.sql_in
@@ -719,14 +719,16 @@ CREATE AGGREGATE MADLIB_SCHEMA.lincrf_lbfgs_step(
INITCOND='{0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}'
);
-DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.array_union(anyarray) CASCADE;
+m4_ifdef(`IS_PG_15',
+`DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.array_union(anycompatiblearray)
CASCADE;',
+`DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.array_union(anyarray) CASCADE;')
+
CREATE m4_ifdef(`__POSTGRESQL__', `',
m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED')) AGGREGATE
-MADLIB_SCHEMA.array_union(anyarray) (
- SFUNC = array_cat,
- STYPE = anyarray
-);
-
+m4_ifdef(`IS_PG_15',
+`MADLIB_SCHEMA.array_union(anycompatiblearray) (SFUNC = array_cat, STYPE =
anycompatiblearray);',
+`MADLIB_SCHEMA.array_union(anyarray) (SFUNC = array_cat, STYPE = anyarray);'
+)
-- We only need to document the last one (unfortunately, in Greenplum we have
to
-- use function overloading instead of default arguments).
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.compute_lincrf(
diff --git
a/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in
b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in
index 5c550f65..7f1f381f 100644
--- a/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in
+++ b/src/ports/postgres/modules/deep_learning/madlib_keras_gpu_info.py_in
@@ -65,7 +65,13 @@ class GPUInfoFunctions:
:return: list of gpu descriptions as returned by tensorflow
"""
current_working_dir = os.path.dirname(os.path.realpath(__file__))
- gpus = subprocess.check_output(["python3", "gpu_info_from_tf.py"],
+
+ try:
+ subprocess.check_output(["which", "python3.9"])
+ py_version = "python3.9"
+ except Exception:
+ py_version = "python3"
+ gpus = subprocess.check_output([py_version, "gpu_info_from_tf.py"],
cwd=current_working_dir).splitlines()
return gpus
diff --git a/src/ports/postgres/modules/graph/wcc.sql_in
b/src/ports/postgres/modules/graph/wcc.sql_in
index 9d9b4802..cd299d3d 100644
--- a/src/ports/postgres/modules/graph/wcc.sql_in
+++ b/src/ports/postgres/modules/graph/wcc.sql_in
@@ -374,7 +374,7 @@ SELECT madlib.weakly_connected_components(
'edge', -- Edge table
'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments
'wcc_out'); -- Output table of weakly connected
components
-SELECT * FROM wcc_out ORDER BY component_id, id;
+SELECT * FROM wcc_out ORDER BY component_id, node_id;
</pre>
<pre class="result">
node_id | component_id
@@ -407,7 +407,7 @@ SELECT madlib.weakly_connected_components(
'src=conn_src, dest=conn_dest', -- Comma delimted string of edge arguments
'wcc_out', -- Output table of weakly connected
components
'user_id'); -- Grouping column name
-SELECT * FROM wcc_out ORDER BY user_id, component_id, id;
+SELECT * FROM wcc_out ORDER BY user_id, component_id, node_id;
</pre>
<pre class="result">
node_id | component_id | user_id
diff --git a/src/ports/postgres/modules/sample/balance_sample.py_in
b/src/ports/postgres/modules/sample/balance_sample.py_in
index 0385499f..63440e9d 100644
--- a/src/ports/postgres/modules/sample/balance_sample.py_in
+++ b/src/ports/postgres/modules/sample/balance_sample.py_in
@@ -73,7 +73,7 @@ def _get_level_frequency_distribution(source_table, class_col,
"""
if grp_by_cols and grp_by_cols.lower() != 'null':
is_grouping = True
- grp_by_cols_comma = grp_by_cols + ', '
+ grp_by_cols_comma = 'group_values, '
array_grp_by_cols_comma = "ARRAY[{0}]".format(grp_by_cols) + " AS
group_values, "
else:
is_grouping = False
@@ -96,7 +96,7 @@ def _get_level_frequency_distribution(source_table, class_col,
({class_col})::TEXT AS classes,
count(*) AS class_count
FROM {source_table}
- GROUP BY {grp_by_cols_comma} ({class_col})
+ GROUP BY {grp_by_cols_comma} classes
) q
{meta_grp_by}
""".format(grp_identifier="group_values" if is_grouping else "NULL",
diff --git a/src/ports/postgres/modules/utilities/encode_categorical.py_in
b/src/ports/postgres/modules/utilities/encode_categorical.py_in
index b47f8a21..8f39f0ff 100644
--- a/src/ports/postgres/modules/utilities/encode_categorical.py_in
+++ b/src/ports/postgres/modules/utilities/encode_categorical.py_in
@@ -469,7 +469,7 @@ class CategoricalEncoder(object):
count(*)::integer as c
FROM {tbl}
{filter_str}
- GROUP BY {col}
+ GROUP BY f
) q
""".format(schema_madlib=self.schema_madlib,
col=col, tbl=self.source_table,