Repository: madlib Updated Branches: refs/heads/master 7d7a069a4 -> fd4b5c242
Build: Add PG11 Support JIRA: MADLIB-1283 PG11 support required a number of minor changes in the code. - Change TRUE/FALSE to true/false - Use TupleDescAttr function instead of direct access. - Use prokind column instead of proisagg. We also added a function to check if the PG version is earlier than 11 as well as the necessary cmake files. Closes #339 Project: http://git-wip-us.apache.org/repos/asf/madlib/repo Commit: http://git-wip-us.apache.org/repos/asf/madlib/commit/fd4b5c24 Tree: http://git-wip-us.apache.org/repos/asf/madlib/tree/fd4b5c24 Diff: http://git-wip-us.apache.org/repos/asf/madlib/diff/fd4b5c24 Branch: refs/heads/master Commit: fd4b5c242d4530c9af289452aef99c81295f8d06 Parents: 7d7a069 Author: Orhan Kislal <[email protected]> Authored: Fri Dec 14 14:34:43 2018 +0300 Committer: Orhan Kislal <[email protected]> Committed: Fri Dec 14 14:34:43 2018 +0300 ---------------------------------------------------------------------- methods/array_ops/src/pg_gp/array_ops.c | 4 +- methods/sketch/src/pg_gp/fm.c | 6 +-- methods/sketch/src/pg_gp/sortasort.c | 6 +-- src/modules/recursive_partitioning/DT_impl.hpp | 4 +- src/ports/postgres/11/CMakeLists.txt | 21 ++++++++ .../postgres/cmake/FindPostgreSQL_11.cmake | 21 ++++++++ src/ports/postgres/dbconnector/AnyType_impl.hpp | 10 +++- src/ports/postgres/modules/kmeans/kmeans.sql_in | 57 +++++++++++++++----- src/ports/postgres/modules/knn/knn.py_in | 8 ++- .../postgres/modules/utilities/utilities.py_in | 9 ++++ .../postgres/modules/utilities/utilities.sql_in | 9 ++++ 11 files changed, 127 insertions(+), 28 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/methods/array_ops/src/pg_gp/array_ops.c ---------------------------------------------------------------------- diff --git a/methods/array_ops/src/pg_gp/array_ops.c b/methods/array_ops/src/pg_gp/array_ops.c index 9360983..b39d2cd 100644 --- a/methods/array_ops/src/pg_gp/array_ops.c +++ b/methods/array_ops/src/pg_gp/array_ops.c @@ -647,9 +647,9 @@ array_contains(PG_FUNCTION_ARGS){ PG_FREE_IF_COPY(v2, 1); if (DatumGetFloat8(res) == 0.) { - PG_RETURN_BOOL(TRUE); + PG_RETURN_BOOL(true); } else { - PG_RETURN_BOOL(FALSE); + PG_RETURN_BOOL(false); } } http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/methods/sketch/src/pg_gp/fm.c ---------------------------------------------------------------------- diff --git a/methods/sketch/src/pg_gp/fm.c b/methods/sketch/src/pg_gp/fm.c index 4e12029..39f3413 100644 --- a/methods/sketch/src/pg_gp/fm.c +++ b/methods/sketch/src/pg_gp/fm.c @@ -556,8 +556,6 @@ big_or(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } - - /*! OR of two big bitmaps, for gathering sketches computed in parallel. */ void big_or_internal(bytea *bitmap1, bytea *bitmap2, bytea *out) { @@ -594,7 +592,7 @@ bytea *fmsketch_sortasort_insert(bytea *transblob, Datum dat, size_t len) sortasort *s_in = (sortasort *)(transval->storage); bytea * newblob; - bool success = FALSE; + bool success = false; size_t new_storage_sz; size_t newsize; @@ -605,7 +603,7 @@ bytea *fmsketch_sortasort_insert(bytea *transblob, Datum dat, size_t len) if (success < 0) elog(ERROR, "insufficient directory capacity in sortasort"); - if (success == TRUE) return (transblob); + if (success == true) return (transblob); /* XXX THIS WHILE LOOP WILL SUCCEED THE FIRST TRY ... REMOVE IT. */ while (!success) { http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/methods/sketch/src/pg_gp/sortasort.c ---------------------------------------------------------------------- diff --git a/methods/sketch/src/pg_gp/sortasort.c b/methods/sketch/src/pg_gp/sortasort.c index 8dcf427..c052ca7 100644 --- a/methods/sketch/src/pg_gp/sortasort.c +++ b/methods/sketch/src/pg_gp/sortasort.c @@ -132,7 +132,7 @@ int sortasort_try_insert(sortasort *s_in, Datum dat, int len) int found = sortasort_find(s_in, dat); if (found >= 0 && found < (int)s_in->num_vals) { /* found! just return TRUE */ - return TRUE; + return true; } len = ExtractDatumLen(dat, len, s_in->typByVal, -1); @@ -146,7 +146,7 @@ int sortasort_try_insert(sortasort *s_in, Datum dat, int len) /* we need to insert v. return FALSE if not enough space. */ if (s_in->storage_cur + len >= s_in->storage_sz) { /* caller will have to allocate a bigger one and try again */ - return FALSE; + return false; } /* return -1 if no more capacity */ @@ -173,7 +173,7 @@ int sortasort_try_insert(sortasort *s_in, Datum dat, int len) sorta_cmp, (void *)s_in); - return TRUE; + return true; } /*! http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/src/modules/recursive_partitioning/DT_impl.hpp ---------------------------------------------------------------------- diff --git a/src/modules/recursive_partitioning/DT_impl.hpp b/src/modules/recursive_partitioning/DT_impl.hpp index 75e4ce4..8f36173 100644 --- a/src/modules/recursive_partitioning/DT_impl.hpp +++ b/src/modules/recursive_partitioning/DT_impl.hpp @@ -862,7 +862,7 @@ DecisionTree<Container>::expand_by_sampling(const Accumulator &state, } } - bool is_leaf_split = FALSE; + bool is_leaf_split = false; if (max_impurity_gain > 0){ // Create and update child nodes if splitting current uint64_t true_count = statCount(max_stats.segment(0, sps)); @@ -871,7 +871,7 @@ DecisionTree<Container>::expand_by_sampling(const Accumulator &state, if (shouldSplit(total_count, true_count, false_count, min_split, min_bucket, max_depth)) { - is_leaf_split = TRUE; + is_leaf_split = true; double max_threshold; if (max_is_cat) max_threshold = static_cast<double>(max_bin); http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/src/ports/postgres/11/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/src/ports/postgres/11/CMakeLists.txt b/src/ports/postgres/11/CMakeLists.txt new file mode 100644 index 0000000..9c46780 --- /dev/null +++ b/src/ports/postgres/11/CMakeLists.txt @@ -0,0 +1,21 @@ +# ------------------------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------------------------ + +add_current_postgresql_version() +add_extension_support() http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/src/ports/postgres/cmake/FindPostgreSQL_11.cmake ---------------------------------------------------------------------- diff --git a/src/ports/postgres/cmake/FindPostgreSQL_11.cmake b/src/ports/postgres/cmake/FindPostgreSQL_11.cmake new file mode 100644 index 0000000..43ba0e4 --- /dev/null +++ b/src/ports/postgres/cmake/FindPostgreSQL_11.cmake @@ -0,0 +1,21 @@ +# ------------------------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------------------------ + +set(_FIND_PACKAGE_FILE "${CMAKE_CURRENT_LIST_FILE}") +include("${CMAKE_CURRENT_LIST_DIR}/FindPostgreSQL.cmake") http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/src/ports/postgres/dbconnector/AnyType_impl.hpp ---------------------------------------------------------------------- diff --git a/src/ports/postgres/dbconnector/AnyType_impl.hpp b/src/ports/postgres/dbconnector/AnyType_impl.hpp index ab0327a..41fcd47 100644 --- a/src/ports/postgres/dbconnector/AnyType_impl.hpp +++ b/src/ports/postgres/dbconnector/AnyType_impl.hpp @@ -337,7 +337,11 @@ AnyType::operator[](uint16_t inID) const { throw std::out_of_range("Invalid type conversion. Access behind " "end of composite object."); +#if PG_VERSION_NUM >= 110000 + typeID = TupleDescAttr(tupdesc, inID)->atttypid; +#else typeID = tupdesc->attrs[inID]->atttypid; +#endif bool isNull = false; datum = madlib_GetAttributeByNum(mTupleHeader, inID, &isNull); if (isNull) @@ -448,7 +452,11 @@ AnyType::getAsDatum(FunctionCallInfo inFnCallInfo, bool* nulls = new bool[targetTupleDesc->natts]; for (size_t pos = 0; pos < mChildren.size(); ++pos) { - Oid targetTypeID = targetTupleDesc->attrs[pos]->atttypid; +#if PG_VERSION_NUM >= 110000 + Oid targetTypeID = TupleDescAttr(targetTupleDesc, pos)->atttypid; +#else + Oid targetTypeID = targetTupleDesc->attrs[pos]->atttypid; +#endif values[pos] = mChildren[pos].getAsDatum(inFnCallInfo, targetTypeID); nulls[pos] = mChildren[pos].isNull(); http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/src/ports/postgres/modules/kmeans/kmeans.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/kmeans/kmeans.sql_in b/src/ports/postgres/modules/kmeans/kmeans.sql_in index c3450e7..81dea80 100644 --- a/src/ports/postgres/modules/kmeans/kmeans.sql_in +++ b/src/ports/postgres/modules/kmeans/kmeans.sql_in @@ -766,15 +766,30 @@ BEGIN proc_fn_dist := fn_dist || '(DOUBLE PRECISION[], DOUBLE PRECISION[])'; - IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR proisagg = TRUE - FROM pg_proc WHERE oid = proc_fn_dist) THEN - RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; - END IF; - proc_agg_centroid := agg_centroid || '(DOUBLE PRECISION[])'; - IF (SELECT prorettype != 'DOUBLE PRECISION[]'::regtype OR proisagg = FALSE - FROM pg_proc WHERE oid = proc_agg_centroid) THEN - RAISE EXCEPTION 'Kmeans error: Mean aggregate has wrong signature or is not an aggregate.'; + + -- Handle PG11 pg_proc table changes + IF (SELECT MADLIB_SCHEMA.is_pg_major_version_less_than(11) = TRUE) THEN + IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR proisagg = TRUE + FROM pg_proc WHERE oid = proc_fn_dist) THEN + RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; + END IF; + proc_agg_centroid := agg_centroid || '(DOUBLE PRECISION[])'; + IF (SELECT prorettype != 'DOUBLE PRECISION[]'::regtype OR proisagg = FALSE + FROM pg_proc WHERE oid = proc_agg_centroid) THEN + RAISE EXCEPTION 'Kmeans error: Mean aggregate has wrong signature or is not an aggregate.'; + END IF; + ELSE + IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR prokind = 'a' + FROM pg_proc WHERE oid = proc_fn_dist) THEN + RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; + END IF; + proc_agg_centroid := agg_centroid || '(DOUBLE PRECISION[])'; + IF (SELECT prorettype != 'DOUBLE PRECISION[]'::regtype OR prokind != 'a' + FROM pg_proc WHERE oid = proc_agg_centroid) THEN + RAISE EXCEPTION 'Kmeans error: Mean aggregate has wrong signature or is not an aggregate.'; + END IF; END IF; + IF (min_frac_reassigned < 0) OR (min_frac_reassigned > 1) THEN RAISE EXCEPTION 'Kmeans error: Invalid convergence threshold (must be a fraction between 0 and 1).'; END IF; @@ -965,9 +980,16 @@ BEGIN rel_source, expr_point, k, initial_centroids, seeding_sample_ratio); proc_fn_dist := fn_dist || '(DOUBLE PRECISION[], DOUBLE PRECISION[])'; - IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR proisagg = TRUE - FROM pg_proc WHERE oid = proc_fn_dist) THEN - RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; + IF (SELECT MADLIB_SCHEMA.is_pg_major_version_less_than(11) = TRUE) THEN + IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR proisagg = TRUE + FROM pg_proc WHERE oid = proc_fn_dist) THEN + RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; + END IF; + ELSE + IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR prokind = 'a' + FROM pg_proc WHERE oid = proc_fn_dist) THEN + RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; + END IF; END IF; -- Unfortunately, Greenplum and PostgreSQL <= 8.2 do not have conversion @@ -1660,9 +1682,16 @@ BEGIN proc_fn_dist := fn_dist || '(DOUBLE PRECISION[], DOUBLE PRECISION[])'; - IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR proisagg = TRUE - FROM pg_proc WHERE oid = proc_fn_dist) THEN - RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; + IF (SELECT MADLIB_SCHEMA.is_pg_major_version_less_than(11) = TRUE) THEN + IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR proisagg = TRUE + FROM pg_proc WHERE oid = proc_fn_dist) THEN + RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; + END IF; + ELSE + IF (SELECT prorettype != 'DOUBLE PRECISION'::regtype OR prokind = 'a' + FROM pg_proc WHERE oid = proc_fn_dist) THEN + RAISE EXCEPTION 'Kmeans error: Distance function has wrong signature or is not a simple function.'; + END IF; END IF; ans := MADLIB_SCHEMA.internal_execute_using_silhouette_args($sql$ http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/src/ports/postgres/modules/knn/knn.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/knn/knn.py_in b/src/ports/postgres/modules/knn/knn.py_in index 249c316..4db7ac1 100644 --- a/src/ports/postgres/modules/knn/knn.py_in +++ b/src/ports/postgres/modules/knn/knn.py_in @@ -39,6 +39,7 @@ from utilities.validate_args import quote_ident from utilities.validate_args import is_var_valid from utilities.utilities import NUMERIC, ONLY_ARRAY from utilities.utilities import is_valid_psql_type +from utilities.utilities import is_pg_major_version_less_than MAX_WEIGHT_ZERO_DIST = 1e6 @@ -112,12 +113,15 @@ def knn_validate_src(schema_madlib, point_source, point_column_name, point_id, 'squared_dist_norm2', 'dist_angle', 'dist_tanimoto')]) + profunc = ("proisagg = TRUE" if is_pg_major_version_less_than(schema_madlib, 11) + else "prokind = 'a'") + is_invalid_func = plpy.execute(""" SELECT prorettype != 'DOUBLE PRECISION'::regtype OR - proisagg = TRUE AS OUTPUT + {profunc} AS OUTPUT FROM pg_proc WHERE oid='{fn_dist}(DOUBLE PRECISION[], DOUBLE PRECISION[])'::regprocedure; - """.format(fn_dist=fn_dist))[0]['output'] + """.format(fn_dist=fn_dist, profunc=profunc))[0]['output'] if is_invalid_func or (fn_dist not in dist_functions): plpy.error("KNN error: Distance function has invalid signature " http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/src/ports/postgres/modules/utilities/utilities.py_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/utilities.py_in b/src/ports/postgres/modules/utilities/utilities.py_in index 3bd3aaf..50c426b 100644 --- a/src/ports/postgres/modules/utilities/utilities.py_in +++ b/src/ports/postgres/modules/utilities/utilities.py_in @@ -407,6 +407,15 @@ def set_client_min_messages(new_level): return old_msg_level # ------------------------------------------------------------------------- +def is_pg_major_version_less_than(schema_madlib, compare_version, **kwargs): + version = plpy.execute("select version()")[0]["version"] + regex = re.compile('PostgreSQL\s*([0-9]+)([0-9.beta]+)', re.IGNORECASE) + version = regex.findall(version) + plpy.info("{0}".format(version)) + if len(version) > 0 and int(version[0][0]) < compare_version: + return True + else: + return False # Deal with earlier versions of PG or GPDB class __mad_version: http://git-wip-us.apache.org/repos/asf/madlib/blob/fd4b5c24/src/ports/postgres/modules/utilities/utilities.sql_in ---------------------------------------------------------------------- diff --git a/src/ports/postgres/modules/utilities/utilities.sql_in b/src/ports/postgres/modules/utilities/utilities.sql_in index 9d6538d..e598566 100644 --- a/src/ports/postgres/modules/utilities/utilities.sql_in +++ b/src/ports/postgres/modules/utilities/utilities.sql_in @@ -508,3 +508,12 @@ PythonFunctionBodyOnly(utilities, utilities) return utilities.create_table_drop_cols(source_table, out_table, cols_to_drop) $$ LANGUAGE plpythonu VOLATILE m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); + + + +CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_pg_major_version_less_than( + compare_version INTEGER) +RETURNS BOOLEAN AS $$ +PythonFunction(utilities, utilities, is_pg_major_version_less_than) +$$ LANGUAGE plpythonu VOLATILE +m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
