dimas-b commented on code in PR #3352: URL: https://github.com/apache/polaris/pull/3352#discussion_r2662091681
########## persistence/relational-jdbc/src/main/resources/cockroachdb/schema-v1.sql: ########## @@ -0,0 +1,146 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file-- +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"). You may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +-- CockroachDB schema v1 (based on PostgreSQL schema v3) +-- This is the initial schema version for CockroachDB support +-- Features: +-- * Uses INT4 explicitly for all integer columns (required for CockroachDB JDBC driver) +-- * Includes all tables: version, entities, grant_records, principal_authentication_data, +-- policy_mapping_record, events +-- * Compatible with PostgreSQL wire protocol + +CREATE SCHEMA IF NOT EXISTS POLARIS_SCHEMA; +SET search_path TO POLARIS_SCHEMA; + +CREATE TABLE IF NOT EXISTS version ( + version_key TEXT PRIMARY KEY, + version_value INT4 NOT NULL +); +INSERT INTO version (version_key, version_value) +VALUES ('version', 1) +ON CONFLICT (version_key) DO UPDATE +SET version_value = EXCLUDED.version_value; +COMMENT ON TABLE version IS 'the version of the JDBC schema in use'; + +CREATE TABLE IF NOT EXISTS entities ( + realm_id TEXT NOT NULL, + catalog_id BIGINT NOT NULL, + id BIGINT NOT NULL, + parent_id BIGINT NOT NULL, + name TEXT NOT NULL, + entity_version INT4 NOT NULL, + type_code INT4 NOT NULL, + sub_type_code INT4 NOT NULL, + create_timestamp BIGINT NOT NULL, + drop_timestamp BIGINT NOT NULL, + purge_timestamp BIGINT NOT NULL, + to_purge_timestamp BIGINT NOT NULL, + last_update_timestamp BIGINT NOT NULL, + properties JSONB not null default '{}'::JSONB, + internal_properties JSONB not null default '{}'::JSONB, + grant_records_version INT4 NOT NULL, + location_without_scheme TEXT, + PRIMARY KEY (realm_id, id), + CONSTRAINT constraint_name UNIQUE (realm_id, catalog_id, parent_id, type_code, name) +); + +-- TODO: create indexes based on all query pattern. +CREATE INDEX IF NOT EXISTS idx_entities ON entities (realm_id, catalog_id, id); +CREATE INDEX IF NOT EXISTS idx_locations + ON entities USING btree (realm_id, parent_id, location_without_scheme) + WHERE location_without_scheme IS NOT NULL; + +COMMENT ON TABLE entities IS 'all the entities'; + +COMMENT ON COLUMN entities.realm_id IS 'realm_id used for multi-tenancy'; +COMMENT ON COLUMN entities.catalog_id IS 'catalog id'; +COMMENT ON COLUMN entities.id IS 'entity id'; +COMMENT ON COLUMN entities.parent_id IS 'entity id of parent'; +COMMENT ON COLUMN entities.name IS 'entity name'; +COMMENT ON COLUMN entities.entity_version IS 'version of the entity'; +COMMENT ON COLUMN entities.type_code IS 'type code'; +COMMENT ON COLUMN entities.sub_type_code IS 'sub type of entity'; +COMMENT ON COLUMN entities.create_timestamp IS 'creation time of entity'; +COMMENT ON COLUMN entities.drop_timestamp IS 'time of drop of entity'; +COMMENT ON COLUMN entities.purge_timestamp IS 'time to start purging entity'; +COMMENT ON COLUMN entities.last_update_timestamp IS 'last time the entity is touched'; +COMMENT ON COLUMN entities.properties IS 'entities properties json'; +COMMENT ON COLUMN entities.internal_properties IS 'entities internal properties json'; +COMMENT ON COLUMN entities.grant_records_version IS 'the version of grant records change on the entity'; + +CREATE TABLE IF NOT EXISTS grant_records ( + realm_id TEXT NOT NULL, + securable_catalog_id BIGINT NOT NULL, + securable_id BIGINT NOT NULL, + grantee_catalog_id BIGINT NOT NULL, + grantee_id BIGINT NOT NULL, + privilege_code INT4, + PRIMARY KEY (realm_id, securable_catalog_id, securable_id, grantee_catalog_id, grantee_id, privilege_code) +); + +COMMENT ON TABLE grant_records IS 'grant records for entities'; + +COMMENT ON COLUMN grant_records.securable_catalog_id IS 'catalog id of the securable'; +COMMENT ON COLUMN grant_records.securable_id IS 'entity id of the securable'; +COMMENT ON COLUMN grant_records.grantee_catalog_id IS 'catalog id of the grantee'; +COMMENT ON COLUMN grant_records.grantee_id IS 'id of the grantee'; +COMMENT ON COLUMN grant_records.privilege_code IS 'privilege code'; + +CREATE TABLE IF NOT EXISTS principal_authentication_data ( + realm_id TEXT NOT NULL, + principal_id BIGINT NOT NULL, + principal_client_id VARCHAR(255) NOT NULL, + main_secret_hash VARCHAR(255) NOT NULL, + secondary_secret_hash VARCHAR(255) NOT NULL, + secret_salt VARCHAR(255) NOT NULL, + PRIMARY KEY (realm_id, principal_client_id) +); + +COMMENT ON TABLE principal_authentication_data IS 'authentication data for client'; + +CREATE TABLE IF NOT EXISTS policy_mapping_record ( + realm_id TEXT NOT NULL, + target_catalog_id BIGINT NOT NULL, + target_id BIGINT NOT NULL, + policy_type_code INT4 NOT NULL, + policy_catalog_id BIGINT NOT NULL, + policy_id BIGINT NOT NULL, + parameters JSONB NOT NULL DEFAULT '{}'::JSONB, + PRIMARY KEY (realm_id, target_catalog_id, target_id, policy_type_code, policy_catalog_id, policy_id) +); + +CREATE INDEX IF NOT EXISTS idx_policy_mapping_record ON policy_mapping_record (realm_id, policy_type_code, policy_catalog_id, policy_id, target_catalog_id, target_id); + +CREATE TABLE IF NOT EXISTS events ( + realm_id TEXT NOT NULL, + catalog_id TEXT NOT NULL, + event_id TEXT NOT NULL, + request_id TEXT, + event_type TEXT NOT NULL, + timestamp_ms BIGINT NOT NULL, + principal_name TEXT, + resource_type TEXT NOT NULL, + resource_identifier TEXT NOT NULL, + additional_properties JSONB NOT NULL DEFAULT '{}'::JSONB, + PRIMARY KEY (event_id) +); + +-- INT4 type used directly in table definitions for CockroachDB JDBC compatibility Review Comment: It is probably preferable to move this comment to the top of this file for visibility. WDYT? ########## persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/DatabaseType.java: ########## @@ -36,28 +45,128 @@ public String getDisplayName() { return displayName; } + /** + * Returns the latest schema version available for this database type. This is used as the default + * schema version for new installations. + */ + public int getLatestSchemaVersion() { + return switch (this) { + case POSTGRES -> 3; // PostgreSQL has schemas v1, v2, v3 + case COCKROACHDB -> 1; // CockroachDB currently has only schema v1 + case H2 -> 3; // H2 uses same schemas as PostgreSQL Review Comment: Related to my comment on the schema file: I believe all "latest" JDBC-based schemas should have the same version and evolve in sync. This is because from the caller side (Polaris JDBC persistence code), the "interface" of all JDBC schemas should be the same (column names and type compatibility). Maintaining different column names and non-trivial type conversions separately for all JDBC backends is going to be too much overhead and will complicate Polaris code unnecessarily. If the schemas can be assumed to be compatible from the client's (Polaris) perspective, using the same version number will only be natural. WDYT? ########## persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java: ########## @@ -137,6 +139,14 @@ public void writeEntities( @Nonnull PolarisCallContext callCtx, @Nonnull List<PolarisBaseEntity> entities, List<PolarisBaseEntity> originalEntities) { + Set<Long> seenEntityIds = new HashSet<>(); + for (PolarisBaseEntity entity : entities) { + if (!seenEntityIds.add(entity.getId())) { + throw new RetryOnConcurrencyException( Review Comment: Why a "retry" exception as opposed to an `IllegalArgumentException`? I believe the behaviour of this method with multiple objects having the same ID is not well-defined and such situations should be avoided on the caller side. ########## runtime/test-common/src/main/java/org/apache/polaris/test/commons/CockroachRelationalJdbcLifeCycleManagement.java: ########## @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.test.commons; + +import io.quarkus.test.common.DevServicesContext; +import io.quarkus.test.common.QuarkusTestResourceLifecycleManager; +import java.util.Map; +import org.testcontainers.cockroachdb.CockroachContainer; + +public class CockroachRelationalJdbcLifeCycleManagement + implements QuarkusTestResourceLifecycleManager, DevServicesContext.ContextAware { + public static final String INIT_SCRIPT = "init-script"; + + private CockroachContainer cockroach; + private String initScript; + private DevServicesContext context; + + @Override + public void init(Map<String, String> initArgs) { + initScript = initArgs.get(INIT_SCRIPT); + } + + @Override + public Map<String, String> start() { + // CockroachDB testcontainers uses PostgreSQL wire protocol compatibility + cockroach = new CockroachContainer("cockroachdb/cockroach:v24.3.0"); Review Comment: Please use `ContainerSpecHelper` to enable managing docker image versions via Renovate. Cf. https://github.com/apache/polaris/blob/5d77feb2e6699cacf229b56a45c615a247b5c43f/runtime/test-common/src/main/java/org/apache/polaris/test/commons/PostgresRelationalJdbcLifeCycleManagement.java#L46 ########## persistence/relational-jdbc/src/main/resources/cockroachdb/schema-v1.sql: ########## Review Comment: Since CockroachDB uses mostly the same Polaris code a PostgreSQL, I believe the schema versions are best kept in sync. I do not think we have to support old schemas in CockroachDB, but the current one should probably be `v3` as our PostgreSQL schema... WDYT? ########## persistence/relational-jdbc/src/main/java/org/apache/polaris/persistence/relational/jdbc/JdbcBasePersistenceImpl.java: ########## @@ -137,6 +139,14 @@ public void writeEntities( @Nonnull PolarisCallContext callCtx, @Nonnull List<PolarisBaseEntity> entities, List<PolarisBaseEntity> originalEntities) { + Set<Long> seenEntityIds = new HashSet<>(); + for (PolarisBaseEntity entity : entities) { + if (!seenEntityIds.add(entity.getId())) { + throw new RetryOnConcurrencyException( + "Multiple updates to entity id '%s' in the same transaction", entity.getId()); + } + } Review Comment: Why / when would this code be invoked with multiple entities having the same ID? 🤔 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
