kgyrtkirk commented on a change in pull request #2756:
URL: https://github.com/apache/hive/pull/2756#discussion_r748218177
##########
File path:
standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
##########
@@ -1101,6 +1118,7 @@ struct CommitTxnRequest {
5: optional CommitTxnKeyValue keyValue,
6: optional bool exclWriteEnabled = true,
7: optional TxnType txn_type,
+ 8: optional set<AffectedRowCount> rowsAffected,
Review comment:
if an older client connect - may it really leave this info out; and
everything will work correctly?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
##########
@@ -235,6 +223,46 @@ private void updateStats(StatsAggregator statsAggregator,
Map<String, String> pa
}
+ private static class TransactionalStatsProcessor {
+ private final HiveTxnManager txnManager;
+ private final Partish partish;
+
+ private TransactionalStatsProcessor(HiveTxnManager txnManager, Partish
partish) {
+ this.txnManager = txnManager;
+ this.partish = partish;
+ }
+
+ private long toLong(String value) {
+ if (value == null || value.isEmpty()) {
+ return 0;
+ }
+
+ return Long.parseLong(value);
+ }
+
+ public void process(StatsAggregator statsAggregator) throws HiveException,
MetaException {
+ if (statsAggregator == null) {
+ return;
+ }
+
+ if (partish.isTransactionalTable()) {
+ String prefix = getAggregationPrefix(partish.getTable(),
partish.getPartition());
+ long insertCount = toLong(statsAggregator.aggregateStats(prefix,
INSERT_COUNT));
+ long updateCount = toLong(statsAggregator.aggregateStats(prefix,
UPDATE_COUNT));
+ long deleteCount = toLong(statsAggregator.aggregateStats(prefix,
DELETE_COUNT));
+
+ if (insertCount > 0 || updateCount > 0 || deleteCount > 0) {
+ AffectedRowCount affectedRowCount = new AffectedRowCount();
+ affectedRowCount.setTableId(partish.getTable().getTTable().getId());
+ affectedRowCount.setInsertCount(insertCount);
+ affectedRowCount.setUpdatedCount(updateCount);
+ affectedRowCount.setDeletedCount(deleteCount);
+
+ txnManager.addAffectedRowCount(affectedRowCount);
Review comment:
I don't really understand why are we dragging this thru the the
txnManager - right here in this class we know that for table `X` we made N,M,K
inserts/updates/deletes - didn't we lose the context ; will it be harder to
track it down?
we already making some metastore calls from this class - wouldn't using
those would be simpler? (I'm still looking around in this patch so I might be
wrong)
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
##########
@@ -12193,13 +12194,14 @@ private void walkASTMarkTABREF(TableMask tableMask,
ASTNode ast, Set<String> cte
if (table.isMaterializedView()) {
// When we are querying a materialized view directly, we check
whether the source tables
// do not apply any policies.
- for (String qName : table.getCreationMetadata().getTablesUsed()) {
+ for (SourceTable sourceTable :
table.getCreationMetadata().getTablesUsed()) {
+ String qualifiedTableName =
TableName.getDbTable(sourceTable.getDbName(), sourceTable.getTableName());
Review comment:
can't we use TableName objects instead of going back to strings?
##########
File path:
standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
##########
@@ -457,11 +457,21 @@ struct StorageDescriptor {
12: optional bool storedAsSubDirectories // stored as subdirectories
or not
}
+struct SourceTable {
+ 1: required string dbName,
+ 2: required string tableName,
+ 3: required i64 tableId,
+ 4: required bool insertOnly,
+ 5: required i64 insertedCount,
+ 6: required i64 updatedCount,
+ 7: required i64 deletedCount
+}
+
struct CreationMetadata {
1: required string catName
2: required string dbName,
3: required string tblName,
- 4: required set<string> tablesUsed,
+ 4: required set<SourceTable> tablesUsed,
Review comment:
this is an incompatible change of the thrift api - if an older client
will ask for an rpc call with this data it will surely get into trouble....
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java
##########
@@ -2537,6 +2544,20 @@ private CreationMetadata
convertToCreationMetadata(MCreationMetadata s) {
return r;
}
+ private SourceTable convertToSourceTable(MMVSource mmvSource) {
+ SourceTable sourceTable = new SourceTable();
+ MTable mTable = mmvSource.getTable();
+ sourceTable.setTableId(mTable.getId());
+ sourceTable.setDbName(mTable.getDatabase().getName());
+ sourceTable.setTableName(mTable.getTableName());
+ String transactionalProp =
mTable.getParameters().get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES);
+
sourceTable.setInsertOnly("insert_only".equalsIgnoreCase(transactionalProp));
Review comment:
this `equalIgnoreCase` is a bit out of scope here: maybe there is a
method which could be used for this?
##########
File path: ql/src/java/org/apache/hadoop/hive/ql/stats/BasicStatsTask.java
##########
@@ -278,6 +306,9 @@ private int aggregateStats(Hive db) {
}
db.alterTable(tableFullName, res, environmentContext, true);
+ TransactionalStatsProcessor transactionalStatsProcessor = new
TransactionalStatsProcessor(txnManager, p);
+ transactionalStatsProcessor.process(statsAggregator);
Review comment:
this call is after the metastore call; meanwhile for partiioned the
order is different
##########
File path:
standalone-metastore/metastore-server/src/main/sql/derby/upgrade-3.2.0-to-4.0.0.derby.sql
##########
@@ -176,6 +176,10 @@ ALTER TABLE "APP"."DC_PRIVS" ADD CONSTRAINT "DC_PRIVS_PK"
PRIMARY KEY ("DC_GRANT
ALTER TABLE "APP"."DC_PRIVS" ADD CONSTRAINT "DC_PRIVS_FK1" FOREIGN KEY
("NAME") REFERENCES "APP"."DATACONNECTORS" ("NAME") ON DELETE NO ACTION ON
UPDATE NO ACTION;
CREATE UNIQUE INDEX "APP"."DCPRIVILEGEINDEX" ON "APP"."DC_PRIVS"
("AUTHORIZER", "NAME", "PRINCIPAL_NAME", "PRINCIPAL_TYPE", "DC_PRIV",
"GRANTOR", "GRANTOR_TYPE");
+-- HIVE-25656
+ALTER TABLE "APP"."MV_TABLES_USED" ADD COLUMN "INSERTED_COUNT" BIGINT NOT NULL
DEFAULT 0;
+ALTER TABLE "APP"."MV_TABLES_USED" ADD COLUMN "UPDATED_COUNT" BIGINT NOT NULL
DEFAULT 0;
+ALTER TABLE "APP"."MV_TABLES_USED" ADD COLUMN "DELETED_COUNT" BIGINT NOT NULL
DEFAULT 0;
Review comment:
primary key seems to be missing
##########
File path:
standalone-metastore/metastore-server/src/main/sql/mssql/hive-schema-4.0.0.mssql.sql
##########
@@ -424,7 +424,10 @@ CREATE INDEX MV_UNIQUE_TABLE ON MV_CREATION_METADATA
(TBL_NAME,DB_NAME);
CREATE TABLE MV_TABLES_USED
(
MV_CREATION_METADATA_ID bigint NOT NULL,
- TBL_ID bigint NOT NULL
+ TBL_ID bigint NOT NULL,
+ INSERTED_COUNT bigint NOT NULL DEFAULT 0,
+ UPDATED_COUNT bigint NOT NULL DEFAULT 0,
+ DELETED_COUNT bigint NOT NULL DEFAULT 0
Review comment:
primary key seems to be missing
##########
File path: standalone-metastore/metastore-server/src/main/resources/package.jdo
##########
@@ -253,18 +253,33 @@
<field name="tblName">
<column name="TBL_NAME" length="256" jdbc-type="VARCHAR"/>
</field>
- <field name="tables" table="MV_TABLES_USED">
- <collection element-type="MTable"/>
- <join>
- <column name="MV_CREATION_METADATA_ID"/>
- </join>
- <element column="TBL_ID"/>
+ <field name="tables" mapped-by="creationMetadata"
dependent-element="true">
+ <collection element-type="MMVSource" dependent-element="true" />
+ <foreign-key name="MV_TABLES_USED_FK1" delete-action="cascade"/>
</field>
<field name="txnList">
<column name="TXN_LIST" length="32672" jdbc-type="VARCHAR"
allows-null="true"/>
</field>
</class>
+ <class name="MMVSource" identity-type="application" table="MV_TABLES_USED"
detachable="true" objectid-class="MMVSource$PK">
+ <field name="creationMetadata" primary-key="true">
+ <column name="MV_CREATION_METADATA_ID"/>
+ </field>
+ <field name="table" primary-key="true">
+ <column name="TBL_ID"/>
+ </field>
Review comment:
I'm wondering about the order in which JDO will create the primary key;
I guess it will be `METADATA_ID,TBL_ID` or the other way around?
but in any case we should declare it in our upgrade sqls
##########
File path:
standalone-metastore/metastore-server/src/main/sql/oracle/upgrade-3.2.0-to-4.0.0.oracle.sql
##########
@@ -201,6 +201,11 @@ ALTER TABLE COMPLETED_COMPACTIONS ADD CC_INITIATOR_ID
varchar(128);
ALTER TABLE COMPLETED_COMPACTIONS ADD CC_INITIATOR_VERSION varchar(128);
ALTER TABLE COMPLETED_COMPACTIONS ADD CC_WORKER_VERSION varchar(128);
+-- HIVE-25656
+ALTER TABLE TABLES_USED ADD INSERTED_COUNT NUMBER DEFAULT 0 NOT NULL;
+ALTER TABLE TABLES_USED ADD UPDATED_COUNT NUMBER DEFAULT 0 NOT NULL;
+ALTER TABLE TABLES_USED ADD DELETED_COUNT NUMBER DEFAULT 0 NOT NULl;
Review comment:
primary key seems to be missing
##########
File path:
standalone-metastore/metastore-server/src/main/sql/mssql/upgrade-3.2.0-to-4.0.0.mssql.sql
##########
@@ -227,6 +227,11 @@ ALTER TABLE DC_PRIVS ADD CONSTRAINT DC_PRIVS_FK1 FOREIGN
KEY (NAME) REFERENCES D
CREATE UNIQUE INDEX DCPRIVILEGEINDEX ON DC_PRIVS
(AUTHORIZER,NAME,PRINCIPAL_NAME,PRINCIPAL_TYPE,DC_PRIV,GRANTOR,GRANTOR_TYPE);
CREATE INDEX DC_PRIVS_N49 ON DC_PRIVS (NAME);
+-- HIVE-25656
+ALTER TABLE "MV_TABLES_USED" ADD "INSERTED_COUNT" BIGINT NOT NULL DEFAULT 0;
+ALTER TABLE "MV_TABLES_USED" ADD "UPDATED_COUNT" BIGINT NOT NULL DEFAULT 0;
+ALTER TABLE "MV_TABLES_USED" ADD "DELETED_COUNT" BIGINT NOT NULL DEFAULT 0;
Review comment:
primary key seems to be missing
##########
File path:
standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift
##########
@@ -2454,7 +2472,7 @@ service ThriftHiveMetastore extends fb303.FacebookService
GetTableResult get_table_req(1:GetTableRequest req) throws (1:MetaException
o1, 2:NoSuchObjectException o2)
GetTablesResult get_table_objects_by_name_req(1:GetTablesRequest req)
throws (1:MetaException o1,
2:InvalidOperationException o2, 3:UnknownDBException o3)
- Materialization get_materialization_invalidation_info(1:CreationMetadata
creation_metadata, 2:string validTxnList)
+ Materialization get_materialization_invalidation_info(1:CreationMetadata
creation_metadata)
Review comment:
one more incompatible change...not sure what we want to do with this
##########
File path:
standalone-metastore/metastore-server/src/main/sql/oracle/upgrade-3.2.0-to-4.0.0.oracle.sql
##########
@@ -201,6 +201,11 @@ ALTER TABLE COMPLETED_COMPACTIONS ADD CC_INITIATOR_ID
varchar(128);
ALTER TABLE COMPLETED_COMPACTIONS ADD CC_INITIATOR_VERSION varchar(128);
ALTER TABLE COMPLETED_COMPACTIONS ADD CC_WORKER_VERSION varchar(128);
+-- HIVE-25656
+ALTER TABLE TABLES_USED ADD INSERTED_COUNT NUMBER DEFAULT 0 NOT NULL;
+ALTER TABLE TABLES_USED ADD UPDATED_COUNT NUMBER DEFAULT 0 NOT NULL;
+ALTER TABLE TABLES_USED ADD DELETED_COUNT NUMBER DEFAULT 0 NOT NULl;
Review comment:
have you tested oracle? unfortunately its not covered in precommit at all
##########
File path:
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MCreationMetadata.java
##########
@@ -47,12 +75,13 @@ public MCreationMetadata(String catName, String dbName,
String tblName,
this.materializationTime = materializationTime;
}
- public Set<MTable> getTables() {
+ public Set<MMVSource> getTables() {
return tables;
}
- public void setTables(Set<MTable> tables) {
- this.tables = tables;
+ public void setTables(Set<MMVSource> tables) {
+ this.tables.clear();
Review comment:
this change aims to make this class independent from the passed
collection; I guess there were issues arising from it; but the constructor is
still keeping the externally passed reference - if it caused problem maybe an
`ImmutableSet` would be better|?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]