dengzhhu653 commented on code in PR #5523:
URL: https://github.com/apache/hive/pull/5523#discussion_r1857704711


##########
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/SQLGenerator.java:
##########
@@ -183,55 +181,6 @@ public PreparedStatement 
prepareStmtWithParameters(Connection dbConn, String sql
     return pst;
   }
 
-
-
-  /**
-   * Oracle SQL query that creates or replaces view HMS_SUMMARY.
-   */
-  private static final String CREATE_METADATASUMMARY_ORACLE = "CREATE OR 
REPLACE VIEW METADATASUMMARYALL AS SELECT a.TBL_ID, a.TBL_NAME, a.OWNER as 
\"CTLG\", a.TBL_TYPE, a.CREATE_TIME, a.DB_ID, a.SD_ID, b.NAME, c.INPUT_FORMAT, 
c.IS_COMPRESSED, c.LOCATION, c.OUTPUT_FORMAT,c.SERDE_ID, d.SLIB, 
TO_CHAR(e.PARAM_VALUE) as \"PARAM_VAL\", count(j.COLUMN_NAME) as 
\"TOTAL_COLUMN_COUNT\", jj.ARRAY_COLUMN_COUNT, jj.STRUCT_COLUMN_COUNT, 
jj.MAP_COLUMN_COUNT, k.PARTITION_KEY_NAME as \"PARTITION_COLUMN\", 
m.PARTITION_CNT, CAST(CAST(q.NUM_FILES AS VARCHAR2(200)) AS NUMBER) as 
\"num_files\", CAST(q.TOTAL_SIZE AS NUMBER) as \"total_size\", CAST(q.NUM_ROWS 
AS NUMBER) as \"num_rows\", q.WRITE_FORMAT_DEFAULT, q.TRANSACTIONAL_PROPERTIES 
FROM TBLS a LEFT JOIN DBS b on a.DB_ID = b.DB_ID LEFT JOIN SDS c on a.SD_ID = 
c.SD_ID LEFT JOIN SERDES d on c.SERDE_ID = d.SERDE_ID LEFT JOIN (select 
SERDE_ID,PARAM_KEY,PARAM_VALUE from SERDE_PARAMS where PARAM_KEY = 
'field.delim') e on c.SERDE_ID = e.SERDE_ID LEFT JOIN CO
 LUMNS_V2 j on c.CD_ID = j.CD_ID LEFT JOIN (SELECT CD_ID, sum(CASE WHEN 
TYPE_NAME like 'array%' THEN 1 ELSE 0 END) AS \"ARRAY_COLUMN_COUNT\", sum(CASE 
WHEN TYPE_NAME like 'struct%' THEN 1 ELSE 0 END) AS \"STRUCT_COLUMN_COUNT\", 
sum(CASE WHEN TYPE_NAME like 'map%' THEN 1 ELSE 0 END) AS \"MAP_COLUMN_COUNT\" 
from COLUMNS_V2 group by CD_ID) jj on jj.CD_ID=c.CD_ID LEFT JOIN (select 
TBL_ID, LISTAGG(PKEY_NAME, ',') as PARTITION_KEY_NAME from PARTITION_KEYS group 
by TBL_ID) k on a.TBL_ID = k.TBL_ID LEFT JOIN (select 
SERDE_ID,PARAM_KEY,PARAM_VALUE from SERDE_PARAMS where PARAM_KEY = 
'serialization.format') f on c.SERDE_ID = f.SERDE_ID LEFT JOIN (select 
TBL_ID,PARAM_KEY,PARAM_VALUE from TABLE_PARAMS where PARAM_KEY = 'comment') g 
on a.TBL_ID = g.TBL_ID LEFT JOIN (select TBL_ID, PARAM_KEY,PARAM_VALUE from 
TABLE_PARAMS where PARAM_KEY = 'transient_lastDdlTime') h on a.TBL_ID = 
h.TBL_ID LEFT JOIN (select TBL_ID,COUNT(PART_ID) as PARTITION_CNT from 
PARTITIONS group by TBL_ID) m on a.TBL_ID = m.TBL
 _ID LEFT JOIN (SELECT aa.TBL_ID, aa.NUM_FILES + case when bb.NUM_FILES is not 
null then bb.NUM_FILES else 0 end AS \"NUM_FILES\", aa.NUM_ROWS + case when 
bb.NUM_ROWS is not null then bb.NUM_ROWS else 0 end AS \"NUM_ROWS\", 
aa.TOTAL_SIZE + case when bb.TOTAL_SIZE is not null then bb.TOTAL_SIZE else 0 
end AS \"TOTAL_SIZE\", aa.WRITE_FORMAT_DEFAULT, aa.TRANSACTIONAL_PROPERTIES 
from (select u.TBL_ID, NUM_FILES, NUM_ROWS, TOTAL_SIZE, WRITE_FORMAT_DEFAULT, 
TRANSACTIONAL_PROPERTIES from (select TBL_ID, max(CASE PARAM_KEY WHEN 
'numFiles' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0 END) 
AS \"NUM_FILES\", max(CASE PARAM_KEY WHEN 'numRows' THEN CAST(CAST(PARAM_VALUE 
AS VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"NUM_ROWS\", max(CASE PARAM_KEY 
WHEN 'totalSize' THEN CAST(CAST(PARAM_VALUE AS VARCHAR2(200)) AS NUMBER) ELSE 0 
END) AS \"TOTAL_SIZE\" from TABLE_PARAMS group by TBL_ID) u left join (select 
TBL_ID, CAST(PARAM_VALUE AS VARCHAR2(200)) as \"WRITE_FORMAT_DEFAULT\" from TAB
 LE_PARAMS where PARAM_KEY = 'write.format.default') v on u.TBL_ID = v.TBL_ID 
left join (select TBL_ID, CAST(PARAM_VALUE AS VARCHAR2(200)) as 
\"TRANSACTIONAL_PROPERTIES\" from TABLE_PARAMS where PARAM_KEY = 
'transactional_properties') w on u.TBL_ID = w.TBL_ID) aa left join (SELECT 
y.TBL_ID, SUM(x.NUM_FILES) AS \"NUM_FILES\", SUM(x.NUM_ROWS) AS \"NUM_ROWS\", 
SUM(x.TOTAL_SIZE) AS \"TOTAL_SIZE\" FROM PARTITIONS y left join (SELECT 
PART_ID, max(CASE PARAM_KEY WHEN 'numFiles' THEN CAST(CAST(PARAM_VALUE AS 
VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"NUM_FILES\", max(CASE PARAM_KEY WHEN 
'numRows' THEN CAST(CAST(PARAM_VALUE AS CHAR(200)) AS NUMBER) ELSE 0 END) AS 
\"NUM_ROWS\", max(CASE PARAM_KEY WHEN 'totalSize' THEN CAST(CAST(PARAM_VALUE AS 
VARCHAR2(200)) AS NUMBER) ELSE 0 END) AS \"TOTAL_SIZE\" FROM PARTITION_PARAMS 
group by PART_ID) x ON y.PART_ID = x.PART_ID group by y.TBL_ID) bb on aa.TBL_ID 
= bb.TBL_ID) q on a.TBL_ID = q.TBL_ID group by a.TBL_ID, a.TBL_NAME, a.OWNER, 
a.TBL_TYPE, a.CREAT
 E_TIME, a.DB_ID, a.SD_ID, b.NAME, c.INPUT_FORMAT, c.IS_COMPRESSED, c.LOCATION, 
c.OUTPUT_FORMAT,c.SERDE_ID, d.SLIB, TO_CHAR(e.PARAM_VALUE), 
jj.ARRAY_COLUMN_COUNT, jj.STRUCT_COLUMN_COUNT,jj.MAP_COLUMN_COUNT, 
k.PARTITION_KEY_NAME, m.PARTITION_CNT,q.NUM_FILES, q.TOTAL_SIZE, q.NUM_ROWS, 
q.WRITE_FORMAT_DEFAULT, q.TRANSACTIONAL_PROPERTIES;";

Review Comment:
   Tested on an env with millions of the partition and ten thousands of the 
table, the performance of collecting summary doesn't degrade afterwards.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to