This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 52f3943049884412b1f1fb0376dd29f5a861400b Author: Fang-Yu Rao <[email protected]> AuthorDate: Fri Feb 20 11:09:51 2026 -0800 IMPALA-14768: Add the operation type to the lineage graph This patch makes Impala produce the operation type of the completed query in the corresponding lineage event so that it would be easier for data lineage tools like Apache Atlas to derive the operation type of a given query. Note that currently Apache Atlas determines the operation type of a given Impala query by matching the field of 'queryText' in the lineage event against predefined regular expressions. Refer to https://github.com/apache/atlas/blob/2957ff2/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java#L49-L77 for more details. However, such an approach is not robust. Recall that the string in 'queryText' is produced by Impala server replacing each newline in the original query string with a space, which is followed by redaction. Thus, 'queryText' may not be a valid SQL statement afterward. string stmt = replace_all_copy(query_ctx->client_request.stmt, "\n", " "); Redact(&stmt); // 'redacted_stmt' will be the string Impala uses to populate // 'queryText' of the lineage event. query_ctx->client_request.__set_redacted_stmt((const string) stmt); For instance, when the original query string contains a one-line SQL comment, it could be difficult for one to decide where that one-line SQL comment ends if every newline in the original query string is already replaced with a space. Therefore, after this patch, it would be much easier for data lineage tools to determine the operation type since it will be directly provided in the lineage log. On the other hand, apart from the field of 'operationType_', this patch also makes PlannerTest#testLineage() check the field of 'queryStr_' of ColumnLineageGraph when testLineage() compares the actual lineage graph with the expected one in lineage.test run in the frontend test. Testing: - Added a new test case to lineage.test run in end-to-end test to show that Impala could produce a lineage event for INSERT OVERWRITE. - Updated lineage.test run in end-to-end and frontend tests to make sure each lineage event comes with its respective operation type. Change-Id: Icb94120a9bb1b994d4e681ea98521035bcc6510e Reviewed-on: http://gerrit.cloudera.org:8080/24018 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/util/lineage-util.h | 2 + common/thrift/LineageGraph.thrift | 3 + .../org/apache/impala/analysis/AlterViewStmt.java | 2 +- .../apache/impala/analysis/ColumnLineageGraph.java | 75 ++++- .../impala/analysis/CreateOrAlterViewStmtBase.java | 5 +- .../apache/impala/analysis/CreateTableStmt.java | 7 +- .../org/apache/impala/analysis/CreateViewStmt.java | 2 +- .../org/apache/impala/analysis/InsertStmt.java | 2 + .../apache/impala/analysis/ParsedStatement.java | 5 + .../impala/analysis/ParsedStatementImpl.java | 6 + .../java/org/apache/impala/planner/Planner.java | 3 +- .../calcite/service/CalciteParsedStatement.java | 7 + .../queries/PlannerTest/lineage.test | 48 ++++ .../queries/QueryTest/lineage.test | 305 +++++++++++++++++++++ 14 files changed, 458 insertions(+), 14 deletions(-) diff --git a/be/src/util/lineage-util.h b/be/src/util/lineage-util.h index 80a77af5f..15b74a358 100644 --- a/be/src/util/lineage-util.h +++ b/be/src/util/lineage-util.h @@ -90,6 +90,8 @@ class LineageUtil { writer.StartObject(); writer.String("queryText"); writer.String(lineage.query_text); + writer.String("operationType"); + writer.String(lineage.operation_type); writer.String("queryId"); writer.String(PrintId(lineage.query_id)); writer.String("hash"); diff --git a/common/thrift/LineageGraph.thrift b/common/thrift/LineageGraph.thrift index 5d0f48202..ac780e285 100644 --- a/common/thrift/LineageGraph.thrift +++ b/common/thrift/LineageGraph.thrift @@ -86,4 +86,7 @@ struct TLineageGraph { // Set only for external tables to establish // lineage between the table and it's location. 9: optional string table_location + + // Operation type of the query. + 10: required string operation_type } diff --git a/fe/src/main/java/org/apache/impala/analysis/AlterViewStmt.java b/fe/src/main/java/org/apache/impala/analysis/AlterViewStmt.java index fd6668a7d..5c3366f0e 100755 --- a/fe/src/main/java/org/apache/impala/analysis/AlterViewStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/AlterViewStmt.java @@ -74,7 +74,7 @@ public class AlterViewStmt extends CreateOrAlterViewStmtBase { createColumnAndViewDefs(analyzer); if (BackendConfig.INSTANCE.getComputeLineage() || RuntimeEnv.INSTANCE.isTestEnv()) { - computeLineageGraph(analyzer); + computeLineageGraph(analyzer, this); } } diff --git a/fe/src/main/java/org/apache/impala/analysis/ColumnLineageGraph.java b/fe/src/main/java/org/apache/impala/analysis/ColumnLineageGraph.java index 1a5dec544..7e5e73ba6 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ColumnLineageGraph.java +++ b/fe/src/main/java/org/apache/impala/analysis/ColumnLineageGraph.java @@ -99,6 +99,30 @@ public class ColumnLineageGraph { public static final String VIEW = "view"; public static final String VIRTUAL = "virtual"; public static final String EXTERNAL_DATASOURCE = "external-datasource"; + + public enum OperationType { + CREATEVIEW("CREATEVIEW"), + CREATETABLE_AS_SELECT("CREATETABLE_AS_SELECT"), + ALTERVIEW_AS("ALTERVIEW_AS"), + INSERT_OVERWRITE("INSERT_OVERWRITE"), + UPSERT("UPSERT"), + INSERT("INSERT"), + SELECT("SELECT"), + CREATETABLE("CREATETABLE"), + UNKNOWN("UNKNOWN"); + + private final String operationType_; + + OperationType(String operationType) { + operationType_ = operationType; + } + + @Override + public String toString() { + return operationType_; + } + } + /** * Represents a vertex in the column lineage graph. A Vertex may correspond to a base * table column, a column in the destination table (for the case of INSERT or CTAS @@ -452,6 +476,8 @@ public class ColumnLineageGraph { // Query statement private String queryStr_; + private String operationType_; + private TUniqueId queryId_; // Name of the user that issued this query @@ -487,12 +513,14 @@ public class ColumnLineageGraph { /** * Private c'tor, used only for testing. */ - private ColumnLineageGraph(String stmt, TUniqueId queryId, String user, long timestamp) + private ColumnLineageGraph(String stmt, TUniqueId queryId, String user, long timestamp, + String operationType) { queryStr_ = stmt; queryId_ = queryId; user_ = user; timestamp_ = timestamp; + operationType_ = operationType; } private void setVertices(Set<Vertex> vertices) { @@ -573,8 +601,9 @@ public class ColumnLineageGraph { * Computes the column lineage graph of a query from the list of query result exprs. * 'rootAnalyzer' is the Analyzer that was used for the analysis of the query. */ - public void computeLineageGraph(List<Expr> resultExprs, Analyzer rootAnalyzer) { - init(rootAnalyzer); + public void computeLineageGraph(List<Expr> resultExprs, Analyzer rootAnalyzer, + OperationType operationType) { + init(rootAnalyzer, operationType); // Compute the dependencies only if result expressions are available. if (resultExprs != null && !resultExprs.isEmpty()) { computeProjectionDependencies(resultExprs, rootAnalyzer); @@ -585,7 +614,7 @@ public class ColumnLineageGraph { /** * Initialize the ColumnLineageGraph from the root analyzer of a query. */ - private void init(Analyzer analyzer) { + private void init(Analyzer analyzer, OperationType operationType) { Preconditions.checkNotNull(analyzer); Preconditions.checkState(analyzer.isRootAnalyzer()); TQueryCtx queryCtx = analyzer.getQueryCtx(); @@ -599,8 +628,11 @@ public class ColumnLineageGraph { descTbl_ = analyzer.getDescTbl(); user_ = analyzer.getUser().getName(); queryId_ = queryCtx.query_id; + operationType_ = operationType.toString(); } + + private void computeProjectionDependencies(List<Expr> resultExprs, Analyzer analyzer) { Preconditions.checkNotNull(resultExprs); Preconditions.checkState(!resultExprs.isEmpty()); @@ -731,6 +763,7 @@ public class ColumnLineageGraph { if (Strings.isNullOrEmpty(queryStr_)) return ""; Map<String, Object> obj = new LinkedHashMap<>(); obj.put("queryText", queryStr_); + obj.put("operationType", operationType_); obj.put("queryId", TUniqueIdUtil.PrintId(queryId_)); obj.put("hash", getQueryHash(queryStr_)); obj.put("user", user_); @@ -758,6 +791,7 @@ public class ColumnLineageGraph { TLineageGraph graph = new TLineageGraph(); if (Strings.isNullOrEmpty(queryStr_)) return graph; graph.setQuery_text(queryStr_); + graph.setOperation_type(operationType_); graph.setQuery_id(queryId_); graph.setHash(getQueryHash(queryStr_)); graph.setUser(user_); @@ -783,7 +817,8 @@ public class ColumnLineageGraph { */ public static ColumnLineageGraph fromThrift(TLineageGraph obj) { ColumnLineageGraph lineage = - new ColumnLineageGraph(obj.query_text, obj.query_id, obj.user, obj.started); + new ColumnLineageGraph(obj.query_text, obj.query_id, obj.user, obj.started, + obj.operation_type); Map<TVertex, Vertex> vertexMap = new HashMap<>(); TreeSet<Vertex> vertices = Sets.newTreeSet(); for (TVertex vertex: obj.vertices) { @@ -821,10 +856,12 @@ public class ColumnLineageGraph { if (!(obj instanceof JSONObject)) return null; JSONObject jsonObj = (JSONObject) obj; String stmt = (String) jsonObj.get("queryText"); + String operationType = (String) jsonObj.get("operationType"); TUniqueId queryId = TUniqueIdUtil.ParseId((String) jsonObj.get("queryId")); String user = (String) jsonObj.get("user"); long timestamp = (Long) jsonObj.get("timestamp"); - ColumnLineageGraph graph = new ColumnLineageGraph(stmt, queryId, user, timestamp); + ColumnLineageGraph graph = new ColumnLineageGraph(stmt, queryId, user, timestamp, + operationType); JSONArray serializedVertices = (JSONArray) jsonObj.get("vertices"); Set<Vertex> vertices = new HashSet<>(); for (int i = 0; i < serializedVertices.size(); ++i) { @@ -870,6 +907,8 @@ public class ColumnLineageGraph { if (obj == null) return false; if (obj.getClass() != this.getClass()) return false; ColumnLineageGraph g = (ColumnLineageGraph) obj; + if (!this.queryStr_.equals(g.queryStr_)) return false; + if (!this.operationType_.equals(g.operationType_)) return false; if (!mapEqualsForTests(this.vertices_, g.vertices_) || !listEqualsForTests(this.edges_, g.edges_)) { return false; @@ -946,4 +985,28 @@ public class ColumnLineageGraph { getTableType(dstTable))); } } + + public static OperationType computeOperationType(StatementBase stmt) { + if (stmt instanceof CreateViewStmt) { + return OperationType.CREATEVIEW; + } else if (stmt instanceof CreateTableAsSelectStmt) { + return OperationType.CREATETABLE_AS_SELECT; + } else if (stmt instanceof AlterViewStmt) { + return OperationType.ALTERVIEW_AS; + } else if (stmt instanceof InsertStmt) { + if (((InsertStmt) stmt).isOverwrite()) { + return OperationType.INSERT_OVERWRITE; + } else if (((InsertStmt) stmt).isUpsert()) { + return OperationType.UPSERT; + } else { + return OperationType.INSERT; + } + } else if (stmt instanceof SelectStmt) { + return OperationType.SELECT; + } else if (stmt instanceof CreateTableStmt) { + return OperationType.CREATETABLE; + } else { + return OperationType.UNKNOWN; + } + } } diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateOrAlterViewStmtBase.java b/fe/src/main/java/org/apache/impala/analysis/CreateOrAlterViewStmtBase.java index 3f2fa4d61..dbe6488a0 100755 --- a/fe/src/main/java/org/apache/impala/analysis/CreateOrAlterViewStmtBase.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateOrAlterViewStmtBase.java @@ -179,7 +179,7 @@ public abstract class CreateOrAlterViewStmtBase extends StatementBase { /** * Computes the column lineage graph for a create/alter view statement. */ - protected void computeLineageGraph(Analyzer analyzer) { + protected void computeLineageGraph(Analyzer analyzer, StatementBase stmt) { ColumnLineageGraph graph = analyzer.getColumnLineageGraph(); List<ColumnLabel> colDefs = new ArrayList<>(); for (ColumnDef colDef: finalColDefs_) { @@ -187,7 +187,8 @@ public abstract class CreateOrAlterViewStmtBase extends StatementBase { ColumnLineageGraph.VIEW)); } graph.addTargetColumnLabels(colDefs); - graph.computeLineageGraph(viewDefStmt_.getResultExprs(), analyzer); + graph.computeLineageGraph(viewDefStmt_.getResultExprs(), analyzer, + ColumnLineageGraph.computeOperationType(stmt)); if (LOG.isTraceEnabled()) LOG.trace("lineage: " + graph.debugString()); } diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java index a67302df8..fb2687051 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java @@ -328,7 +328,7 @@ public class CreateTableStmt extends StatementBase implements SingleTableStmt { // If lineage logging is enabled, compute minimal lineage graph. if (BackendConfig.INSTANCE.getComputeLineage() || RuntimeEnv.INSTANCE.isTestEnv()) { - computeLineageGraph(analyzer); + computeLineageGraph(analyzer, this); } analyzeSerializationEncoding(); @@ -339,9 +339,10 @@ public class CreateTableStmt extends StatementBase implements SingleTableStmt { * populate a few fields of the graph including query text. If this is a CTAS, * the graph is enhanced during the "insert" phase of CTAS. */ - protected void computeLineageGraph(Analyzer analyzer) { + protected void computeLineageGraph(Analyzer analyzer, StatementBase stmt) { ColumnLineageGraph graph = analyzer.getColumnLineageGraph(); - graph.computeLineageGraph(new ArrayList(), analyzer); + graph.computeLineageGraph(new ArrayList(), analyzer, + ColumnLineageGraph.computeOperationType(stmt)); } /** diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java index a133e53c0..016f8d58a 100755 --- a/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateViewStmt.java @@ -66,7 +66,7 @@ public class CreateViewStmt extends CreateOrAlterViewStmtBase { createColumnAndViewDefs(analyzer); if (BackendConfig.INSTANCE.getComputeLineage() || RuntimeEnv.INSTANCE.isTestEnv()) { - computeLineageGraph(analyzer); + computeLineageGraph(analyzer, this); } } diff --git a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java index ab565fce9..832d71d46 100644 --- a/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/InsertStmt.java @@ -1146,6 +1146,8 @@ public class InsertStmt extends DmlStatementBase { public boolean isTargetTableKuduTable() { return (table_ instanceof FeKuduTable); } public void setWriteId(long writeId) { this.writeId_ = writeId; } public boolean isOverwrite() { return overwrite_; } + public boolean isUpsert() { return isUpsert_; } + @Override public TSortingOrder getSortingOrder() { return sortingOrder_; } diff --git a/fe/src/main/java/org/apache/impala/analysis/ParsedStatement.java b/fe/src/main/java/org/apache/impala/analysis/ParsedStatement.java index 185c77cb2..6d85eaa89 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ParsedStatement.java +++ b/fe/src/main/java/org/apache/impala/analysis/ParsedStatement.java @@ -20,6 +20,7 @@ package org.apache.impala.analysis; import java.util.Set; import org.apache.impala.analysis.AnalysisContext.AnalysisResult; +import org.apache.impala.analysis.ColumnLineageGraph.OperationType; /** * ParsedStatement interface that holds the parsed query statement. @@ -47,4 +48,8 @@ public interface ParsedStatement { // Could be overridden to handle an AuthorizationException thrown for a registered // masked privilege request. public void handleAuthorizationException(AnalysisResult analysisResult); + + // Returns the type of the operation that will be used to produce the column lineage + // graph when applicable. + public OperationType getOperationType(); } diff --git a/fe/src/main/java/org/apache/impala/analysis/ParsedStatementImpl.java b/fe/src/main/java/org/apache/impala/analysis/ParsedStatementImpl.java index d3826ca94..060028cbb 100644 --- a/fe/src/main/java/org/apache/impala/analysis/ParsedStatementImpl.java +++ b/fe/src/main/java/org/apache/impala/analysis/ParsedStatementImpl.java @@ -19,6 +19,7 @@ package org.apache.impala.analysis; import java.util.Set; +import org.apache.impala.analysis.ColumnLineageGraph.OperationType; import org.apache.impala.common.ImpalaException; import org.apache.impala.thrift.TQueryCtx; import org.apache.impala.thrift.TQueryOptions; @@ -82,4 +83,9 @@ public class ParsedStatementImpl implements ParsedStatement { AnalysisContext.AnalysisResult analysisResult) { stmt_.handleAuthorizationException(analysisResult); } + + @Override + public OperationType getOperationType() { + return ColumnLineageGraph.computeOperationType(stmt_); + } } diff --git a/fe/src/main/java/org/apache/impala/planner/Planner.java b/fe/src/main/java/org/apache/impala/planner/Planner.java index 5da40d760..760767c33 100644 --- a/fe/src/main/java/org/apache/impala/planner/Planner.java +++ b/fe/src/main/java/org/apache/impala/planner/Planner.java @@ -279,7 +279,8 @@ public class Planner { } List<Expr> outputExprs = new ArrayList<>(); rootFragment.getSink().collectExprs(outputExprs); - graph.computeLineageGraph(outputExprs, ctx_.getRootAnalyzer()); + graph.computeLineageGraph(outputExprs, ctx_.getRootAnalyzer(), + ctx_.getAnalysisResult().getParsedStmt().getOperationType()); if (LOG.isTraceEnabled()) LOG.trace("lineage: " + graph.debugString()); ctx_.getTimeline().markEvent("Lineage info computed"); } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteParsedStatement.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteParsedStatement.java index bd2e75894..51ae5bcb5 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteParsedStatement.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteParsedStatement.java @@ -26,6 +26,7 @@ import org.apache.impala.analysis.AnalysisDriver; import org.apache.impala.analysis.ParsedStatement; import org.apache.impala.analysis.TableName; import org.apache.impala.analysis.StmtMetadataLoader; +import org.apache.impala.analysis.ColumnLineageGraph.OperationType; import org.apache.impala.catalog.FeCatalog; import org.apache.impala.common.ImpalaException; import org.apache.impala.thrift.TQueryCtx; @@ -89,6 +90,12 @@ public class CalciteParsedStatement implements ParsedStatement { // Do nothing. } + @Override + public OperationType getOperationType() { + // Currently, we only support SELECT queries when Calcite is the planner. + return OperationType.SELECT; + } + public SqlNode getParsedSqlNode() { // Return a clone of 'parsedNode_' to maintain the immutability of 'parsedNode_'. // This prevents other instances, e.g., diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test b/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test index aa7a8de8c..47acc1dc6 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/lineage.test @@ -7,6 +7,7 @@ select * from ( ---- LINEAGE { "queryText":"select * from (\n select tinyint_col + int_col x from functional.alltypes\n union all\n select sum(bigint_col) y from (select bigint_col from functional.alltypes) v1) v2", + "operationType":"SELECT", "queryId":"0:0", "hash":"1a88e455569d619b268157867d97489d", "user":"dummy_user", @@ -76,6 +77,7 @@ order by b.bigint_col limit 10 ---- LINEAGE { "queryText":"select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id),\n count(b.string_col), b.timestamp_col\nfrom functional.alltypes a join functional.alltypessmall b on (a.id = b.id)\nwhere a.year = 2010 and b.float_col > 0\ngroup by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col\nhaving count(a.int_col) > 10\norder by b.bigint_col limit 10", + "operationType":"SELECT", "queryId":"0:0", "hash":"ea11605d73c3e77c40d2905de9a1f008", "user":"dummy_user", @@ -263,6 +265,7 @@ create table lineage_test_tbl as select int_col, tinyint_col from functional.all ---- LINEAGE { "queryText":"create table lineage_test_tbl as select int_col, tinyint_col from functional.alltypes", + "operationType": "CREATETABLE_AS_SELECT", "queryId":"0:0", "hash":"2f673a20092048a5035f4a01edd612fd", "user":"dummy_user", @@ -338,6 +341,7 @@ where a.year = 2009 and b.month = 2 ---- LINEAGE { "queryText":"create table lineage_test_tbl as\nselect distinct a.int_col, a.string_col from functional.alltypes a\ninner join functional.alltypessmall b on (a.id = b.id)\nwhere a.year = 2009 and b.month = 2", + "operationType": "CREATETABLE_AS_SELECT", "queryId":"0:0", "hash":"2c547ba840dfc511483fd1a5909da15f", "user":"dummy_user", @@ -466,6 +470,7 @@ select * from ---- LINEAGE { "queryText":"create table lineage_test_tbl as\nselect * from\n (select * from\n (select int_col from functional.alltypestiny limit 1) v1 ) v2", + "operationType": "CREATETABLE_AS_SELECT", "queryId":"0:0", "hash":"d34ab281d4fcf088efe7b558a5786a00", "user":"dummy_user", @@ -510,6 +515,7 @@ create table lineage_test_tblm as select * from functional_hbase.alltypes limit ---- LINEAGE { "queryText":"create table lineage_test_tblm as select * from functional_hbase.alltypes limit 5", + "operationType": "CREATETABLE_AS_SELECT", "queryId":"0:0", "hash":"b69259e85dac890c7df82ca94dcf8c54", "user":"dummy_user", @@ -905,6 +911,7 @@ functional_hbase.alltypes ---- LINEAGE { "queryText":"insert into\nfunctional_hbase.alltypes\n values (1, 1, true, \"1999-12-01\", 2.0, 1.0, 1, 12, 2, \"abs\",\n cast(now() as timestamp), 1, 1999)", + "operationType": "INSERT", "queryId":"0:0", "hash":"7eacea08a1e442fae1edb6734cd15551", "user":"dummy_user", @@ -1155,6 +1162,7 @@ from functional.alltypes ---- LINEAGE { "queryText":"insert into table functional.alltypesnopart (id, bool_col, timestamp_col)\nselect id, bool_col, timestamp_col\nfrom functional.alltypes", + "operationType": "INSERT", "queryId":"0:0", "hash":"24d103f36450208bfb8cccfa2db687c4", "user":"dummy_user", @@ -1404,6 +1412,7 @@ where year=2009 and month=05 ---- LINEAGE { "queryText":"insert into table functional.alltypessmall (smallint_col, int_col)\npartition (year=2009, month=04)\nselect smallint_col, int_col\nfrom functional.alltypes\nwhere year=2009 and month=05", + "operationType": "INSERT", "queryId":"0:0", "hash":"1d7f2ed12efeb2876353734d7f1fca33", "user":"dummy_user", @@ -1720,6 +1729,7 @@ where year=2009 and month>10 ---- LINEAGE { "queryText":"insert into table functional.alltypessmall (id, string_col, int_col)\npartition (year, month)\nselect id, string_col, int_col, year, month\nfrom functional_seq_snap.alltypes\nwhere year=2009 and month>10", + "operationType": "INSERT", "queryId":"0:0", "hash":"adbda5f55e1de85c20b63ef02baa49b1", "user":"dummy_user", @@ -2053,6 +2063,7 @@ having min(id) > 10 ---- LINEAGE { "queryText":"insert into table functional.alltypessmall\npartition (year=2009, month)\nselect min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col),\nmin(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col),\nmin(timestamp_col), month\nfrom functional.alltypes\nwhere year=2009 and month>10\ngroup by month\nhaving min(id) > 10", + "operationType": "INSERT", "queryId":"0:0", "hash":"7059dbbaa22bdf0c2d4c5b08baf11ef2", "user":"dummy_user", @@ -2470,6 +2481,7 @@ group by int_col, tinyint_col ---- LINEAGE { "queryText":"select\nmax(tinyint_col) over(partition by int_col)\nfrom functional.alltypes\ngroup by int_col, tinyint_col", + "operationType":"SELECT", "queryId":"0:0", "hash":"4de56e1b4b44d31bf3851fb46deed110", "user":"dummy_user", @@ -2527,6 +2539,7 @@ select int_col, rank() over(order by int_col) from functional.alltypesagg ---- LINEAGE { "queryText":"select int_col, rank() over(order by int_col) from functional.alltypesagg", + "operationType":"SELECT", "queryId":"0:0", "hash":"6c646de10e62d1155b5216ae8589ff6d", "user":"dummy_user", @@ -2590,6 +2603,7 @@ order by a.tinyint_col, a.int_col ---- LINEAGE { "queryText":"select a.tinyint_col, a.int_col, count(a.double_col)\n over(partition by a.tinyint_col order by a.int_col desc rows between 1 preceding and 1 following)\nfrom functional.alltypes a inner join functional.alltypessmall b on a.id = b.id\norder by a.tinyint_col, a.int_col", + "operationType":"SELECT", "queryId":"0:0", "hash":"57958e2fb9f6dd0277f91593b59ed66e", "user":"dummy_user", @@ -2726,6 +2740,7 @@ order by 2, 3, 4 ---- LINEAGE { "queryText":"with v2 as\n (select\n double_col,\n count(int_col) over() a,\n sum(int_col + bigint_col) over(partition by bool_col) b\n from\n (select * from functional.alltypes) v1)\nselect double_col, a, b, a + b, double_col + a from v2\norder by 2, 3, 4", + "operationType":"SELECT", "queryId":"0:0", "hash":"94cd8f86dbdf00a40b9d935bccffcbaa", "user":"dummy_user", @@ -2878,6 +2893,7 @@ order by 2, 3, 4 ---- LINEAGE { "queryText":"select double_col, a, b, a + b, double_col + a from\n (select\n double_col,\n count(int_col) over() a,\n sum(int_col + bigint_col) over(partition by bool_col) b\n from\n (select * from functional.alltypes) v1) v2\norder by 2, 3, 4", + "operationType":"SELECT", "queryId":"0:0", "hash":"4c67b7dddb64d9f584c1e6bd9e182fc6", "user":"dummy_user", @@ -3032,6 +3048,7 @@ where b.month = 1 ---- LINEAGE { "queryText":"select a.month, a.year, b.int_col, b.month\nfrom\n (select year, month from functional.alltypes\n union all\n select year, month from functional.alltypes) a\n inner join\n functional.alltypessmall b\n on (a.month = b.month)\nwhere b.month = 1", + "operationType":"SELECT", "queryId":"0:0", "hash":"6f38ebd361a51366f80a9f889f302424", "user":"dummy_user", @@ -3160,6 +3177,7 @@ where month = 1 ---- LINEAGE { "queryText":"select t1.int_col, t2.month, t2.int_col + 1\nfrom (\n select int_col, count(*)\n from functional.alltypessmall\n where month = 1\n group by int_col\n having count(*) > 1\n order by count(*) desc limit 5\n ) t1\njoin functional.alltypes t2 on (t1.int_col = t2.int_col)\nwhere month = 1", + "operationType":"SELECT", "queryId":"0:0", "hash":"90ceff377b6984b0c3eb7306c3711b38", "user":"dummy_user", @@ -3282,6 +3300,7 @@ and x.int_col + x.float_col + cast(c.string_col as float) < 1000 ---- LINEAGE { "queryText":"select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col\nfrom functional.alltypessmall c\njoin (\n select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,\n a.int_col int_col, a.month month, b.float_col float_col, b.id id\n from ( select * from functional.alltypesagg a where month=1 ) a\n join functional.alltypessmall b on (a.smallint_col = b.id)\n ) x on (x.tinyint_col = c.id)\nwhere x.day=1\nand x.int_col [...] + "operationType":"SELECT", "queryId":"0:0", "hash":"154323930680f2bff235d4abcf5afa62", "user":"dummy_user", @@ -3499,6 +3518,7 @@ from ---- LINEAGE { "queryText":"select c1, c2, c3\nfrom\n (select c1, c2, c3\n from\n (select int_col c1, sum(float_col) c2, min(float_col) c3\n from functional_hbase.alltypessmall\n group by 1) x\n order by 2,3 desc\n limit 5\n) y", + "operationType":"SELECT", "queryId":"0:0", "hash":"412ef8a4c54e66f19a0065c3fecc5c76", "user":"dummy_user", @@ -3603,6 +3623,7 @@ limit 0 ---- LINEAGE { "queryText":"select c1, x2\nfrom (\n select c1, min(c2) x2\n from (\n select c1, c2, c3\n from (\n select int_col c1, tinyint_col c2, min(float_col) c3\n from functional_hbase.alltypessmall\n group by 1, 2\n order by 1,2\n limit 1\n ) x\n ) x2\n group by c1\n) y\norder by 2,1 desc\nlimit 0", + "operationType":"SELECT", "queryId":"0:0", "hash":"44626c1f89757d07604a3f054e98d67d", "user":"dummy_user", @@ -3677,6 +3698,7 @@ select int_col, string_col from functional.view_view ---- LINEAGE { "queryText":"select int_col, string_col from functional.view_view", + "operationType":"SELECT", "queryId":"0:0", "hash":"05d4986bc7d30b32d8780d787af559d9", "user":"dummy_user", @@ -3740,6 +3762,7 @@ where t.id < 10 ---- LINEAGE { "queryText":"select t.id from (select id from functional.alltypes_view) t\nwhere t.id < 10", + "operationType":"SELECT", "queryId":"0:0", "hash":"0c505dc14adb7f037eea4b3c77c39bff", "user":"dummy_user", @@ -3791,6 +3814,7 @@ where id in ---- LINEAGE { "queryText":"select string_col, float_col, bool_col\nfrom functional.alltypes\nwhere id in\n (select id from functional.alltypesagg)", + "operationType":"SELECT", "queryId":"0:0", "hash":"ae4a1b83a50d2ce727117d0387c8e098", "user":"dummy_user", @@ -3916,6 +3940,7 @@ and tinyint_col < 10 ---- LINEAGE { "queryText":"select 1\nfrom functional.alltypesagg a\nwhere exists\n (select id, count(int_col) over (partition by bool_col)\n from functional.alltypestiny b\n where a.tinyint_col = b.tinyint_col\n group by id, int_col, bool_col)\nand tinyint_col < 10", + "operationType":"SELECT", "queryId":"0:0", "hash":"ce3e78ae349dd67613458acb0dac572f", "user":"dummy_user", @@ -3977,6 +4002,7 @@ and a.bigint_col > 10 ---- LINEAGE { "queryText":"select int_col + 1, tinyint_col - 1\nfrom functional.alltypes a\nwhere a.int_col <\n (select max(int_col) from functional.alltypesagg g where g.bool_col = true)\nand a.bigint_col > 10", + "operationType":"SELECT", "queryId":"0:0", "hash":"2301fac73ccbb692c19615733c977c14", "user":"dummy_user", @@ -4083,6 +4109,7 @@ with t as (select int_col x, bigint_col y from functional.alltypes) select x, y ---- LINEAGE { "queryText":"with t as (select int_col x, bigint_col y from functional.alltypes) select x, y from t", + "operationType":"SELECT", "queryId":"0:0", "hash":"ff421b2c2b249764028350613be74fb8", "user":"dummy_user", @@ -4147,6 +4174,7 @@ select id, int_col, string_col, year, month from t1 ---- LINEAGE { "queryText":"with t1 as (select * from functional.alltypestiny)\ninsert into functional.alltypesinsert (id, int_col, string_col) partition(year, month)\nselect id, int_col, string_col, year, month from t1", + "operationType": "INSERT", "queryId":"0:0", "hash":"0fa8fc72001bde1a8745956bccc77820", "user":"dummy_user", @@ -4456,6 +4484,7 @@ from ---- LINEAGE { "queryText":"select lead(a) over (partition by b order by c)\nfrom\n (select lead(id) over (partition by int_col order by bigint_col) as a,\n max(id) over (partition by tinyint_col order by int_col) as b,\n min(int_col) over (partition by string_col order by bool_col) as c\n from functional.alltypes) v", + "operationType":"SELECT", "queryId":"0:0", "hash":"6891082bb252b6fce39d846bf89c497d", "user":"dummy_user", @@ -4559,6 +4588,7 @@ create view test_view_lineage as select id from functional.alltypestiny ---- LINEAGE { "queryText":"create view test_view_lineage as select id from functional.alltypestiny", + "operationType": "CREATEVIEW", "queryId":"0:0", "hash":"fd85383983eafa8a57731e87fba7b7b6", "user":"dummy_user", @@ -4619,6 +4649,7 @@ limit 0 ---- LINEAGE { "queryText":"create view test_view_lineage (a, b) as select c1, x2\nfrom (\n select c1, min(c2) x2\n from (\n select c1, c2, c3\n from (\n select int_col c1, tinyint_col c2, min(float_col) c3\n from functional_hbase.alltypessmall\n group by 1, 2\n order by 1,2\n limit 1\n ) x\n ) x2\n group by c1\n) y\norder by 2,1 desc\nlimit 0", + "operationType": "CREATEVIEW", "queryId":"0:0", "hash":"a3cdac0fcdaef861d783d0ef91ad1919", "user":"dummy_user", @@ -4716,6 +4747,7 @@ create view test_view_lineage (a1, a2, a3, a4, a5, a6, a7) as ---- LINEAGE { "queryText":"create view test_view_lineage (a1, a2, a3, a4, a5, a6, a7) as\n select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col\n from functional.alltypessmall c\n join (\n select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,\n a.int_col int_col, a.month month, b.float_col float_col, b.id id\n from ( select * from functional.alltypesagg a where month=1 ) a\n join functional.alltypessmall b on (a.smallin [...] + "operationType": "CREATEVIEW", "queryId":"0:0", "hash":"687637d408bf427f7851f9dfafb4d637", "user":"dummy_user", @@ -4974,6 +5006,7 @@ create view test_view_lineage as ---- LINEAGE { "queryText":"create view test_view_lineage as\n select * from (\n select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id),\n count(b.string_col), b.timestamp_col\n from functional.alltypes a join functional.alltypessmall b on (a.id = b.id)\n where a.year = 2010 and b.float_col > 0\n group by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col\n having count(a.int_col) > 10\n order by b.bigint_col limit 10) t", + "operationType": "CREATEVIEW", "queryId":"0:0", "hash":"663b053e02a33d9bbd58ddc23ccbc101", "user":"dummy_user", @@ -5176,6 +5209,7 @@ alter view functional.alltypes_view as select id from functional.alltypestiny ---- LINEAGE { "queryText":"alter view functional.alltypes_view as select id from functional.alltypestiny", + "operationType": "ALTERVIEW_AS", "queryId":"0:0", "hash":"f12ecfd6e5357c92cbd14f52b2637e1a", "user":"dummy_user", @@ -5227,6 +5261,7 @@ select * from ( ---- LINEAGE { "queryText":"select * from (\n select int_struct_col.f1 + int_struct_col.f2 x from functional.allcomplextypes\n where year = 2000\n order by nested_struct_col.f2.f12.f21 limit 10\n union all\n select sum(f1) y from\n (select complex_struct_col.f1 f1 from functional.allcomplextypes\n group by 1) v1) v2", + "operationType":"SELECT", "queryId":"0:0", "hash":"c32c80f6a02e5f11b920d658daa4a5c3", "user":"dummy_user", @@ -5320,6 +5355,7 @@ select * from functional.allcomplextypes.int_array_col a inner join ---- LINEAGE { "queryText":"select * from functional.allcomplextypes.int_array_col a inner join\n functional.allcomplextypes.struct_map_col m on (a.item = m.f1)", + "operationType":"SELECT", "queryId":"0:0", "hash":"f0e1c9c0c94732cfb0e44792a1339d3e", "user":"dummy_user", @@ -5446,6 +5482,7 @@ select * from functional.allcomplextypes t, t.int_array_col a, t.struct_map_col ---- LINEAGE { "queryText":"select * from functional.allcomplextypes t, t.int_array_col a, t.struct_map_col m\n where a.item = m.f1", + "operationType":"SELECT", "queryId":"0:0", "hash":"58bf06d90d336cd80b24a2ea6404aa32", "user":"dummy_user", @@ -5676,6 +5713,7 @@ select a + b as ab, c, d, e from functional.allcomplextypes t, ---- LINEAGE { "queryText":"select a + b as ab, c, d, e from functional.allcomplextypes t,\n (select sum(item) a from t.int_array_col\n where item < 10) v1,\n (select count(f1) b from t.struct_map_col\n group by key) v2,\n (select avg(value) over(partition by key) c from t.map_map_col.value) v3,\n (select item d from t.int_array_col\n union all\n select value from t.int_map_col) v4,\n (select f21 e from t.complex_nested_struct_col.f2.f12 order by key limit 10) v5", + "operationType":"SELECT", "queryId":"0:0", "hash":"17f8a4d5d7a82a254e377d2dd9f3ca66", "user":"dummy_user", @@ -5854,6 +5892,7 @@ where not exists (select 1 from functional.alltypes a where v.id = a.id) ---- LINEAGE { "queryText":"create view test_view_lineage as\nselect id from functional.alltypes_view v\nwhere not exists (select 1 from functional.alltypes a where v.id = a.id)", + "operationType": "CREATEVIEW", "queryId":"0:0", "hash":"41a074680a8eedce05db55fde68669e0", "user":"dummy_user", @@ -5908,6 +5947,7 @@ where k.int_col < 10 ---- LINEAGE { "queryText":"select count(*) from functional_kudu.alltypes k join functional.alltypes h on k.id = h.id\nwhere k.int_col < 10", + "operationType":"SELECT", "queryId":"0:0", "hash":"0528557b137722769f5df029d4816c7a", "user":"dummy_user", @@ -5978,6 +6018,7 @@ functional.alltypes a where a.id < 100 ---- LINEAGE { "queryText":"insert into functional_kudu.testtbl select id, string_col as name, int_col as zip from\nfunctional.alltypes a where a.id < 100", + "operationType": "INSERT", "queryId":"0:0", "hash":"dd40a5127a7a1abe4708ac58811680ae", "user":"dummy_user", @@ -6092,6 +6133,7 @@ functional.alltypes where id < 10 ---- LINEAGE { "queryText":"insert into functional_kudu.testtbl (name, id) select string_col as name, id from\nfunctional.alltypes where id < 10", + "operationType": "INSERT", "queryId":"0:0", "hash":"111bc2413ac3bc54cdca8c0f0fc3cb8c", "user":"dummy_user", @@ -6176,6 +6218,7 @@ functional.alltypes where id < 10 ---- LINEAGE { "queryText":"upsert into functional_kudu.testtbl (name, id) select string_col as name, id from\nfunctional.alltypes where id < 10", + "operationType": "UPSERT", "queryId":"0:0", "hash":"848add05dd236efd79bffd369a0bdb69", "user":"dummy_user", @@ -6262,6 +6305,7 @@ from functional.alltypestiny ---- LINEAGE { "queryText":"create table kudu_ctas primary key (id) partition by hash (id) partitions 3\nstored as kudu as select id, bool_col, tinyint_col, smallint_col, int_col,\nbigint_col, float_col, double_col, date_string_col, string_col\nfrom functional.alltypestiny", + "operationType": "CREATETABLE_AS_SELECT", "queryId":"0:0", "hash":"1baf83a160e93356a7b12a393f147355", "user":"dummy_user", @@ -6570,6 +6614,7 @@ from functional_hbase.alltypestiny ---- LINEAGE { "queryText":"create table ice_ctas_from_hbase partitioned by spec (bucket(7,id))\nstored as iceberg as select id, bool_col, int_col,\nbigint_col, float_col, double_col, date_string_col, string_col\nfrom functional_hbase.alltypestiny", + "operationType": "CREATETABLE_AS_SELECT", "queryId":"0:0", "hash":"fc2031078e9801a8965cd81a415955d0", "user":"dummy_user", @@ -6820,6 +6865,7 @@ from functional_kudu.alltypestiny ---- LINEAGE { "queryText":"create table ice_ctas_from_kudu partitioned by spec (truncate(100, id))\nstored as iceberg as select id, bool_col, int_col,\nbigint_col, float_col, double_col, date_string_col, string_col\nfrom functional_kudu.alltypestiny", + "operationType": "CREATETABLE_AS_SELECT", "queryId":"0:0", "hash":"5f8edff98099449082049dda676428b0", "user":"dummy_user", @@ -7069,6 +7115,7 @@ string_col from functional_kudu.alltypestiny ---- LINEAGE { "queryText":"INSERT INTO functional_parquet.iceberg_alltypes_part SELECT id, bool_col, int_col,\nbigint_col, float_col, double_col, 3.14, cast(date_string_col as date format 'MM/DD/YYYY'),\nstring_col from functional_kudu.alltypestiny", + "operationType": "INSERT", "queryId":"0:0", "hash":"58cca98fbd61b04a38cce3c6d33db56d", "user":"dummy_user", @@ -7335,6 +7382,7 @@ from functional_parquet.iceberg_partitioned a where a.id < 100 ---- LINEAGE { "queryText":"insert into functional_kudu.testtbl select id, user as name, cast(id*2 as INT) as zip\nfrom functional_parquet.iceberg_partitioned a where a.id < 100", + "operationType": "INSERT", "queryId":"0:0", "hash":"e1c26e3c7ec88ea6ffe339402631655b", "user":"dummy_user", diff --git a/testdata/workloads/functional-query/queries/QueryTest/lineage.test b/testdata/workloads/functional-query/queries/QueryTest/lineage.test index ba808dd9a..73035233e 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/lineage.test +++ b/testdata/workloads/functional-query/queries/QueryTest/lineage.test @@ -29,6 +29,7 @@ create table lineage_test_db.iceberg_partitioned like functional_parquet.iceberg "queryId": "7547f4882e03cdde:941c4bad00000000", "user": "anurag", "queryText": "create table lineage_test_db.foo (id int)", + "operationType": "CREATETABLE", "endTime": 1687533491 } ---- QUERY @@ -44,6 +45,7 @@ create table lineage_test_db.foo (id int) "queryId": "554879b21cb9ae72:5e564c9200000000", "user": "anurag", "queryText": "create external table lineage_test_db.ext_tbl_loc (id int) location '/test-warehouse/lineage_test_db.db/'", + "operationType": "CREATETABLE", "endTime": 1687533496, "tableLocation": "hdfs://localhost:20500/test-warehouse/lineage_test_db.db" } @@ -61,6 +63,7 @@ create external table lineage_test_db.ext_tbl_loc (id int) location "queryId": "1948dbf64fdfe0c2:2cbbe10700000000", "user": "anurag", "queryText": "create external table lineage_test_db.ext_tbl (id int)", + "operationType": "CREATETABLE", "endTime": 1687533501, "tableLocation": "hdfs://localhost:20500/test-warehouse/lineage_test_db.db/ext_tbl" } @@ -456,6 +459,7 @@ create external table lineage_test_db.ext_tbl (id int) "queryId": "c0450321c0d12cbe:30419c8200000000", "user": "anurag", "queryText": "create external table lineage_test_db.ext_ctas as select * from lineage_test_db.alltypes", + "operationType": "CREATETABLE_AS_SELECT", "endTime": 1687533508, "tableLocation": "hdfs://localhost:20500/test-warehouse/lineage_test_db.db/ext_ctas" } @@ -523,6 +527,7 @@ create view lineage_test_db.alltypes_view as select * from lineage_test_db.allty "queryId": "8e4d008c58bff1cc:1eb556a200000000", "user": "anurag", "queryText": "select * from ( select tinyint_col + int_col x from functional.alltypes union all select sum(bigint_col) y from (select bigint_col from functional.alltypes) v1) v2", + "operationType": "SELECT", "endTime": 1687533513 } ---- QUERY @@ -716,6 +721,7 @@ select * from ( "queryId": "374d7d4cfcda6f00:11963b1000000000", "user": "anurag", "queryText": "select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id), count(b.string_col), b.timestamp_col from functional.alltypes a join functional.alltypessmall b on (a.id = b.id) where a.year = 2010 and b.float_col > 0 group by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col having count(a.int_col) > 10 order by b.bigint_col limit 10", + "operationType": "SELECT", "endTime": 1687533519 } ---- QUERY @@ -799,6 +805,7 @@ order by b.bigint_col limit 10 "queryId": "5341037bc766b0c5:5b9b5c6a00000000", "user": "anurag", "queryText": "create table lineage_test_db.lineage_test_tbl1 as select int_col, tinyint_col from functional.alltypes", + "operationType": "CREATETABLE_AS_SELECT", "endTime": 1687533524 } ---- QUERY @@ -927,6 +934,7 @@ create table lineage_test_db.lineage_test_tbl1 as select int_col, tinyint_col fr "queryId": "2545984dd298803d:532b0b6100000000", "user": "anurag", "queryText": "create table lineage_test_db.lineage_test_tbl2 as select distinct a.int_col, a.string_col from functional.alltypes a inner join functional.alltypessmall b on (a.id = b.id) where a.year = 2009 and b.month = 2", + "operationType": "CREATETABLE_AS_SELECT", "endTime": 1687533530 } ---- QUERY @@ -975,6 +983,7 @@ where a.year = 2009 and b.month = 2 "queryId": "e741f71c0e666916:abefa06a00000000", "user": "anurag", "queryText": "create table lineage_test_db.lineage_test_tbl3 as select * from (select * from (select int_col from functional.alltypestiny limit 1) v1 ) v2", + "operationType": "CREATETABLE_AS_SELECT", "endTime": 1687533535 } ---- QUERY @@ -1371,6 +1380,7 @@ select * from "queryId": "a940f90c620a7259:2137273d00000000", "user": "anurag", "queryText": "create table lineage_test_db.lineage_test_tbl4 as select * from functional_hbase.alltypes limit 5", + "operationType": "CREATETABLE_AS_SELECT", "endTime": 1687533540 } ---- QUERY @@ -1609,6 +1619,7 @@ create table lineage_test_db.lineage_test_tbl4 as select * from functional_hbase "queryId": "664e64cfb1d3fd09:ef8dd84600000000", "user": "anurag", "queryText": "insert into lineage_test_db.alltypes values (1, 1, true, \"1999-12-01\", 2.0, 1.0, 1, 12, 2, \"abs\", cast(now() as timestamp), 1, 1999)", + "operationType": "INSERT", "endTime": 1687533546 } ---- QUERY @@ -1852,6 +1863,7 @@ lineage_test_db.alltypes "queryId": "784e59087868d497:418a666800000000", "user": "anurag", "queryText": "insert into table lineage_test_db.alltypesnopart (id, bool_col, timestamp_col) select id, bool_col, timestamp_col from functional.alltypes", + "operationType": "INSERT", "endTime": 1687533551 } ---- QUERY @@ -2157,6 +2169,7 @@ from functional.alltypes "queryId": "50459cec26bebcb8:6d71e38a00000000", "user": "anurag", "queryText": "insert into table lineage_test_db.alltypessmall (smallint_col, int_col) partition (year=2009, month=04) select smallint_col, int_col from functional.alltypes where year=2009 and month=05", + "operationType": "INSERT", "endTime": 1687533556 } ---- QUERY @@ -2480,6 +2493,7 @@ where year=2009 and month=05 "queryId": "ca4e2ac78776624d:6e44f11c00000000", "user": "anurag", "queryText": "insert into table lineage_test_db.alltypessmall (id, string_col, int_col) partition (year, month) select id, string_col, int_col, year, month from functional_seq_snap.alltypes where year=2009 and month>10", + "operationType": "INSERT", "endTime": 1687533562 } ---- QUERY @@ -2898,6 +2912,7 @@ where year=2009 and month>10 "queryId": "e64b728b24bbd5e9:d50607e600000000", "user": "anurag", "queryText": "insert into table lineage_test_db.alltypessmall partition (year=2009, month) select min(id), min(bool_col), min(tinyint_col), min(smallint_col), min(int_col), min(bigint_col), min(float_col), min(double_col), min(date_string_col), min(string_col), min(timestamp_col), month from functional.alltypes where year=2009 and month>10 group by month having min(id) > 10", + "operationType": "INSERT", "endTime": 1687533567 } ---- QUERY @@ -2965,6 +2980,7 @@ having min(id) > 10 "queryId": "b4403b6c2b81f788:b8af2f7700000000", "user": "anurag", "queryText": "select max(tinyint_col) over(partition by int_col) from functional.alltypes group by int_col, tinyint_col", + "operationType": "SELECT", "endTime": 1687533573 } ---- QUERY @@ -3030,6 +3046,7 @@ group by int_col, tinyint_col "queryId": "fb4794254fb75b20:d1daf37500000000", "user": "anurag", "queryText": "select int_col, rank() over(order by int_col) from functional.alltypesagg", + "operationType": "SELECT", "endTime": 1687533578 } ---- QUERY @@ -3160,6 +3177,7 @@ select int_col, rank() over(order by int_col) from functional.alltypesagg "queryId": "8d4ddc6897bb8c9f:2f295b1700000000", "user": "anurag", "queryText": "select a.tinyint_col, a.int_col, count(a.double_col) over(partition by a.tinyint_col order by a.int_col desc rows between 1 preceding and 1 following) from functional.alltypes a inner join functional.alltypessmall b on a.id = b.id order by a.tinyint_col, a.int_col", + "operationType": "SELECT", "endTime": 1687533583 } ---- QUERY @@ -3310,6 +3328,7 @@ order by a.tinyint_col, a.int_col "queryId": "c14107654ce903b7:9bd3363200000000", "user": "anurag", "queryText": "with v2 as (select double_col, count(int_col) over() a, sum(int_col + bigint_col) over(partition by bool_col) b from (select * from functional.alltypes) v1) select double_col, a, b, a + b, double_col + a from v2 order by 2, 3, 4", + "operationType": "SELECT", "endTime": 1687533589 } ---- QUERY @@ -3465,6 +3484,7 @@ order by 2, 3, 4 "queryId": "1744e089e7cc73a9:ed50bff000000000", "user": "anurag", "queryText": "select double_col, a, b, a + b, double_col + a from (select double_col, count(int_col) over() a, sum(int_col + bigint_col) over(partition by bool_col) b from (select * from functional.alltypes) v1) v2 order by 2, 3, 4", + "operationType": "SELECT", "endTime": 1687533594 } ---- QUERY @@ -3591,6 +3611,7 @@ order by 2, 3, 4 "queryId": "4740735360ac58fe:543457eb00000000", "user": "anurag", "queryText": "select a.month, a.year, b.int_col, b.month from (select year, month from functional.alltypes union all select year, month from functional.alltypes) a inner join functional.alltypessmall b on (a.month = b.month) where b.month = 1", + "operationType": "SELECT", "endTime": 1687533599 } ---- QUERY @@ -3712,6 +3733,7 @@ where b.month = 1 "queryId": "704c9e102ed9744a:97d7530700000000", "user": "anurag", "queryText": "select t1.int_col, t2.month, t2.int_col + 1 from ( select int_col, count(*) from functional.alltypessmall where month = 1 group by int_col having count(*) > 1 order by count(*) desc limit 5 ) t1 join functional.alltypes t2 on (t1.int_col = t2.int_col) where month = 1", + "operationType": "SELECT", "endTime": 1687533604 } ---- QUERY @@ -3933,6 +3955,7 @@ where month = 1 "queryId": "d5413148c4203ab5:24a6dc2f00000000", "user": "anurag", "queryText": "select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col from functional.alltypessmall c join ( select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day, a.int_col int_col, a.month month, b.float_col float_col, b.id id from ( select * from functional.alltypesagg a where month=1 ) a join functional.alltypessmall b on (a.smallint_col = b.id) ) x on (x.tinyint_col = c.id) where x.day=1 and x.int_col > 899 and [...] + "operationType": "SELECT", "endTime": 1687533610 } ---- QUERY @@ -4035,6 +4058,7 @@ and x.int_col + x.float_col + cast(c.string_col as float) < 1000 "queryId": "d94c43f20a44f454:87b6d0d300000000", "user": "anurag", "queryText": "select c1, c2, c3 from (select c1, c2, c3 from (select int_col c1, sum(float_col) c2, min(float_col) c3 from functional_hbase.alltypessmall group by 1) x order by 2,3 desc limit 5 ) y", + "operationType": "SELECT", "endTime": 1687533616 } ---- QUERY @@ -4119,6 +4143,7 @@ from "queryId": "674320549904c2a7:e85c13d400000000", "user": "anurag", "queryText": "select c1, x2 from ( select c1, min(c2) x2 from ( select c1, c2, c3 from ( select int_col c1, tinyint_col c2, min(float_col) c3 from functional_hbase.alltypessmall group by 1, 2 order by 1,2 limit 1 ) x ) x2 group by c1 ) y order by 2,1 desc limit 0", + "operationType": "SELECT", "endTime": 1687533621 } ---- QUERY @@ -4175,6 +4200,7 @@ limit 0 "queryId": "1f45c1b8f43f6b96:a19ff31600000000", "user": "anurag", "queryText": "select id from functional.view_view", + "operationType": "SELECT", "endTime": 1687533626 } ---- QUERY @@ -4225,6 +4251,7 @@ select id from functional.view_view "queryId": "b7476b57060dceb7:b59a484600000000", "user": "anurag", "queryText": "select t.id from (select id from functional.alltypes_view) t where t.id < 10", + "operationType": "SELECT", "endTime": 1687533631 } ---- QUERY @@ -4346,6 +4373,7 @@ where t.id < 10 "queryId": "09489cc02e60feec:c8ddfb0b00000000", "user": "anurag", "queryText": "select string_col, float_col, bool_col from functional.alltypes where id in (select id from functional.alltypesagg)", + "operationType": "SELECT", "endTime": 1687533637 } ---- QUERY @@ -4408,6 +4436,7 @@ where id in "queryId": "684366081fd04903:ce9b652700000000", "user": "anurag", "queryText": "select 1 from functional.alltypesagg a where exists (select id, count(int_col) over (partition by bool_col) from functional.alltypestiny b where a.tinyint_col = b.tinyint_col group by id, int_col, bool_col) and tinyint_col < 10", + "operationType": "SELECT", "endTime": 1687533642 } ---- QUERY @@ -4522,6 +4551,7 @@ and tinyint_col < 10 "queryId": "bc4aa55c595aa11f:b69f692600000000", "user": "anurag", "queryText": "select int_col + 1, tinyint_col - 1 from functional.alltypes a where a.int_col < (select max(int_col) from functional.alltypesagg g where g.bool_col = true) and a.bigint_col > 10", + "operationType": "SELECT", "endTime": 1687533647 } ---- QUERY @@ -4590,6 +4620,7 @@ and a.bigint_col > 10 "queryId": "a54406d43e4870fb:cf7baf2100000000", "user": "anurag", "queryText": "with t as (select int_col x, bigint_col y from functional.alltypes) select x, y from t", + "operationType": "SELECT", "endTime": 1687533652 } ---- QUERY @@ -4888,6 +4919,7 @@ with t as (select int_col x, bigint_col y from functional.alltypes) select x, y "queryId": "7d4e988e98626451:c0be63a200000000", "user": "anurag", "queryText": "with t1 as (select * from functional.alltypestiny) insert into lineage_test_db.alltypesinsert (id, int_col, string_col) partition(year, month) select id, int_col, string_col, year, month from t1", + "operationType": "INSERT", "endTime": 1687533658 } ---- QUERY @@ -4994,6 +5026,7 @@ select id, int_col, string_col, year, month from t1 "queryId": "cf494d06aa9e201a:7047aec600000000", "user": "anurag", "queryText": "select lead(a) over (partition by b order by c) from (select lead(id) over (partition by int_col order by bigint_col) as a, max(id) over (partition by tinyint_col order by int_col) as b, min(int_col) over (partition by string_col order by bool_col) as c from functional.alltypes) v", + "operationType": "SELECT", "endTime": 1687533663 } ---- QUERY @@ -5045,6 +5078,7 @@ from "queryId": "574dce023537d84c:5de5f32b00000000", "user": "anurag", "queryText": "create view lineage_test_db.lineage_test_view1 as select id from functional.alltypestiny", + "operationType": "CREATEVIEW", "endTime": 1687533668 } ---- QUERY @@ -5131,6 +5165,7 @@ create view lineage_test_db.lineage_test_view1 as select id from functional.allt "queryId": "b54cc29caca2cf8a:99ff97ae00000000", "user": "anurag", "queryText": "create view lineage_test_db.lineage_test_view2 (a, b) as select c1, x2 from ( select c1, min(c2) x2 from ( select c1, c2, c3 from ( select int_col c1, tinyint_col c2, min(float_col) c3 from functional_hbase.alltypessmall group by 1, 2 order by 1,2 limit 1 ) x ) x2 group by c1 ) y order by 2,1 desc limit 0", + "operationType": "CREATEVIEW", "endTime": 1687533673 } ---- QUERY @@ -5399,6 +5434,7 @@ limit 0 "queryId": "2a43cfbaea01dda0:c08ec40600000000", "user": "anurag", "queryText": "create view lineage_test_db.lineage_test_view3 (a1, a2, a3, a4, a5, a6, a7) as select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col from functional.alltypessmall c join ( select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day, a.int_col int_col, a.month month, b.float_col float_col, b.id id from ( select * from functional.alltypesagg a where month=1 ) a join functional.alltypessmall b on ( [...] + "operationType": "CREATEVIEW", "endTime": 1687533679 } ---- QUERY @@ -5616,6 +5652,7 @@ create view lineage_test_db.lineage_test_view3 (a1, a2, a3, a4, a5, a6, a7) as "queryId": "d448e01e4832881e:59ee2b4a00000000", "user": "anurag", "queryText": "create view lineage_test_db.lineage_test_view4 as select * from ( select sum(a.tinyint_col) over (partition by a.smallint_col order by a.id), count(b.string_col), b.timestamp_col from functional.alltypes a join functional.alltypessmall b on (a.id = b.id) where a.year = 2010 and b.float_col > 0 group by a.tinyint_col, a.smallint_col, a.id, b.string_col, b.timestamp_col, b.bigint_col having count(a.int_col) > 10 order by b.bigint_col limit 10) t", + "operationType": "CREATEVIEW", "endTime": 1687533684 } ---- QUERY @@ -5670,6 +5707,7 @@ create view lineage_test_db.lineage_test_view4 as "queryId": "b54e1dc4a678c946:eb23e54700000000", "user": "anurag", "queryText": "alter view lineage_test_db.alltypes_view as select id from functional.alltypestiny", + "operationType": "ALTERVIEW_AS", "endTime": 1687533689 } ---- QUERY @@ -5763,6 +5801,7 @@ alter view lineage_test_db.alltypes_view as select id from functional.alltypesti "queryId": "ee4f63ccf2181e85:2b38e8dd00000000", "user": "anurag", "queryText": "select * from ( select int_struct_col.f1 + int_struct_col.f2 x from functional.allcomplextypes where year = 2000 order by nested_struct_col.f2.f12.f21 limit 10 union all select sum(f1) y from (select complex_struct_col.f1 f1 from functional.allcomplextypes group by 1) v1) v2", + "operationType": "SELECT", "endTime": 1687533694 } ---- QUERY @@ -5896,6 +5935,7 @@ select * from ( "queryId": "394a9bd88d04af98:9452981700000000", "user": "anurag", "queryText": "select * from functional.allcomplextypes.int_array_col a inner join functional.allcomplextypes.struct_map_col m on (a.item = m.f1)", + "operationType": "SELECT", "endTime": 1687533700 } ---- QUERY @@ -6121,6 +6161,7 @@ select * from functional.allcomplextypes.int_array_col a inner join "queryId": "b043e925908e15a2:1a5b1afe00000000", "user": "anurag", "queryText": "select * from functional.allcomplextypes t, t.int_array_col a, t.struct_map_col m where a.item = m.f1", + "operationType": "SELECT", "endTime": 1687533705 } ---- QUERY @@ -6301,6 +6342,7 @@ select * from functional.allcomplextypes t, t.int_array_col a, t.struct_map_col "queryId": "04414cb7de8797d4:4caad29500000000", "user": "anurag", "queryText": "select a + b as ab, c, d, e from functional.allcomplextypes t, (select sum(item) a from t.int_array_col where item < 10) v1, (select count(f1) b from t.struct_map_col group by key) v2, (select avg(value) over(partition by key) c from t.map_map_col.value) v3, (select item d from t.int_array_col union all select value from t.int_map_col) v4, (select f21 e from t.complex_nested_struct_col.f2.f12 order by key limit 10) v5", + "operationType": "SELECT", "endTime": 1687533710 } ---- QUERY @@ -6365,6 +6407,7 @@ select a + b as ab, c, d, e from functional.allcomplextypes t, "queryId": "764a4986b0d631c2:a52f087b00000000", "user": "anurag", "queryText": "create view lineage_test_db.lineage_test_view5 as select id from functional.alltypes_view v where not exists (select 1 from functional.alltypes a where v.id = a.id)", + "operationType": "CREATEVIEW", "endTime": 1687533715 } ---- QUERY @@ -6471,6 +6514,7 @@ where not exists (select 1 from functional.alltypes a where v.id = a.id) "queryId": "db47d0dcf9aa4267:6cfbbef700000000", "user": "boroknagyz", "queryText": "create table lineage_test_db.ice_ctas partitioned by spec (truncate(1, string_col)) stored by iceberg as select id, string_col, timestamp_col from functional_parquet.alltypes", + "operationType": "CREATETABLE_AS_SELECT", "endTime": 1687615427 } ---- QUERY @@ -6605,6 +6649,7 @@ select id, string_col, timestamp_col from functional_parquet.alltypes "queryId": "1942b41abdcfba11:1d8e573d00000000", "user": "boroknagyz", "queryText": "insert into lineage_test_db.iceberg_partitioned select * from functional_parquet.iceberg_partitioned", + "operationType": "INSERT", "endTime": 1687615432 } ---- QUERY @@ -6738,6 +6783,7 @@ select * from functional_parquet.iceberg_partitioned "queryId": "5e4febca0b4e3922:96171f9700000000", "user": "boroknagyz", "queryText": "insert into lineage_test_db.iceberg_partitioned select * from functional_parquet.iceberg_partitioned", + "operationType": "INSERT", "endTime": 1687615438 } ---- QUERY @@ -6784,9 +6830,268 @@ select * from functional_parquet.iceberg_partitioned "queryId": "46450d3ee417fa15:9c00c0ce00000000", "user": "boroknagyz", "queryText": "insert into lineage_test_db.foo select id from lineage_test_db.iceberg_partitioned", + "operationType": "INSERT", "endTime": 1687615443 } ---- QUERY insert into lineage_test_db.foo select id from lineage_test_db.iceberg_partitioned ==== +---- LINEAGE +{ + "queryText": "insert overwrite table lineage_test_db.alltypesnopart (id, bool_col, timestamp_col) select id, bool_col, timestamp_col from functional.alltypes", + "operationType": "INSERT_OVERWRITE", + "queryId": "44426aacddcb94ec:1d961ec800000000", + "hash": "b28dcb636dce5104778696cde4ed4802", + "user": "fangyurao", + "timestamp": 1772141117, + "endTime": 1772141117, + "edges": [ + { + "sources": [ + 1 + ], + "targets": [ + 0 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [ + 3 + ], + "targets": [ + 2 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [], + "targets": [ + 4 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [], + "targets": [ + 5 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [], + "targets": [ + 6 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [], + "targets": [ + 7 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [], + "targets": [ + 8 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [], + "targets": [ + 9 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [], + "targets": [ + 10 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [], + "targets": [ + 11 + ], + "edgeType": "PROJECTION" + }, + { + "sources": [ + 13 + ], + "targets": [ + 12 + ], + "edgeType": "PROJECTION" + } + ], + "vertices": [ + { + "id": 0, + "vertexType": "COLUMN", + "vertexId": "id", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 1, + "vertexType": "COLUMN", + "vertexId": "functional.alltypes.id", + "metadata": { + "tableName": "functional.alltypes", + "tableType": "hive", + "tableCreateTime": 1766691526 + } + }, + { + "id": 2, + "vertexType": "COLUMN", + "vertexId": "bool_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 3, + "vertexType": "COLUMN", + "vertexId": "functional.alltypes.bool_col", + "metadata": { + "tableName": "functional.alltypes", + "tableType": "hive", + "tableCreateTime": 1766691526 + } + }, + { + "id": 4, + "vertexType": "COLUMN", + "vertexId": "tinyint_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 5, + "vertexType": "COLUMN", + "vertexId": "smallint_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 6, + "vertexType": "COLUMN", + "vertexId": "int_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 7, + "vertexType": "COLUMN", + "vertexId": "bigint_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 8, + "vertexType": "COLUMN", + "vertexId": "float_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 9, + "vertexType": "COLUMN", + "vertexId": "double_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 10, + "vertexType": "COLUMN", + "vertexId": "date_string_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 11, + "vertexType": "COLUMN", + "vertexId": "string_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 12, + "vertexType": "COLUMN", + "vertexId": "timestamp_col", + "metadata": { + "tableName": "lineage_test_db.alltypesnopart", + "tableType": "hive", + "tableCreateTime": 1772140870 + } + }, + { + "id": 13, + "vertexType": "COLUMN", + "vertexId": "functional.alltypes.timestamp_col", + "metadata": { + "tableName": "functional.alltypes", + "tableType": "hive", + "tableCreateTime": 1766691526 + } + } + ] +} +---- QUERY +insert overwrite table lineage_test_db.alltypesnopart (id, bool_col, timestamp_col) +select id, bool_col, timestamp_col +from functional.alltypes +==== +---- LINEAGE +{ + "queryText": "create table lineage_test_db.like_alltypestiny_file like parquet '/test-warehouse/schemas/alltypestiny.parquet'", + "operationType": "CREATETABLE", + "queryId": "1d48cc246ea3d03d:e73a74ad00000000", + "hash": "85b96955243dd853bb653f379d433f13", + "user": "fangyurao", + "timestamp": 1772480467, + "endTime": 1772480467, + "edges": [], + "vertices": [] +} +---- QUERY +create table lineage_test_db.like_alltypestiny_file like parquet +'$FILESYSTEM_PREFIX/test-warehouse/schemas/alltypestiny.parquet' +====
