kfaraz commented on code in PR #14391:
URL: https://github.com/apache/druid/pull/14391#discussion_r1225000091
##########
docs/querying/sql-translation.md:
##########
@@ -67,7 +67,12 @@ be translated to native.
EXPLAIN PLAN statements return:
- a `PLAN` column that contains a JSON array of native queries that Druid will
run
- a `RESOURCES` column that describes the resources used in the query
-- a `ATTRIBUTES` column that describes the attributes of a query, such as the
statement type and target data source
+- a `ATTRIBUTES` column that describes the attributes of a query. Attributes
include:
Review Comment:
```suggestion
- an `ATTRIBUTES` column that describes the attributes of the query,
including:
```
##########
docs/querying/sql-translation.md:
##########
@@ -221,6 +226,224 @@ The EXPLAIN PLAN statement returns the following result
with plan, resources, an
]
```
+Now consider the following EXPLAIN PLAN for a `REPLACE` query that replaces
all the data in the `wikipedia` datasource:
+```sql
+EXPLAIN PLAN FOR
+REPLACE INTO wikipedia
+OVERWRITE ALL
+SELECT
+ TIME_PARSE("timestamp") AS __time,
+ namespace,
+ cityName,
+ countryName,
+ regionIsoCode,
+ metroCode,
+ countryIsoCode,
+ regionName
+FROM TABLE(
+ EXTERN(
+
'{"type":"http","uris":["https://druid.apache.org/data/wikipedia.json.gz"]}',
+ '{"type":"json"}',
+
'[{"name":"timestamp","type":"string"},{"name":"namespace","type":"string"},{"name":"cityName","type":"string"},{"name":"countryName","type":"string"},{"name":"regionIsoCode","type":"string"},{"name":"metroCode","type":"long"},{"name":"countryIsoCode","type":"string"},{"name":"regionName","type":"string"}]'
+ )
+ )
+PARTITIONED BY HOUR
+CLUSTERED BY cityName
+```
+
+The EXPLAIN PLAN returns the following result:
Review Comment:
```suggestion
The above EXPLAIN PLAN returns the following result:
```
##########
sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java:
##########
@@ -34,12 +36,27 @@
@Nullable
private final SqlNode targetDataSource;
+ @Nullable
+ private final Granularity partitionedBy;
+
+ @Nullable
+ private final SqlNodeList clusteredBy;
+
+ @Nullable
+ private final SqlNode replaceTimeChunks;
+
public ExplainAttributes(
@JsonProperty("statementType") final String statementType,
- @JsonProperty("targetDataSource") @Nullable final SqlNode
targetDataSource)
+ @JsonProperty("targetDataSource") @Nullable final SqlNode
targetDataSource,
+ @JsonProperty("partitionedBy") @Nullable Granularity partitionedBy,
+ @JsonProperty("clusteredBy") @Nullable SqlNodeList clusteredBy,
+ @JsonProperty("replaceTimeChunks") @Nullable SqlNode replaceTimeChunks)
Review Comment:
Nit:
```suggestion
@JsonProperty("replaceTimeChunks") @Nullable SqlNode replaceTimeChunks
)
```
##########
sql/src/test/java/org/apache/druid/sql/avatica/DruidAvaticaHandlerTest.java:
##########
@@ -471,7 +471,7 @@ public void testExplainSelectCount() throws SQLException
"RESOURCES",
"[{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]",
"ATTRIBUTES",
- "{\"statementType\":\"SELECT\",\"targetDataSource\":null}"
+
"{\"statementType\":\"SELECT\",\"targetDataSource\":null,\"partitionedBy\":null,\"clusteredBy\":null,\"replaceTimeChunks\":null}"
Review Comment:
Can we make it so that the output contains only the non-null fields?
##########
docs/querying/sql-translation.md:
##########
@@ -221,6 +226,224 @@ The EXPLAIN PLAN statement returns the following result
with plan, resources, an
]
```
+Now consider the following EXPLAIN PLAN for a `REPLACE` query that replaces
all the data in the `wikipedia` datasource:
Review Comment:
```suggestion
Example 2: EXPLAIN PLAN for a `REPLACE` query that replaces all the data in
the `wikipedia` datasource:
```
Should we do something more formal here, like numbering the examples or
something?
##########
sql/src/test/java/org/apache/druid/sql/calcite/CalciteReplaceDmlTest.java:
##########
@@ -683,6 +683,88 @@ public void testExplainReplaceFromExternal() throws
IOException
didTest = true;
}
+ @Test
+ public void testExplainReplaceTimeChunksWithPartitioningAndClustering()
throws IOException
+ {
+ // Skip vectorization since otherwise the "context" will change for each
subtest.
+ skipVectorize();
+
+ ObjectMapper queryJsonMapper = queryFramework().queryJsonMapper();
+ final ScanQuery expectedQuery = newScanQueryBuilder()
+ .dataSource("foo")
+ .intervals(querySegmentSpec(Filtration.eternity()))
+ .columns("__time", "cnt", "dim1", "dim2", "dim3", "m1", "m2",
"unique_dim1")
+ .orderBy(
+ ImmutableList.of(
+ new ScanQuery.OrderBy("dim1", ScanQuery.Order.ASCENDING)
+ )
+ )
+ .context(
+ queryJsonMapper.readValue(
+
"{\"sqlInsertSegmentGranularity\":\"\\\"DAY\\\"\",\"sqlQueryId\":\"dummy\",\"sqlReplaceTimeChunks\":\"2000-01-01T00:00:00.000Z/2000-01-02T00:00:00.000Z\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"}",
+ JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT
+ )
+ )
+ .build();
+
+ final String legacyExplanation =
+ "DruidQueryRel(query=["
+ + queryJsonMapper.writeValueAsString(expectedQuery)
+ + "], signature=[{__time:LONG, dim1:STRING, dim2:STRING, dim3:STRING,
cnt:LONG, m1:FLOAT, m2:DOUBLE, unique_dim1:COMPLEX<hyperUnique>}])\n";
+
+ final String explanation =
"[{\"query\":{\"queryType\":\"scan\",\"dataSource\":{\"type\":\"table\",\"name\":\"foo\"},\"intervals\":{\"type\":\"intervals\",\"intervals\":[\"-146136543-09-08T08:23:32.096Z/146140482-04-24T15:36:27.903Z\"]},\"resultFormat\":\"compactedList\",\"orderBy\":[{\"columnName\":\"dim1\",\"order\":\"ascending\"}],\"columns\":[\"__time\",\"cnt\",\"dim1\",\"dim2\",\"dim3\",\"m1\",\"m2\",\"unique_dim1\"],\"legacy\":false,\"context\":{\"sqlInsertSegmentGranularity\":\"\\\"DAY\\\"\",\"sqlQueryId\":\"dummy\",\"sqlReplaceTimeChunks\":\"2000-01-01T00:00:00.000Z/2000-01-02T00:00:00.000Z\",\"vectorize\":\"false\",\"vectorizeVirtualColumns\":\"false\"},\"granularity\":{\"type\":\"all\"}},\"signature\":[{\"name\":\"__time\",\"type\":\"LONG\"},{\"name\":\"dim1\",\"type\":\"STRING\"},{\"name\":\"dim2\",\"type\":\"STRING\"},{\"name\":\"dim3\",\"type\":\"STRING\"},{\"name\":\"cnt\",\"type\":\"LONG\"},{\"name\":\"m1\",\"type\":\"FLOAT\"},{\"name\":\"m2\",\"type\":\"DOUBLE\"}
,{\"name\":\"unique_dim1\",\"type\":\"COMPLEX<hyperUnique>\"}],\"columnMappings\":[{\"queryColumn\":\"__time\",\"outputColumn\":\"__time\"},{\"queryColumn\":\"dim1\",\"outputColumn\":\"dim1\"},{\"queryColumn\":\"dim2\",\"outputColumn\":\"dim2\"},{\"queryColumn\":\"dim3\",\"outputColumn\":\"dim3\"},{\"queryColumn\":\"cnt\",\"outputColumn\":\"cnt\"},{\"queryColumn\":\"m1\",\"outputColumn\":\"m1\"},{\"queryColumn\":\"m2\",\"outputColumn\":\"m2\"},{\"queryColumn\":\"unique_dim1\",\"outputColumn\":\"unique_dim1\"}]}]";
+ final String resources =
"[{\"name\":\"dst\",\"type\":\"DATASOURCE\"},{\"name\":\"foo\",\"type\":\"DATASOURCE\"}]";
+ final String attributes =
"{\"statementType\":\"REPLACE\",\"targetDataSource\":\"dst\",\"partitionedBy\":\"DAY\",\"clusteredBy\":\"`dim1`\",\"replaceTimeChunks\":\"`__time`
>= TIMESTAMP '2000-01-01 00:00:00' AND `__time` < TIMESTAMP '2000-01-02
00:00:00'\"}";
+
+ final String sql = "EXPLAIN PLAN FOR"
+ + " REPLACE INTO dst"
+ + " OVERWRITE WHERE __time >= TIMESTAMP '2000-01-01
00:00:00' AND __time < TIMESTAMP '2000-01-02 00:00:00' "
+ + "SELECT * FROM foo PARTITIONED BY DAY CLUSTERED BY
dim1";
+ // Use testQuery for EXPLAIN (not testIngestionQuery).
+ testQuery(
+ PLANNER_CONFIG_LEGACY_QUERY_EXPLAIN,
+ ImmutableMap.of("sqlQueryId", "dummy"),
+ Collections.emptyList(),
+ sql,
+ CalciteTests.SUPER_USER_AUTH_RESULT,
+ ImmutableList.of(),
+ new DefaultResultsVerifier(
+ ImmutableList.of(
+ new Object[]{
+ legacyExplanation,
+ resources,
+ attributes
+ }
+ ),
Review Comment:
Nit: We can probably keep this in a single line, as breaking it up doesn't
seem to improve readability.
##########
sql/src/main/java/org/apache/druid/sql/calcite/planner/ExplainAttributes.java:
##########
@@ -62,12 +79,48 @@ public String getTargetDataSource()
return targetDataSource == null ? null : targetDataSource.toString();
}
+ /**
+ * @return the time-based partitioning granularity specified in the
<code>PARTITIONED BY</code> clause
+ * for an INSERT or REPLACE statement. Returns null for SELECT/non-DML
statements.
Review Comment:
Afaik, SELECT is the only non-DML we use right now, so we can remove the
ambiguity.
```suggestion
* for an INSERT or REPLACE statement. Returns null for SELECT statements.
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]