codope commented on code in PR #12420:
URL: https://github.com/apache/hudi/pull/12420#discussion_r1874239940


##########
website/docs/sql_ddl.md:
##########
@@ -342,65 +377,65 @@ CREATE TABLE hudi_table_func_index (
 ) USING HUDI
 tblproperties (primaryKey = 'uuid')
 PARTITIONED BY (city)
-location 'file:///tmp/hudi_table_func_index';
+location 'file:///tmp/hudi_table_expr_index';
 
 -- disable small file handling so the each insert creates new file --
 set hoodie.parquet.small.file.limit=0;
 
-INSERT INTO hudi_table_func_index VALUES ('2023-09-20 
03:58:59','334e26e9-8355-45cc-97c6-c31daf0df330','rider-A','driver-K',19.10,'san_francisco');
-INSERT INTO hudi_table_func_index VALUES ('2023-09-19 
08:46:34','e96c4396-3fad-413a-a942-4cb36106d721','rider-C','driver-M',27.70 
,'san_francisco');
-INSERT INTO hudi_table_func_index VALUES ('2023-09-18 
17:45:31','9909a8b1-2d15-4d3d-8ec9-efc48c536a00','rider-D','driver-L',33.90 
,'san_francisco');
-INSERT INTO hudi_table_func_index VALUES ('2023-09-22 
13:12:56','1dced545-862b-4ceb-8b43-d2a568f6616b','rider-E','driver-O',93.50,'san_francisco');
-INSERT INTO hudi_table_func_index VALUES ('2023-09-24 
06:15:45','e3cf430c-889d-4015-bc98-59bdce1e530c','rider-F','driver-P',34.15,'sao_paulo');
-INSERT INTO hudi_table_func_index VALUES ('2023-09-22 
15:21:36','7a84095f-737f-40bc-b62f-6b69664712d2','rider-G','driver-Q',43.40 
,'sao_paulo');
-INSERT INTO hudi_table_func_index VALUES ('2023-09-20 
12:35:45','3eeb61f7-c2b0-4636-99bd-5d7a5a1d2c04','rider-I','driver-S',41.06 
,'chennai');
-INSERT INTO hudi_table_func_index VALUES ('2023-09-19 
05:34:56','c8abbe79-8d89-47ea-b4ce-4d224bae5bfa','rider-J','driver-T',17.85,'chennai');
-
--- Query with hour function filter but no idex yet --
-spark-sql> SELECT city, fare, rider, driver FROM  hudi_table_func_index WHERE  
city NOT IN ('chennai') AND hour(ts) > 12;
+INSERT INTO hudi_table_expr_index VALUES ('2023-09-20 
03:58:59','334e26e9-8355-45cc-97c6-c31daf0df330','rider-A','driver-K',19.10,'san_francisco');
+INSERT INTO hudi_table_expr_index VALUES ('2023-09-19 
08:46:34','e96c4396-3fad-413a-a942-4cb36106d721','rider-C','driver-M',27.70 
,'san_francisco');
+INSERT INTO hudi_table_expr_index VALUES ('2023-09-18 
17:45:31','9909a8b1-2d15-4d3d-8ec9-efc48c536a00','rider-D','driver-L',33.90 
,'san_francisco');
+INSERT INTO hudi_table_expr_index VALUES ('2023-09-22 
13:12:56','1dced545-862b-4ceb-8b43-d2a568f6616b','rider-E','driver-O',93.50,'san_francisco');
+INSERT INTO hudi_table_expr_index VALUES ('2023-09-24 
06:15:45','e3cf430c-889d-4015-bc98-59bdce1e530c','rider-F','driver-P',34.15,'sao_paulo');
+INSERT INTO hudi_table_expr_index VALUES ('2023-09-22 
15:21:36','7a84095f-737f-40bc-b62f-6b69664712d2','rider-G','driver-Q',43.40 
,'sao_paulo');
+INSERT INTO hudi_table_expr_index VALUES ('2023-09-20 
12:35:45','3eeb61f7-c2b0-4636-99bd-5d7a5a1d2c04','rider-I','driver-S',41.06 
,'chennai');
+INSERT INTO hudi_table_expr_index VALUES ('2023-09-19 
05:34:56','c8abbe79-8d89-47ea-b4ce-4d224bae5bfa','rider-J','driver-T',17.85,'chennai');
+
+-- Query with hour function filter but no index yet --
+spark-sql> SELECT city, fare, rider, driver FROM  hudi_table_expr_index WHERE  
city NOT IN ('chennai') AND hour(ts) > 12;
 san_francisco  93.5    rider-E driver-O
 san_francisco  33.9    rider-D driver-L
 sao_paulo      43.4    rider-G driver-Q
 Time taken: 0.208 seconds, Fetched 3 row(s)
 
-spark-sql> EXPLAIN COST SELECT city, fare, rider, driver FROM  
hudi_table_func_index WHERE  city NOT IN ('chennai') AND hour(ts) > 12;
+spark-sql> EXPLAIN COST SELECT city, fare, rider, driver FROM  
hudi_table_expr_index WHERE  city NOT IN ('chennai') AND hour(ts) > 12;
 == Optimized Logical Plan ==
 Project [city#3465, fare#3464, rider#3462, driver#3463], 
Statistics(sizeInBytes=899.5 KiB)
 +- Filter ((isnotnull(city#3465) AND isnotnull(ts#3460)) AND (NOT (city#3465 = 
chennai) AND (hour(cast(ts#3460 as timestamp), Some(Asia/Kolkata)) > 12))), 
Statistics(sizeInBytes=2.5 MiB)
-   +- Relation 
default.hudi_table_func_index[_hoodie_commit_time#3455,_hoodie_commit_seqno#3456,_hoodie_record_key#3457,_hoodie_partition_path#3458,_hoodie_file_name#3459,ts#3460,uuid#3461,rider#3462,driver#3463,fare#3464,city#3465]
 parquet, Statistics(sizeInBytes=2.5 MiB)
+   +- Relation 
default.hudi_table_expr_index[_hoodie_commit_time#3455,_hoodie_commit_seqno#3456,_hoodie_record_key#3457,_hoodie_partition_path#3458,_hoodie_file_name#3459,ts#3460,uuid#3461,rider#3462,driver#3463,fare#3464,city#3465]
 parquet, Statistics(sizeInBytes=2.5 MiB)
 
 == Physical Plan ==
 *(1) Project [city#3465, fare#3464, rider#3462, driver#3463]
 +- *(1) Filter (isnotnull(ts#3460) AND (hour(cast(ts#3460 as timestamp), 
Some(Asia/Kolkata)) > 12))
    +- *(1) ColumnarToRow
-      +- FileScan parquet 
default.hudi_table_func_index[ts#3460,rider#3462,driver#3463,fare#3464,city#3465]
 Batched: true, DataFilters: [isnotnull(ts#3460), (hour(cast(ts#3460 as 
timestamp), Some(Asia/Kolkata)) > 12)], Format: Parquet, Location: 
HoodieFileIndex(1 paths)[file:/tmp/hudi_table_func_index], PartitionFilters: 
[isnotnull(city#3465), NOT (city#3465 = chennai)], PushedFilters: 
[IsNotNull(ts)], ReadSchema: 
struct<ts:string,rider:string,driver:string,fare:double>
+      +- FileScan parquet 
default.hudi_table_expr_index[ts#3460,rider#3462,driver#3463,fare#3464,city#3465]
 Batched: true, DataFilters: [isnotnull(ts#3460), (hour(cast(ts#3460 as 
timestamp), Some(Asia/Kolkata)) > 12)], Format: Parquet, Location: 
HoodieFileIndex(1 paths)[file:/tmp/hudi_table_expr_index], PartitionFilters: 
[isnotnull(city#3465), NOT (city#3465 = chennai)], PushedFilters: 
[IsNotNull(ts)], ReadSchema: 
struct<ts:string,rider:string,driver:string,fare:double>
       
      
--- create the functional index --
-CREATE INDEX ts_hour ON hudi_table_func_index USING column_stats(ts) 
options(func='hour');
+-- create the expression index --
+CREATE INDEX ts_hour ON hudi_table_expr_index USING column_stats(ts) 
options(expr='hour');
 
 -- query after creating the index --
-spark-sql> SELECT city, fare, rider, driver FROM  hudi_table_func_index WHERE  
city NOT IN ('chennai') AND hour(ts) > 12;
+spark-sql> SELECT city, fare, rider, driver FROM  hudi_table_expr_index WHERE  
city NOT IN ('chennai') AND hour(ts) > 12;
 san_francisco  93.5    rider-E driver-O
 san_francisco  33.9    rider-D driver-L
 sao_paulo      43.4    rider-G driver-Q
 Time taken: 0.202 seconds, Fetched 3 row(s)
-spark-sql> EXPLAIN COST SELECT city, fare, rider, driver FROM  
hudi_table_func_index WHERE  city NOT IN ('chennai') AND hour(ts) > 12;
+spark-sql> EXPLAIN COST SELECT city, fare, rider, driver FROM  
hudi_table_expr_index WHERE  city NOT IN ('chennai') AND hour(ts) > 12;
 == Optimized Logical Plan ==
 Project [city#2970, fare#2969, rider#2967, driver#2968], 
Statistics(sizeInBytes=449.8 KiB)
 +- Filter ((isnotnull(city#2970) AND isnotnull(ts#2965)) AND (NOT (city#2970 = 
chennai) AND (hour(cast(ts#2965 as timestamp), Some(Asia/Kolkata)) > 12))), 
Statistics(sizeInBytes=1278.3 KiB)
-   +- Relation 
default.hudi_table_func_index[_hoodie_commit_time#2960,_hoodie_commit_seqno#2961,_hoodie_record_key#2962,_hoodie_partition_path#2963,_hoodie_file_name#2964,ts#2965,uuid#2966,rider#2967,driver#2968,fare#2969,city#2970]
 parquet, Statistics(sizeInBytes=1278.3 KiB)
+   +- Relation 
default.hudi_table_expr_index[_hoodie_commit_time#2960,_hoodie_commit_seqno#2961,_hoodie_record_key#2962,_hoodie_partition_path#2963,_hoodie_file_name#2964,ts#2965,uuid#2966,rider#2967,driver#2968,fare#2969,city#2970]
 parquet, Statistics(sizeInBytes=1278.3 KiB)
 
 == Physical Plan ==
 *(1) Project [city#2970, fare#2969, rider#2967, driver#2968]
 +- *(1) Filter (isnotnull(ts#2965) AND (hour(cast(ts#2965 as timestamp), 
Some(Asia/Kolkata)) > 12))
    +- *(1) ColumnarToRow
-      +- FileScan parquet 
default.hudi_table_func_index[ts#2965,rider#2967,driver#2968,fare#2969,city#2970]
 Batched: true, DataFilters: [isnotnull(ts#2965), (hour(cast(ts#2965 as 
timestamp), Some(Asia/Kolkata)) > 12)], Format: Parquet, Location: 
HoodieFileIndex(1 paths)[file:/tmp/hudi_table_func_index], PartitionFilters: 
[isnotnull(city#2970), NOT (city#2970 = chennai)], PushedFilters: 
[IsNotNull(ts)], ReadSchema: 
struct<ts:string,rider:string,driver:string,fare:double>
+      +- FileScan parquet 
default.hudi_table_expr_index[ts#2965,rider#2967,driver#2968,fare#2969,city#2970]
 Batched: true, DataFilters: [isnotnull(ts#2965), (hour(cast(ts#2965 as 
timestamp), Some(Asia/Kolkata)) > 12)], Format: Parquet, Location: 
HoodieFileIndex(1 paths)[file:/tmp/hudi_table_expr_index], PartitionFilters: 
[isnotnull(city#2970), NOT (city#2970 = chennai)], PushedFilters: 
[IsNotNull(ts)], ReadSchema: 
struct<ts:string,rider:string,driver:string,fare:double>
       
 ```
 </details>
 
-#### Create Partition Stats and Secondary Index (Experimental)
+#### Create Partition Stats and Secondary Index

Review Comment:
   You're right. Now those two need not be set, as they are true by default.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to