[ 
https://issues.apache.org/jira/browse/ARROW-16980?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Richard Tia updated ARROW-16980:
--------------------------------
    Priority: Major  (was: Minor)

> [Python] Results of running a substrait plan against a tpch data table 
> written into parquet are all null
> --------------------------------------------------------------------------------------------------------
>
>                 Key: ARROW-16980
>                 URL: https://issues.apache.org/jira/browse/ARROW-16980
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>            Reporter: Richard Tia
>            Priority: Major
>         Attachments: lineitem.json
>
>
> SQL
> {code:java}
> SELECT l_returnflag, l_linestatus FROM lineitem{code}
>  
> Substrait Plan:
> {code:java}
> """
> {
>   "extensionUris": [],
>   "extensions": [],
>   "relations": [{
>     "root": {
>       "input": {
>         "project": {
>           "common": {
>           },
>           "input": {
>             "read": {
>               "common": {
>                 "direct": {
>                 }
>               },
>               "baseSchema": {
>                 "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", 
> "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", 
> "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", 
> "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"],
>                 "struct": {
>                   "types": [{
>                     "i64": {
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "i64": {
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "i64": {
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "i32": {
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "decimal": {
>                       "scale": 0,
>                       "precision": 19,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "decimal": {
>                       "scale": 0,
>                       "precision": 19,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "decimal": {
>                       "scale": 0,
>                       "precision": 19,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "decimal": {
>                       "scale": 0,
>                       "precision": 19,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "fixedChar": {
>                       "length": 1,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "fixedChar": {
>                       "length": 1,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "date": {
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "date": {
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "date": {
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "fixedChar": {
>                       "length": 25,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "fixedChar": {
>                       "length": 10,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }, {
>                     "varchar": {
>                       "length": 44,
>                       "typeVariationReference": 0,
>                       "nullability": "NULLABILITY_NULLABLE"
>                     }
>                   }],
>                   "typeVariationReference": 0,
>                   "nullability": "NULLABILITY_REQUIRED"
>                 }
>               },
>              "local_files": {
>                  "items": [
>                  {
>                      "uri_file": "file://FILENAME_PLACEHOLDER",
>                      "format": "FILE_FORMAT_PARQUET"
>                  }
>                  ]
>              }
>             }
>           },
>           "expressions": [{
>             "selection": {
>               "directReference": {
>                 "structField": {
>                   "field": 7
>                 }
>               },
>               "rootReference": {
>               }
>             }
>           }, {
>             "selection": {
>               "directReference": {
>                 "structField": {
>                   "field": 6
>                 }
>               },
>               "rootReference": {
>               }
>             }
>           }]
>         }
>       },
>       "names": ["L_TAX", "L_DISCOUNT"]
>     }
>   }],
>   "expectedTypeUrls": []
> }
> """ {code}
>  
>  
> Result:
> {code:java}
> pyarrow.Table
> L_TAX: decimal128(19, 0)
> L_DISCOUNT: decimal128(19, 0)
> ----
> L_TAX: [[null,null,null,null,null,null,null,null,null,null]]
> L_DISCOUNT: [[null,null,null,null,null,null,null,null,null,null]] {code}
>  
> Reproduction Steps:
> {code:java}
> import pyarrow as pa
> import pyarrow.parquet as pq
> import pyarrow.substrait as substrait
> from pyarrow import json as pyarrow_json
> from pyarrow.lib import tobytes
> substrait_query = <string from above plan>
> json_file_path = os.path.join(<path>, 'lineitem.json')
> arrow_data_path_parquet = os.path.join(str(tmpdir), 'substrait_data.parquet') 
> substrait_query = tobytes(substrait_query.replace("FILENAME_PLACEHOLDER", 
> arrow_data_path_parquet))
> # Save lineitem.json into IPC arrow binary file
> table = pyarrow_json.read_json(json_file_path)  pq.write_table(pa_table, 
> arrow_data_path_parquet)
> # Run the substrait query plan
> buf = pa._substrait._parse_json_plan(substrait_query)
> reader = substrait.run_query(buf)
> result = reader.read_all()
> print(result)
> {code}
> lineitem.json is attached
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to