[
https://issues.apache.org/jira/browse/HIVE-26551?focusedWorklogId=811081&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-811081
]
ASF GitHub Bot logged work on HIVE-26551:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 22/Sep/22 07:50
Start Date: 22/Sep/22 07:50
Worklog Time Spent: 10m
Work Description: zhangbutao commented on code in PR #3611:
URL: https://github.com/apache/hive/pull/3611#discussion_r977315747
##########
ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java:
##########
@@ -117,4 +130,87 @@ public ObjectInspector getObjectInspector() throws
SerDeException {
return inspector;
}
+ @Override
+ public List<FieldSchema> readSchema(Configuration conf, String file) throws
SerDeException {
+ List<String> fieldNames;
+ List<TypeDescription> fieldTypes;
+ try (Reader reader = OrcFile.createReader(new Path(file),
OrcFile.readerOptions(conf))) {
+ fieldNames = reader.getSchema().getFieldNames();
+ fieldTypes = reader.getSchema().getChildren();
+ } catch (Exception e) {
+ throw new SerDeException(ErrorMsg.ORC_FOOTER_ERROR.getErrorCodedMsg(),
e);
+ }
+
+ List<FieldSchema> schema = new ArrayList<>();
+ for (int i = 0; i < fieldNames.size(); i++) {
+ FieldSchema fieldSchema = convertOrcTypeToFieldSchema(fieldNames.get(i),
fieldTypes.get(i));
+ schema.add(fieldSchema);
+ LOG.debug("Inferred field schema {}", fieldSchema);
+ }
+ return schema;
+ }
+
+ private FieldSchema convertOrcTypeToFieldSchema(String fieldName,
TypeDescription fieldType) {
+ String typeName = convertOrcTypeToFieldType(fieldType);
+ return new FieldSchema(fieldName, typeName, "Inferred from Orc file.");
+ }
+
+ private String convertOrcTypeToFieldType(TypeDescription fieldType) {
+ if (fieldType.getCategory().isPrimitive()) {
+ return convertPrimitiveType(fieldType);
+ }
+ return convertComplexType(fieldType);
+ }
+
+ private String convertPrimitiveType(TypeDescription fieldType) {
+ if (fieldType.getCategory().getName().equals("timestamp with local time
zone")) {
+ throw new IllegalArgumentException("Unhandled ORC type " +
fieldType.getCategory().getName());
+ }
+ return fieldType.toString();
+ }
+
+ private String convertComplexType(TypeDescription fieldType) {
+ StringBuilder buffer = new StringBuilder();
+ buffer.append(fieldType.getCategory().getName());
+ switch (fieldType.getCategory()) {
+ case LIST:
+ case MAP:
+ case UNION:
+ buffer.append('<');
+ for (int i = 0; i < fieldType.getChildren().size(); i++) {
+ if (i != 0) {
+ buffer.append(',');
+ }
+
buffer.append(convertOrcTypeToFieldType(fieldType.getChildren().get(i)));
+ }
+ buffer.append('>');
+ break;
+ case STRUCT:
+ buffer.append('<');
+ for(int i=0; i < fieldType.getChildren().size(); ++i) {
+ if (i != 0) {
+ buffer.append(',');
+ }
+ getStructFieldName(buffer, fieldType.getFieldNames().get(i));
+ buffer.append(':');
+
buffer.append(convertOrcTypeToFieldType(fieldType.getChildren().get(i)));
+ }
+ buffer.append('>');
+ break;
+ default:
+ throw new IllegalArgumentException("ORC doesn't handle " +
+ fieldType.getCategory());
+ }
+ return buffer.toString();
+ }
+
+ static void getStructFieldName(StringBuilder buffer, String name) {
+ if (UNQUOTED_NAMES.matcher(name).matches()) {
+ buffer.append(name);
+ } else {
+ buffer.append('`');
+ buffer.append(name.replace("`", "``"));
+ buffer.append('`');
Review Comment:
Done. Thx.
Issue Time Tracking
-------------------
Worklog Id: (was: 811081)
Time Spent: 1.5h (was: 1h 20m)
> Support CREATE TABLE LIKE FILE for ORC
> --------------------------------------
>
> Key: HIVE-26551
> URL: https://issues.apache.org/jira/browse/HIVE-26551
> Project: Hive
> Issue Type: New Feature
> Components: HiveServer2
> Affects Versions: 4.0.0-alpha-2
> Reporter: zhangbutao
> Assignee: zhangbutao
> Priority: Major
> Labels: pull-request-available
> Time Spent: 1.5h
> Remaining Estimate: 0h
>
> https://issues.apache.org/jira/browse/HIVE-26395 added the ability to create
> table based on the existing parquet files. We can continue to support
> creating table based on existing orc files.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)