SteNicholas commented on code in PR #970: URL: https://github.com/apache/incubator-paimon/pull/970#discussion_r1173330513
########## paimon-hive/paimon-hive-connector-common/src/main/java/org/apache/paimon/hive/HiveSchema.java: ########## @@ -81,27 +98,46 @@ public static HiveSchema extract(@Nullable Configuration configuration, Properti Options options = PaimonJobConf.extractCatalogConfig(configuration); options.set(CoreOptions.PATH, path.toUri().toString()); CatalogContext catalogContext = CatalogContext.create(options, configuration); - TableSchema tableSchema = FileStoreTableFactory.create(catalogContext).schema(); - - if (properties.containsKey(serdeConstants.LIST_COLUMNS) - && properties.containsKey(serdeConstants.LIST_COLUMN_TYPES)) { - String columnNames = properties.getProperty(serdeConstants.LIST_COLUMNS); - String columnNameDelimiter = - properties.getProperty( - // serdeConstants.COLUMN_NAME_DELIMITER is not defined in earlier Hive - // versions, so we use a constant string instead - "column.name.delimite", String.valueOf(SerDeUtils.COMMA)); - List<String> names = Arrays.asList(columnNames.split(columnNameDelimiter)); - - String columnTypes = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES); - List<TypeInfo> typeInfos = TypeInfoUtils.getTypeInfosFromTypeString(columnTypes); - - if (names.size() > 0 && typeInfos.size() > 0) { - checkSchemaMatched(names, typeInfos, tableSchema); + Optional<TableSchema> tableSchema = TableFactory.schema(catalogContext); + + String columnProperty = properties.getProperty(serdeConstants.LIST_COLUMNS); + // Create hive external table with empty ddl + if (StringUtils.isEmpty(columnProperty)) { + if (!tableSchema.isPresent()) { + throw new IllegalArgumentException( + "Schema file not found in location " + + location + + ". Please create table first."); } + // Paimon external table can read schema from the specified location + return new HiveSchema(tableSchema.get()); } - return new HiveSchema(tableSchema); + // Create hive external table with ddl + String columnNameDelimiter = + properties.getProperty( + // serdeConstants.COLUMN_NAME_DELIMITER is not defined in earlier Hive + // versions, so we use a constant string instead + "column.name.delimite", String.valueOf(SerDeUtils.COMMA)); + List<String> columnNames = Arrays.asList(columnProperty.split(columnNameDelimiter)); + String columnTypes = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES); + List<TypeInfo> typeInfos = TypeInfoUtils.getTypeInfosFromTypeString(columnTypes); + List<String> comments = + Lists.newArrayList( + Splitter.on('\0').split(properties.getProperty("columns.comments"))); + // Both Paimon table schema and Hive table schema exist + if (tableSchema.isPresent() && columnNames.size() > 0 && typeInfos.size() > 0) { + LOG.debug( + "Extract schema with exists DDL and exists paimon table, table location:[{}].", + location); + checkSchemaMatched(columnNames, typeInfos, tableSchema.get()); + comments = schemaComments(tableSchema.get()); + } + List<DataType> dataTypes = + typeInfos.stream() + .map(HiveTypeUtils::typeInfoToLogicalType) + .collect(Collectors.toList()); + return new HiveSchema(columnNames, dataTypes, comments); Review Comment: Could this generate `TableSchema` object and create `HiveSchema` with `TableSchema`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@paimon.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org