rdblue commented on a change in pull request #1481: URL: https://github.com/apache/iceberg/pull/1481#discussion_r493970566
########## File path: mr/src/main/java/org/apache/iceberg/mr/Catalogs.java ########## @@ -77,6 +102,77 @@ private static Table loadTable(Configuration conf, String tableIdentifier, Strin return new HadoopTables(conf).load(tableLocation); } + /** + * Creates an Iceberg table using the catalog specified by the configuration. + * The properties should contain the following values: + * <p><ul> + * <li>Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required + * <li>Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required + * <li>Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if + * not provided + * </ul><p> + * Other properties will be handled over to the Table creation. The controlling properties above will not be + * propagated. + * @param conf a Hadoop conf + * @param props the controlling properties + * @return the created Iceberg table + */ + public static Table createTable(Configuration conf, Properties props) { + String schemaString = props.getProperty(InputFormatConfig.TABLE_SCHEMA); + Preconditions.checkNotNull(schemaString, "Table schema not set"); + Schema schema = SchemaParser.fromJson(props.getProperty(InputFormatConfig.TABLE_SCHEMA)); + + String specString = props.getProperty(InputFormatConfig.PARTITION_SPEC); + PartitionSpec spec = PartitionSpec.unpartitioned(); + if (specString != null) { + spec = PartitionSpecParser.fromJson(schema, specString); + } + + String location = props.getProperty(LOCATION); + + // Create a table property map without the controlling properties + Map<String, String> map = new HashMap<>(props.size()); + for (Object key : props.keySet()) { + if (!PROPERTIES_TO_REMOVE.contains(key)) { + map.put(key.toString(), props.get(key).toString()); + } + } + + Optional<Catalog> catalog = loadCatalog(conf); Review comment: Somewhat out of scope: We might want to build a `Catalog` for this logic so that this class can avoid loading and checking the catalog in every method. The catalog would get created with the configuration and handle this delegation internally. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org