This is an automated email from the ASF dual-hosted git repository. timbrown pushed a commit to branch remaining-renames in repository https://gitbox.apache.org/repos/asf/incubator-xtable.git
commit abf7d25709ad6b1e1eb18267114b3d17a812cfed Author: Timothy Brown <t...@onehouse.ai> AuthorDate: Wed Apr 3 15:28:30 2024 -0500 Rename all instances of OneTable to xtable where possible --- README.md | 4 +-- .../org/apache/xtable/model/TableSyncMetadata.java | 8 ++--- .../apache/xtable/spi/sync/ConversionTarget.java | 2 +- .../apache/xtable/constants/OneTableConstants.java | 35 ---------------------- .../apache/xtable/delta/DeltaActionsConverter.java | 2 +- .../apache/xtable/delta/DeltaConversionSource.java | 3 +- .../apache/xtable/delta/DeltaConversionTarget.java | 2 +- .../xtable/delta/DeltaDataFileExtractor.java | 2 +- .../xtable/delta/DeltaPartitionExtractor.java | 2 +- .../apache/xtable/delta/DeltaSchemaExtractor.java | 2 +- .../xtable/hudi/BaseFileUpdatesExtractor.java | 4 +-- .../apache/xtable/hudi/HudiFileStatsExtractor.java | 2 +- .../apache/xtable/hudi/HudiSchemaExtractor.java | 2 +- .../iceberg/IcebergColumnStatsConverter.java | 2 +- .../xtable/iceberg/IcebergConversionSource.java | 2 +- .../xtable/iceberg/IcebergConversionTarget.java | 4 +-- .../iceberg/IcebergPartitionValueConverter.java | 2 +- .../xtable/iceberg/IcebergSchemaExtractor.java | 6 ++-- .../apache/xtable/iceberg/IcebergSchemaSync.java | 2 +- .../org/apache/xtable/ITConversionController.java | 26 +++++++--------- .../conversion/TestConversionController.java | 14 ++++----- .../org/apache/xtable/delta/TestDeltaSync.java | 14 ++++----- .../java/org/apache/xtable/hudi/HudiTestUtil.java | 4 +-- .../xtable/hudi/ITHudiConversionSourceSource.java | 18 +++++------ .../iceberg/TestIcebergConversionTargetSource.java | 2 +- .../iceberg/TestIcebergPartitionSpecExtractor.java | 2 +- .../TestIcebergPartitionValueConverter.java | 23 +++++++------- .../org/apache/xtable/iceberg/TestIcebergSync.java | 22 +++++++------- demo/README.md | 4 +-- demo/notebook/demo.ipynb | 8 ++--- hudi-support/extensions/README.md | 6 ++-- .../apache/xtable/hudi/sync/XTableSyncConfig.java | 4 +-- .../java/org/apache/xtable/utilities/RunSync.java | 6 ++-- ...faults.yaml => xtable-conversion-defaults.yaml} | 0 ...oop-defaults.xml => xtable-hadoop-defaults.xml} | 0 website/docs/biglake-metastore.md | 12 ++++---- website/docs/bigquery.md | 2 +- website/docs/glue-catalog.md | 16 +++++----- website/docs/how-to.md | 2 +- website/docs/redshift.md | 10 +++---- website/docs/unity-catalog.md | 8 ++--- 41 files changed, 126 insertions(+), 165 deletions(-) diff --git a/README.md b/README.md index 08bb33a5..aec462b9 100644 --- a/README.md +++ b/README.md @@ -94,7 +94,7 @@ catalogOptions: # all other options are passed through in a map ``` 5. run with `java -jar utilities/target/utilities-0.1.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml [--hadoopConfig hdfs-site.xml] [--convertersConfig converters.yaml] [--icebergCatalogConfig catalog.yaml]` The bundled jar includes hadoop dependencies for AWS, Azure, and GCP. Sample hadoop configurations for configuring the converters -can be found in the [onetable-hadoop-defaults.xml](https://github.com/apache/incubator-xtable/blob/main/utilities/src/main/resources/onetable-hadoop-defaults.xml) file. +can be found in the [xtable-hadoop-defaults.xml](https://github.com/apache/incubator-xtable/blob/main/utilities/src/main/resources/xtable-hadoop-defaults.xml) file. The custom hadoop configurations can be passed in with the `--hadoopConfig [custom-hadoop-config-file]` option. The config in custom hadoop config file will override the default hadoop configurations. For an example of a custom hadoop config file, see [hadoop.xml](https://xtable.apache.org/docs/fabric#step-2-translate-source-table-to-delta-lake-format-using-apache-xtable-incubating). @@ -107,7 +107,7 @@ For setting up the repo on IntelliJ, open the project and change the java versio You have found a bug, or have a cool idea you that want to contribute to the project ? Please file a GitHub issue [here](https://github.com/apache/incubator-xtable/issues) ## Adding a new target format -Adding a new target format requires a developer implement [ConversionTarget](./api/src/main/java/org/apache/xtable/spi/sync/ConversionTarget.java). Once you have implemented that interface, you can integrate it into the [OneTableClient](./core/src/main/java/org/apache/xtable/client/OneTableClient.java). If you think others may find that target useful, please raise a Pull Request to add it to the project. +Adding a new target format requires a developer implement [ConversionTarget](./api/src/main/java/org/apache/xtable/spi/sync/ConversionTarget.java). Once you have implemented that interface, you can integrate it into the [ConversionController](./core/src/main/java/org/apache/xtable/conversion/ConversionController.java). If you think others may find that target useful, please raise a Pull Request to add it to the project. ## Overview of the sync process  diff --git a/api/src/main/java/org/apache/xtable/model/TableSyncMetadata.java b/api/src/main/java/org/apache/xtable/model/TableSyncMetadata.java index 42878b0d..7c0b8c1a 100644 --- a/api/src/main/java/org/apache/xtable/model/TableSyncMetadata.java +++ b/api/src/main/java/org/apache/xtable/model/TableSyncMetadata.java @@ -41,7 +41,7 @@ public class TableSyncMetadata { * Property name for the lastInstantSynced field from SyncResult, used for persisting * lastInstantSynced in the table metadata/properties */ - public static final String ONETABLE_LAST_INSTANT_SYNCED_PROP = "ONETABLE_LAST_INSTANT_SYNCED"; + public static final String XTABLE_LAST_INSTANT_SYNCED_PROP = "XTABLE_LAST_INSTANT_SYNCED"; /** * Property name for the list of instants to consider during the next sync. This list may include * out-of-order instants that could be missed without explicit tracking. @@ -54,7 +54,7 @@ public class TableSyncMetadata { public Map<String, String> asMap() { Map<String, String> map = new HashMap<>(); - map.put(ONETABLE_LAST_INSTANT_SYNCED_PROP, lastInstantSynced.toString()); + map.put(XTABLE_LAST_INSTANT_SYNCED_PROP, lastInstantSynced.toString()); map.put( INFLIGHT_COMMITS_TO_CONSIDER_FOR_NEXT_SYNC_PROP, convertInstantsToConsiderForNextSyncToString()); @@ -65,8 +65,8 @@ public class TableSyncMetadata { if (properties != null) { Instant lastInstantSynced = null; List<Instant> instantsToConsiderForNextSync = null; - if (properties.containsKey(ONETABLE_LAST_INSTANT_SYNCED_PROP)) { - lastInstantSynced = Instant.parse(properties.get(ONETABLE_LAST_INSTANT_SYNCED_PROP)); + if (properties.containsKey(XTABLE_LAST_INSTANT_SYNCED_PROP)) { + lastInstantSynced = Instant.parse(properties.get(XTABLE_LAST_INSTANT_SYNCED_PROP)); } if (properties.containsKey(INFLIGHT_COMMITS_TO_CONSIDER_FOR_NEXT_SYNC_PROP)) { instantsToConsiderForNextSync = diff --git a/api/src/main/java/org/apache/xtable/spi/sync/ConversionTarget.java b/api/src/main/java/org/apache/xtable/spi/sync/ConversionTarget.java index 37eb25f1..2ff6fb27 100644 --- a/api/src/main/java/org/apache/xtable/spi/sync/ConversionTarget.java +++ b/api/src/main/java/org/apache/xtable/spi/sync/ConversionTarget.java @@ -82,7 +82,7 @@ public interface ConversionTarget { /** Completes the sync and performs any cleanup required. */ void completeSync(); - /** Returns the onetable metadata persisted in the target */ + /** Returns the xtable metadata persisted in the target */ Optional<TableSyncMetadata> getTableMetadata(); /** Returns the TableFormat name the client syncs to */ diff --git a/core/src/main/java/org/apache/xtable/constants/OneTableConstants.java b/core/src/main/java/org/apache/xtable/constants/OneTableConstants.java deleted file mode 100644 index 1f763dc3..00000000 --- a/core/src/main/java/org/apache/xtable/constants/OneTableConstants.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.xtable.constants; - -import lombok.Builder; -import lombok.Value; - -@Builder -@Value -public class OneTableConstants { - /** - * Maximum number of syncs that are persisted in the archive file, after that least recent sync is - * evicted. - */ - public static final Integer NUM_ARCHIVED_SYNCS_RESULTS = 10; - - /** InternalTable meta directory inside table base path to store sync info. */ - public static final String ONETABLE_META_DIR = ".onetable"; -} diff --git a/core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java b/core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java index c588bef4..fbee89f4 100644 --- a/core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java +++ b/core/src/main/java/org/apache/xtable/delta/DeltaActionsConverter.java @@ -89,7 +89,7 @@ public class DeltaActionsConverter { .build(); } - public FileFormat convertToOneTableFileFormat(String provider) { + public FileFormat convertToFileFormat(String provider) { if (provider.equals("parquet")) { return FileFormat.APACHE_PARQUET; } else if (provider.equals("orc")) { diff --git a/core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java b/core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java index 91095d30..e113d269 100644 --- a/core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java +++ b/core/src/main/java/org/apache/xtable/delta/DeltaConversionSource.java @@ -112,8 +112,7 @@ public class DeltaConversionSource implements ConversionSource<Long> { List<Action> actionsForVersion = getChangesState().getActionsForVersion(versionNumber); Snapshot snapshotAtVersion = deltaLog.getSnapshotAt(versionNumber, Option.empty()); FileFormat fileFormat = - actionsConverter.convertToOneTableFileFormat( - snapshotAtVersion.metadata().format().provider()); + actionsConverter.convertToFileFormat(snapshotAtVersion.metadata().format().provider()); Set<InternalDataFile> addedFiles = new HashSet<>(); Set<InternalDataFile> removedFiles = new HashSet<>(); for (Action action : actionsForVersion) { diff --git a/core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java b/core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java index 798c3907..910fe1d7 100644 --- a/core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java +++ b/core/src/main/java/org/apache/xtable/delta/DeltaConversionTarget.java @@ -261,7 +261,7 @@ public class DeltaConversionTarget implements ConversionTarget { transaction.updateMetadata(metadata, false); transaction.commit( actions, - new DeltaOperations.Update(Option.apply(Literal.fromObject("onetable-delta-sync")))); + new DeltaOperations.Update(Option.apply(Literal.fromObject("xtable-delta-sync")))); } private Map<String, String> getConfigurationsForDeltaSync() { diff --git a/core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java b/core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java index ae052b3b..cffbcbaa 100644 --- a/core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java +++ b/core/src/main/java/org/apache/xtable/delta/DeltaDataFileExtractor.java @@ -63,7 +63,7 @@ public class DeltaDataFileExtractor { private DeltaDataFileIterator( Snapshot snapshot, InternalSchema schema, boolean includeColumnStats) { this.fileFormat = - actionsConverter.convertToOneTableFileFormat(snapshot.metadata().format().provider()); + actionsConverter.convertToFileFormat(snapshot.metadata().format().provider()); this.fields = schema.getFields(); this.partitionFields = partitionExtractor.convertFromDeltaPartitionFormat( diff --git a/core/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java b/core/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java index c87e41e3..3492857f 100644 --- a/core/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java +++ b/core/src/main/java/org/apache/xtable/delta/DeltaPartitionExtractor.java @@ -77,7 +77,7 @@ public class DeltaPartitionExtractor { private static final String DATE_FORMAT_FOR_YEAR = "yyyy"; // For timestamp partition fields, actual partition column names in delta format will be of type // generated & and with a name like `delta_partition_col_{transform_type}_{source_field_name}`. - private static final String DELTA_PARTITION_COL_NAME_FORMAT = "onetable_partition_col_%s_%s"; + private static final String DELTA_PARTITION_COL_NAME_FORMAT = "xtable_partition_col_%s_%s"; static final String DELTA_GENERATION_EXPRESSION = "delta.generationExpression"; private static final List<ParsedGeneratedExpr.GeneratedExprType> GRANULARITIES = Arrays.asList( diff --git a/core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java b/core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java index 26418c2d..fa425ef2 100644 --- a/core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java +++ b/core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java @@ -49,7 +49,7 @@ import org.apache.xtable.schema.SchemaUtils; * * <ul> * <li>Delta schemas are represented as Spark StructTypes which do not have enums so the enum - * types are lost when converting from Onetable to Delta Lake representations + * types are lost when converting from XTable to Delta Lake representations * <li>Delta does not have a fixed length byte array option so {@link InternalType#FIXED} is * simply translated to a {@link org.apache.spark.sql.types.BinaryType} * <li>Similarly, {@link InternalType#TIMESTAMP_NTZ} is translated to a long in Delta Lake diff --git a/core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java b/core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java index 797662ae..bef00013 100644 --- a/core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java +++ b/core/src/main/java/org/apache/xtable/hudi/BaseFileUpdatesExtractor.java @@ -18,7 +18,7 @@ package org.apache.xtable.hudi; -import static org.apache.xtable.hudi.HudiSchemaExtractor.convertFromOneTablePath; +import static org.apache.xtable.hudi.HudiSchemaExtractor.convertFromXTablePath; import java.util.ArrayList; import java.util.Collections; @@ -244,7 +244,7 @@ public class BaseFileUpdatesExtractor { columnStat -> HoodieColumnRangeMetadata.<Comparable>create( fileName, - convertFromOneTablePath(columnStat.getField().getPath()), + convertFromXTablePath(columnStat.getField().getPath()), (Comparable) columnStat.getRange().getMinValue(), (Comparable) columnStat.getRange().getMaxValue(), columnStat.getNumNulls(), diff --git a/core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java b/core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java index 3c4d9597..2ffbad04 100644 --- a/core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java +++ b/core/src/main/java/org/apache/xtable/hudi/HudiFileStatsExtractor.java @@ -274,7 +274,7 @@ public class HudiFileStatsExtractor { } private String getFieldNameForStats(InternalField field, boolean isReadFromMetadataTable) { - String convertedDotPath = HudiSchemaExtractor.convertFromOneTablePath(field.getPath()); + String convertedDotPath = HudiSchemaExtractor.convertFromXTablePath(field.getPath()); // the array field naming is different for metadata table if (isReadFromMetadataTable) { return convertedDotPath.replace(ARRAY_DOT_FIELD, PARQUET_ELMENT_DOT_FIELD); diff --git a/core/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java b/core/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java index a9a89f7f..a7a1b8c4 100644 --- a/core/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java +++ b/core/src/main/java/org/apache/xtable/hudi/HudiSchemaExtractor.java @@ -55,7 +55,7 @@ public class HudiSchemaExtractor implements SchemaExtractor<Schema> { return converter.toInternalSchema(schema); } - static String convertFromOneTablePath(String path) { + static String convertFromXTablePath(String path) { return path.replace(InternalField.Constants.MAP_KEY_FIELD_NAME, MAP_KEY_FIELD_NAME) .replace(InternalField.Constants.MAP_VALUE_FIELD_NAME, MAP_VALUE_FIELD_NAME) .replace(InternalField.Constants.ARRAY_ELEMENT_FIELD_NAME, LIST_ELEMENT_FIELD_NAME); diff --git a/core/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java b/core/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java index 539d2c69..60d4a453 100644 --- a/core/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java +++ b/core/src/main/java/org/apache/xtable/iceberg/IcebergColumnStatsConverter.java @@ -61,7 +61,7 @@ public class IcebergColumnStatsConverter { columnStats -> { InternalField field = columnStats.getField(); Types.NestedField icebergField = - schema.findField(IcebergSchemaExtractor.convertFromOneTablePath(field.getPath())); + schema.findField(IcebergSchemaExtractor.convertFromXTablePath(field.getPath())); int fieldId = icebergField.fieldId(); columnSizes.put(fieldId, columnStats.getTotalSize()); valueCounts.put(fieldId, columnStats.getNumValues()); diff --git a/core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java b/core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java index 76775b49..2308b839 100644 --- a/core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java +++ b/core/src/main/java/org/apache/xtable/iceberg/IcebergConversionSource.java @@ -164,7 +164,7 @@ public class IcebergConversionSource implements ConversionSource<Snapshot> { private InternalDataFile fromIceberg( DataFile file, PartitionSpec partitionSpec, InternalTable internalTable) { List<PartitionValue> partitionValues = - partitionConverter.toOneTable(internalTable, file.partition(), partitionSpec); + partitionConverter.toXTable(internalTable, file.partition(), partitionSpec); return dataFileExtractor.fromIceberg(file, partitionValues, internalTable.getReadSchema()); } diff --git a/core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java b/core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java index f04b3419..cb9cfeae 100644 --- a/core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java +++ b/core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java @@ -19,7 +19,7 @@ package org.apache.xtable.iceberg; import static org.apache.xtable.model.TableSyncMetadata.INFLIGHT_COMMITS_TO_CONSIDER_FOR_NEXT_SYNC_PROP; -import static org.apache.xtable.model.TableSyncMetadata.ONETABLE_LAST_INSTANT_SYNCED_PROP; +import static org.apache.xtable.model.TableSyncMetadata.XTABLE_LAST_INSTANT_SYNCED_PROP; import java.time.Instant; import java.time.temporal.ChronoUnit; @@ -275,7 +275,7 @@ public class IcebergConversionTarget implements ConversionTarget { Transaction transaction = table.newTransaction(); transaction .updateProperties() - .remove(ONETABLE_LAST_INSTANT_SYNCED_PROP) + .remove(XTABLE_LAST_INSTANT_SYNCED_PROP) .remove(INFLIGHT_COMMITS_TO_CONSIDER_FOR_NEXT_SYNC_PROP) .commit(); transaction.commitTransaction(); diff --git a/core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java b/core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java index bbda45a7..a6abd2a9 100644 --- a/core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java +++ b/core/src/main/java/org/apache/xtable/iceberg/IcebergPartitionValueConverter.java @@ -71,7 +71,7 @@ public class IcebergPartitionValueConverter { return INSTANCE; } - public List<PartitionValue> toOneTable( + public List<PartitionValue> toXTable( InternalTable internalTable, StructLike structLike, PartitionSpec partitionSpec) { if (!partitionSpec.isPartitioned()) { return Collections.emptyList(); diff --git a/core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java b/core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java index bfef00cb..3acd7856 100644 --- a/core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java +++ b/core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java @@ -82,7 +82,7 @@ public class IcebergSchemaExtractor { Schema partialSchema = new Schema(nestedFields); Set<Integer> recordKeyIds = recordKeyFields.stream() - .map(keyField -> partialSchema.findField(convertFromOneTablePath(keyField.getPath()))) + .map(keyField -> partialSchema.findField(convertFromXTablePath(keyField.getPath()))) .filter(Objects::nonNull) .map(Types.NestedField::fieldId) .collect(Collectors.toSet()); @@ -90,7 +90,7 @@ public class IcebergSchemaExtractor { List<String> missingFieldPaths = recordKeyFields.stream() .map(InternalField::getPath) - .filter(path -> partialSchema.findField(convertFromOneTablePath(path)) == null) + .filter(path -> partialSchema.findField(convertFromXTablePath(path)) == null) .collect(CustomCollectors.toList(recordKeyFields.size())); log.error("Missing field IDs for record key field paths: " + missingFieldPaths); throw new SchemaExtractorException( @@ -134,7 +134,7 @@ public class IcebergSchemaExtractor { .collect(CustomCollectors.toList(iceFields.size())); } - static String convertFromOneTablePath(String path) { + static String convertFromXTablePath(String path) { return path.replace(InternalField.Constants.MAP_KEY_FIELD_NAME, MAP_KEY_FIELD_NAME) .replace(InternalField.Constants.MAP_VALUE_FIELD_NAME, MAP_VALUE_FIELD_NAME) .replace(InternalField.Constants.ARRAY_ELEMENT_FIELD_NAME, LIST_ELEMENT_FIELD_NAME); diff --git a/core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java b/core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java index 713f7498..05e89469 100644 --- a/core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java +++ b/core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaSync.java @@ -62,7 +62,7 @@ public class IcebergSchemaSync { * updates to happen in the same order as the source system. * * @param current The current schema in the iceberg table - * @param latest The latest schema provided by Onetable (reflects source schema's state) + * @param latest The latest schema provided by XTable (reflects source schema's state) * @param updateSchema An updateSchema object for the current transaction * @param parentPath dot separated path for nested field's parent * @return map of fieldId to action diff --git a/core/src/test/java/org/apache/xtable/ITConversionController.java b/core/src/test/java/org/apache/xtable/ITConversionController.java index fda52d9c..44338494 100644 --- a/core/src/test/java/org/apache/xtable/ITConversionController.java +++ b/core/src/test/java/org/apache/xtable/ITConversionController.java @@ -181,9 +181,9 @@ public class ITConversionController { String tableName = getTableName(); ConversionController conversionController = new ConversionController(jsc.hadoopConfiguration()); List<String> targetTableFormats = getOtherFormats(sourceTableFormat); - String oneTablePartitionConfig = null; + String partitionConfig = null; if (isPartitioned) { - oneTablePartitionConfig = "level:VALUE"; + partitionConfig = "level:VALUE"; } ConversionSourceProvider<?> conversionSourceProvider = getConversionSourceProvider(sourceTableFormat); @@ -200,9 +200,7 @@ public class ITConversionController { .tableBasePath(table.getBasePath()) .tableDataPath(table.getDataPath()) .hudiSourceConfig( - HudiSourceConfigImpl.builder() - .partitionFieldSpecConfig(oneTablePartitionConfig) - .build()) + HudiSourceConfigImpl.builder().partitionFieldSpecConfig(partitionConfig).build()) .syncMode(syncMode) .build(); conversionController.sync(perTableConfig, conversionSourceProvider); @@ -231,9 +229,7 @@ public class ITConversionController { .tableBasePath(tableWithUpdatedSchema.getBasePath()) .tableDataPath(tableWithUpdatedSchema.getDataPath()) .hudiSourceConfig( - HudiSourceConfigImpl.builder() - .partitionFieldSpecConfig(oneTablePartitionConfig) - .build()) + HudiSourceConfigImpl.builder().partitionFieldSpecConfig(partitionConfig).build()) .syncMode(syncMode) .build(); List<Row> insertsAfterSchemaUpdate = tableWithUpdatedSchema.insertRows(100); @@ -290,7 +286,7 @@ public class ITConversionController { .tableBasePath(table.getBasePath()) .hudiSourceConfig( HudiSourceConfigImpl.builder() - .partitionFieldSpecConfig(partitionConfig.getOneTableConfig()) + .partitionFieldSpecConfig(partitionConfig.getXTableConfig()) .build()) .syncMode(syncMode) .build(); @@ -325,7 +321,7 @@ public class ITConversionController { .tableBasePath(table.getBasePath()) .hudiSourceConfig( HudiSourceConfigImpl.builder() - .partitionFieldSpecConfig(partitionConfig.getOneTableConfig()) + .partitionFieldSpecConfig(partitionConfig.getXTableConfig()) .build()) .syncMode(syncMode) .build(); @@ -475,7 +471,7 @@ public class ITConversionController { String sourceTableFormat = tableFormatPartitionDataHolder.getSourceTableFormat(); List<String> targetTableFormats = tableFormatPartitionDataHolder.getTargetTableFormats(); Optional<String> hudiPartitionConfig = tableFormatPartitionDataHolder.getHudiSourceConfig(); - String oneTablePartitionConfig = tableFormatPartitionDataHolder.getOneTablePartitionConfig(); + String xTablePartitionConfig = tableFormatPartitionDataHolder.getXTablePartitionConfig(); String filter = tableFormatPartitionDataHolder.getFilter(); ConversionSourceProvider<?> conversionSourceProvider = getConversionSourceProvider(sourceTableFormat); @@ -497,7 +493,7 @@ public class ITConversionController { .tableDataPath(tableToClose.getDataPath()) .hudiSourceConfig( HudiSourceConfigImpl.builder() - .partitionFieldSpecConfig(oneTablePartitionConfig) + .partitionFieldSpecConfig(xTablePartitionConfig) .build()) .syncMode(SyncMode.INCREMENTAL) .build(); @@ -843,13 +839,13 @@ public class ITConversionController { String sourceFormat, List<String> targetFormats, String hudiPartitionConfig, - String oneTablePartitionConfig, + String xTablePartitionConfig, String filter) { return TableFormatPartitionDataHolder.builder() .sourceTableFormat(sourceFormat) .targetTableFormats(targetFormats) .hudiSourceConfig(Optional.ofNullable(hudiPartitionConfig)) - .oneTablePartitionConfig(oneTablePartitionConfig) + .xTablePartitionConfig(xTablePartitionConfig) .filter(filter) .build(); } @@ -859,7 +855,7 @@ public class ITConversionController { private static class TableFormatPartitionDataHolder { String sourceTableFormat; List<String> targetTableFormats; - String oneTablePartitionConfig; + String xTablePartitionConfig; Optional<String> hudiSourceConfig; String filter; } diff --git a/core/src/test/java/org/apache/xtable/conversion/TestConversionController.java b/core/src/test/java/org/apache/xtable/conversion/TestConversionController.java index c30d0878..4fe7c365 100644 --- a/core/src/test/java/org/apache/xtable/conversion/TestConversionController.java +++ b/core/src/test/java/org/apache/xtable/conversion/TestConversionController.java @@ -72,7 +72,7 @@ public class TestConversionController { @Test void testAllSnapshotSyncAsPerConfig() { SyncMode syncMode = SyncMode.FULL; - InternalTable internalTable = getOneTable(); + InternalTable internalTable = getInternalTable(); InternalSnapshot internalSnapshot = buildSnapshot(internalTable, "v1"); Instant instantBeforeHour = Instant.now().minus(Duration.ofHours(1)); SyncResult syncResult = buildSyncResult(syncMode, instantBeforeHour); @@ -185,7 +185,7 @@ public class TestConversionController { @Test void testIncrementalSyncFallBackToSnapshotForAllFormats() { SyncMode syncMode = SyncMode.INCREMENTAL; - InternalTable internalTable = getOneTable(); + InternalTable internalTable = getInternalTable(); Instant instantBeforeHour = Instant.now().minus(Duration.ofHours(1)); InternalSnapshot internalSnapshot = buildSnapshot(internalTable, "v1"); SyncResult syncResult = buildSyncResult(syncMode, instantBeforeHour); @@ -274,7 +274,7 @@ public class TestConversionController { when(mockConversionSource.getTableChangeForCommit(instant)).thenReturn(tableChange); } // Iceberg needs to sync by snapshot since instant15 is affected by table clean-up. - InternalTable internalTable = getOneTable(); + InternalTable internalTable = getInternalTable(); Instant instantBeforeHour = Instant.now().minus(Duration.ofHours(1)); InternalSnapshot internalSnapshot = buildSnapshot(internalTable, "v1"); SyncResult syncResult = buildSyncResult(syncMode, instantBeforeHour); @@ -370,7 +370,7 @@ public class TestConversionController { } private TableChange getTableChange(Instant instant) { - return TableChange.builder().tableAsOfChange(getOneTable(instant)).build(); + return TableChange.builder().tableAsOfChange(getInternalTable(instant)).build(); } private SyncResult buildSyncResult(SyncMode syncMode, Instant lastSyncedInstant) { @@ -385,11 +385,11 @@ public class TestConversionController { return InternalSnapshot.builder().table(internalTable).version(version).build(); } - private InternalTable getOneTable() { - return getOneTable(Instant.now()); + private InternalTable getInternalTable() { + return getInternalTable(Instant.now()); } - private InternalTable getOneTable(Instant instant) { + private InternalTable getInternalTable(Instant instant) { return InternalTable.builder().name("some_table").latestCommitTime(instant).build(); } diff --git a/core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java b/core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java index 514cc394..135ce995 100644 --- a/core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java +++ b/core/src/test/java/org/apache/xtable/delta/TestDeltaSync.java @@ -150,8 +150,8 @@ public class TestDeltaSync { .build()) .build()); InternalSchema schema2 = getInternalSchema().toBuilder().fields(fields2).build(); - InternalTable table1 = getOneTable(tableName, basePath, schema1, null, LAST_COMMIT_TIME); - InternalTable table2 = getOneTable(tableName, basePath, schema2, null, LAST_COMMIT_TIME); + InternalTable table1 = getInternalTable(tableName, basePath, schema1, null, LAST_COMMIT_TIME); + InternalTable table2 = getInternalTable(tableName, basePath, schema2, null, LAST_COMMIT_TIME); InternalDataFile dataFile1 = getDataFile(1, Collections.emptyList(), basePath); InternalDataFile dataFile2 = getDataFile(2, Collections.emptyList(), basePath); @@ -186,7 +186,7 @@ public class TestDeltaSync { .transformType(PartitionTransformType.VALUE) .build(); InternalTable table = - getOneTable( + getInternalTable( tableName, basePath, schema, @@ -245,7 +245,7 @@ public class TestDeltaSync { .transformType(PartitionTransformType.VALUE) .build(); InternalTable table = - getOneTable( + getInternalTable( tableName, basePath, schema, @@ -312,7 +312,7 @@ public class TestDeltaSync { .transformType(transformType) .build(); InternalTable table = - getOneTable( + getInternalTable( tableName, basePath, schema, @@ -362,7 +362,7 @@ public class TestDeltaSync { JavaConverters.seqAsJavaList(translatedExpression) .get(0) .toString() - .contains(String.format("onetable_partition_col_%s_timestamp_field", transformType))); + .contains(String.format("xtable_partition_col_%s_timestamp_field", transformType))); } private static Stream<Arguments> timestampPartitionTestingArgs() { @@ -413,7 +413,7 @@ public class TestDeltaSync { .build(); } - private InternalTable getOneTable( + private InternalTable getInternalTable( String tableName, Path basePath, InternalSchema schema, diff --git a/core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java b/core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java index 617f7ce3..d21d1d18 100644 --- a/core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java +++ b/core/src/test/java/org/apache/xtable/hudi/HudiTestUtil.java @@ -117,7 +117,7 @@ public class HudiTestUtil { public static SparkConf getSparkConf(Path tempDir) { return new SparkConf() - .setAppName("onetable-testing") + .setAppName("xtable-testing") .set("spark.serializer", KryoSerializer.class.getName()) .set("spark.sql.catalog.default_iceberg", "org.apache.iceberg.spark.SparkCatalog") .set("spark.sql.catalog.default_iceberg.type", "hadoop") @@ -140,6 +140,6 @@ public class HudiTestUtil { @AllArgsConstructor(staticName = "of") public static class PartitionConfig { String hudiConfig; - String oneTableConfig; + String xTableConfig; } } diff --git a/core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java b/core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java index 241a33a7..408e4373 100644 --- a/core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java +++ b/core/src/test/java/org/apache/xtable/hudi/ITHudiConversionSourceSource.java @@ -135,7 +135,7 @@ public class ITHudiConversionSourceSource { HudiConversionSource hudiClient = getHudiSourceClient( - CONFIGURATION, table.getBasePath(), partitionConfig.getOneTableConfig()); + CONFIGURATION, table.getBasePath(), partitionConfig.getXTableConfig()); // Get the current snapshot InternalSnapshot internalSnapshot = hudiClient.getCurrentSnapshot(); ValidationTestHelper.validateSnapshot( @@ -183,7 +183,7 @@ public class ITHudiConversionSourceSource { HudiConversionSource hudiClient = getHudiSourceClient( - CONFIGURATION, table.getBasePath(), partitionConfig.getOneTableConfig()); + CONFIGURATION, table.getBasePath(), partitionConfig.getXTableConfig()); // Get the current snapshot InternalSnapshot internalSnapshot = hudiClient.getCurrentSnapshot(); ValidationTestHelper.validateSnapshot( @@ -225,7 +225,7 @@ public class ITHudiConversionSourceSource { HudiConversionSource hudiClient = getHudiSourceClient( - CONFIGURATION, table.getBasePath(), partitionConfig.getOneTableConfig()); + CONFIGURATION, table.getBasePath(), partitionConfig.getXTableConfig()); // commitInstant1 is not safe for incremental sync as cleaner has run after and touched // related files. assertFalse( @@ -390,7 +390,7 @@ public class ITHudiConversionSourceSource { HudiConversionSource hudiClient = getHudiSourceClient( - CONFIGURATION, table.getBasePath(), partitionConfig.getOneTableConfig()); + CONFIGURATION, table.getBasePath(), partitionConfig.getXTableConfig()); // Get the current snapshot InternalSnapshot internalSnapshot = hudiClient.getCurrentSnapshot(); ValidationTestHelper.validateSnapshot( @@ -445,7 +445,7 @@ public class ITHudiConversionSourceSource { HudiConversionSource hudiClient = getHudiSourceClient( - CONFIGURATION, table.getBasePath(), partitionConfig.getOneTableConfig()); + CONFIGURATION, table.getBasePath(), partitionConfig.getXTableConfig()); // Get the current snapshot InternalSnapshot internalSnapshot = hudiClient.getCurrentSnapshot(); ValidationTestHelper.validateSnapshot( @@ -511,7 +511,7 @@ public class ITHudiConversionSourceSource { HudiConversionSource hudiClient = getHudiSourceClient( - CONFIGURATION, table.getBasePath(), partitionConfig.getOneTableConfig()); + CONFIGURATION, table.getBasePath(), partitionConfig.getXTableConfig()); // Get the current snapshot InternalSnapshot internalSnapshot = hudiClient.getCurrentSnapshot(); ValidationTestHelper.validateSnapshot(internalSnapshot, baseFilesAfterCommit4); @@ -560,7 +560,7 @@ public class ITHudiConversionSourceSource { } private HudiConversionSource getHudiSourceClient( - Configuration conf, String basePath, String onetablePartitionConfig) { + Configuration conf, String basePath, String xTablePartitionConfig) { HoodieTableMetaClient hoodieTableMetaClient = HoodieTableMetaClient.builder() .setConf(conf) @@ -569,9 +569,7 @@ public class ITHudiConversionSourceSource { .build(); HudiSourcePartitionSpecExtractor partitionSpecExtractor = new ConfigurationBasedPartitionSpecExtractor( - HudiSourceConfigImpl.builder() - .partitionFieldSpecConfig(onetablePartitionConfig) - .build()); + HudiSourceConfigImpl.builder().partitionFieldSpecConfig(xTablePartitionConfig).build()); return new HudiConversionSource(hoodieTableMetaClient, partitionSpecExtractor); } diff --git a/core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java b/core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java index 010a524b..dc3276d6 100644 --- a/core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java +++ b/core/src/test/java/org/apache/xtable/iceberg/TestIcebergConversionTargetSource.java @@ -174,7 +174,7 @@ class TestIcebergConversionTargetSource { verify(spyConversionSource, times(1)).getTable(iceCurrentSnapshot); verify(spyConversionSource, times(1)) .getSchemaCatalog(internalSnapshot.getTable(), iceCurrentSnapshot); - verify(spyPartitionConverter, times(5)).toOneTable(any(), any(), any()); + verify(spyPartitionConverter, times(5)).toXTable(any(), any(), any()); verify(spyDataFileExtractor, times(5)).fromIceberg(any(), any(), any()); Assertions.assertNotNull(internalSnapshot.getPartitionedDataFiles()); diff --git a/core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java b/core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java index 775a62fe..31fbb6f5 100644 --- a/core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java +++ b/core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionSpecExtractor.java @@ -38,7 +38,7 @@ import org.apache.xtable.model.schema.InternalType; import org.apache.xtable.model.schema.PartitionTransformType; public class TestIcebergPartitionSpecExtractor { - // TODO assert error cases and add wrap errors in Onetable exceptions + // TODO assert error cases and add wrap errors in XTable exceptions private static final Schema TEST_SCHEMA = new Schema( Types.NestedField.required(0, "timestamp_hour", Types.TimestampType.withZone()), diff --git a/core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java b/core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java index 52e4f113..7ff331ec 100644 --- a/core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java +++ b/core/src/test/java/org/apache/xtable/iceberg/TestIcebergPartitionValueConverter.java @@ -65,15 +65,16 @@ public class TestIcebergPartitionValueConverter { IcebergSchemaExtractor.getInstance().fromIceberg(SCHEMA); @Test - public void testToOneTableNotPartitioned() { + public void testToXTableNotPartitioned() { PartitionSpec partitionSpec = PartitionSpec.unpartitioned(); List<PartitionValue> partitionValues = - partitionValueConverter.toOneTable(buildOnetable(false), STRUCT_LIKE_RECORD, partitionSpec); + partitionValueConverter.toXTable( + buildInternalTable(false), STRUCT_LIKE_RECORD, partitionSpec); assertTrue(partitionValues.isEmpty()); } @Test - public void testToOneTableValuePartitioned() { + public void testToXTableValuePartitioned() { List<PartitionValue> expectedPartitionValues = Collections.singletonList( PartitionValue.builder() @@ -82,8 +83,8 @@ public class TestIcebergPartitionValueConverter { .build()); PartitionSpec partitionSpec = PartitionSpec.builderFor(SCHEMA).identity("name").build(); List<PartitionValue> partitionValues = - partitionValueConverter.toOneTable( - buildOnetable(true, "name", PartitionTransformType.VALUE), + partitionValueConverter.toXTable( + buildInternalTable(true, "name", PartitionTransformType.VALUE), STRUCT_LIKE_RECORD, partitionSpec); assertEquals(1, partitionValues.size()); @@ -91,7 +92,7 @@ public class TestIcebergPartitionValueConverter { } @Test - public void testToOneTableYearPartitioned() { + public void testToXTableYearPartitioned() { List<PartitionValue> expectedPartitionValues = Collections.singletonList( PartitionValue.builder() @@ -100,19 +101,19 @@ public class TestIcebergPartitionValueConverter { .build()); PartitionSpec partitionSpec = PartitionSpec.builderFor(SCHEMA).year("birthDate").build(); List<PartitionValue> partitionValues = - partitionValueConverter.toOneTable( - buildOnetable(true, "birthDate", PartitionTransformType.YEAR), + partitionValueConverter.toXTable( + buildInternalTable(true, "birthDate", PartitionTransformType.YEAR), STRUCT_LIKE_RECORD, partitionSpec); assertEquals(1, partitionValues.size()); assertEquals(expectedPartitionValues, partitionValues); } - private InternalTable buildOnetable(boolean isPartitioned) { - return buildOnetable(isPartitioned, null, null); + private InternalTable buildInternalTable(boolean isPartitioned) { + return buildInternalTable(isPartitioned, null, null); } - private InternalTable buildOnetable( + private InternalTable buildInternalTable( boolean isPartitioned, String sourceField, PartitionTransformType transformType) { return InternalTable.builder() .readSchema(IcebergSchemaExtractor.getInstance().fromIceberg(SCHEMA)) diff --git a/core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java b/core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java index f676f8a9..3d479b4e 100644 --- a/core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java +++ b/core/src/test/java/org/apache/xtable/iceberg/TestIcebergSync.java @@ -221,8 +221,9 @@ public class TestIcebergSync { List<Types.NestedField> fields = new ArrayList<>(icebergSchema.columns()); fields.add(Types.NestedField.of(6, false, "long_field", Types.LongType.get())); Schema icebergSchema2 = new Schema(fields); - InternalTable table1 = getOneTable(tableName, basePath, internalSchema, null, LAST_COMMIT_TIME); - InternalTable table2 = getOneTable(tableName, basePath, schema2, null, LAST_COMMIT_TIME); + InternalTable table1 = + getInternalTable(tableName, basePath, internalSchema, null, LAST_COMMIT_TIME); + InternalTable table2 = getInternalTable(tableName, basePath, schema2, null, LAST_COMMIT_TIME); Map<SchemaVersion, InternalSchema> schemas = new HashMap<>(); SchemaVersion schemaVersion1 = new SchemaVersion(1, ""); schemas.put(schemaVersion1, internalSchema); @@ -318,9 +319,10 @@ public class TestIcebergSync { List<Types.NestedField> fields = new ArrayList<>(icebergSchema.columns()); fields.add(Types.NestedField.of(6, false, "long_field", Types.LongType.get())); Schema icebergSchema2 = new Schema(fields); - InternalTable table1 = getOneTable(tableName, basePath, internalSchema, null, LAST_COMMIT_TIME); + InternalTable table1 = + getInternalTable(tableName, basePath, internalSchema, null, LAST_COMMIT_TIME); InternalTable table2 = - getOneTable(tableName, basePath, schema2, null, LAST_COMMIT_TIME.plusMillis(100000L)); + getInternalTable(tableName, basePath, schema2, null, LAST_COMMIT_TIME.plusMillis(100000L)); Map<SchemaVersion, InternalSchema> schemas = new HashMap<>(); SchemaVersion schemaVersion1 = new SchemaVersion(1, ""); schemas.put(schemaVersion1, internalSchema); @@ -385,7 +387,7 @@ public class TestIcebergSync { .build(); InternalTable table = - getOneTable( + getInternalTable( tableName, basePath, internalSchema, @@ -451,7 +453,7 @@ public class TestIcebergSync { .build(); InternalTable table = - getOneTable( + getInternalTable( tableName, basePath, internalSchema, @@ -514,7 +516,7 @@ public class TestIcebergSync { .build(); InternalTable table = - getOneTable( + getInternalTable( tableName, basePath, internalSchema, @@ -577,7 +579,7 @@ public class TestIcebergSync { .build(); InternalTable table = - getOneTable( + getInternalTable( tableName, basePath, internalSchema, @@ -655,7 +657,7 @@ public class TestIcebergSync { .build(); InternalTable table = - getOneTable( + getInternalTable( tableName, basePath, internalSchema, @@ -730,7 +732,7 @@ public class TestIcebergSync { .build(); } - private InternalTable getOneTable( + private InternalTable getInternalTable( String tableName, Path basePath, InternalSchema schema, diff --git a/demo/README.md b/demo/README.md index 6325172f..db0bfe8f 100644 --- a/demo/README.md +++ b/demo/README.md @@ -1,7 +1,7 @@ # Running a Local Demo -This demo was created for the 2023 Open Source Data Summit. It shows how OneTable can be used with two existing datasets. +This demo was created for the 2023 Open Source Data Summit. It shows how XTable can be used with two existing datasets. -Use `./start_demo.sh` to spin up a local notebook with a scala interpreter, Hive Metastore, Presto and Trino in docker containers. The script will first build the OneTable jars required for the demo and then start the containers. +Use `./start_demo.sh` to spin up a local notebook with a scala interpreter, Hive Metastore, Presto and Trino in docker containers. The script will first build the XTable jars required for the demo and then start the containers. ## Accessing Services ### Jupyter Notebook diff --git a/demo/notebook/demo.ipynb b/demo/notebook/demo.ipynb index 244c52b4..d242fc43 100644 --- a/demo/notebook/demo.ipynb +++ b/demo/notebook/demo.ipynb @@ -79,7 +79,7 @@ "id": "620912ec-ac97-456e-9e6f-11559a729c7a", "metadata": {}, "source": [ - "## What happens in a OneTable Sync?" + "## What happens in a XTable Sync?" ] }, { @@ -101,7 +101,7 @@ "metadata": {}, "source": [ "# Demo\n", - "We start out our demo with 2 datasets. One in Hudi (hudi_dimCustomer) and one in Delta Lake (delta_dimGeography). There is a team that is using tools that only support Iceberg so we'll start by using OneTable to expose the Hudi table as an Iceberg table synced with our Hive Metastore that is connected to a locally running Trino container." + "We start out our demo with 2 datasets. One in Hudi (hudi_dimCustomer) and one in Delta Lake (delta_dimGeography). There is a team that is using tools that only support Iceberg so we'll start by using XTable to expose the Hudi table as an Iceberg table synced with our Hive Metastore that is connected to a locally running Trino container." ] }, { @@ -140,7 +140,7 @@ "metadata": {}, "source": [ "## Incremental Updates\n", - "OneTable tracks metadata in the target table format that tracks what was last synced. This allows us to only sync commits that have happened since the last commit. \n", + "XTable tracks metadata in the target table format that tracks what was last synced. This allows us to only sync commits that have happened since the last commit. \n", "We'll insert more records into the Hudi table to create a new commit. The file(s) added will then be synced to existing Iceberg metadata along with any updates to the table schema." ] }, @@ -283,7 +283,7 @@ "source": [ "## Querying from Presto\n", "\n", - "In the same organization, there is another team who are only authorized to use Presto. They access `presto-cli` for running their analytics workloads. And they want the ability to query all three formats at the same time; and they also would like the ability to modify the table (run `INSERT` & `DELETE` SQL statements). So we'll have to leave tables in Iceberg format which supports writes from Presto. The good thing is, its just a simple translation now using OneTable." + "In the same organization, there is another team who are only authorized to use Presto. They access `presto-cli` for running their analytics workloads. And they want the ability to query all three formats at the same time; and they also would like the ability to modify the table (run `INSERT` & `DELETE` SQL statements). So we'll have to leave tables in Iceberg format which supports writes from Presto. The good thing is, its just a simple translation now using XTable." ] }, { diff --git a/hudi-support/extensions/README.md b/hudi-support/extensions/README.md index 26782ed1..aabd31c9 100644 --- a/hudi-support/extensions/README.md +++ b/hudi-support/extensions/README.md @@ -13,10 +13,10 @@ For example, if you're using the Hudi [quick-start guide](https://hudi.apache.or ## Hudi Streamer Extensions ### When should you use them? -If you want to use OneTable with Hudi [streaming ingestion](https://hudi.apache.org/docs/hoodie_streaming_ingestion) to sync each commit into other table formats. +If you want to use XTable with Hudi [streaming ingestion](https://hudi.apache.org/docs/hoodie_streaming_ingestion) to sync each commit into other table formats. ### How do you use them? 1. Add the extensions jar (`hudi-extensions-0.1.0-SNAPSHOT-bundled.jar`) to your class path. 2. Add `org.apache.xtable.hudi.sync.XTableSyncTool` to your list of sync classes 3. Set the following configurations based on your preferences: - `hoodie.onetable.formats: "ICEBERG,DELTA"` (or simply use one format) - `hoodie.onetable.target.metadata.retention.hr: 168` (default retention for target format metadata is 168 hours) \ No newline at end of file + `hoodie.xtable.formats.to.sync: "ICEBERG,DELTA"` (or simply use one format) + `hoodie.xtable.target.metadata.retention.hr: 168` (default retention for target format metadata is 168 hours) \ No newline at end of file diff --git a/hudi-support/extensions/src/main/java/org/apache/xtable/hudi/sync/XTableSyncConfig.java b/hudi-support/extensions/src/main/java/org/apache/xtable/hudi/sync/XTableSyncConfig.java index c4e6a600..67b00417 100644 --- a/hudi-support/extensions/src/main/java/org/apache/xtable/hudi/sync/XTableSyncConfig.java +++ b/hudi-support/extensions/src/main/java/org/apache/xtable/hudi/sync/XTableSyncConfig.java @@ -27,12 +27,12 @@ import org.apache.hudi.sync.common.HoodieSyncConfig; public class XTableSyncConfig extends HoodieSyncConfig implements Serializable { public static final ConfigProperty<String> ONE_TABLE_FORMATS = - ConfigProperty.key("hoodie.onetable.formats.to.sync") + ConfigProperty.key("hoodie.xtable.formats.to.sync") .defaultValue("DELTA,ICEBERG") .withDocumentation("Comma separated list of formats to sync."); public static final ConfigProperty<Integer> ONE_TABLE_TARGET_METADATA_RETENTION_HOURS = - ConfigProperty.key("hoodie.onetable.target.metadata.retention.hr") + ConfigProperty.key("hoodie.xtable.target.metadata.retention.hr") .defaultValue(24 * 7) .withDocumentation("Retention in hours for metadata in target table."); diff --git a/utilities/src/main/java/org/apache/xtable/utilities/RunSync.java b/utilities/src/main/java/org/apache/xtable/utilities/RunSync.java index 09ce2c2b..7fb65c42 100644 --- a/utilities/src/main/java/org/apache/xtable/utilities/RunSync.java +++ b/utilities/src/main/java/org/apache/xtable/utilities/RunSync.java @@ -104,7 +104,7 @@ public class RunSync { try { cmd = parser.parse(OPTIONS, args); } catch (ParseException e) { - new HelpFormatter().printHelp("onetable.jar", OPTIONS, true); + new HelpFormatter().printHelp("xtable.jar", OPTIONS, true); return; } @@ -184,7 +184,7 @@ public class RunSync { @VisibleForTesting static Configuration loadHadoopConf(byte[] customConfig) { Configuration conf = new Configuration(); - conf.addResource("onetable-hadoop-defaults.xml"); + conf.addResource("xtable-hadoop-defaults.xml"); if (customConfig != null) { conf.addResource(new ByteArrayInputStream(customConfig), "customConfigStream"); } @@ -203,7 +203,7 @@ public class RunSync { throws IOException { // get resource stream from default converter config yaml file try (InputStream inputStream = - RunSync.class.getClassLoader().getResourceAsStream("onetable-conversion-defaults.yaml")) { + RunSync.class.getClassLoader().getResourceAsStream("xtable-conversion-defaults.yaml")) { TableFormatConverters converters = YAML_MAPPER.readValue(inputStream, TableFormatConverters.class); if (customConfigs != null) { diff --git a/utilities/src/main/resources/onetable-conversion-defaults.yaml b/utilities/src/main/resources/xtable-conversion-defaults.yaml similarity index 100% rename from utilities/src/main/resources/onetable-conversion-defaults.yaml rename to utilities/src/main/resources/xtable-conversion-defaults.yaml diff --git a/utilities/src/main/resources/onetable-hadoop-defaults.xml b/utilities/src/main/resources/xtable-hadoop-defaults.xml similarity index 100% rename from utilities/src/main/resources/onetable-hadoop-defaults.xml rename to utilities/src/main/resources/xtable-hadoop-defaults.xml diff --git a/website/docs/biglake-metastore.md b/website/docs/biglake-metastore.md index cb07e933..aaf63e1c 100644 --- a/website/docs/biglake-metastore.md +++ b/website/docs/biglake-metastore.md @@ -48,7 +48,7 @@ In this tutorial we'll use `us-west1` region. projects/<yourProjectName>/locations/us-west1/catalogs ``` ```rest md title="catalogId" -onetable +xtable ``` ### Create BigLake Database @@ -56,10 +56,10 @@ Use the `Try this method` on Google's REST reference docs for [`projects.locations.catalogs.databases.create`](https://cloud.google.com/bigquery/docs/reference/biglake/rest/v1/projects.locations.catalogs/create) method to create a database. ```rest md title="parent" -projects/<yourProjectName>/locations/us-west1/catalogs/onetable/databases +projects/<yourProjectName>/locations/us-west1/catalogs/xtable/databases ``` ```rest md title="databaseId" -onetable_synced_db +xtable_synced_db ``` ### Running sync @@ -96,7 +96,7 @@ datasets: - tableBasePath: gs://path/to/source/data tableName: table_name - namespace: onetable_synced_db + namespace: xtable_synced_db ``` </TabItem> @@ -107,7 +107,7 @@ An example `catalog.yaml` file to sync with BigLake Metastore: ```yaml md title="yaml" catalogImpl: org.apache.iceberg.gcp.biglake.BigLakeCatalog -catalogName: onetable +catalogName: xtable catalogOptions: gcp_project: <yourProjectName> gcp_location: us-west1 @@ -132,7 +132,7 @@ We can use `Try this method` option on Google's REST reference docs for [`projects.locations.catalogs.databases.tables.get`](https://cloud.google.com/bigquery/docs/reference/biglake/rest/v1/projects.locations.catalogs.databases.tables/get) method to view the created table. ```rest md title="name" -projects/<yourProjectName>/locations/us-west1/catalogs/onetable/databases/onetable_synced_db/tables/table_name +projects/<yourProjectName>/locations/us-west1/catalogs/xtable/databases/xtable_synced_db/tables/table_name ``` ## Conclusion diff --git a/website/docs/bigquery.md b/website/docs/bigquery.md index 14265afa..aa9036ea 100644 --- a/website/docs/bigquery.md +++ b/website/docs/bigquery.md @@ -14,7 +14,7 @@ Apache XTable™ (Incubating) outputs metadata files for Iceberg target format s to read the BigLake tables. ```sql md title="sql" -CREATE EXTERNAL TABLE onetable_synced_iceberg_table +CREATE EXTERNAL TABLE xtable_synced_iceberg_table WITH CONNECTION `myproject.mylocation.myconnection` OPTIONS ( format = 'ICEBERG', diff --git a/website/docs/glue-catalog.md b/website/docs/glue-catalog.md index 1d8f970e..14bb4c38 100644 --- a/website/docs/glue-catalog.md +++ b/website/docs/glue-catalog.md @@ -81,7 +81,7 @@ datasets: Replace with appropriate values for `sourceFormat`, `tableBasePath` and `tableName` fields. ::: -From your terminal under the cloned onetable directory, run the sync process using the below command. +From your terminal under the cloned xtable directory, run the sync process using the below command. ```shell md title="shell" java -jar utilities/target/utilities-0.1.0-SNAPSHOT-bundled.jar --datasetConfig my_config.yaml @@ -96,7 +96,7 @@ with metadata files which contains the information that helps query engines inte From your terminal, create a glue database. ```shell md title="shell" - aws glue create-database --database-input "{\"Name\":\"onetable_synced_db\"}" + aws glue create-database --database-input "{\"Name\":\"xtable_synced_db\"}" ``` From your terminal, create a glue crawler. Modify the `<yourAccountId>`, `<yourRoleName>` @@ -121,21 +121,21 @@ values={[ <TabItem value="hudi"> ```shell md title="shell" -aws glue create-crawler --name onetable_crawler --role arn:aws:iam::${accountId}:role/service-role/${roleName} --database onetable_synced_db --targets "{\"HudiTargets\":[{\"Paths\":[\"${s3DataPath}\"]}]}" +aws glue create-crawler --name xtable_crawler --role arn:aws:iam::${accountId}:role/service-role/${roleName} --database xtable_synced_db --targets "{\"HudiTargets\":[{\"Paths\":[\"${s3DataPath}\"]}]}" ``` </TabItem> <TabItem value="delta"> ```shell md title="shell" -aws glue create-crawler --name onetable_crawler --role arn:aws:iam::${accountId}:role/service-role/${roleName} --database onetable_synced_db --targets "{\"DeltaTargets\":[{\"Paths\":[\"${s3DataPath}\"]}]}" +aws glue create-crawler --name xtable_crawler --role arn:aws:iam::${accountId}:role/service-role/${roleName} --database xtable_synced_db --targets "{\"DeltaTargets\":[{\"Paths\":[\"${s3DataPath}\"]}]}" ``` </TabItem> <TabItem value="iceberg"> ```shell md title="shell" -aws glue create-crawler --name onetable_crawler --role arn:aws:iam::${accountId}:role/service-role/${roleName} --database onetable_synced_db --targets "{\"IcebergTargets\":[{\"Paths\":[\"${s3DataPath}\"]}]}" +aws glue create-crawler --name xtable_crawler --role arn:aws:iam::${accountId}:role/service-role/${roleName} --database xtable_synced_db --targets "{\"IcebergTargets\":[{\"Paths\":[\"${s3DataPath}\"]}]}" ``` </TabItem> @@ -144,7 +144,7 @@ aws glue create-crawler --name onetable_crawler --role arn:aws:iam::${accountId} From your terminal, run the glue crawler. ```shell md title="shell" - aws glue start-crawler --name onetable_crawler + aws glue start-crawler --name xtable_crawler ``` Once the crawler succeeds, you’ll be able to query this Iceberg table from Athena, EMR and/or Redshift query engines. @@ -174,7 +174,7 @@ After the crawler runs successfully, you can inspect the catalogued tables in Gl and also query the table in Amazon Athena like below: ```sql -SELECT * FROM onetable_synced_db.<table_name>; +SELECT * FROM xtable_synced_db.<table_name>; ``` </TabItem> @@ -185,7 +185,7 @@ After the crawler runs successfully, you can inspect the catalogued tables in Gl and also query the table in Amazon Athena like below: ```sql -SELECT * FROM onetable_synced_db.<table_name>; +SELECT * FROM xtable_synced_db.<table_name>; ``` </TabItem> diff --git a/website/docs/how-to.md b/website/docs/how-to.md index fcfc4784..8dea72f7 100644 --- a/website/docs/how-to.md +++ b/website/docs/how-to.md @@ -219,7 +219,7 @@ df = spark.createDataFrame(records, schema) ### Running sync -Create `my_config.yaml` in the cloned onetable directory. +Create `my_config.yaml` in the cloned xtable directory. <Tabs groupId="table-format" diff --git a/website/docs/redshift.md b/website/docs/redshift.md index ed62ba79..473dc03c 100644 --- a/website/docs/redshift.md +++ b/website/docs/redshift.md @@ -11,10 +11,10 @@ For more information on creating external schemas, refer to [Redshift docs](https://docs.aws.amazon.com/redshift/latest/dg/c-spectrum-external-schemas.html#c-spectrum-external-catalogs). ### Hudi and Iceberg tables -The following query creates an external schema `onetable_synced_schema` using the Glue database `glue_database_name` +The following query creates an external schema `xtable_synced_schema` using the Glue database `glue_database_name` ```sql md title="sql" -CREATE EXTERNAL SCHEMA onetable_synced_schema +CREATE EXTERNAL SCHEMA xtable_synced_schema FROM DATA CATALOG DATABASE <glue_database_name> IAM_ROLE 'arn:aws:iam::<accountId>:role/<roleName>' @@ -30,7 +30,7 @@ Redshift can infer the tables present in the Glue database automatically. You ca ```sql md title="sql" SELECT * -FROM onetable_synced_schema.<table_name>; +FROM xtable_synced_schema.<table_name>; ``` ### Delta Lake table @@ -48,7 +48,7 @@ You have two options to create and query Delta tables in Redshift Spectrum: You can then use a similar approach to query the Hudi and Iceberg tables mentioned above. ```sql md title="sql" -CREATE EXTERNAL SCHEMA onetable_synced_schema_delta +CREATE EXTERNAL SCHEMA xtable_synced_schema_delta FROM DATA CATALOG DATABASE <delta_glue_database_name> IAM_ROLE 'arn:aws:iam::<accountId>:role/<roleName>' @@ -57,5 +57,5 @@ CREATE EXTERNAL DATABASE IF NOT EXISTS; ```sql md title="sql" SELECT * -FROM onetable_synced_schema_delta.<table_name>; +FROM xtable_synced_schema_delta.<table_name>; ``` diff --git a/website/docs/unity-catalog.md b/website/docs/unity-catalog.md index 0b8819ff..331fdb3f 100644 --- a/website/docs/unity-catalog.md +++ b/website/docs/unity-catalog.md @@ -54,11 +54,11 @@ At this point, if you check your bucket path, you will be able to see `_delta_lo In your Databricks workspace, under SQL editor, run the following queries. ```sql md title="SQL" -CREATE CATALOG onetable; +CREATE CATALOG xtable; -CREATE SCHEMA onetable.synced_delta_schema; +CREATE SCHEMA xtable.synced_delta_schema; -CREATE TABLE onetable.synced_delta_schema.<table_name> +CREATE TABLE xtable.synced_delta_schema.<table_name> USING DELTA LOCATION 's3://path/to/source/data'; ``` @@ -72,7 +72,7 @@ You can now see the created delta table in **Unity Catalog** under **Catalog** a `synced_delta_schema` and also query the table in the SQL editor: ```sql -SELECT * FROM onetable.synced_delta_schema.<table_name>; +SELECT * FROM xtable.synced_delta_schema.<table_name>; ``` ## Conclusion