alexeykudinkin commented on code in PR #6016:
URL: https://github.com/apache/hudi/pull/6016#discussion_r931715112
##########
hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java:
##########
@@ -244,12 +255,7 @@ private Map<PartitionPath, FileStatus[]>
loadPartitionPathFiles() {
);
fetchedPartitionToFiles =
- FSUtils.getFilesInPartitions(
- engineContext,
- metadataConfig,
- basePath,
- fullPartitionPathsMapToFetch.keySet().toArray(new String[0]),
- fileSystemStorageConfig.getSpillableDir())
Review Comment:
Good call
##########
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestUtils.java:
##########
@@ -124,28 +125,28 @@ public static HoodieTableMetaClient init(Configuration
hadoopConf, String basePa
}
public static HoodieTableMetaClient init(Configuration hadoopConf, String
basePath, HoodieTableType tableType,
- Properties properties)
- throws IOException {
- properties = HoodieTableMetaClient.withPropertyBuilder()
- .setTableName(RAW_TRIPS_TEST_NAME)
- .setTableType(tableType)
- .setPayloadClass(HoodieAvroPayload.class)
- .fromProperties(properties)
- .build();
- return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf,
basePath, properties);
+ Properties properties) throws
IOException {
+ return init(hadoopConf, basePath, tableType, properties, null);
}
public static HoodieTableMetaClient init(Configuration hadoopConf, String
basePath, HoodieTableType tableType,
Properties properties, String
databaseName)
throws IOException {
- properties = HoodieTableMetaClient.withPropertyBuilder()
- .setDatabaseName(databaseName)
- .setTableName(RAW_TRIPS_TEST_NAME)
- .setTableType(tableType)
- .setPayloadClass(HoodieAvroPayload.class)
- .fromProperties(properties)
- .build();
- return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf,
basePath, properties);
+ HoodieTableMetaClient.PropertyBuilder builder =
+ HoodieTableMetaClient.withPropertyBuilder()
+ .setDatabaseName(databaseName)
+ .setTableName(RAW_TRIPS_TEST_NAME)
+ .setTableType(tableType)
+ .setPayloadClass(HoodieAvroPayload.class);
+
+ String keyGen =
properties.getProperty("hoodie.datasource.write.keygenerator.class");
+ if (!Objects.equals(keyGen,
"org.apache.hudi.keygen.NonpartitionedKeyGenerator")) {
+ builder.setPartitionFields("some_nonexistent_field");
Review Comment:
I don't think we should actually standardize on this one, it's just to stop
the bleeding in misconfigured tests
##########
hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java:
##########
@@ -324,6 +335,26 @@ private void doRefresh() {
LOG.info(String.format("Refresh table %s, spent: %d ms",
metaClient.getTableConfig().getTableName(), duration));
}
+ private Map<String, FileStatus[]>
getAllFilesInPartitionsUnchecked(Collection<String>
fullPartitionPathsMapToFetch) {
+ try {
+ return tableMetadata.getAllFilesInPartitions(new
ArrayList<>(fullPartitionPathsMapToFetch));
+ } catch (IOException e) {
+ throw new HoodieIOException("Failed to list partition paths for a
table", e);
+ }
+ }
+
+ private List<String> getAllPartitionPathsUnchecked() {
+ try {
+ if (partitionColumns.length == 0) {
+ return Collections.singletonList("");
Review Comment:
Non-partitioned table has exactly one partition, which we designate w/ ""
##########
hudi-common/src/main/java/org/apache/hudi/hadoop/CachingPath.java:
##########
@@ -32,11 +33,12 @@
* NOTE: This class is thread-safe
*/
@ThreadSafe
-public class CachingPath extends Path implements Serializable {
Review Comment:
Yep
##########
hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java:
##########
@@ -138,13 +144,17 @@ public FileStatus[] getAllFilesInPartition(Path
partitionPath)
}
}
- return new FileSystemBackedTableMetadata(getEngineContext(), hadoopConf,
dataBasePath, metadataConfig.shouldAssumeDatePartitioning())
+ return new FileSystemBackedTableMetadata(getEngineContext(), hadoopConf,
dataBasePath.toString(), metadataConfig.shouldAssumeDatePartitioning())
.getAllFilesInPartition(partitionPath);
}
@Override
public Map<String, FileStatus[]> getAllFilesInPartitions(List<String>
partitions)
throws IOException {
+ if (partitions.isEmpty()) {
+ return Collections.emptyMap();
Review Comment:
Agree, this is somewhat dissonant, but that's just the way things are -- for
non-partitioned tables it's assumed that the only partition that is there has
to be identified by ""
##########
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java:
##########
@@ -398,7 +398,7 @@ public HoodieArchivedTimeline getArchivedTimeline(String
startTs) {
public void validateTableProperties(Properties properties) {
// Once meta fields are disabled, it cant be re-enabled for a given table.
if (!getTableConfig().populateMetaFields()
- && Boolean.parseBoolean((String)
properties.getOrDefault(HoodieTableConfig.POPULATE_META_FIELDS.key(),
HoodieTableConfig.POPULATE_META_FIELDS.defaultValue()))) {
+ && Boolean.parseBoolean((String)
properties.getOrDefault(HoodieTableConfig.POPULATE_META_FIELDS.key(),
HoodieTableConfig.POPULATE_META_FIELDS.defaultValue().toString()))) {
Review Comment:
It's not
##########
hudi-common/src/main/java/org/apache/hudi/common/table/log/block/HoodieHFileDataBlock.java:
##########
@@ -167,9 +164,8 @@ protected ClosableIterator<IndexedRecord>
deserializeRecords(byte[] content) thr
// Get schema from the header
Schema writerSchema = new
Schema.Parser().parse(super.getLogBlockHeader().get(HeaderMetadataType.SCHEMA));
- FileSystem fs = FSUtils.getFs(pathForReader.toString(), new
Configuration());
// Read the content
- HoodieHFileReader<IndexedRecord> reader = new HoodieHFileReader<>(fs,
pathForReader, content, Option.of(writerSchema));
+ HoodieHFileReader<IndexedRecord> reader = new HoodieHFileReader<>(null,
pathForReader, content, Option.of(writerSchema));
Review Comment:
Yeah, i checked it and it actually doesn't use `fs` at all
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala:
##########
@@ -341,4 +340,9 @@ object SparkHoodieTableFileIndex {
override def invalidate(): Unit = cache.invalidateAll()
}
}
+
+ private def shouldValidatePartitionColumns(spark: SparkSession): Boolean = {
+ // NOTE: We can't use helper, method nor the config-entry to stay
compatible w/ Spark 2.4
+
spark.sessionState.conf.getConfString("spark.sql.sources.validatePartitionColumns",
"true").toBoolean
Review Comment:
We don't need to
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]