jonvex commented on code in PR #11130:
URL: https://github.com/apache/hudi/pull/11130#discussion_r1586941487
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieConversionUtils.scala:
##########
@@ -30,9 +31,7 @@ object HoodieConversionUtils {
* a mutable one)
*/
def mapAsScalaImmutableMap[K, V](map: ju.Map[K, V]): Map[K, V] = {
- // NOTE: We have to use deprecated [[JavaConversions]] to stay compatible
w/ Scala 2.11
Review Comment:
is this not compatible with scala2.11 anymore?
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieDatasetBulkInsertHelper.scala:
##########
@@ -241,17 +241,16 @@ object HoodieDatasetBulkInsertHelper
}
}
- private def getPartitionPathFields(config: HoodieWriteConfig): Seq[String] =
{
+ private def getPartitionPathFields(config: HoodieWriteConfig):
mutable.Seq[String] = {
Review Comment:
what is the purpose of this change?
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ValidateMetadataTableFilesProcedure.scala:
##########
@@ -115,10 +115,10 @@ class ValidateMetadataTableFilesProcedure() extends
BaseProcedure with Procedure
rows.add(Row(partition, file, doesFsFileExists,
doesMetadataFileExists, fsFileLength, metadataFileLength))
}
}
- if (metadataPathInfoList.length != pathInfoList.length) {
- logError(" FS and metadata files count not matching for " + partition
+ ". FS files count " + pathInfoList.length + ", metadata base files count " +
metadataPathInfoList.length)
+ if (metadataPathInfoList.size() != pathInfoList.size()) {
Review Comment:
size and length are the same? Usually one is the capacity and the other is
the usage
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/command/procedures/ExportInstantsProcedure.scala:
##########
@@ -176,12 +176,12 @@ class ExportInstantsProcedure extends BaseProcedure with
ProcedureBuilder with L
@throws[Exception]
private def copyNonArchivedInstants(metaClient: HoodieTableMetaClient,
instants: util.List[HoodieInstant], limit: Int, localFolder: String): Int = {
- import scala.collection.JavaConversions._
+ import scala.collection.JavaConverters._
var copyCount = 0
- if (instants.nonEmpty) {
+ if (!instants.isEmpty) {
Review Comment:
They got rid of nonEmpty!!!?
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/analysis/TestHoodiePruneFileSourcePartitions.scala:
##########
@@ -107,12 +107,12 @@ class TestHoodiePruneFileSourcePartitions extends
HoodieClientTestBase with Scal
case "eager" =>
// NOTE: In case of partitioned table 3 files will be created,
while in case of non-partitioned just 1
if (partitioned) {
- assertEquals(1275, f.stats.sizeInBytes.longValue() / 1024)
- assertEquals(1275, lr.stats.sizeInBytes.longValue() / 1024)
+ assertEquals(1275, f.stats.sizeInBytes.longValue / 1024)
Review Comment:
They require no empty ()?
##########
hudi-spark-datasource/hudi-spark3.5.x/src/test/java/org/apache/hudi/spark3/internal/TestReflectUtil.java:
##########
@@ -42,7 +44,7 @@ public void testDataSourceWriterExtraCommitMetadata() throws
Exception {
InsertIntoStatement newStatment = ReflectUtil.createInsertInto(
statement.table(),
statement.partitionSpec(),
- scala.collection.immutable.List.empty(),
+ ((scala.collection.immutable.Seq<String>)
scala.collection.immutable.Seq$.MODULE$.empty()).toSeq(),
Review Comment:
?. I guess disabled because it doesn't work?
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/HoodieInternalRowUtils.scala:
##########
@@ -18,11 +18,12 @@
package org.apache.spark.sql
-import org.apache.avro.Schema
Review Comment:
Not sure what is going on with this file?
##########
.github/workflows/bot.yml:
##########
@@ -454,17 +486,21 @@ jobs:
env:
FLINK_PROFILE: ${{ matrix.flinkProfile }}
SPARK_PROFILE: ${{ matrix.sparkProfile }}
- SCALA_PROFILE: 'scala-2.12'
+ SCALA_PROFILE: ${{ matrix.scalaProfile }}
run: |
- mvn clean package -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE"
-DdeployArtifacts=true -DskipTests=true $MVN_ARGS
+ if [ "$SCALA_PROFILE" == "scala-2.13" ]; then
+ mvn clean package -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE"
-DdeployArtifacts=true -DskipTests=true $MVN_ARGS -pl
packaging/hudi-hadoop-mr-bundle,packaging/hudi-kafka-connect-bundle,packaging/hudi-spark-bundle,packaging/hudi-utilities-bundle,packaging/hudi-utilities-slim-bundle,packaging/hudi-metaserver-server-bundle
-am
+ else
+ mvn clean package -T 2 -D"$SCALA_PROFILE" -D"$SPARK_PROFILE"
-DdeployArtifacts=true -DskipTests=true $MVN_ARGS
Review Comment:
I think maybe this line was supposed to be deleted?
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/AvroConversionUtils.scala:
##########
@@ -18,20 +18,20 @@
package org.apache.hudi
-import org.apache.avro.Schema.Type
-import org.apache.avro.generic.GenericRecord
-import org.apache.avro.{JsonProperties, Schema}
import org.apache.hudi.HoodieSparkUtils.sparkAdapter
import org.apache.hudi.avro.AvroSchemaUtils
import org.apache.hudi.exception.SchemaCompatibilityException
import org.apache.hudi.internal.schema.HoodieSchemaException
+
+import org.apache.avro.Schema.Type
+import org.apache.avro.generic.GenericRecord
+import org.apache.avro.{JsonProperties, Schema}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
import org.apache.spark.sql.{Dataset, Row, SparkSession}
-import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
Review Comment:
Is there a way we can move to just the hudi javascala converters?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala:
##########
@@ -308,7 +309,7 @@ class ColumnStatsIndexSupport(spark: SparkSession,
}
}
- Row(coalescedRowValuesSeq:_*)
+ Row(coalescedRowValuesSeq.toSeq: _*)
Review Comment:
Is this name still correct if we need to call .toSeq on it?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala:
##########
@@ -263,10 +262,10 @@ object DefaultSource {
Option(schema)
}
- val useNewParquetFileFormat =
parameters.getOrDefault(HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key(),
+ val useNewParquetFileFormat =
parameters.asJava.getOrDefault(HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key(),
HoodieReaderConfig.FILE_GROUP_READER_ENABLED.defaultValue().toString).toBoolean
&&
!metaClient.isMetadataTable && (globPaths == null ||
globPaths.isEmpty) &&
- !parameters.getOrDefault(SCHEMA_EVOLUTION_ENABLED.key(),
SCHEMA_EVOLUTION_ENABLED.defaultValue().toString).toBoolean &&
+ !parameters.asJava.getOrDefault(SCHEMA_EVOLUTION_ENABLED.key(),
SCHEMA_EVOLUTION_ENABLED.defaultValue().toString).toBoolean &&
Review Comment:
Parameters is Map[String, String] so why do we need to change this to java?
Did they really get rid of that in java 2.13?!?
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala:
##########
@@ -226,7 +226,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase
with ScalaAssertionSup
.save(basePath)
partitionPaths = FSUtils.getAllPartitionPaths(new
HoodieSparkEngineContext(jsc), HoodieMetadataConfig.newBuilder().build(),
basePath)
- assertEquals(partitionPaths.length, 1)
+ assertEquals(partitionPaths.size(), 1)
Review Comment:
same as other comment
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala:
##########
@@ -959,7 +959,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase
with ScalaAssertionSup
assertEquals(insert1Cnt, hoodieROViewDF1.count())
val commitInstantTime1 = HoodieDataSourceHelpers.latestCommit(storage,
basePath)
- val records2 = recordsToStrings(inserts2Dup ++ inserts2New).toList
+ val records2 = recordsToStrings((inserts2Dup.asScala ++
inserts2New.asScala).asJava).asScala.toList
Review Comment:
😬
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/dml/TestHoodieTableValuedFunction.scala:
##########
@@ -689,6 +690,6 @@ class TestHoodieTableValuedFunction extends
HoodieSparkSqlTestBase {
}
}
}
- spark.sessionState.conf.unsetConf(SPARK_SQL_INSERT_INTO_OPERATION.key)
+ spark.sessionState.conf.unsetConf(SPARK_SQL_INSERT_INTO_OPERATION.key)*/
Review Comment:
Are we going to uncomment?
##########
hudi-spark-datasource/hudi-spark3-common/src/main/java/org/apache/hudi/spark3/internal/ReflectUtil.java:
##########
@@ -23,7 +23,7 @@
import org.apache.spark.sql.catalyst.util.DateFormatter;
import scala.Option;
-import scala.collection.Seq;
+import scala.collection.immutable.Seq;
import scala.collection.immutable.Map;
Review Comment:
combine into scala.collection.immutable.{Seq, Map} ?
##########
hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/procedure/TestHdfsParquetImportProcedure.scala:
##########
@@ -112,7 +112,7 @@ class TestHdfsParquetImportProcedure extends
HoodieSparkProcedureTestBase {
@throws[ParseException]
@throws[IOException]
def createInsertRecords(srcFolder: Path): util.List[GenericRecord] = {
- import scala.collection.JavaConversions._
+ import scala.collection.JavaConverters._
Review Comment:
Should we just do the import at the top instead of each individual method?
##########
packaging/bundle-validation/base/Dockerfile:
##########
@@ -51,9 +52,16 @@ RUN wget
https://archive.apache.org/dist/flink/flink-$FLINK_VERSION/flink-$FLINK
&& rm $WORKDIR/flink-$FLINK_VERSION-bin-scala_2.12.tgz
ENV FLINK_HOME=$WORKDIR/flink-$FLINK_VERSION
-RUN wget
https://archive.apache.org/dist/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$SPARK_HADOOP_VERSION.tgz
-P "$WORKDIR" \
- && tar -xf
$WORKDIR/spark-$SPARK_VERSION-bin-hadoop$SPARK_HADOOP_VERSION.tgz -C $WORKDIR/ \
- && rm $WORKDIR/spark-$SPARK_VERSION-bin-hadoop$SPARK_HADOOP_VERSION.tgz
+RUN if [ "$SCALA_VERSION" = "2.13" ]; then \
Review Comment:
Spark version is set to 3.1.3 so do we need this?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala:
##########
@@ -263,10 +262,10 @@ object DefaultSource {
Option(schema)
}
- val useNewParquetFileFormat =
parameters.getOrDefault(HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key(),
+ val useNewParquetFileFormat =
parameters.asJava.getOrDefault(HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key(),
HoodieReaderConfig.FILE_GROUP_READER_ENABLED.defaultValue().toString).toBoolean
&&
!metaClient.isMetadataTable && (globPaths == null ||
globPaths.isEmpty) &&
- !parameters.getOrDefault(SCHEMA_EVOLUTION_ENABLED.key(),
SCHEMA_EVOLUTION_ENABLED.defaultValue().toString).toBoolean &&
+ !parameters.asJava.getOrDefault(SCHEMA_EVOLUTION_ENABLED.key(),
SCHEMA_EVOLUTION_ENABLED.defaultValue().toString).toBoolean &&
Review Comment:
I see you used getOrElse elsewhere right?
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/util/JavaScalaConverters.scala:
##########
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hudi.util
+
+import scala.collection.JavaConverters._
+
+/**
+ * Utils that do conversion between Java and Scala collections.
+ */
+object JavaScalaConverters {
Review Comment:
If we only are going to have these 4 conversions then the naming is fine.
But if we add more, we should make "JavaToScalaConverters" and
"ScalaToJavaConverters" so the methods can just be "convertList",
"convertIterator", etc
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]