pvillard31 commented on code in PR #10996: URL: https://github.com/apache/nifi/pull/10996#discussion_r2923714133
########## nifi-extension-bundles/nifi-iceberg-bundle/nifi-iceberg-processors/src/main/java/org/apache/nifi/processors/iceberg/record/RecordConverter.java: ########## @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.processors.iceberg.record; + +import org.apache.nifi.serialization.record.DataType; +import org.apache.nifi.serialization.record.MapRecord; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordField; +import org.apache.nifi.serialization.record.RecordFieldType; +import org.apache.nifi.serialization.record.RecordSchema; + +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Record Converter handles translating field values to types compatible with Apache Iceberg Records + */ +class RecordConverter { Review Comment: We do not support nested records. The PartitionKeyRecord does handle nested structs via its STRUCT converter, but not here. Might be an acceptable limitation for know but wanted to point it out. ########## nifi-extension-bundles/nifi-iceberg-bundle/nifi-iceberg-parquet-writer/src/main/java/org/apache/nifi/services/iceberg/parquet/io/PartitionKeyRecord.java: ########## @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.services.iceberg.parquet.io; + +import org.apache.iceberg.StructLike; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; + +import java.lang.reflect.Array; +import java.nio.ByteBuffer; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.function.Function; + +/** + * Partition Key Record Wrapper based on Apache Iceberg InternalRecordWrapper to avoid iceberg-data dependency + */ +class PartitionKeyRecord implements StructLike { + private final Function<Object, Object>[] converters; + + private StructLike wrapped = null; + + @SuppressWarnings("unchecked") Review Comment: Do we want to keep those? ########## nifi-extension-bundles/nifi-iceberg-bundle/nifi-iceberg-parquet-writer/src/main/java/org/apache/nifi/services/iceberg/parquet/io/PartitionKeyRecord.java: ########## @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nifi.services.iceberg.parquet.io; + +import org.apache.iceberg.StructLike; +import org.apache.iceberg.types.Type; +import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.DateTimeUtil; + +import java.lang.reflect.Array; +import java.nio.ByteBuffer; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.OffsetDateTime; +import java.util.List; +import java.util.function.Function; + +/** + * Partition Key Record Wrapper based on Apache Iceberg InternalRecordWrapper to avoid iceberg-data dependency + */ +class PartitionKeyRecord implements StructLike { + private final Function<Object, Object>[] converters; + + private StructLike wrapped = null; + + @SuppressWarnings("unchecked") + PartitionKeyRecord(final Types.StructType structType) { + final List<Types.NestedField> fields = structType.fields(); + + converters = fields.stream() + .map(Types.NestedField::type) + .map(PartitionKeyRecord::getConverter) + .toArray(length -> (Function<Object, Object>[]) Array.newInstance(Function.class, length)); + } + + @Override + public int size() { + return wrapped.size(); + } + + @Override + public <T> T get(final int position, final Class<T> javaClass) { + final T processed; + + final Function<Object, Object> converter = converters[position]; + if (converter == null) { + processed = wrapped.get(position, javaClass); + } else { + final Object value = wrapped.get(position, Object.class); + if (value == null) { + processed = null; + } else { + final Object converted = converter.apply(value); + processed = javaClass.cast(converted); + } + } + + return processed; + } + + @Override + public <T> void set(final int position, final T value) { + throw new UnsupportedOperationException("Set method not supported"); + } + + PartitionKeyRecord wrap(final StructLike wrapped) { + this.wrapped = wrapped; + return this; + } + + private static Function<Object, Object> getConverter(final Type fieldType) { + final Type.TypeID typeId = fieldType.typeId(); + + final Function<Object, Object> converter; + + if (Type.TypeID.TIMESTAMP_NANO == typeId) { + final Types.TimestampNanoType timestampNanoType = (Types.TimestampNanoType) fieldType; + if (timestampNanoType.shouldAdjustToUTC()) { + converter = dateTime -> DateTimeUtil.nanosFromTimestamptz((OffsetDateTime) dateTime); + } else { + converter = dateTime -> DateTimeUtil.nanosFromTimestamp((LocalDateTime) dateTime); + } + } else if (Type.TypeID.TIMESTAMP == typeId) { + final Types.TimestampType timestampType = (Types.TimestampType) fieldType; + if (timestampType.shouldAdjustToUTC()) { + converter = dateTime -> DateTimeUtil.nanosFromTimestamptz((OffsetDateTime) dateTime); + } else { + converter = dateTime -> DateTimeUtil.nanosFromTimestamp((LocalDateTime) dateTime); + } Review Comment: Copy/paste I guess? ```suggestion final Types.TimestampType timestampType = (Types.TimestampType) fieldType; if (timestampType.shouldAdjustToUTC()) { converter = dateTime -> DateTimeUtil.microsFromTimestamptz((OffsetDateTime) dateTime); } else { converter = dateTime -> DateTimeUtil.microsFromTimestamp((LocalDateTime) dateTime); } ``` The existing test `testWriteDataFilesPartitionedTimestamp` does not catch this because it only verifies that `dataFiles` has length 1 and a `recordCount()` of 1, it never asserts on the actual partition values of the output DataFile. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
