a2l007 commented on code in PR #14329: URL: https://github.com/apache/druid/pull/14329#discussion_r1225897215
########## extensions-contrib/druid-iceberg-extensions/pom.xml: ########## @@ -0,0 +1,314 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + ~ Licensed to the Apache Software Foundation (ASF) under one + ~ or more contributor license agreements. See the NOTICE file + ~ distributed with this work for additional information + ~ regarding copyright ownership. The ASF licenses this file + ~ to you under the Apache License, Version 2.0 (the + ~ "License"); you may not use this file except in compliance + ~ with the License. You may obtain a copy of the License at + ~ + ~ http://www.apache.org/licenses/LICENSE-2.0 + ~ + ~ Unless required by applicable law or agreed to in writing, + ~ software distributed under the License is distributed on an + ~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ~ KIND, either express or implied. See the License for the + ~ specific language governing permissions and limitations + ~ under the License. + --> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + + <groupId>org.apache.druid.extensions</groupId> + <artifactId>druid-iceberg-extensions</artifactId> + <name>druid-iceberg-extensions</name> + <description>druid-iceberg-extensions</description> + + + <parent> + <artifactId>druid</artifactId> + <groupId>org.apache.druid</groupId> + <version>27.0.0-SNAPSHOT</version> + <relativePath>../../pom.xml</relativePath> + </parent> + <modelVersion>4.0.0</modelVersion> + + <properties> + </properties> + <dependencies> + <dependency> Review Comment: In the limitations of the extensions, it is specified that hadoop 2.x support is not tested. Do we still need a hadoop2 profile? ########## processing/src/main/java/org/apache/druid/data/input/AbstractInputSourceAdapter.java: ########## @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.data.input; + +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import org.apache.druid.data.input.impl.LocalInputSourceAdapter; +import org.apache.druid.data.input.impl.SplittableInputSource; +import org.apache.druid.java.util.common.CloseableIterators; +import org.apache.druid.java.util.common.ISE; +import org.apache.druid.java.util.common.parsers.CloseableIterator; + +import javax.annotation.Nullable; +import java.io.File; +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +/** + * A wrapper on top of {@link SplittableInputSource} that handles input source creation. Review Comment: Added more details here. ########## extensions-contrib/druid-iceberg-extensions/src/main/java/org/apache/druid/iceberg/input/IcebergInputSource.java: ########## @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.iceberg.input; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import org.apache.druid.data.input.AbstractInputSourceAdapter; +import org.apache.druid.data.input.InputFormat; +import org.apache.druid.data.input.InputRowSchema; +import org.apache.druid.data.input.InputSource; +import org.apache.druid.data.input.InputSourceReader; +import org.apache.druid.data.input.InputSplit; +import org.apache.druid.data.input.SplitHintSpec; +import org.apache.druid.data.input.impl.SplittableInputSource; +import org.apache.druid.iceberg.filter.IcebergFilter; + +import javax.annotation.Nullable; +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.stream.Stream; + +/** + * Inputsource to ingest data managed by the Iceberg table format. + * This inputsource talks to the configured catalog, executes any configured filters and retrieves the data file paths upto the latest snapshot associated with the iceberg table. + * The data file paths are then provided to a native {@link SplittableInputSource} implementation depending on the warehouse source defined. + */ +public class IcebergInputSource implements SplittableInputSource<List<String>> +{ + public static final String TYPE_KEY = "iceberg"; + + @JsonProperty + private final String tableName; + + @JsonProperty + private final String namespace; + + @JsonProperty + private IcebergCatalog icebergCatalog; + + @JsonProperty + private IcebergFilter icebergFilter; + + @JsonProperty + private AbstractInputSourceAdapter warehouseSource; + + private boolean isLoaded = false; + + @JsonCreator + public IcebergInputSource( + @JsonProperty("tableName") String tableName, + @JsonProperty("namespace") String namespace, + @JsonProperty("icebergFilter") @Nullable IcebergFilter icebergFilter, + @JsonProperty("icebergCatalog") IcebergCatalog icebergCatalog, + @JsonProperty("warehouseSource") AbstractInputSourceAdapter warehouseSource Review Comment: This can be any of the three available `AbstractInputSourceAdapter` implementations: `local`, `s3` or `hdfs` If any other identifier is provided here, the deserialization will fail it it isn't available in the class registries. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
