Reamer commented on code in PR #5032: URL: https://github.com/apache/zeppelin/pull/5032#discussion_r2293143196
########## flink/flink1.20-shims/src/main/java/org/apache/zeppelin/flink/Flink120Shims.java: ########## @@ -0,0 +1,408 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.flink; + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Method; +import java.net.InetAddress; +import java.net.URL; +import java.time.ZoneId; +import java.util.List; +import java.util.Properties; +import org.apache.commons.cli.CommandLine; +import org.apache.commons.compress.utils.Lists; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.flink.api.common.RuntimeExecutionMode; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.client.cli.CliFrontend; +import org.apache.flink.client.cli.CustomCommandLine; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.configuration.ExecutionOptions; +import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironmentFactory; +import org.apache.flink.table.api.*; +import org.apache.flink.table.api.bridge.java.internal.StreamTableEnvironmentImpl; +import org.apache.flink.table.api.config.TableConfigOptions; +import org.apache.flink.table.catalog.*; +import org.apache.flink.table.client.resource.ClientResourceManager; +import org.apache.flink.table.client.util.ClientClassloaderUtil; +import org.apache.flink.table.client.util.ClientWrapperClassLoader; +import org.apache.flink.table.delegation.Executor; +import org.apache.flink.table.delegation.ExecutorFactory; +import org.apache.flink.table.delegation.Planner; +import org.apache.flink.table.factories.CatalogStoreFactory; +import org.apache.flink.table.factories.FactoryUtil; +import org.apache.flink.table.factories.PlannerFactoryUtil; +import org.apache.flink.table.factories.TableFactoryUtil; +import org.apache.flink.table.functions.AggregateFunction; +import org.apache.flink.table.functions.ScalarFunction; +import org.apache.flink.table.functions.TableAggregateFunction; +import org.apache.flink.table.functions.TableFunction; +import org.apache.flink.table.module.ModuleManager; +import org.apache.flink.table.resource.ResourceManager; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; +import org.apache.flink.types.Row; +import org.apache.flink.types.RowKind; +import org.apache.flink.util.FlinkException; +import org.apache.zeppelin.flink.shims120.CollectStreamTableSink; +import org.apache.zeppelin.interpreter.InterpreterContext; +import org.apache.zeppelin.interpreter.InterpreterResult; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Shims for flink 1.20 + */ +public class Flink120Shims extends FlinkShims { + + private static final Logger LOGGER = LoggerFactory.getLogger(Flink120Shims.class); + + private Flink120SqlInterpreter batchSqlInterpreter; + private Flink120SqlInterpreter streamSqlInterpreter; + + public Flink120Shims(FlinkVersion flinkVersion, Properties properties) { + super(flinkVersion, properties); + } + + public void initInnerBatchSqlInterpreter(FlinkSqlContext flinkSqlContext) { + this.batchSqlInterpreter = new Flink120SqlInterpreter(flinkSqlContext, true); + } + + public void initInnerStreamSqlInterpreter(FlinkSqlContext flinkSqlContext) { + this.streamSqlInterpreter = new Flink120SqlInterpreter(flinkSqlContext, false); + } + + @Override + public Object createResourceManager(List<URL> jars, Object tableConfig) { + Configuration configuration = ((TableConfig) tableConfig).getConfiguration().clone(); + ClientWrapperClassLoader userClassLoader = + new ClientWrapperClassLoader( + ClientClassloaderUtil.buildUserClassLoader( + jars, + Thread.currentThread().getContextClassLoader(), + new Configuration(configuration)), + configuration); + return new ClientResourceManager(configuration, userClassLoader); + } + + @Override + public Object createFunctionCatalog(Object tableConfig, Object catalogManager, Object moduleManager, List<URL> jars) { + ResourceManager resourceManager = (ResourceManager) createResourceManager(jars, tableConfig); + return new FunctionCatalog((TableConfig) tableConfig, resourceManager, (CatalogManager) catalogManager, (ModuleManager) moduleManager); + } + + @Override + public void disableSysoutLogging(Object batchConfig, Object streamConfig) { + // do nothing + } + + @Override + public Object createScalaBlinkStreamTableEnvironment(Object environmentSettingsObj, + Object senvObj, + Object tableConfigObj, + Object moduleManagerObj, + Object functionCatalogObj, + Object catalogManagerObj, + List<URL> jars, + ClassLoader classLoader) { + EnvironmentSettings environmentSettings = (EnvironmentSettings) environmentSettingsObj; + StreamExecutionEnvironment senv = (StreamExecutionEnvironment) senvObj; + TableConfig tableConfig = (TableConfig) tableConfigObj; + ModuleManager moduleManager = (ModuleManager) moduleManagerObj; + FunctionCatalog functionCatalog = (FunctionCatalog) functionCatalogObj; + CatalogManager catalogManager = (CatalogManager) catalogManagerObj; + ImmutablePair<Object, Object> pair = createPlannerAndExecutor( + classLoader, environmentSettings, senv, + tableConfig, moduleManager, functionCatalog, catalogManager); + Planner planner = (Planner) pair.left; + Executor executor = (Executor) pair.right; + + ResourceManager resourceManager = (ResourceManager) createResourceManager(jars, tableConfig); + + return new org.apache.flink.table.api.bridge.scala.internal.StreamTableEnvironmentImpl(catalogManager, + moduleManager, resourceManager, + functionCatalog, tableConfig, new org.apache.flink.streaming.api.scala.StreamExecutionEnvironment(senv), + planner, executor, environmentSettings.isStreamingMode()); + } + + @Override + public Object createJavaBlinkStreamTableEnvironment(Object environmentSettingsObj, + Object senvObj, + Object tableConfigObj, + Object moduleManagerObj, + Object functionCatalogObj, + Object catalogManagerObj, + List<URL> jars, + ClassLoader classLoader) { + EnvironmentSettings environmentSettings = (EnvironmentSettings) environmentSettingsObj; + StreamExecutionEnvironment senv = (StreamExecutionEnvironment) senvObj; + TableConfig tableConfig = (TableConfig) tableConfigObj; + ModuleManager moduleManager = (ModuleManager) moduleManagerObj; + FunctionCatalog functionCatalog = (FunctionCatalog) functionCatalogObj; + CatalogManager catalogManager = (CatalogManager) catalogManagerObj; + ImmutablePair<Object, Object> pair = createPlannerAndExecutor( + classLoader, environmentSettings, senv, + tableConfig, moduleManager, functionCatalog, catalogManager); + Planner planner = (Planner) pair.left; + Executor executor = (Executor) pair.right; + + ResourceManager resourceManager = (ResourceManager) createResourceManager(jars, tableConfig); + + return new StreamTableEnvironmentImpl(catalogManager, moduleManager, resourceManager, + functionCatalog, tableConfig, senv, planner, executor, environmentSettings.isStreamingMode()); + } + + @Override + public Object createStreamExecutionEnvironmentFactory(Object streamExecutionEnvironment) { + return new StreamExecutionEnvironmentFactory() { + @Override + public StreamExecutionEnvironment createExecutionEnvironment(Configuration configuration) { + return (StreamExecutionEnvironment) streamExecutionEnvironment; + } + }; + } + + @Override + public Object createCatalogManager(Object config) { + final TableConfig tableConfig = TableConfig.getDefault(); + ClassLoader userClassLoader=Thread.currentThread().getContextClassLoader(); + + final CatalogStoreFactory catalogStoreFactory = + TableFactoryUtil.findAndCreateCatalogStoreFactory( + tableConfig.getConfiguration(), userClassLoader); + final CatalogStore catalogStore = + catalogStoreFactory.createCatalogStore(); + + return CatalogManager.newBuilder() + .classLoader(Thread.currentThread().getContextClassLoader()) + .config((ReadableConfig) config) + .defaultCatalog( + "default_catalog", + new GenericInMemoryCatalog( + "default_catalog", + "default_database")) + . catalogStoreHolder( + CatalogStoreHolder.newBuilder() + .classloader(Thread.currentThread().getContextClassLoader()) + .config(tableConfig) + .catalogStore(catalogStore) + .factory(catalogStoreFactory) + .build()) + .build(); + } + + @Override + public String getPyFlinkPythonPath(Properties properties) throws IOException { + String mode = properties.getProperty("flink.execution.mode"); + if ("yarn-application".equalsIgnoreCase(mode)) { + // for yarn application mode, FLINK_HOME is container working directory + String flinkHome = new File(".").getAbsolutePath(); Review Comment: We should use the new Files and Paths API to interact with folders and files. ########## flink/flink1.20-shims/src/main/java/org/apache/zeppelin/flink/shims120/CollectStreamTableSink.java: ########## @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zeppelin.flink.shims120; + +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.typeutils.TupleTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.datastream.DataStreamSink; +import org.apache.flink.streaming.experimental.CollectSink; +import org.apache.flink.table.sinks.RetractStreamTableSink; +import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.InetAddress; +import java.util.UUID; + +/** + * Table sink for collecting the results locally using sockets. + */ +public class CollectStreamTableSink implements RetractStreamTableSink<Row> { + + private static final Logger LOGGER = LoggerFactory.getLogger(CollectStreamTableSink.class); + + private final InetAddress targetAddress; + private final int targetPort; + private final TypeSerializer<Tuple2<Boolean, Row>> serializer; + + private String[] fieldNames; + private TypeInformation<?>[] fieldTypes; + + public CollectStreamTableSink(InetAddress targetAddress, + int targetPort, + TypeSerializer<Tuple2<Boolean, Row>> serializer) { + LOGGER.info("Use address: " + targetAddress.getHostAddress() + ":" + targetPort); Review Comment: ```suggestion LOGGER.info("Use address: {}:{}", targetAddress.getHostAddress(), targetPort); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@zeppelin.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org