y0908105023 commented on PR #6650:
URL: https://github.com/apache/hudi/pull/6650#issuecomment-1243288837
Test as:
`package org.apache.flink.hudi;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.RestOptions;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Schema;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment;
import
org.apache.flink.table.api.bridge.scala.internal.StreamTableEnvironmentImpl;
import org.apache.flink.table.catalog.Column;
import org.apache.flink.table.catalog.ResolvedSchema;
import org.apache.flink.table.runtime.typeutils.ExternalTypeInfo;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.utils.TypeConversions;
import org.apache.flink.types.Row;
import org.apache.flink.util.Test;
import org.apache.hudi.org.apache.avro.SchemaBuilder;
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.util.List;
public class HudiRead {
public static void main(String[] args) throws Exception {
Configuration config = new Configuration();
config.setInteger(RestOptions.PORT, 8082);
config.setString("metrics.system-resource", "true");
StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment();
EnvironmentSettings settings = EnvironmentSettings.newInstance()
.inStreamingMode()
.build();
StreamTableEnvironmentImpl tEnv =
Test.getStreamTableEnvironment(env, settings);
String hudiSql = "CREATE TABLE IF NOT EXISTS hudi (\n" +
" `aa` STRING, " +
" `bb` BIGINT," +
" `cc` DECIMAL(10,2)," +
" `dd` DATE," +
" `ff` TIMESTAMP(6), " +
// " `gg` as bb*10, " +
// " WATERMARK FOR ff AS ff - INTERVAL '5' SECOND," +
" PRIMARY KEY (bb) NOT ENFORCED\n" +
")\n" +
" PARTITIONED BY (`dd`)\n" +
"with (\n" +
" \n" +
" 'connector' = 'hudi',\n" +
" 'hive_sync.support_timestamp' = 'true',\n" +
" 'path' =
'hdfs://yangshuo7.local:9000/user/hudi//hjh8171/mor_fq_82572',\n" +
" 'read.streaming.enabled' = 'true',\n" +
" 'table.type' = 'MERGE_ON_READ',\n" +
" 'write.tasks' = '1',\n" +
" 'changelog.enable' = 'true',\n" +
" 'write.operation' = 'upsert',\n" +
" 'compaction.async.enabled' = 'false',\n" +
" 'hive_sync.enable' = 'true',\n" +
" 'hive_sync.mode' = 'hms',\n" +
" 'hive_sync.db' = 'hjh8171',\n" +
" 'hive_sync.table' = 'mor_fq_82572',\n" +
" 'hive_sync.metastore.uris' = 'thrift://localhost:9083'\n"
+
")";
tEnv.executeSql(hudiSql);
Table t = tEnv.from("hudi");
DataStream<Row> dataStream1 = Test.toStream(tEnv, t).javaStream();
dataStream1.map(new MapFunction<Row, Row>() {
@Override
public Row map(Row value) throws Exception {
return value;
}
}).print("");
Schema schema = getSchema(t);
DataStream<Row> dataStream = tEnv.toChangelogStream(t,
schema).javaStream();
TypeInformation typeInformation = dataStream.getType();
tEnv.toChangelogStream(t).javaStream().map(new MapFunction<Row,
Row>() {
@Override
public Row map(Row value) throws Exception {
return value;
}
}).print();
env.execute("hudi job");
}
private static Schema getSchema(Table t) {
Schema.Builder builder = Schema.newBuilder();
List<Column> columns = t.getResolvedSchema().getColumns();
t.getResolvedSchema().toPhysicalRowDataType();
for (Column c: columns) {
DataType dt = c.getDataType();
switch (dt.getLogicalType().getTypeRoot()) {
case TIMESTAMP_WITHOUT_TIME_ZONE:
builder.column(c.getName(),
dt.bridgedTo(Timestamp.class));
break;
case TIME_WITHOUT_TIME_ZONE:
builder.column(c.getName(), dt.bridgedTo(Time.class));
break;
case DATE:
builder.column(c.getName(), dt.bridgedTo(Date.class));
break;
default:
builder.column(c.getName(), dt);
}
}
return builder.build();
}
}
`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]