xyk0930 commented on issue #4790:
URL: https://github.com/apache/paimon/issues/4790#issuecomment-2563379141
1.create a paimon table
CREATE TABLE paimon_default.default_array_test
(
id INT COMMENT '唯一标识',
name STRING COMMENT '姓名',
tags ARRAY<STRING> COMMENT '标签'
)
USING paimon
COMMENT '数组test'
TBLPROPERTIES (
'bucket' = '-1',
'changelog-producer' = 'none',
'deletion-vectors.enabled' = 'false',
'dynamic-bucket.initial-buckets' = '10',
'dynamic-bucket.target-row-num' = '2000000',
'file.compression' = 'zstd',
'file.compression.zstd-level' = '1',
'file.format' = 'parquet',
'full-compaction.delta-commits' = '1',
'ignore-delete' = 'false',
'merge-engine' = 'deduplicate',
'path' =
'hdfs://hadoop105:8020/paimon/warehouse/paimon_default.db/default_array_test',
'primary-key' = 'id',
'snapshot.expire.limit' = '10',
'snapshot.num-retained.max' = '10',
'snapshot.num-retained.min' = '3',
'snapshot.time-retained' = '1 h',
'tag.num-retained-max' = '7')
;
2.use spark write data into paimon table
public class PaimonArrayTest {
public static void main(String[] args) {
// 初始化 SparkSession 并启用 Hive 支持
SparkSession spark = SparkSession.builder()
.appName("Spark Paimon Example")
.config("spark.sql.catalog.spark_catalog",
"org.apache.paimon.spark.SparkGenericCatalog")
.config("spark.sql.extensions",
"org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions")
.enableHiveSupport()
.getOrCreate();
// 定义 Schema
StructType schema = new StructType(new StructField[]{
DataTypes.createStructField("id", DataTypes.IntegerType,
false),
DataTypes.createStructField("name", DataTypes.StringType,
false),
DataTypes.createStructField("tags",
DataTypes.createArrayType(DataTypes.StringType), false)
});
// 创建一些数据行
List<Row> data = Arrays.asList(
RowFactory.create(1, "Alice", Arrays.asList("Java",
"Scala")),
RowFactory.create(2, "Bob", Arrays.asList("Python", "R")),
RowFactory.create(3, "Charlie", new ArrayList<>())
);
// 创建 DataFrame
Dataset<Row> df = spark.createDataFrame(data, schema);
// 显示 DataFrame 内容
df.show();
// 将 DataFrame 写入 paimon 表
df.write().mode(SaveMode.Overwrite).format("paimon").save("hdfs://hadoop105:8020/paimon/warehouse/paimon_default.db/default_array_test");
// 替换为实际的数据库和表名
spark.table("paimon_default.default_array_test").show();
// 关闭 SparkSession
spark.stop();
}
}
3.driver stdout

@Aiden-Dong this is a sample test
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]