stdzur820 opened a new issue, #8735:
URL: https://github.com/apache/hudi/issues/8735
Hello, I am using the java client of hudi, and the function I want to
implement is to write data to aws s3, but my writing speed is very slow. Is
there any solution, or is there something wrong with my configuration, and the
following is my code. It takes about 6 seconds to insert a piece of data
> import lombok.SneakyThrows;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;
import org.apache.hudi.client.HoodieJavaWriteClient;
import org.apache.hudi.client.common.HoodieJavaEngineContext;
import org.apache.hudi.common.config.HoodieMetadataConfig;
import org.apache.hudi.common.model.*;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
import org.apache.hudi.config.HoodieCleanConfig;
import org.apache.hudi.config.HoodieCompactionConfig;
import org.apache.hudi.config.HoodieIndexConfig;
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.index.HoodieIndex;
import java.io.IOException;
import java.util.Collections;
public class HudiIncrementTest {
@SneakyThrows
public static void main(String[] args) {
HudiTest();
}
public static void HudiTest() throws IOException {
Configuration hadoopConf = new Configuration();
HoodieJavaEngineContext context = new HoodieJavaEngineContext(new
Configuration());
//fs.defaultFS的value值是bucket name
hadoopConf.set("fs.s3a.impl",
"org.apache.hadoop.fs.s3a.S3AFileSystem");
hadoopConf.set("fs.s3a.endpoint", "*");
hadoopConf.set("fs.s3a.access.key", "*");
hadoopConf.set("fs.s3a.secret.key", "*");
final String tablePath = "s3a://us-west2-hudi/hudi";
final String tableType = HoodieTableType.MERGE_ON_READ.name();
final String tableName = "table1";
HoodieTableMetaClient hoodieTableMetaClient =
HoodieTableMetaClient.withPropertyBuilder()
.setTableType(tableType)
.setTableName(tableName)
.setRecordKeyFields("ph")
.setPreCombineField("entryTs")
.setBaseFileFormat("PARQUET")
.setPayloadClassName(HoodieAvroPayload.class.getName())
.initTable(hadoopConf, tablePath);
final Schema schema = SchemaBuilder
.record("user").doc("user").namespace("example.avro")
.fields()
.name("name").doc("Nome").type().stringType().noDefault()
.name("favorite_number").doc("number").type().nullable().intType().noDefault()
.name("favorite_color").doc("color").type().stringType().noDefault()
.name("ph").doc("mobile").type().intType().noDefault()
.name("entryTs").doc("tiebreaker on
duplicates").type().intType().noDefault()
.endRecord();
final GenericRecord user1 = new GenericData.Record(schema);
user1.put("name", "myTest");
user1.put("favorite_number", 256);
user1.put("favorite_color", "blue1");
user1.put("ph", 1);
user1.put("entryTs", 101);
String recordKey = user1.get("ph").toString();
HoodieAvroPayload record = new HoodieAvroPayload(user1, 0);
final HoodieAvroRecord<HoodieAvroPayload>
hoodieAvroPayloadHoodieAvroRecord
= new HoodieAvroRecord<>(new HoodieKey(recordKey, "asin1"),
record);
HoodieWriteConfig cfg = HoodieWriteConfig.newBuilder()
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
.enable(false)
.build())
.withCompactionConfig(HoodieCompactionConfig.newBuilder()
.withMaxNumDeltaCommitsBeforeCompaction(1)
.withCompactionLogFileNumThreshold(2)
.build())
.withCleanConfig(HoodieCleanConfig.newBuilder()
.retainFileVersions(1)
.withMaxCommitsBeforeCleaning(1)
.withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS)
.build()
)
.withIndexConfig(HoodieIndexConfig.newBuilder()
.withIndexType(HoodieIndex.IndexType.BLOOM)
.build())
.withPath(tablePath)
.forTable(tableName)
.withSchema(schema.toString())
.withParallelism(2, 2)
.withDeleteParallelism(2)
.withAutoCommit(true)
.build();
HoodieJavaWriteClient<HoodieAvroPayload> hudiClient = new
HoodieJavaWriteClient<>(context, cfg);
String newCommitTime = HoodieActiveTimeline.createNewInstantTime();
hudiClient.startCommitWithTime(newCommitTime);
hudiClient.upsertPreppedRecords(Collections.singletonList(hoodieAvroPayloadHoodieAvroRecord),
newCommitTime);
hudiClient.close();
}
}
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]