jtao15 commented on a change in pull request #6419:
URL: https://github.com/apache/incubator-pinot/pull/6419#discussion_r557050072
##########
File path:
pinot-integration-tests/src/test/java/org/apache/pinot/compat/tests/StreamOp.java
##########
@@ -85,4 +116,118 @@ boolean runOp() {
public void setTableConfigFileNames(List<String> tableConfigFileNames) {
_tableConfigFileNames = tableConfigFileNames;
}
+
+ public String getRecordReaderConfigFileName() {
+ return _recordReaderConfigFileName;
+ }
+
+ public void setRecordReaderConfigFileName(String recordReaderConfigFileName)
{
+ _recordReaderConfigFileName = recordReaderConfigFileName;
+ }
+
+ @Override
+ boolean runOp() {
+ try {
+ File csvFile = new File(_inputDataFileName);
+ Map<String, String> streamConfigMap = JsonUtils.fileToObject(new
File(_streamConfigFileName), HashMap.class);
+ final Map<String, Object> config = new HashMap<>();
+ config.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:" +
ClusterDescriptor.KAFKA_PORT);
+ config.put(AdminClientConfig.CLIENT_ID_CONFIG, "Kafka2AdminClient-" +
UUID.randomUUID().toString());
+ config.put(AdminClientConfig.REQUEST_TIMEOUT_MS_CONFIG, 15000);
+ AdminClient adminClient = KafkaAdminClient.create(config);
+
+ // create kafka topic
+ String topicName = streamConfigMap.get("stream.kafka.topic.name");
+ int partitions =
Integer.parseInt(streamConfigMap.get("stream.kafka.numPartitions"));
+ String partitionColumn =
streamConfigMap.get("stream.kafka.partitionColumn");
+ NewTopic newTopic = new NewTopic(topicName, partitions, (short) 1);
+ CreateTopicsResult createTopicsResult =
adminClient.createTopics(Arrays.asList(newTopic));
+ try {
+ createTopicsResult.all().get();
+ } catch (InterruptedException | ExecutionException e) {
+ LOGGER.warn("Failed to create Kafka topic: {}, Exception: {}",
newTopic.toString(), e);
+ }
+
+ List<Long> existingTotalDocs = new ArrayList<>();
+ List<String> tableNames = new ArrayList<>();
+
+ for (String tableConfigFileName : _tableConfigFileNames) {
+ // get table config
+ TableConfig tableConfig = JsonUtils.fileToObject(new
File(tableConfigFileName), TableConfig.class);
+
+ // get original rows
+ String tableName = tableConfig.getTableName();
+ tableNames.add(tableName);
+
existingTotalDocs.add(fetchExistingTotalDocs(tableConfig.getTableName()));
+ }
+
+ // push avro file to kafka
+ Schema avroSchema = StreamOpUtils.getAvroSchema(new
File(_avroSchemaFileName));
+ StreamOpUtils.pushCsvIntoKafka(
+ csvFile,
+ avroSchema,
+ null,
+ _numRows,
+ getCSVRecordReaderConfig(),
+ "localhost:" + KafkaStarterUtils.DEFAULT_KAFKA_PORT,
+ topicName,
+ 10000,
+ null,
+ partitionColumn);
+
+ for (int i = 0; i < tableNames.size(); i++) {
+ // verify number of rows increases as expected
+ String tableName = tableNames.get(i);
+ long targetTotalDocs = existingTotalDocs.get(i) + _numRows;
+ waitForDocsLoaded(tableName, targetTotalDocs, 60_000L);
+ LOGGER.info("Verified {} new rows in table: {}", _numRows, tableName);
+ }
+ } catch (Exception e) {
+ LOGGER.error("Failed to ingest stream data", e);
+ return false;
+ }
+ return true;
+ }
+
+ private RecordReaderConfig getCSVRecordReaderConfig() throws IOException {
+ CSVRecordReaderConfig recordReaderConfig = JsonUtils.fileToObject(new
File(_recordReaderConfigFileName), CSVRecordReaderConfig.class);
+ return recordReaderConfig;
+ }
+
+ private long fetchExistingTotalDocs(String tableName) throws Exception {
+ String query = "SELECT count(*) FROM " + tableName;
+ JsonNode response = ClusterTest.postQuery(query,
ClusterDescriptor.BROKER_URL, false, "sql");
+ if (response == null) {
+ String errorMsg = String.format("Failed to query Table: %s", tableName);
+ LOGGER.error(errorMsg);
+ throw new RuntimeException(errorMsg);
+ }
+ if (response.has("hasPartialResults") &&
response.get("hasPartialResults").asBoolean()) {
Review comment:
Actually, the `totalDocs` in `V1Constants` is the total docs for segment
metadata, and there's no `hasPartialResults` in broker response. Used my own
constants instead.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]