This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new cc0e25eb02 Fixing RealtimeProvisioningHelperCommand to allow using
just -schemaFile along with -sampleCompletedSegmentDir (#13727)
cc0e25eb02 is described below
commit cc0e25eb0202dae573a85e0d1c348681f1e15ad6
Author: Xiang Fu <[email protected]>
AuthorDate: Fri Aug 2 13:00:51 2024 -0700
Fixing RealtimeProvisioningHelperCommand to allow using just -schemaFile
along with -sampleCompletedSegmentDir (#13727)
---
.../command/RealtimeProvisioningHelperCommand.java | 58 ++++++++++++++++------
1 file changed, 42 insertions(+), 16 deletions(-)
diff --git
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java
index 4ae7fa3f97..330e30992c 100644
---
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java
+++
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java
@@ -59,6 +59,9 @@ public class RealtimeProvisioningHelperCommand extends
AbstractBaseAdminCommand
@CommandLine.Option(names = {"-tableConfigFile"}, required = true)
private String _tableConfigFile;
+ @CommandLine.Option(names = {"-schemaFile"}, required = false)
+ private String _schemaFile;
+
@CommandLine.Option(names = {"-numPartitions"}, required = true,
description = "number of stream partitions for the table")
private int _numPartitions;
@@ -108,6 +111,11 @@ public class RealtimeProvisioningHelperCommand extends
AbstractBaseAdminCommand
return this;
}
+ public RealtimeProvisioningHelperCommand setSchemaFile(String schemaFile) {
+ _schemaFile = schemaFile;
+ return this;
+ }
+
public RealtimeProvisioningHelperCommand setNumPartitions(int numPartitions)
{
_numPartitions = numPartitions;
return this;
@@ -155,8 +163,10 @@ public class RealtimeProvisioningHelperCommand extends
AbstractBaseAdminCommand
@Override
public String toString() {
- String segmentStr = _sampleCompletedSegmentDir != null ? "
-sampleCompletedSegmentDir " + _sampleCompletedSegmentDir
- : " -schemaWithMetadataFile " + _schemaWithMetadataFile + " -numRows "
+ _numRows;
+ String segmentStr =
+ _sampleCompletedSegmentDir != null
+ ? " -schemaFile " + _schemaFile + " -sampleCompletedSegmentDir " +
_sampleCompletedSegmentDir
+ : " -schemaWithMetadataFile " + _schemaWithMetadataFile + "
-numRows " + _numRows;
return "RealtimeProvisioningHelper -tableConfigFile " + _tableConfigFile +
" -numPartitions " + _numPartitions
+ " -pushFrequency " + _pushFrequency + " -numHosts " + _numHosts + "
-numHours " + _numHours + segmentStr
+ " -ingestionRate " + _ingestionRate + " -maxUsableHostMemory " +
_maxUsableHostMemory + " -retentionHours "
@@ -188,13 +198,13 @@ public class RealtimeProvisioningHelperCommand extends
AbstractBaseAdminCommand
StringBuilder builder = new StringBuilder();
builder.append("\n\nThis command allows you to estimate the capacity
needed for provisioning realtime hosts. ")
.append("It assumes that there is no upper limit to the amount of
memory you can mmap").append(
- "\nIf you have a hybrid table, then consult the push frequency setting
in your offline table specify it in "
- + "the -pushFrequency argument").append(
- "\nIf you have a realtime-only table, then the default behavior is to
assume that your queries need all "
- + "data in memory all the time").append(
- "\nHowever, if most of your queries are going to be for (say) the last
96 hours, then you can specify "
- + "that in -retentionHours").append(
- "\nDoing so will let this program assume that you are willing to take
a page hit when querying older data")
+ "\nIf you have a hybrid table, then consult the push frequency
setting in your offline table specify it in "
+ + "the -pushFrequency argument").append(
+ "\nIf you have a realtime-only table, then the default behavior is
to assume that your queries need all "
+ + "data in memory all the time").append(
+ "\nHowever, if most of your queries are going to be for (say) the
last 96 hours, then you can specify "
+ + "that in -retentionHours").append(
+ "\nDoing so will let this program assume that you are willing to
take a page hit when querying older data")
.append("\nand optimize memory and number of hosts accordingly.")
.append("\n See
https://docs.pinot.apache.org/operators/operating-pinot/tuning/realtime for
details");
System.out.println(builder);
@@ -206,13 +216,14 @@ public class RealtimeProvisioningHelperCommand extends
AbstractBaseAdminCommand
boolean segmentProvided = _sampleCompletedSegmentDir != null;
boolean characteristicsProvided = _schemaWithMetadataFile != null;
- Preconditions.checkState(segmentProvided ^ characteristicsProvided,
- "Either completed segment should be provided or schema with
characteristics file!");
+ boolean schemaProvided = _schemaFile != null;
+ Preconditions.checkState((schemaProvided & segmentProvided) ^
characteristicsProvided,
+ "Either schema with completed segment should be provided or schema
with characteristics file!");
- LOGGER.info("Executing command: {}", toString());
+ LOGGER.info("Executing command: {}", this);
TableConfig tableConfig;
- try (FileInputStream fis = new FileInputStream(new
File(_tableConfigFile))) {
+ try (FileInputStream fis = new FileInputStream(_tableConfigFile)) {
String tableConfigString = IOUtils.toString(fis);
tableConfig = JsonUtils.stringToObject(tableConfigString,
TableConfig.class);
} catch (IOException e) {
@@ -262,8 +273,7 @@ public class RealtimeProvisioningHelperCommand extends
AbstractBaseAdminCommand
long maxUsableHostMemBytes = DataSizeUtils.toBytes(_maxUsableHostMemory);
File workingDir = Files.createTempDir();
- File file = new File(_schemaWithMetadataFile);
- Schema schema = deserialize(file, Schema.class);
+ Schema schema = extractSchema();
MemoryEstimator memoryEstimator;
if (segmentProvided) {
// use the provided segment to estimate memory
@@ -276,7 +286,7 @@ public class RealtimeProvisioningHelperCommand extends
AbstractBaseAdminCommand
if (_numRows == 0) {
_numRows = DEFAULT_NUMBER_OF_ROWS;
}
- SchemaWithMetaData schemaWithMetaData = deserialize(file,
SchemaWithMetaData.class);
+ SchemaWithMetaData schemaWithMetaData = deserialize(new
File(_schemaWithMetadataFile), SchemaWithMetaData.class);
memoryEstimator =
new MemoryEstimator(tableConfig, schema, schemaWithMetaData,
_numRows, _ingestionRate, maxUsableHostMemBytes,
tableRetentionHours, workingDir);
@@ -299,6 +309,22 @@ public class RealtimeProvisioningHelperCommand extends
AbstractBaseAdminCommand
return true;
}
+ private Schema extractSchema() {
+ if (_schemaFile != null) {
+ try (FileInputStream fis = new FileInputStream(_schemaFile)) {
+ String schemaString = IOUtils.toString(fis);
+ return Schema.fromString(schemaString);
+ } catch (IOException e) {
+ throw new RuntimeException("Exception in reading schema from file " +
_schemaFile, e);
+ }
+ } else if (_schemaWithMetadataFile != null) {
+ File file = new File(_schemaWithMetadataFile);
+ return deserialize(file, Schema.class);
+ } else {
+ throw new IllegalArgumentException("Schema file is required");
+ }
+ }
+
private void displayOutputHeader(StringBuilder note) {
System.out.println("\n============================================================\n"
+ toString());
System.out.println(note.toString());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]