This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new cc0e25eb02 Fixing RealtimeProvisioningHelperCommand to allow using 
just -schemaFile along with -sampleCompletedSegmentDir (#13727)
cc0e25eb02 is described below

commit cc0e25eb0202dae573a85e0d1c348681f1e15ad6
Author: Xiang Fu <[email protected]>
AuthorDate: Fri Aug 2 13:00:51 2024 -0700

    Fixing RealtimeProvisioningHelperCommand to allow using just -schemaFile 
along with -sampleCompletedSegmentDir (#13727)
---
 .../command/RealtimeProvisioningHelperCommand.java | 58 ++++++++++++++++------
 1 file changed, 42 insertions(+), 16 deletions(-)

diff --git 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java
 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java
index 4ae7fa3f97..330e30992c 100644
--- 
a/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java
+++ 
b/pinot-tools/src/main/java/org/apache/pinot/tools/admin/command/RealtimeProvisioningHelperCommand.java
@@ -59,6 +59,9 @@ public class RealtimeProvisioningHelperCommand extends 
AbstractBaseAdminCommand
   @CommandLine.Option(names = {"-tableConfigFile"}, required = true)
   private String _tableConfigFile;
 
+  @CommandLine.Option(names = {"-schemaFile"}, required = false)
+  private String _schemaFile;
+
   @CommandLine.Option(names = {"-numPartitions"}, required = true,
       description = "number of stream partitions for the table")
   private int _numPartitions;
@@ -108,6 +111,11 @@ public class RealtimeProvisioningHelperCommand extends 
AbstractBaseAdminCommand
     return this;
   }
 
+  public RealtimeProvisioningHelperCommand setSchemaFile(String schemaFile) {
+    _schemaFile = schemaFile;
+    return this;
+  }
+
   public RealtimeProvisioningHelperCommand setNumPartitions(int numPartitions) 
{
     _numPartitions = numPartitions;
     return this;
@@ -155,8 +163,10 @@ public class RealtimeProvisioningHelperCommand extends 
AbstractBaseAdminCommand
 
   @Override
   public String toString() {
-    String segmentStr = _sampleCompletedSegmentDir != null ? " 
-sampleCompletedSegmentDir " + _sampleCompletedSegmentDir
-        : " -schemaWithMetadataFile " + _schemaWithMetadataFile + " -numRows " 
+ _numRows;
+    String segmentStr =
+        _sampleCompletedSegmentDir != null
+            ? " -schemaFile " + _schemaFile + " -sampleCompletedSegmentDir " + 
_sampleCompletedSegmentDir
+            : " -schemaWithMetadataFile " + _schemaWithMetadataFile + " 
-numRows " + _numRows;
     return "RealtimeProvisioningHelper -tableConfigFile " + _tableConfigFile + 
" -numPartitions " + _numPartitions
         + " -pushFrequency " + _pushFrequency + " -numHosts " + _numHosts + " 
-numHours " + _numHours + segmentStr
         + " -ingestionRate " + _ingestionRate + " -maxUsableHostMemory " + 
_maxUsableHostMemory + " -retentionHours "
@@ -188,13 +198,13 @@ public class RealtimeProvisioningHelperCommand extends 
AbstractBaseAdminCommand
     StringBuilder builder = new StringBuilder();
     builder.append("\n\nThis command allows you to estimate the capacity 
needed for provisioning realtime hosts. ")
         .append("It assumes that there is no upper limit to the amount of 
memory you can mmap").append(
-        "\nIf you have a hybrid table, then consult the push frequency setting 
in your offline table specify it in "
-            + "the -pushFrequency argument").append(
-        "\nIf you have a realtime-only table, then the default behavior is to 
assume that your queries need all "
-            + "data in memory all the time").append(
-        "\nHowever, if most of your queries are going to be for (say) the last 
96 hours, then you can specify "
-            + "that in -retentionHours").append(
-        "\nDoing so will let this program assume that you are willing to take 
a page hit when querying older data")
+            "\nIf you have a hybrid table, then consult the push frequency 
setting in your offline table specify it in "
+                + "the -pushFrequency argument").append(
+            "\nIf you have a realtime-only table, then the default behavior is 
to assume that your queries need all "
+                + "data in memory all the time").append(
+            "\nHowever, if most of your queries are going to be for (say) the 
last 96 hours, then you can specify "
+                + "that in -retentionHours").append(
+            "\nDoing so will let this program assume that you are willing to 
take a page hit when querying older data")
         .append("\nand optimize memory and number of hosts accordingly.")
         .append("\n See 
https://docs.pinot.apache.org/operators/operating-pinot/tuning/realtime for 
details");
     System.out.println(builder);
@@ -206,13 +216,14 @@ public class RealtimeProvisioningHelperCommand extends 
AbstractBaseAdminCommand
 
     boolean segmentProvided = _sampleCompletedSegmentDir != null;
     boolean characteristicsProvided = _schemaWithMetadataFile != null;
-    Preconditions.checkState(segmentProvided ^ characteristicsProvided,
-        "Either completed segment should be provided or schema with 
characteristics file!");
+    boolean schemaProvided = _schemaFile != null;
+    Preconditions.checkState((schemaProvided & segmentProvided) ^ 
characteristicsProvided,
+        "Either schema with completed segment should be provided or schema 
with characteristics file!");
 
-    LOGGER.info("Executing command: {}", toString());
+    LOGGER.info("Executing command: {}", this);
 
     TableConfig tableConfig;
-    try (FileInputStream fis = new FileInputStream(new 
File(_tableConfigFile))) {
+    try (FileInputStream fis = new FileInputStream(_tableConfigFile)) {
       String tableConfigString = IOUtils.toString(fis);
       tableConfig = JsonUtils.stringToObject(tableConfigString, 
TableConfig.class);
     } catch (IOException e) {
@@ -262,8 +273,7 @@ public class RealtimeProvisioningHelperCommand extends 
AbstractBaseAdminCommand
     long maxUsableHostMemBytes = DataSizeUtils.toBytes(_maxUsableHostMemory);
 
     File workingDir = Files.createTempDir();
-    File file = new File(_schemaWithMetadataFile);
-    Schema schema = deserialize(file, Schema.class);
+    Schema schema = extractSchema();
     MemoryEstimator memoryEstimator;
     if (segmentProvided) {
       // use the provided segment to estimate memory
@@ -276,7 +286,7 @@ public class RealtimeProvisioningHelperCommand extends 
AbstractBaseAdminCommand
       if (_numRows == 0) {
         _numRows = DEFAULT_NUMBER_OF_ROWS;
       }
-      SchemaWithMetaData schemaWithMetaData = deserialize(file, 
SchemaWithMetaData.class);
+      SchemaWithMetaData schemaWithMetaData = deserialize(new 
File(_schemaWithMetadataFile), SchemaWithMetaData.class);
       memoryEstimator =
           new MemoryEstimator(tableConfig, schema, schemaWithMetaData, 
_numRows, _ingestionRate, maxUsableHostMemBytes,
               tableRetentionHours, workingDir);
@@ -299,6 +309,22 @@ public class RealtimeProvisioningHelperCommand extends 
AbstractBaseAdminCommand
     return true;
   }
 
+  private Schema extractSchema() {
+    if (_schemaFile != null) {
+      try (FileInputStream fis = new FileInputStream(_schemaFile)) {
+        String schemaString = IOUtils.toString(fis);
+        return Schema.fromString(schemaString);
+      } catch (IOException e) {
+        throw new RuntimeException("Exception in reading schema from file " + 
_schemaFile, e);
+      }
+    } else if (_schemaWithMetadataFile != null) {
+      File file = new File(_schemaWithMetadataFile);
+      return deserialize(file, Schema.class);
+    } else {
+      throw new IllegalArgumentException("Schema file is required");
+    }
+  }
+
   private void displayOutputHeader(StringBuilder note) {
     
System.out.println("\n============================================================\n"
 + toString());
     System.out.println(note.toString());


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to