shanthoosh commented on a change in pull request #1079: SAMZA-2250: Support 
large job models in standalone.
URL: https://github.com/apache/samza/pull/1079#discussion_r295021274
 
 

 ##########
 File path: 
samza-core/src/main/java/org/apache/samza/job/model/JobModelUtil.java
 ##########
 @@ -57,6 +70,70 @@
     return taskToSSPs;
   }
 
+  /**
+   * Splits the {@link JobModel} into independent byte array segments of 1 MB 
size.
+   * @param jobModel the job model to split.
+   * @return the job model splitted into independent byte array chunks.
+   */
+  private static List<byte[]> chunkJobModel(JobModel jobModel) {
+    try {
+      String jobModelSerializedAsString = 
MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(jobModel);
+      List<byte[]> jobModelSegments = new ArrayList<>();
+      for (int index = 0; index < jobModelSerializedAsString.length(); index 
+= JOB_MODEL_SEGMENT_SIZE_IN_BYTES) {
+        String jobModelSegment = jobModelSerializedAsString.substring(index, 
Math.min(index + JOB_MODEL_SEGMENT_SIZE_IN_BYTES, 
jobModelSerializedAsString.length()));
+        jobModelSegments.add(jobModelSegment.getBytes(Charset.forName(UTF_8)));
+      }
+      return jobModelSegments;
+    } catch (Exception e) {
+      throw new SamzaException(String.format("Exception occurred when 
splitting the JobModel: %s to small chunks.", jobModel), e);
+    }
+  }
+
+  /**
+   * Splits the larger {@link JobModel} into independent segments of 1 MB size 
and stores them in {@link MetadataStore}.
+   * @param jobModel the job model to store into {@link MetadataStore}.
+   * @param jobModelVersion the job model version.
+   * @param metadataStore the metadata store.
+   */
+  public static void writeJobModel(JobModel jobModel, String jobModelVersion, 
MetadataStore metadataStore) {
+    try {
+      List<byte[]> jobModelSegments = chunkJobModel(jobModel);
+      for (int jobModelSegmentIndex = 0; jobModelSegmentIndex < 
jobModelSegments.size(); jobModelSegmentIndex += 1) {
 
 Review comment:
   Done.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to