a1vin-tian commented on a change in pull request #7153: URL: https://github.com/apache/skywalking/pull/7153#discussion_r656070702
########## File path: oap-server/server-core/src/main/java/org/apache/skywalking/oap/server/core/storage/PersistenceTimer.java ########## @@ -63,98 +70,149 @@ public void start(ModuleManager moduleManager, CoreModuleConfig moduleConfig) { IBatchDAO batchDAO = moduleManager.find(StorageModule.NAME).provider().getService(IBatchDAO.class); MetricsCreator metricsCreator = moduleManager.find(TelemetryModule.NAME) - .provider() - .getService(MetricsCreator.class); + .provider() + .getService(MetricsCreator.class); errorCounter = metricsCreator.createCounter( - "persistence_timer_bulk_error_count", "Error execution of the prepare stage in persistence timer", - MetricsTag.EMPTY_KEY, MetricsTag.EMPTY_VALUE + "persistence_timer_bulk_error_count", "Error execution of the prepare stage in persistence timer", + MetricsTag.EMPTY_KEY, MetricsTag.EMPTY_VALUE ); prepareLatency = metricsCreator.createHistogramMetric( - "persistence_timer_bulk_prepare_latency", "Latency of the prepare stage in persistence timer", - MetricsTag.EMPTY_KEY, MetricsTag.EMPTY_VALUE + "persistence_timer_bulk_prepare_latency", "Latency of the prepare stage in persistence timer", + MetricsTag.EMPTY_KEY, MetricsTag.EMPTY_VALUE ); executeLatency = metricsCreator.createHistogramMetric( - "persistence_timer_bulk_execute_latency", "Latency of the execute stage in persistence timer", - MetricsTag.EMPTY_KEY, MetricsTag.EMPTY_VALUE + "persistence_timer_bulk_execute_latency", "Latency of the execute stage in persistence timer", + MetricsTag.EMPTY_KEY, MetricsTag.EMPTY_VALUE ); + allLatency = metricsCreator.createHistogramMetric( + "persistence_timer_bulk_all_latency", "Latency of the all stage in persistence timer", + MetricsTag.EMPTY_KEY, MetricsTag.EMPTY_VALUE + ); + syncOperationThreadsNum = moduleConfig.getSyncThreads(); maxSyncoperationNum = moduleConfig.getMaxSyncOperationNum(); + batchExecutorService = Executors.newSingleThreadExecutor(); executorService = Executors.newFixedThreadPool(syncOperationThreadsNum); + prepareExecutorService = Executors.newFixedThreadPool(moduleConfig.getPrepareThreads()); if (!isStarted) { Executors.newSingleThreadScheduledExecutor() - .scheduleWithFixedDelay( - new RunnableWithExceptionProtection(() -> extractDataAndSave(batchDAO), t -> log - .error("Extract data and save failure.", t)), 5, moduleConfig.getPersistentPeriod(), - TimeUnit.SECONDS - ); + .scheduleWithFixedDelay( + new RunnableWithExceptionProtection(() -> extractDataAndSave(batchDAO), t -> log + .error("Extract data and save failure.", t)), 5, moduleConfig.getPersistentPeriod(), + TimeUnit.SECONDS + ); this.isStarted = true; } } - private void extractDataAndSave(IBatchDAO batchDAO) { + @VisibleForTesting + void extractDataAndSave(IBatchDAO batchDAO) { if (log.isDebugEnabled()) { log.debug("Extract data and save"); } long startTime = System.currentTimeMillis(); + HistogramMetrics.Timer allTimer = allLatency.createTimer(); try { - HistogramMetrics.Timer timer = prepareLatency.createTimer(); - - try { - List<PersistenceWorker> persistenceWorkers = new ArrayList<>(); - persistenceWorkers.addAll(TopNStreamProcessor.getInstance().getPersistentWorkers()); - persistenceWorkers.addAll(MetricsStreamProcessor.getInstance().getPersistentWorkers()); - - persistenceWorkers.forEach(worker -> { - if (log.isDebugEnabled()) { - log.debug("extract {} worker data and save", worker.getClass().getName()); + List<PersistenceWorker> persistenceWorkers = new ArrayList<>(); + persistenceWorkers.addAll(TopNStreamProcessor.getInstance().getPersistentWorkers()); + persistenceWorkers.addAll(MetricsStreamProcessor.getInstance().getPersistentWorkers()); + CountDownLatch countDownLatch = new CountDownLatch(MetricsStreamProcessor.getInstance().getPersistentWorkers().size()); + + persistenceWorkers.forEach(worker -> { + prepareExecutorService.submit(() -> { + HistogramMetrics.Timer timer = prepareLatency.createTimer(); + try { + if (log.isDebugEnabled()) { + log.debug("extract {} worker data and save", worker.getClass().getName()); + } + List<PrepareRequest> innerPrepareRequests = new ArrayList<>(5000); + worker.buildBatchRequests(innerPrepareRequests); + synchronized (prepareRequests) { + prepareRequests.addAll(innerPrepareRequests); + if (prepareRequests.size() >= maxSyncoperationNum) { + prepareRequests.notify(); + } + } + worker.endOfRound(System.currentTimeMillis() - lastTime); + } finally { + timer.finish(); + countDownLatch.countDown(); } - - worker.buildBatchRequests(prepareRequests); - - worker.endOfRound(System.currentTimeMillis() - lastTime); }); + }); + + Future<?> batchFuture = batchExecutorService.submit(() -> { + List<Future<?>> results = new ArrayList<>(); + while (true) { + synchronized (prepareRequests) { + if (prepareDone && CollectionUtils.isEmpty(prepareRequests)) { + break; + } + } - if (debug) { - log.info("build batch persistence duration: {} ms", System.currentTimeMillis() - startTime); - } - } finally { - timer.finish(); - } + synchronized (prepareRequests) { + while (this.prepareRequests.size() < maxSyncoperationNum && !prepareDone) { + try { + this.prepareRequests.wait(1000); + } catch (InterruptedException e) { + } + } - HistogramMetrics.Timer executeLatencyTimer = executeLatency.createTimer(); - try { - List<List<PrepareRequest>> partitions = Lists.partition(prepareRequests, maxSyncoperationNum); - CountDownLatch countDownLatch = new CountDownLatch(partitions.size()); - for (final List<PrepareRequest> partition : partitions) { - executorService.submit(() -> { + if (CollectionUtils.isEmpty(prepareRequests)) { + continue; + } + } + + List<PrepareRequest> partition = null; + synchronized (prepareRequests) { + List<PrepareRequest> prepareRequestList = this.prepareRequests.subList(0, Math.min(maxSyncoperationNum, this.prepareRequests.size())); + partition = new ArrayList<>(prepareRequestList); + prepareRequestList.clear(); Review comment: If we did not clear the sublist cause always get the same range from `this.prepareRequests` https://github.com/apache/skywalking/pull/7153#discussion_r656046059 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org