andyvuong commented on a change in pull request #1195: SOLR-13101: Log accurate file counts for Push and Pull and CorePushPull URL: https://github.com/apache/lucene-solr/pull/1195#discussion_r370380158
########## File path: solr/core/src/java/org/apache/solr/store/blob/metadata/CorePushPull.java ########## @@ -419,45 +433,51 @@ protected String pushFileToBlobStore(CoreStorageClient blob, Directory dir, Stri /** * Logs soblb line for push or pull action - * TODO: This is for callers of this method. - * fileAffected and bytesTransferred represent correct values only in case of success - * In case of failure(partial processing) we are not accurate. - * Do we want to change that? If yes, then in case of pull is downloading of files locally to temp folder is considered - * transfer or moving from temp dir to final destination. One option could be to just make them -1 in case of failure. */ - private void logBlobAction(String action, long filesAffected, long bytesTransferred, long requestQueuedTimeMs, int attempt, long startTimeMs) throws Exception { + private void logBlobAction(String action, long expectedFilesAffected, long actualFilesAffected, long bytesTransferred, boolean isSuccessful, + long requestQueuedTimeMs, int attempt, long startTimeMs) throws Exception { long now = System.nanoTime(); long runTime = now - startTimeMs; long startLatency = now - requestQueuedTimeMs; String message = String.format(Locale.ROOT, "PushPullData=[%s] action=%s storageProvider=%s bucketRegion=%s bucketName=%s " - + "runTime=%s startLatency=%s bytesTransferred=%s attempt=%s filesAffected=%s localGeneration=%s blobGeneration=%s ", + + "runTime=%s startLatency=%s bytesTransferred=%s attempt=%s expectedFilesAffected=%s actualFilesAffected=%s isSuccessful=%s " + + "localGeneration=%s blobGeneration=%s ", pushPullData.toString(), action, coreStorageClient.getStorageProvider().name(), coreStorageClient.getBucketRegion(), - coreStorageClient.getBucketName(), runTime, startLatency, bytesTransferred, attempt, filesAffected, - solrServerMetadata.getGeneration(), blobMetadata.getGeneration()); + coreStorageClient.getBucketName(), runTime, startLatency, bytesTransferred, attempt, expectedFilesAffected, actualFilesAffected, + isSuccessful, solrServerMetadata.getGeneration(), blobMetadata.getGeneration()); log.info(message); } /** * Downloads files from the Blob store * @param destDir (temporary) directory into which files should be downloaded. * @param filesToDownload blob files to be downloaded + * @return number of files downloaded and number of bytes transferred successfully */ @VisibleForTesting - protected void downloadFilesFromBlob(Directory destDir, Collection<? extends BlobFile> filesToDownload) throws Exception { + protected Pair<Long, Long> downloadFilesFromBlob(Directory destDir, Collection<? extends BlobFile> filesToDownload) throws Exception { // Synchronously download all Blob blobs (remember we're running on an async thread, so no need to be async twice unless // we eventually want to parallelize downloads of multiple blobs, but for the PoC we don't :) + long filesDownloaded = 0; + long bytesTransferred = 0; for (BlobFile bf: filesToDownload) { log.info("About to create " + bf.getSolrFileName() + " for core " + pushPullData.getCoreName() + " from index on blob " + pushPullData.getSharedStoreName()); - IndexOutput io = destDir.createOutput(bf.getSolrFileName(), DirectoryFactory.IOCONTEXT_NO_CACHE); - try (OutputStream outStream = new IndexOutputStream(io); - InputStream bis = coreStorageClient.pullStream(bf.getBlobName())) { - IOUtils.copy(bis, outStream); - } + try (IndexOutput io = destDir.createOutput(bf.getSolrFileName(), DirectoryFactory.IOCONTEXT_NO_CACHE); Review comment: Looked at the code and `OutputStream outStream = new IndexOutputStream(io)` was already in the try-with block and will end up closing the IndexOutput you moved. But the change has no affect if close is called twice so no change needed here, just wanted to note that. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org