adarshsanjeev commented on code in PR #15953: URL: https://github.com/apache/druid/pull/15953#discussion_r1538634834
########## extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/querykit/results/ExportResultsFrameProcessorFactory.java: ########## @@ -88,7 +93,7 @@ public ExportStorageProvider getExportStorageProvider() } @Override - public ProcessorsAndChannels<Object, Long> makeProcessors( + public ProcessorsAndChannels<Object, Object> makeProcessors( Review Comment: Changing the type here would result in the arguments in thecall to mergeResults also having List<String>, which would cause a cast exception to be thrown instead of a nicer one, which we would like to avoid, right? ########## extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportMetadataManager.java: ########## @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.exec; + +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.storage.ExportStorageProvider; +import org.apache.druid.storage.StorageConnector; + +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; +import java.util.List; + +/** + * Manages writing of metadata files during export queries. + */ +public class ExportMetadataManager +{ + public static final String SYMLINK_DIR = "_symlink_format_manifest"; + public static final String MANIFEST_FILE = SYMLINK_DIR + "/manifest"; + public static final String META_FILE = SYMLINK_DIR + "/druid_export_meta"; + public static final int MANIFEST_FILE_VERSION = 1; + private static final Logger log = new Logger(ExportMetadataManager.class); + private final ExportStorageProvider exportStorageProvider; + + public ExportMetadataManager(final ExportStorageProvider exportStorageProvider) + { + this.exportStorageProvider = exportStorageProvider; + } + + public void writeMetadata(List<String> exportedFiles) throws IOException + { + final StorageConnector storageConnector = exportStorageProvider.get(); + log.info("Writing manifest file at [%s]", exportStorageProvider.getBasePath()); + + if (storageConnector.pathExists(MANIFEST_FILE) || storageConnector.pathExists(META_FILE)) { + throw DruidException.defensive("Found existing manifest file already present at path."); Review Comment: Changed ########## extensions-core/multi-stage-query/src/main/java/org/apache/druid/msq/exec/ExportMetadataManager.java: ########## @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.msq.exec; + +import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.StringUtils; +import org.apache.druid.java.util.common.logger.Logger; +import org.apache.druid.storage.ExportStorageProvider; +import org.apache.druid.storage.StorageConnector; + +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.nio.charset.StandardCharsets; +import java.util.List; + +/** + * Manages writing of metadata files during export queries. + */ +public class ExportMetadataManager +{ + public static final String SYMLINK_DIR = "_symlink_format_manifest"; + public static final String MANIFEST_FILE = SYMLINK_DIR + "/manifest"; + public static final String META_FILE = SYMLINK_DIR + "/druid_export_meta"; + public static final int MANIFEST_FILE_VERSION = 1; + private static final Logger log = new Logger(ExportMetadataManager.class); + private final ExportStorageProvider exportStorageProvider; + + public ExportMetadataManager(final ExportStorageProvider exportStorageProvider) + { + this.exportStorageProvider = exportStorageProvider; + } + + public void writeMetadata(List<String> exportedFiles) throws IOException + { + final StorageConnector storageConnector = exportStorageProvider.get(); + log.info("Writing manifest file at [%s]", exportStorageProvider.getBasePath()); + + if (storageConnector.pathExists(MANIFEST_FILE) || storageConnector.pathExists(META_FILE)) { + throw DruidException.defensive("Found existing manifest file already present at path."); + } + + createManifestFile(storageConnector, exportedFiles); + createDruidMetadataFile(storageConnector); + } + + /** + * Creates a manifest file containing the list of files created by the export query. The manifest file consists of a + * new line separated list. Each line contains the absolute path to a file created by the export. + */ + public void createManifestFile(StorageConnector storageConnector, List<String> exportedFiles) throws IOException + { + try (PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(storageConnector.write(MANIFEST_FILE), StandardCharsets.UTF_8))) { + for (String exportedFile : exportedFiles) { + printWriter.println(exportStorageProvider.getFilePathForManifest(exportedFile)); + } + } + } + + /** + * Creates a druid metadata file at the export location. This file contains extra information about the export, which + * cannot be stored in the manifest directly, so that it can follow the symlink format. + * <br> + * Currently, this only contains the manifest file version. + */ + private void createDruidMetadataFile(StorageConnector storageConnector) throws IOException Review Comment: Removed this documentation ########## docs/multi-stage-query/reference.md: ########## @@ -99,6 +99,17 @@ For more information, see [Read external data with EXTERN](concepts.md#read-exte This variation of EXTERN requires one argument, the details of the destination as specified below. This variation additionally requires an `AS` clause to specify the format of the exported rows. +While exporting data, some metadata files will also be created at the destination in addition to the data. These files will be created in a directory `_symlink_format_manifest`. +- `_symlink_format_manifest/manifest`: Lists the files which were created as part of the export. The file is in the symlink manifest format, and consists of a list of absolute paths to the files created. +```text +s3://export-bucket/export/query-6564a32f-2194-423a-912e-eead470a37c4-worker2-partition2.csv +s3://export-bucket/export/query-6564a32f-2194-423a-912e-eead470a37c4-worker1-partition1.csv +s3://export-bucket/export/query-6564a32f-2194-423a-912e-eead470a37c4-worker0-partition0.csv +... +s3://export-bucket/export/query-6564a32f-2194-423a-912e-eead470a37c4-worker0-partition24.csv +``` +- `_symlink_format_manifest/druid_export_meta`: Used to store additional information about the export metadata, such as the version of the manifest file format. Review Comment: Removed this part since it is not intended to be user facing. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
