thomasmueller commented on code in PR #587:
URL: https://github.com/apache/jackrabbit-oak/pull/587#discussion_r896739484
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStoreUtils.java:
##########
@@ -30,46 +32,67 @@
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
-import java.util.zip.Deflater;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-import static com.google.common.base.Charsets.UTF_8;
class FlatFileStoreUtils {
+ public static final String COMPRESSION_TYPE_LZ4 = "lz4";
+ public static final String COMPRESSION_TYPE_GZIP = "gz";
+ public static final String COMPRESSION_TYPE_NONE = "none";
+
+ /**
+ * A map of compression Type to ExternalSort CompressionType Enum
+ * @param compressionType string representation of the compression
algorithm, use "none" for disable compression,
+ * any unknown or unsupported value will be
considered as none.
+ * @return
+ */
+ public static ExternalSort.CompressionType
getExternalSortCompressionType(String compressionType) {
+ switch (compressionType) {
+ case COMPRESSION_TYPE_LZ4:
Review Comment:
I think it's not needed; it can be replaced with CompressionType.valueOf
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStoreUtils.java:
##########
@@ -30,46 +32,67 @@
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
-import java.util.zip.Deflater;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-import static com.google.common.base.Charsets.UTF_8;
class FlatFileStoreUtils {
+ public static final String COMPRESSION_TYPE_LZ4 = "lz4";
+ public static final String COMPRESSION_TYPE_GZIP = "gz";
Review Comment:
I would use "gzip" instead of "gz"... but even better, couldn't we use the
CompressionType enum in this class as well? So we don't need the String at
all...
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStoreUtils.java:
##########
@@ -30,46 +32,67 @@
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
-import java.util.zip.Deflater;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-import static com.google.common.base.Charsets.UTF_8;
class FlatFileStoreUtils {
+ public static final String COMPRESSION_TYPE_LZ4 = "lz4";
+ public static final String COMPRESSION_TYPE_GZIP = "gz";
+ public static final String COMPRESSION_TYPE_NONE = "none";
+
+ /**
+ * A map of compression Type to ExternalSort CompressionType Enum
+ * @param compressionType string representation of the compression
algorithm, use "none" for disable compression,
+ * any unknown or unsupported value will be
considered as none.
+ * @return
+ */
+ public static ExternalSort.CompressionType
getExternalSortCompressionType(String compressionType) {
+ switch (compressionType) {
+ case COMPRESSION_TYPE_LZ4:
+ return ExternalSort.CompressionType.LZ4;
+ case COMPRESSION_TYPE_GZIP:
+ return ExternalSort.CompressionType.GZIP;
+ case COMPRESSION_TYPE_NONE:
+ return ExternalSort.CompressionType.NONE;
+ default:
+ return ExternalSort.CompressionType.NONE;
+ }
+ }
+
public static BufferedReader createReader(File file, boolean
compressionEnabled) {
+ return createReader(file, compressionEnabled ? COMPRESSION_TYPE_GZIP :
COMPRESSION_TYPE_NONE);
+ }
+
+ public static BufferedReader createReader(File file, String
compressionType) {
try {
- BufferedReader br;
InputStream in = new FileInputStream(file);
- if (compressionEnabled) {
- br = new BufferedReader(new InputStreamReader(new
GZIPInputStream(in, 2048), UTF_8));
- } else {
- br = new BufferedReader(new InputStreamReader(in, UTF_8));
- }
- return br;
+ return new BufferedReader(new
InputStreamReader(getExternalSortCompressionType(compressionType).getInputStream(in)));
} catch (IOException e) {
throw new RuntimeException("Error opening file " + file, e);
}
}
public static BufferedWriter createWriter(File file, boolean
compressionEnabled) throws IOException {
+ return createWriter(file, compressionEnabled ? COMPRESSION_TYPE_GZIP :
COMPRESSION_TYPE_NONE);
+ }
+
+ public static BufferedWriter createWriter(File file, String
compressionType) throws IOException {
OutputStream out = new FileOutputStream(file);
- if (compressionEnabled) {
- out = new GZIPOutputStream(out, 2048) {
- {
- def.setLevel(Deflater.BEST_SPEED);
- }
- };
- }
- return new BufferedWriter(new OutputStreamWriter(out, UTF_8));
+ return new BufferedWriter(new
OutputStreamWriter(getExternalSortCompressionType(compressionType).getOutputStream(out)));
}
public static long sizeOf(List<File> sortedFiles) {
return sortedFiles.stream().mapToLong(File::length).sum();
}
- public static String getSortedStoreFileName(boolean compressionEnabled){
- return compressionEnabled ? "store-sorted.json.gz" :
"store-sorted.json";
+ public static String getSortedStoreFileName(boolean compressionEnabled) {
+ return getSortedStoreFileName(compressionEnabled ?
COMPRESSION_TYPE_GZIP : COMPRESSION_TYPE_NONE);
+ }
+
+ public static String getSortedStoreFileName(String compressionType) {
+ String name = "store-sorted.json";
+ if (compressionType.equals(COMPRESSION_TYPE_NONE)) {
+ return name;
+ }
+ return name + "." + compressionType;
Review Comment:
here, we could use CompressionType.toString().toLowerCase(Locale.ENGLISH) --
Locale.ENGLISH is important because in Turkey, sorry Türkiye, "GZIP" would be
converted to "gzıp". Notice here, the "i" is missing the dot (it's a dotless
i). Yes it's weird, it's the "Turkish Locale Problem".
We could add a method "getSuffix" to the CompressionType enum, and hardcode
the types there, that might be better. Then we can also use ".gzip" and ".lz4"
and "" (empty string for none).
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/MergeRunner.java:
##########
@@ -120,7 +121,7 @@
public class MergeRunner implements Runnable {
private static final Logger log =
LoggerFactory.getLogger(MergeRunner.class);
private final Charset charset = UTF_8;
- private final boolean compressionEnabled;
+ private final String compressionType;
Review Comment:
I would prefer CompressionType here.
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStoreUtils.java:
##########
@@ -30,46 +32,67 @@
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
-import java.util.zip.Deflater;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-import static com.google.common.base.Charsets.UTF_8;
class FlatFileStoreUtils {
+ public static final String COMPRESSION_TYPE_LZ4 = "lz4";
+ public static final String COMPRESSION_TYPE_GZIP = "gz";
+ public static final String COMPRESSION_TYPE_NONE = "none";
+
+ /**
+ * A map of compression Type to ExternalSort CompressionType Enum
+ * @param compressionType string representation of the compression
algorithm, use "none" for disable compression,
+ * any unknown or unsupported value will be
considered as none.
+ * @return
+ */
+ public static ExternalSort.CompressionType
getExternalSortCompressionType(String compressionType) {
+ switch (compressionType) {
+ case COMPRESSION_TYPE_LZ4:
+ return ExternalSort.CompressionType.LZ4;
+ case COMPRESSION_TYPE_GZIP:
+ return ExternalSort.CompressionType.GZIP;
+ case COMPRESSION_TYPE_NONE:
+ return ExternalSort.CompressionType.NONE;
+ default:
+ return ExternalSort.CompressionType.NONE;
+ }
+ }
+
public static BufferedReader createReader(File file, boolean
compressionEnabled) {
+ return createReader(file, compressionEnabled ? COMPRESSION_TYPE_GZIP :
COMPRESSION_TYPE_NONE);
+ }
+
+ public static BufferedReader createReader(File file, String
compressionType) {
Review Comment:
Could we use CompressionType in the method signature directly?
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/StoreAndSortStrategy.java:
##########
@@ -50,7 +51,7 @@ class StoreAndSortStrategy implements SortStrategy {
private final PathElementComparator comparator;
private final NodeStateEntryWriter entryWriter;
private final File storeDir;
- private final boolean compressionEnabled;
+ private final String compressionType;
Review Comment:
I would use CompressionType here
##########
oak-run-commons/src/main/java/org/apache/jackrabbit/oak/index/indexer/document/flatfile/FlatFileStoreUtils.java:
##########
@@ -30,46 +32,67 @@
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.List;
-import java.util.zip.Deflater;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-
-import static com.google.common.base.Charsets.UTF_8;
class FlatFileStoreUtils {
+ public static final String COMPRESSION_TYPE_LZ4 = "lz4";
+ public static final String COMPRESSION_TYPE_GZIP = "gz";
+ public static final String COMPRESSION_TYPE_NONE = "none";
+
+ /**
+ * A map of compression Type to ExternalSort CompressionType Enum
+ * @param compressionType string representation of the compression
algorithm, use "none" for disable compression,
+ * any unknown or unsupported value will be
considered as none.
+ * @return
+ */
+ public static ExternalSort.CompressionType
getExternalSortCompressionType(String compressionType) {
+ switch (compressionType) {
+ case COMPRESSION_TYPE_LZ4:
+ return ExternalSort.CompressionType.LZ4;
+ case COMPRESSION_TYPE_GZIP:
+ return ExternalSort.CompressionType.GZIP;
+ case COMPRESSION_TYPE_NONE:
+ return ExternalSort.CompressionType.NONE;
+ default:
+ return ExternalSort.CompressionType.NONE;
+ }
+ }
+
public static BufferedReader createReader(File file, boolean
compressionEnabled) {
+ return createReader(file, compressionEnabled ? COMPRESSION_TYPE_GZIP :
COMPRESSION_TYPE_NONE);
+ }
+
+ public static BufferedReader createReader(File file, String
compressionType) {
try {
- BufferedReader br;
InputStream in = new FileInputStream(file);
- if (compressionEnabled) {
- br = new BufferedReader(new InputStreamReader(new
GZIPInputStream(in, 2048), UTF_8));
- } else {
- br = new BufferedReader(new InputStreamReader(in, UTF_8));
- }
- return br;
+ return new BufferedReader(new
InputStreamReader(getExternalSortCompressionType(compressionType).getInputStream(in)));
} catch (IOException e) {
throw new RuntimeException("Error opening file " + file, e);
}
}
public static BufferedWriter createWriter(File file, boolean
compressionEnabled) throws IOException {
+ return createWriter(file, compressionEnabled ? COMPRESSION_TYPE_GZIP :
COMPRESSION_TYPE_NONE);
Review Comment:
I would use CompressionType here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]