This is an automated email from the ASF dual-hosted git repository. atri pushed a commit to branch new_pdistbuffer in repository https://gitbox.apache.org/repos/asf/pinot.git
commit 646b590320ac420f499583247e0f3c75480455f6 Author: Atri Sharma <[email protected]> AuthorDate: Mon Jul 17 15:49:17 2023 +0530 Move Native Text Indices to ByteBuffer Instead of InputStreams Native text indices use immutable FSTs which have been using InputStreams which are inefficient. This commit moves the same to ByteBuffers. --- .../index/readers/text/NativeTextIndexReader.java | 4 +- .../pinot/segment/local/utils/nativefst/FST.java | 38 ++++++++--------- .../segment/local/utils/nativefst/FSTHeader.java | 10 ++--- .../local/utils/nativefst/ImmutableFST.java | 23 +++++----- .../utils/nativefst/NativeFSTIndexReader.java | 4 +- .../utils/nativefst/FSTRegexpWithWeirdTest.java | 4 +- .../local/utils/nativefst/FSTSanityTest.java | 4 +- .../local/utils/nativefst/FSTTraversalTest.java | 35 ++++++++++------ .../nativefst/ImmutableFSTDeserializedTest.java | 11 +++-- .../local/utils/nativefst/ImmutableFSTTest.java | 49 ++++++++++++++++------ .../local/utils/nativefst/SerializerTestBase.java | 6 +-- 11 files changed, 107 insertions(+), 81 deletions(-) diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java index 3650e3531f..97d704a5b0 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/readers/text/NativeTextIndexReader.java @@ -23,8 +23,6 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; -import java.util.Collections; -import org.apache.avro.util.ByteBufferInputStream; import org.apache.pinot.segment.local.segment.creator.impl.text.NativeTextIndexCreator; import org.apache.pinot.segment.local.segment.index.readers.BitmapInvertedIndexReader; import org.apache.pinot.segment.local.utils.nativefst.FST; @@ -89,7 +87,7 @@ public class NativeTextIndexReader implements TextIndexReader { long fstDataEndOffset = fstDataStartOffset + fstDataLength; ByteBuffer byteBuffer = _buffer.toDirectByteBuffer(fstDataStartOffset, fstDataLength); try { - _fst = FST.read(new ByteBufferInputStream(Collections.singletonList(byteBuffer)), ImmutableFST.class, true); + _fst = FST.read(byteBuffer, ImmutableFST.class, true); } catch (IOException e) { throw new RuntimeException(e); } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FST.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FST.java index 3471b6ed80..202b0ab4f1 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FST.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FST.java @@ -21,7 +21,7 @@ package org.apache.pinot.segment.local.utils.nativefst; import java.io.ByteArrayOutputStream; import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; +import java.nio.BufferUnderflowException; import java.nio.ByteBuffer; import java.util.BitSet; import java.util.Collections; @@ -43,20 +43,18 @@ import org.apache.pinot.segment.spi.memory.PinotDataBufferMemoryManager; */ public abstract class FST implements Iterable<ByteBuffer> { /** - * @param in The input stream. + * @param byteBuffer The input bytebuffer * @param length Length of input to be read * @return Reads remaining bytes upto length from an input stream and returns * them as a byte array. Null if no data was read * @throws IOException Rethrown if an I/O exception occurs. */ - protected static byte[] readRemaining(InputStream in, int length) - throws IOException { + protected static byte[] readRemaining(ByteBuffer byteBuffer, int length) { byte[] buf = new byte[length]; - int readLen; - - readLen = in.read(buf, 0, length); - if (readLen == -1) { + try { + byteBuffer.get(buf, 0, length); + } catch (BufferUnderflowException e) { return null; } @@ -66,29 +64,28 @@ public abstract class FST implements Iterable<ByteBuffer> { /** * Wrapper for the main read function */ - public static FST read(InputStream stream) + public static FST read(ByteBuffer buffer) throws IOException { - return read(stream, false, new DirectMemoryManager(FST.class.getName())); + return read(buffer, false, new DirectMemoryManager(FST.class.getName())); } /** * A factory for reading automata in any of the supported versions. * - * @param stream - * The input stream to read automaton data from. The stream is not - * closed. + * @param buffer + * The input byte buffer. * @return Returns an instantiated automaton. Never null. * @throws IOException * If the input stream does not represent an automaton or is * otherwise invalid. */ - public static FST read(InputStream stream, boolean hasOutputSymbols, PinotDataBufferMemoryManager memoryManager) + public static FST read(ByteBuffer buffer, boolean hasOutputSymbols, PinotDataBufferMemoryManager memoryManager) throws IOException { - FSTHeader header = FSTHeader.read(stream); + FSTHeader header = FSTHeader.read(buffer); switch (header._version) { case ImmutableFST.VERSION: - return new ImmutableFST(stream, hasOutputSymbols, memoryManager); + return new ImmutableFST(buffer, hasOutputSymbols, memoryManager); default: throw new IOException( String.format(Locale.ROOT, "Unsupported automaton version: 0x%02x", header._version & 0xFF)); @@ -98,9 +95,8 @@ public abstract class FST implements Iterable<ByteBuffer> { /** * A factory for reading a specific FST subclass, including proper casting. * - * @param stream - * The input stream to read automaton data from. The stream is not - * closed. + * @param in + * The input bytebuffer. * @param clazz A subclass of {@link FST} to cast the read automaton to. * @param <T> A subclass of {@link FST} to cast the read automaton to. * @return Returns an instantiated automaton. Never null. @@ -109,9 +105,9 @@ public abstract class FST implements Iterable<ByteBuffer> { * invalid or the class of the automaton read from the input stream * is not assignable to <code>clazz</code>. */ - public static <T extends FST> T read(InputStream stream, Class<? extends T> clazz, boolean hasOutputSymbols) + public static <T extends FST> T read(ByteBuffer in, Class<? extends T> clazz, boolean hasOutputSymbols) throws IOException { - FST fst = read(stream, hasOutputSymbols, new DirectMemoryManager(FST.class.getName())); + FST fst = read(in, hasOutputSymbols, new DirectMemoryManager(FST.class.getName())); if (!clazz.isInstance(fst)) { throw new IOException( String.format(Locale.ROOT, "Expected FST type %s, but read an incompatible type %s.", clazz.getName(), diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java index 6f67fcb79a..ef38fcad08 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/FSTHeader.java @@ -19,8 +19,8 @@ package org.apache.pinot.segment.local.utils.nativefst; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; +import java.nio.ByteBuffer; /** @@ -47,14 +47,14 @@ public final class FSTHeader { * @return Returns a valid {@link FSTHeader} with version information. * @throws IOException If the stream ends prematurely or if it contains invalid data. */ - public static FSTHeader read(InputStream in) + public static FSTHeader read(ByteBuffer in) throws IOException { - if (in.read() != ((FST_MAGIC >>> 24)) || in.read() != ((FST_MAGIC >>> 16) & 0xff) || in.read() != ((FST_MAGIC >>> 8) - & 0xff) || in.read() != ((FST_MAGIC) & 0xff)) { + if (in.get() != ((FST_MAGIC >>> 24)) || in.get() != ((FST_MAGIC >>> 16) & 0xff) || in.get() != ((FST_MAGIC >>> 8) + & 0xff) || in.get() != ((FST_MAGIC) & 0xff)) { throw new IOException("Invalid file header, probably not an FST."); } - int version = in.read(); + int version = in.get(); if (version == -1) { throw new IOException("Truncated file, no version number."); } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFST.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFST.java index 7b36d2e3d3..5f4e87efba 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFST.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFST.java @@ -18,9 +18,7 @@ */ package org.apache.pinot.segment.local.utils.nativefst; -import java.io.DataInputStream; -import java.io.IOException; -import java.io.InputStream; +import java.nio.ByteBuffer; import java.util.Collections; import java.util.EnumSet; import java.util.Map; @@ -162,13 +160,11 @@ public final class ImmutableFST extends FST { /** * Read and wrap a binary automaton in FST version 5. */ - ImmutableFST(InputStream stream, boolean hasOutputSymbols, PinotDataBufferMemoryManager memoryManager) - throws IOException { - DataInputStream in = new DataInputStream(stream); + ImmutableFST(ByteBuffer in, boolean hasOutputSymbols, PinotDataBufferMemoryManager memoryManager) { - _filler = in.readByte(); - _annotation = in.readByte(); - final byte hgtl = in.readByte(); + _filler = in.get(); + _annotation = in.get(); + final byte hgtl = in.get(); _mutableBytesStore = new OffHeapMutableBytesStore(memoryManager, "ImmutableFST"); @@ -187,7 +183,7 @@ public final class ImmutableFST extends FST { _gotoLength = hgtl & 0x0f; if (hasOutputSymbols) { - final int outputSymbolsLength = in.readInt(); + final int outputSymbolsLength = in.getInt(); byte[] outputSymbolsBuffer = readRemaining(in, outputSymbolsLength); if (outputSymbolsBuffer.length > 0) { @@ -200,10 +196,11 @@ public final class ImmutableFST extends FST { readRemaining(in); } - private void readRemaining(InputStream in) - throws IOException { + private void readRemaining(ByteBuffer in) { byte[] buffer = new byte[PER_BUFFER_SIZE]; - while ((in.read(buffer)) >= 0) { + while (in.hasRemaining()) { + int dataSize = in.remaining() > PER_BUFFER_SIZE ? PER_BUFFER_SIZE : in.remaining(); + in.get(buffer, 0, dataSize); _mutableBytesStore.add(buffer); } } diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexReader.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexReader.java index 0a03389336..578f7963a7 100644 --- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexReader.java +++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/nativefst/NativeFSTIndexReader.java @@ -20,8 +20,6 @@ package org.apache.pinot.segment.local.utils.nativefst; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.Collections; -import org.apache.avro.util.ByteBufferInputStream; import org.apache.pinot.segment.local.utils.nativefst.utils.RegexpMatcher; import org.apache.pinot.segment.spi.index.reader.TextIndexReader; import org.apache.pinot.segment.spi.memory.PinotDataBuffer; @@ -49,7 +47,7 @@ public class NativeFSTIndexReader implements TextIndexReader { throws IOException { // TODO: Implement an InputStream directly on PinotDataBuffer ByteBuffer byteBuffer = dataBuffer.toDirectByteBuffer(0, (int) dataBuffer.size()); - _fst = FST.read(new ByteBufferInputStream(Collections.singletonList(byteBuffer)), ImmutableFST.class, true); + _fst = FST.read(byteBuffer, ImmutableFST.class, true); } @Override diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTRegexpWithWeirdTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTRegexpWithWeirdTest.java index c27c8e1050..02c6d54790 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTRegexpWithWeirdTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTRegexpWithWeirdTest.java @@ -18,9 +18,9 @@ */ package org.apache.pinot.segment.local.utils.nativefst; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.Arrays; import org.apache.pinot.segment.local.utils.nativefst.builder.FSTBuilder; @@ -66,7 +66,7 @@ public class FSTRegexpWithWeirdTest { FST fst = fstBuilder.complete(); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - _fst = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + _fst = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); } @Test diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTSanityTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTSanityTest.java index 96e7404320..479fc2437a 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTSanityTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTSanityTest.java @@ -19,10 +19,10 @@ package org.apache.pinot.segment.local.utils.nativefst; import java.io.BufferedReader; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.ByteBuffer; import java.util.List; import java.util.Objects; import java.util.SortedMap; @@ -60,7 +60,7 @@ public class FSTSanityTest { FST fst = FSTBuilder.buildFST(input); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - _nativeFST = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + _nativeFST = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); _fst = org.apache.pinot.segment.local.utils.fst.FSTBuilder.buildFST(input); } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTTraversalTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTTraversalTest.java index f22c7ab0aa..39b3a09ffc 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTTraversalTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/FSTTraversalTest.java @@ -19,14 +19,16 @@ package org.apache.pinot.segment.local.utils.nativefst; import com.google.common.collect.Sets; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; import java.util.Arrays; +import java.util.EnumSet; import java.util.SortedMap; import java.util.TreeMap; import org.apache.pinot.segment.local.io.writer.impl.DirectMemoryManager; @@ -61,8 +63,12 @@ public class FSTTraversalTest { @BeforeClass public void setUp() throws Exception { - try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream("data/en_tst.dict")) { - _fst = FST.read(inputStream, false, new DirectMemoryManager(FSTTraversalTest.class.getName())); + try { + ByteBuffer buffer = + ByteBuffer.wrap(getClass().getClassLoader().getResourceAsStream("data/en_tst.dict").readAllBytes()); + _fst = FST.read(buffer, false, new DirectMemoryManager(FSTTraversalTest.class.getName())); + } catch (IOException e) { + throw e; } String regexTestInputString = @@ -86,7 +92,7 @@ public class FSTTraversalTest { Arrays.asList("a".getBytes(UTF_8), "ab".getBytes(UTF_8), "abc".getBytes(UTF_8), "ad".getBytes(UTF_8), "bcd".getBytes(UTF_8), "bce".getBytes(UTF_8)), new int[]{10, 11, 12, 13, 14, 15}); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - fst = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + fst = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); FSTTraversal fstTraversal = new FSTTraversal(fst); assertEquals(fstTraversal.match("a".getBytes(UTF_8))._kind, EXACT_MATCH); @@ -153,7 +159,10 @@ public class FSTTraversalTest { public void testMatch() throws IOException { File file = new File("./src/test/resources/data/abc.native.fst"); - FST fst = FST.read(new FileInputStream(file), false, new DirectMemoryManager(FSTTraversalTest.class.getName())); + FileChannel fileChannel = (FileChannel) Files.newByteChannel( + file.toPath(), EnumSet.of(StandardOpenOption.READ)); + MappedByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); + FST fst = FST.read(mappedByteBuffer, false, new DirectMemoryManager(FSTTraversalTest.class.getName())); FSTTraversal traversalHelper = new FSTTraversal(fst); MatchResult m = traversalHelper.match("ax".getBytes()); @@ -184,7 +193,7 @@ public class FSTTraversalTest { FST fst = builder.complete(); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - fst = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + fst = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); RoaringBitmapWriter<MutableRoaringBitmap> writer = RoaringBitmapWriter.bufferWriter().get(); RegexpMatcher.regexMatch("h.*", fst, writer::add); @@ -204,7 +213,7 @@ public class FSTTraversalTest { FST fst = builder.complete(); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - fst = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + fst = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); RoaringBitmapWriter<MutableRoaringBitmap> writer = RoaringBitmapWriter.bufferWriter().get(); RegexpMatcher.regexMatch(".*h", fst, writer::add); @@ -222,7 +231,7 @@ public class FSTTraversalTest { FST fst = FSTBuilder.buildFST(input); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - fst = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + fst = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); RoaringBitmapWriter<MutableRoaringBitmap> writer = RoaringBitmapWriter.bufferWriter().get(); RegexpMatcher.regexMatch(".*123", fst, writer::add); @@ -246,7 +255,7 @@ public class FSTTraversalTest { FST fst = FSTBuilder.buildFST(input); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - fst = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + fst = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); RoaringBitmapWriter<MutableRoaringBitmap> writer = RoaringBitmapWriter.bufferWriter().get(); RegexpMatcher.regexMatch("hello.*123", fst, writer::add); @@ -270,7 +279,7 @@ public class FSTTraversalTest { FST fst = FSTBuilder.buildFST(input); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - fst = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + fst = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); ImmutableFST.printToString(fst); } @@ -284,7 +293,7 @@ public class FSTTraversalTest { FST fst = FSTBuilder.buildFST(input); byte[] fstData = new FSTSerializerImpl().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - fst = FST.read(new ByteArrayInputStream(fstData), ImmutableFST.class, true); + fst = FST.read(ByteBuffer.wrap(fstData), ImmutableFST.class, true); RoaringBitmapWriter<MutableRoaringBitmap> writer = RoaringBitmapWriter.bufferWriter().get(); RegexpMatcher.regexMatch("cars?", fst, writer::add); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFSTDeserializedTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFSTDeserializedTest.java index a5df1f860e..f0c4f58237 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFSTDeserializedTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFSTDeserializedTest.java @@ -18,7 +18,8 @@ */ package org.apache.pinot.segment.local.utils.nativefst; -import java.io.InputStream; +import java.nio.ByteBuffer; + import org.apache.pinot.segment.local.io.writer.impl.DirectMemoryManager; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -36,8 +37,12 @@ public class ImmutableFSTDeserializedTest { @BeforeClass public void setUp() throws Exception { - try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream("data/serfst.txt")) { - _fst = FST.read(inputStream, true, new DirectMemoryManager(ImmutableFSTDeserializedTest.class.getName())); + try { + ByteBuffer buffer = + ByteBuffer.wrap(getClass().getClassLoader().getResourceAsStream("data/serfst.txt").readAllBytes()); + _fst = FST.read(buffer, true, new DirectMemoryManager(ImmutableFSTDeserializedTest.class.getName())); + } catch (Exception e) { + throw new RuntimeException(e.getMessage()); } } diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFSTTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFSTTest.java index 78d4d161e5..78f97ff60f 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFSTTest.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/ImmutableFSTTest.java @@ -19,13 +19,16 @@ package org.apache.pinot.segment.local.utils.nativefst; import java.io.File; -import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; -import java.io.InputStream; import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Files; +import java.nio.file.StandardOpenOption; import java.util.ArrayList; import java.util.Arrays; +import java.util.EnumSet; import java.util.List; import org.apache.commons.io.FileUtils; import org.apache.pinot.segment.local.utils.nativefst.builder.FSTBuilder; @@ -77,20 +80,28 @@ public final class ImmutableFSTTest { @Test public void testVersion5() throws IOException { - try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream("data/abc.native.fst")) { - FST fst = FST.read(inputStream); + try { + ByteBuffer buffer = + ByteBuffer.wrap(getClass().getClassLoader().getResourceAsStream("data/abc.native.fst").readAllBytes()); + FST fst = FST.read(buffer); assertFalse(fst.getFlags().contains(FSTFlags.NUMBERS)); verifyContent(fst, _expected); + } catch (IOException e) { + throw e; } } @Test public void testVersion5WithNumbers() throws IOException { - try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream("data/abc-numbers.native.fst")) { - FST fst = FST.read(inputStream); + try { + ByteBuffer buffer = ByteBuffer.wrap( + getClass().getClassLoader().getResourceAsStream("data/abc-numbers.native.fst").readAllBytes()); + FST fst = FST.read(buffer); assertTrue(fst.getFlags().contains(FSTFlags.NUMBERS)); verifyContent(fst, _expected); + } catch (IOException e) { + throw e; } } @@ -98,11 +109,15 @@ public final class ImmutableFSTTest { public void testArcsAndNodes() throws IOException { for (String resourceName : new String[]{"data/abc.native.fst", "data/abc-numbers.native.fst"}) { - try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream(resourceName)) { - FST fst = FST.read(inputStream); + try { + ByteBuffer buffer = + ByteBuffer.wrap(getClass().getClassLoader().getResourceAsStream(resourceName).readAllBytes()); + FST fst = FST.read(buffer); FSTInfo fstInfo = new FSTInfo(fst); assertEquals(fstInfo._nodeCount, 4); assertEquals(fstInfo._arcsCount, 7); + } catch (IOException e) { + throw e; } } } @@ -110,8 +125,10 @@ public final class ImmutableFSTTest { @Test public void testNumbers() throws IOException { - try (InputStream inputStream = getClass().getClassLoader().getResourceAsStream("data/abc-numbers.native.fst")) { - FST fst = FST.read(inputStream); + try { + ByteBuffer in = ByteBuffer.wrap( + getClass().getClassLoader().getResourceAsStream("data/abc-numbers.native.fst").readAllBytes()); + FST fst = FST.read(in); assertTrue(fst.getFlags().contains(FSTFlags.NEXTBIT)); // Get all numbers for nodes. @@ -121,6 +138,8 @@ public final class ImmutableFSTTest { result.sort(null); assertEquals(result, Arrays.asList("0 c", "1 b", "2 ba", "3 a", "4 ac", "5 aba")); + } catch (IOException e) { + throw e; } } @@ -138,8 +157,14 @@ public final class ImmutableFSTTest { File fstFile = new File(FileUtils.getTempDirectory(), "test.native.fst"); fst.save(new FileOutputStream(fstFile)); - try (FileInputStream inputStream = new FileInputStream(fstFile)) { - verifyContent(FST.read(inputStream, ImmutableFST.class, true), inputList); + try { + FileChannel fileChannel = (FileChannel) Files.newByteChannel( + fstFile.toPath(), EnumSet.of(StandardOpenOption.READ)); + MappedByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); + + verifyContent(FST.read(mappedByteBuffer, ImmutableFST.class, true), inputList); + } catch (IOException e) { + throw e; } FileUtils.deleteQuietly(fstFile); diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/SerializerTestBase.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/SerializerTestBase.java index 82af070da8..043f303f14 100644 --- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/SerializerTestBase.java +++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/nativefst/SerializerTestBase.java @@ -18,7 +18,6 @@ */ package org.apache.pinot.segment.local.utils.nativefst; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.ByteBuffer; @@ -135,7 +134,7 @@ public abstract class SerializerTestBase { private void checkSerialization0(FSTSerializer serializer, final byte[][] in, FST root, boolean hasOutputSymbols) throws IOException { byte[] fstData = serializer.serialize(root, new ByteArrayOutputStream()).toByteArray(); - FST fst = FST.read(new ByteArrayInputStream(fstData), hasOutputSymbols, + FST fst = FST.read(ByteBuffer.wrap(fstData), hasOutputSymbols, new DirectMemoryManager(SerializerTestBase.class.getName())); checkCorrect(in, fst); } @@ -176,8 +175,7 @@ public abstract class SerializerTestBase { FST fst = FSTBuilder.build(input, new int[]{10, 11, 12, 13}); byte[] fstData = createSerializer().withNumbers().serialize(fst, new ByteArrayOutputStream()).toByteArray(); - fst = - FST.read(new ByteArrayInputStream(fstData), true, new DirectMemoryManager(SerializerTestBase.class.getName())); + fst = FST.read(ByteBuffer.wrap(fstData), true, new DirectMemoryManager(SerializerTestBase.class.getName())); // Ensure we have the NUMBERS flag set. assertTrue(fst.getFlags().contains(NUMBERS)); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
