adelapena commented on code in PR #2498: URL: https://github.com/apache/cassandra/pull/2498#discussion_r1269270063
########## src/java/org/apache/cassandra/index/sai/disk/v1/sortedterms/SortedTermsTrieSearcher.java: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.sai.disk.v1.sortedterms; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.cassandra.index.sai.disk.v1.trie.TriePrefixSearcher; +import org.apache.cassandra.io.tries.Walker; +import org.apache.cassandra.io.util.Rebufferer; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; + +public class SortedTermsTrieSearcher implements AutoCloseable +{ + List<TrieSegment> segments; Review Comment: Nit: can be `private final` ########## src/java/org/apache/cassandra/index/sai/disk/v1/sortedterms/SortedTermsTrieSearcher.java: ########## @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.index.sai.disk.v1.sortedterms; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.cassandra.index.sai.disk.v1.trie.TriePrefixSearcher; +import org.apache.cassandra.io.tries.Walker; +import org.apache.cassandra.io.util.Rebufferer; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; + +public class SortedTermsTrieSearcher implements AutoCloseable +{ + List<TrieSegment> segments; + + public SortedTermsTrieSearcher(Rebufferer source, SortedTermsMeta meta) + { + this.segments = new ArrayList<>(meta.segments.size()); + for (SortedTermsMeta.SortedTermsSegmentMeta segmentMeta : meta.segments) + this.segments.add(new TrieSegment(source, segmentMeta)); + + } + + public long prefixSearch(ByteComparable term) + { + for (TrieSegment segment : segments) + { + if (segment.includesTerm(term)) + return segment.prefixSearch(term); + } + return Long.MAX_VALUE; + } + + @Override + public void close() throws Exception + { + segments.forEach(TrieSegment::close); + } + + private class TrieSegment implements AutoCloseable Review Comment: Nit: can be `static` ########## src/java/org/apache/cassandra/index/sai/disk/v1/sortedterms/SortedTermsMeta.java: ########## @@ -19,30 +19,87 @@ package org.apache.cassandra.index.sai.disk.v1.sortedterms; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; /** * Metadata produced by {@link SortedTermsWriter}, needed by {@link SortedTermsReader}. */ public class SortedTermsMeta { - public final long trieFilePointer; public final long termCount; public final int maxTermLength; + public List<SortedTermsSegmentMeta> segments; public SortedTermsMeta(DataInput input) throws IOException { - this.trieFilePointer = input.readLong(); - this.termCount = input.readLong(); - this.maxTermLength = input.readInt(); + termCount = input.readLong(); + maxTermLength = input.readInt(); + int numberOfSegments = input.readInt(); + segments = new ArrayList<>(numberOfSegments); + for (int index = 0; index < numberOfSegments; index++) + segments.add(new SortedTermsSegmentMeta(input)); } - public static void write(IndexOutput output, long trieFilePointer, long termCount, int maxTermLength) throws IOException + public static void write(IndexOutput output, long termCount, int maxTermLength, List<SortedTermsSegmentMeta> segments) throws IOException { - output.writeLong(trieFilePointer); output.writeLong(termCount); output.writeInt(maxTermLength); + output.writeInt(segments.size()); + for (SortedTermsSegmentMeta segment : segments) + segment.write(output); + } + + public static class SortedTermsSegmentMeta Review Comment: Nit: being nested into `SortedTermsMeta`, it can be renamed to `Segment` or `SegmentMeta` ########## test/distributed/org/apache/cassandra/distributed/test/sai/IndexStreamingTest.java: ########## @@ -92,10 +92,10 @@ public void testIndexComponentStreaming() throws IOException )); int num_components = isLiteral ? sstableStreamingComponentsCount() + - V1OnDiskFormat.PER_SSTABLE_COMPONENTS.size() + + V1OnDiskFormat.SKINNY_PER_SSTABLE_COMPONENTS.size() + Review Comment: I think it would be interesting to use a wide table to see how all the possible components are streamed. Even better, we could parameterise whether the table is skinny or wide: https://github.com/adelapena/cassandra/commit/481c144105b886d1f8891011810c4b3e6cd9bf1b If the tests turns out to be too slow or it OOMs, we can always split it. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]

