mad commented on code in PR #11900:
URL: https://github.com/apache/lucene/pull/11900#discussion_r2689830749


##########
lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java:
##########
@@ -46,7 +46,9 @@ public class FuzzySet implements Accountable {
 
   public static final int VERSION_SPI = 1; // HashFunction used to be loaded 
through a SPI
   public static final int VERSION_START = VERSION_SPI;
-  public static final int VERSION_CURRENT = 2;
+  public static final int VERSION_MURMUR2 = 2;
+  private static final int VERSION_MULTI_HASH = 3;
+  public static final int VERSION_CURRENT = VERSION_MULTI_HASH;

Review Comment:
   Thanks for the detailed answer!
   
   This error occurs without any code changes. Exact versions 8.11.5 vs. 9.11.2
   
   I made changes related to the byte order, and the index opened. But since 
the hash function has changed, it will most likely not work correctly.
   
   ```
   Subject: Bloom backward compatibility
   ---
   Index: lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java
   IDEA additional info:
   Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
   <+>UTF-8
   ===================================================================
   diff --git 
a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java 
b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java
   --- a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java    
(revision c389a33e4c42584f958e4a90cab756e8d9c00d24)
   +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/FuzzySet.java    
(date 1768319192474)
   @@ -17,6 +17,8 @@
    package org.apache.lucene.codecs.bloom;
    
    import java.io.IOException;
   +
   +import org.apache.lucene.codecs.CodecUtil;
    import org.apache.lucene.search.DocIdSetIterator;
    import org.apache.lucene.store.DataInput;
    import org.apache.lucene.store.DataOutput;
   @@ -177,30 +179,47 @@
       * </ul>
       *
       * @param out Data output stream
   +   * @param be
       * @throws IOException If there is a low-level I/O error
       */
   -  public void serialize(DataOutput out) throws IOException {
   -    out.writeVInt(hashCount);
   -    out.writeInt(bloomSize);
   +  public void serialize(DataOutput out, final boolean be) throws 
IOException {
   +    if (be) {
   +      CodecUtil.writeBEInt(out, hashCount);
   +    } else {
   +      out.writeVInt(hashCount);
   +    }
   +    if (be) {
   +      CodecUtil.writeBEInt(out, bloomSize);
   +    } else {
   +      out.writeInt(bloomSize);
   +    }
        long[] bits = filter.getBits();
   -    out.writeInt(bits.length);
   +    if (be) {
   +      CodecUtil.writeBEInt(out, bits.length);
   +    } else {
   +      out.writeInt(bits.length);
   +    }
        for (int i = 0; i < bits.length; i++) {
          // Can't used VLong encoding because cant cope with negative numbers
          // output by FixedBitSet
   -      out.writeLong(bits[i]);
   +      if (be) {
   +        CodecUtil.writeBELong(out, bits[i]);
   +      } else {
   +        out.writeLong(bits[i]);
   +      }
        }
      }
    
   -  public static FuzzySet deserialize(DataInput in) throws IOException {
   -    int hashCount = in.readVInt();
   -    int bloomSize = in.readInt();
   -    int numLongs = in.readInt();
   +  public static FuzzySet deserialize(DataInput in, final boolean be) throws 
IOException {
   +    int hashCount = be ? CodecUtil.readBEInt(in) : in.readVInt();
   +    int bloomSize = be ? CodecUtil.readBEInt(in) : in.readInt();
   +    int numLongs = be ? CodecUtil.readBEInt(in) : in.readInt();
        long[] longs = new long[numLongs];
        for (int i = 0; i < numLongs; i++) {
   -      longs[i] = in.readLong();
   +      longs[i] = be ? CodecUtil.readBELong(in) : in.readLong();
        }
        FixedBitSet bits = new FixedBitSet(longs, bloomSize + 1);
   -    return new FuzzySet(bits, bloomSize, hashCount);
   +    return new FuzzySet(bits, bloomSize, be ? 1 : hashCount);
      }
    
      private boolean mayContainValue(int aHash) {
   Index: 
lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
   IDEA additional info:
   Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
   <+>UTF-8
   ===================================================================
   diff --git 
a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
 
b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
   --- 
a/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
        (revision c389a33e4c42584f958e4a90cab756e8d9c00d24)
   +++ 
b/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
        (date 1768385603828)
   @@ -44,6 +44,7 @@
    import org.apache.lucene.store.IndexOutput;
    import org.apache.lucene.util.BytesRef;
    import org.apache.lucene.util.IOUtils;
   +import org.apache.lucene.util.Version;
    import org.apache.lucene.util.automaton.CompiledAutomaton;
    
    /**
   @@ -62,7 +63,7 @@
     *   <li>BloomFilter (.blm) --&gt; Header, DelegatePostingsFormatName, 
NumFilteredFields,
     *       Filter<sup>NumFilteredFields</sup>, Footer
     *   <li>Filter --&gt; FieldNumber, FuzzySet
   - *   <li>FuzzySet --&gt;See {@link FuzzySet#serialize(DataOutput)}
   + *   <li>FuzzySet --&gt;See {@link FuzzySet#serialize(DataOutput, boolean)}
     *   <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}
     *   <li>DelegatePostingsFormatName --&gt; {@link 
DataOutput#writeString(String) String} The name of
     *       a ServiceProvider registered {@link PostingsFormat}
   @@ -166,10 +167,11 @@
            PostingsFormat delegatePostingsFormat = 
PostingsFormat.forName(bloomIn.readString());
    
            this.delegateFieldsProducer = 
delegatePostingsFormat.fieldsProducer(state);
   -        int numBlooms = bloomIn.readInt();
   +        boolean be = 
!state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_9_0_0);
   +        int numBlooms = be ? CodecUtil.readBEInt(bloomIn) : 
bloomIn.readInt();
            for (int i = 0; i < numBlooms; i++) {
   -          int fieldNum = bloomIn.readInt();
   -          FuzzySet bloom = FuzzySet.deserialize(bloomIn);
   +          int fieldNum = be ? CodecUtil.readBEInt(bloomIn) : 
bloomIn.readInt();
   +          FuzzySet bloom = FuzzySet.deserialize(bloomIn, be);
              FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
              bloomsByFieldName.put(fieldInfo.name, bloom);
            }
   @@ -476,13 +478,22 @@
            // remember the name of the postings format we will delegate to
            bloomOutput.writeString(delegatePostingsFormat.getName());
    
   +        boolean be = 
!state.segmentInfo.getVersion().onOrAfter(Version.LUCENE_9_0_0);
            // First field in the output file is the number of fields+blooms 
saved
   -        bloomOutput.writeInt(nonSaturatedBlooms.size());
   +        if (be) {
   +          CodecUtil.writeBEInt(bloomOutput,nonSaturatedBlooms.size());
   +        } else {
   +          bloomOutput.writeInt(nonSaturatedBlooms.size());
   +        }
            for (Entry<FieldInfo, FuzzySet> entry : nonSaturatedBlooms) {
              FieldInfo fieldInfo = entry.getKey();
              FuzzySet bloomFilter = entry.getValue();
   -          bloomOutput.writeInt(fieldInfo.number);
   -          saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, 
fieldInfo);
   +          if (be) {
   +            CodecUtil.writeBEInt(bloomOutput, fieldInfo.number);
   +          } else {
   +            bloomOutput.writeInt(fieldInfo.number);
   +          }
   +          saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, 
fieldInfo, be);
            }
            CodecUtil.writeFooter(bloomOutput);
          }
   @@ -491,13 +502,13 @@
        }
    
        private void saveAppropriatelySizedBloomFilter(
   -        IndexOutput bloomOutput, FuzzySet bloomFilter, FieldInfo fieldInfo) 
throws IOException {
   +        IndexOutput bloomOutput, FuzzySet bloomFilter, FieldInfo fieldInfo, 
boolean be) throws IOException {
    
          FuzzySet rightSizedSet = bloomFilterFactory.downsize(fieldInfo, 
bloomFilter);
          if (rightSizedSet == null) {
            rightSizedSet = bloomFilter;
          }
   -      rightSizedSet.serialize(bloomOutput);
   +      rightSizedSet.serialize(bloomOutput, be);
        }
      }
    ```
   
   I'll consider adding support for the older version.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to