eribeiro commented on a change in pull request #864: SOLR-13101 : Shared 
storage support in SolrCloud
URL: https://github.com/apache/lucene-solr/pull/864#discussion_r324026158
 
 

 ##########
 File path: 
solr/core/src/java/org/apache/solr/store/blob/client/BlobCoreMetadata.java
 ##########
 @@ -0,0 +1,284 @@
+package org.apache.solr.store.blob.client;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.UUID;
+
+/**
+ * Object defining metadata stored in blob store for a Shared Collection shard 
and its builders.  
+ * This metadata includes all actual segment files as well as the segments_N 
file of the commit point.
+ * 
+ * This object is serialized to/from Json and stored in the blob store as a 
blob.
+ */
+public class BlobCoreMetadata {
+
+    /**
+     * Name of the shard index data that is shared by all replicas belonging 
to that shard. This 
+     * name is to decouple the core name that Solr manages from the name of 
the core on blob store. 
+     */
+    private final String sharedBlobName;
+
+    /**
+     * Unique identifier of this metadata, that changes on every update to the 
metadata (except generating a new corrupt metadata
+     * through {@link #getCorruptOf}).
+     */
+    private final String uniqueIdentifier;
+
+    /**
+     * Indicates that a Solr (search) server pulled this core and was then 
unable to open or use it. This flag is used as
+     * an indication to servers pushing blobs for that core into Blob Store to 
push a complete set of files if they have
+     * a locally working copy rather than just diffs (files missing on Blob 
Store).
+     */
+    private final boolean isCorrupt;
+
+    /**
+     * Indicates that this core has been deleted by the client. This flag is 
used as a marker to prevent other servers
+     * from pushing their version of this core to blob and to allow local copy 
cleanup.
+     */
+    private final boolean isDeleted;
+
+    /**
+     * The array of files that constitute the current commit point of the core 
(as known by the Blob store).
+     * This array is not ordered! There are no duplicate entries in it either 
(see how it's built in {@link BlobCoreMetadataBuilder}).
+     */
+    private final BlobFile[] blobFiles;
+
+    /**
+     * Files marked for delete but not yet removed from the Blob store. Each 
such file contains information indicating when
+     * it was marked for delete so we can actually remove the corresponding 
blob (and the entry from this array in the metadata)
+     * when it's safe to do so even if there are (unexpected) conflicting 
updates to the blob store by multiple solr servers...
+     * TODO: we might want to separate the metadata blob with the deletes as 
it's not required to always fetch the delete list when checking freshness of 
local core...
+     */
+    private final BlobFileToDelete[] blobFilesToDelete;
+
+    /**
+     * This is the constructor called by {@link BlobCoreMetadataBuilder}.
+     * It always builds non "isCorrupt" and non "isDeleted" metadata. 
+     * The only way to build an instance of "isCorrupt" metadata is to use 
{@link #getCorruptOf} and for "isDeleted" use {@link #getDeletedOf()}
+     */
+    BlobCoreMetadata(String sharedBlobName, BlobFile[] blobFiles, 
BlobFileToDelete[] blobFilesToDelete) {
+        this(sharedBlobName, blobFiles, blobFilesToDelete, 
UUID.randomUUID().toString(), false,
+                false);
+    }
+
+    private BlobCoreMetadata(String sharedBlobName, BlobFile[] blobFiles, 
BlobFileToDelete[] blobFilesToDelete, 
+        String uniqueIdentifier, boolean isCorrupt, boolean isDeleted) {
+        this.sharedBlobName = sharedBlobName;
+        this.blobFiles = blobFiles;
+        this.blobFilesToDelete = blobFilesToDelete;
+        this.uniqueIdentifier = uniqueIdentifier;
+        this.isCorrupt = isCorrupt;
+        this.isDeleted = isDeleted;
+    }
+
+    /**
+     * Given a non corrupt {@link BlobCoreMetadata} instance, creates an 
equivalent one based on it but marked as corrupt.<p>
+     * The new instance keeps all the rest of the metadata unchanged, 
including the {@link #uniqueIdentifier}.
+     */
+    public BlobCoreMetadata getCorruptOf() {
+        assert !isCorrupt;
+        return new BlobCoreMetadata(sharedBlobName, blobFiles, 
blobFilesToDelete, uniqueIdentifier, true, isDeleted);
+    }
+
+    /**
+     * Given a {@link BlobCoreMetadata} instance, creates an equivalent one 
based on it but marked as deleted.
+     * <p>
+     * The new instance keeps all the rest of the metadata unchanged, 
including the {@link #uniqueIdentifier}.
+     */
+    public BlobCoreMetadata getDeletedOf() {
+        assert !isDeleted;
+        return new BlobCoreMetadata(sharedBlobName, blobFiles, 
blobFilesToDelete, uniqueIdentifier, isCorrupt, true);
+    }
+
+    /**
+     * Returns true if the Blob metadata was marked as deleted
+     */
+    public boolean getIsDeleted() {
+        return isDeleted;
+    }
+
+    /**
+     * Returns the core name corresponding to this metadata
+     */
+    public String getSharedBlobName() {
+        return sharedBlobName;
+    }
+
+    /**
+     * Returns true if the Blob metadata was marked as corrupt. In which case, 
the core should not be pulled from the Blob Store
+     * as it is useless.
+     */
+    public boolean getIsCorrupt() {
+        return isCorrupt;
+    }
+
+    /**
+     * Unique identifier of this blob core metadata. Allows quickly seeing 
that the core metadata has changed without comparing
+     * the whole content.<p>
+     * {@link #getCorruptOf()} is the only call allowing the creation of two 
instances of {@link BlobCoreMetadata} having
+     * the same unique identifier.
+     */
+    public String getUniqueIdentifier() {
+        return uniqueIdentifier;
+    }
+
+    public BlobFile[] getBlobFiles() {
+        return blobFiles;
+    }
+
+    public BlobFileToDelete[] getBlobFilesToDelete() {
+        return blobFilesToDelete;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        BlobCoreMetadata that = (BlobCoreMetadata) o;
+
+        if (this.isCorrupt != that.isCorrupt) return false;
+        if (this.isDeleted != that.isDeleted) return false;
+        if (!this.uniqueIdentifier.equals(that.uniqueIdentifier)) return false;
+        if (!this.sharedBlobName.equals(that.sharedBlobName)) return false;
+
+        // blobFiles array is not ordered so not using Arrays.equals here but 
rather Set comparison (we also know all elements are distinct in the array)
+        Set<BlobFile> thisFiles = new HashSet<>(Arrays.asList(this.blobFiles));
+        Set<BlobFile> thatFiles = new HashSet<>(Arrays.asList(that.blobFiles));
+        if (!thisFiles.equals(thatFiles)) return false;
+
+        // same for the conf files
+        Set<BlobFileToDelete> thisFilesToDelete = new 
HashSet<>(Arrays.asList(this.blobFilesToDelete));
+        Set<BlobFileToDelete> thatFilesToDelete = new 
HashSet<>(Arrays.asList(that.blobFilesToDelete));
+        return thisFilesToDelete.equals(thatFilesToDelete);
+    }
+
+    @Override
+    public int hashCode() {
+        int result = sharedBlobName.hashCode();
+        result = 31 * result + uniqueIdentifier.hashCode();
+        // The array of files is not ordered so need to compare as a set
+        result = 31 * result + new 
HashSet<>(Arrays.asList(this.blobFiles)).hashCode();
+        result = 31 * result + new 
HashSet<>(Arrays.asList(this.blobFilesToDelete)).hashCode();
+        result = 31 * result + (isCorrupt ? 1 : 0);
+        result = 31 * result + (isDeleted ? 1 : 0);
+        return result;
+    }
+
+    @Override
+    public String toString() {
+        return "sharedBlobName=" + sharedBlobName + " isCorrupt=" + isCorrupt 
+ " uniqueIdentifier=" + uniqueIdentifier;
+    }
+
+    /**
+     * A file (or blob) stored in the blob store.
+     */
+    public static class BlobFile {
+        /**
+         * Name the file should have on a Solr server retrieving it, not 
including the core specific part of the filename (i.e. the path)
+         */
+        private final String solrFileName;
+
+        /**
+         * Name of the blob representing the file on the blob store. This will 
initially be an absolute path on the Blob
+         * server (for compatibility with {@link 
org.apache.solr.store.blob.client.LocalStorageClient}) but eventually might not 
include
+         * the core name if cores are organized into per core S3 buckets).
+         */
+        private final String blobName;
+
+        // TODO add some checksum here to verify blob files are not corrupt
 
 Review comment:
   ```
   import java.util.zip.CRC32
   (...)
   private long checksum;
   
   public static long calculateDigest(byte[] data) {
         CRC32 digest = new CRC32();
         digest.update(data);
         return digest.getValue();
   }
   
   public long getChecksum(){
       return checksum;
   }
   
   public void setChecksum(byte[] data) {
       checksum = calculateChecksum(data);
   }
   public boolean isCorrupt(long digest) {
       return this.checksum == digest;
   }
   ```
   PS: another option is to use Adler32.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to