[ 
https://issues.apache.org/jira/browse/HIVE-27186?focusedWorklogId=858047&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-858047
 ]

ASF GitHub Bot logged work on HIVE-27186:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 19/Apr/23 21:55
            Start Date: 19/Apr/23 21:55
    Worklog Time Spent: 10m 
      Work Description: saihemanth-cloudera commented on code in PR #4194:
URL: https://github.com/apache/hive/pull/4194#discussion_r1171890976


##########
standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/properties/Digester.java:
##########
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.properties;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectOutput;
+import java.io.ObjectOutputStream;
+import java.io.OutputStream;
+import java.net.URI;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.Date;
+import java.util.UUID;
+
+/**
+ * Helper class that creates a type 5 uuid.
+ * <p>This is computed from a set of updates using a SHA-1 message digest 
massaged into an UUID.
+ * see <a 
href="https://en.wikipedia.org/wiki/Universally_unique_identifier";>...</a>
+ */
+public class Digester {
+    /** The Namespace uuid. */
+    private final UUID nsuid;
+    /** The digest used to compute the UUID. */
+    private final MessageDigest md;
+    /** A default namespace based on the class loading time. */
+    private static final UUID TEMP_NS;
+    static {
+        MessageDigest md = createDigest();
+        digest(md, System.currentTimeMillis());
+        TEMP_NS = computeUUID(md);
+    }
+
+    /**
+     * Allows to update the message digest from an object.
+     */
+    private static class TagOutputStream extends OutputStream {
+        /** The digest to update. */
+        private final MessageDigest md;
+
+        /**
+         * Sole ctor.
+         * @param md the message digester
+         */
+        TagOutputStream(MessageDigest md) {
+            this.md = md;
+        }
+
+        @Override
+        public void write(int b) throws IOException {
+            md.update((byte) b);
+        }
+
+        @Override
+        public void write(byte[] b, int off, int len) throws IOException {
+            md.update(b, off, len);
+        }
+    }
+
+    /**
+     * @return a SHA-1 message digest
+     */
+    private static MessageDigest createDigest() {
+        MessageDigest md;
+        try {
+            md = MessageDigest.getInstance("SHA-1");
+        } catch (NoSuchAlgorithmException nsae) {
+            throw new InternalError("SHA not supported");
+        }
+        return md;
+    }
+
+    /**
+     * Updates a digest with a uuid.
+     * @param md the digest to update
+     * @param uid the uuid
+     */
+    private static MessageDigest digest(MessageDigest md, UUID uid) {
+        if (uid != null) {
+            long msb = uid.getMostSignificantBits();
+            digest(md, msb);
+            long lsb = uid.getLeastSignificantBits();
+            digest(md, lsb);
+        }
+        return md;
+    }
+
+    /**
+     * Updates a digest with an input stream.
+     * @param md the digest to update
+     * @param input the input to consume
+     * @throws IllegalStateException if an io exception occurs
+     */
+    private static void digest(MessageDigest md, InputStream input) {
+        try (OutputStream out = new TagOutputStream(md)) {
+            byte[] buffer = new byte[1024];
+            int read;
+            while ((read = input.read(buffer, 0, 1024)) >= 0) {
+                out.write(buffer, 0, read);
+            }
+        } catch(IOException xio) {
+            throw new IllegalStateException(xio);
+        }
+    }
+
+
+    /**
+     * Updates a digest with a long.
+     * @param md the digest to update
+     * @param l8 the long
+     */
+    private static void digest(MessageDigest md, long l8) {
+        md.update((byte) (l8 & 0xff));
+        md.update((byte) (l8 >> 8));
+        md.update((byte) (l8 >> 16));
+        md.update((byte) (l8 >> 24));
+        md.update((byte) (l8 >> 32));
+        md.update((byte) (l8 >> 40));
+        md.update((byte) (l8 >> 48));
+        md.update((byte) (l8 >> 56));
+    }
+
+    /**
+     * Updates a digest with an object.
+     * @param md the digest to update
+     * @param obj the object
+     */
+    private static void digest(MessageDigest md, Object obj) {
+        if (obj == null) {
+            return;
+        }
+        try (ObjectOutput out = new ObjectOutputStream(new 
TagOutputStream(md))) {
+            out.writeObject(obj);
+        } catch (IOException ex) {
+            // ignore close exception
+        }
+        // ignore close exception
+    }
+
+    /**
+     * Computes the uuid.
+     * @param md the message digest used to compute the hash
+     * @return the eTag as a type 5 uuid
+     */
+    private static UUID computeUUID(MessageDigest md) {
+        byte[] sha1Bytes = md.digest();
+        sha1Bytes[6] &= 0x0f;  /* clear version        */
+        sha1Bytes[6] |= 0x50;  /* set to version 5     */
+        sha1Bytes[8] &= 0x3f;  /* clear variant        */
+        sha1Bytes[8] |= 0x80;  /* set to IETF variant  */
+
+        // SHA generates 160 bytes; truncate to 128
+        long msb = 0;
+        //assert data.length == 16 || data.length == 20;
+        for (int i = 0; i < 8; i++) {
+            msb = (msb << 8) | (sha1Bytes[i] & 0xff);
+        }
+        long lsb = 0;
+        for (int i = 8; i < 16; i++) {
+            lsb = (lsb << 8) | (sha1Bytes[i] & 0xff);
+        }
+        return new UUID(msb, lsb);
+    }
+
+    /**
+     * A marker interface for objects that can be digested.
+     */
+    public interface Digestible {
+        /**
+         * Updates a digest with this variable.
+         * @param digester the digester to update
+         * @return true if this digestible actually contributed to the digest
+         */
+        boolean digest(Digester digester);
+    }
+
+    /**
+     * A type 5 uuid is namespace + sha1; namespace in our case is a uuid.
+     * Two instances of digesters built with the same namespace will produce 
the same UUIDs from the
+     * same inputs.
+     * @param namespace the uuid namespace
+     */
+    public Digester(UUID namespace) {
+        nsuid = namespace == null? TEMP_NS : namespace;
+        md = createDigest();
+        // inject namespace
+        digest(md, nsuid);
+    }
+
+    /**
+     * A copy ctor base.
+     * @param lnsuid the namespace uid
+     * @param lmd the message digest
+     */
+    private Digester(UUID lnsuid, MessageDigest lmd)  {
+        this.nsuid = lnsuid;
+        this.md = lmd;
+    }
+
+    /**
+     * Default ctor.
+     * The created digester uses the class loading time as seed for its 
namespace; this means 2 instances of digester
+     * built in different JVM instances will *NOT* produce the same UUIDs for 
the same input. Typical use is in
+     * a non-persistent scenario, to verify an instance of an object has not 
been modified by checking
+     * its digested UUID remained the same.
+     * To get stable UUID computation across time and space in Digester usable 
in persistent scenario,
+     * you *NEED* to use a namespace-based digester using {@link 
Digester(UUID)}, uuid that is easily created
+     * using {@link UUID#nameUUIDFromBytes(byte[])} from any name/uri you 
might desire.
+     */
+    public Digester() {
+        this(null);
+    }
+
+    /**
+     * @return a clone of this instance
+     */
+    public Digester copy() {
+        try {
+            return new Digester(nsuid, (MessageDigest) md.clone());
+        } catch (CloneNotSupportedException ex) {
+            return null;
+        }
+    }
+
+    /**
+     * Computes the version tag from this digester.
+     * <p>This uses the current message digest state and resets it.
+     * @return the type 5 uuid
+     */
+    public UUID getUUID() {
+        UUID uuid = computeUUID(md);
+        md.reset();
+        digest(nsuid);
+        return uuid;
+    }
+
+    /**
+     * Updates the digest with a boolean.
+     * @param b the boolean
+     * @return this digester
+     */
+    public Digester digest(boolean b) {
+        md.update((byte) (b? 1 : 0));
+        return this;
+    }
+
+    /**
+     * Updates the digest with a char.
+     * @param c the char
+     * @return this digester
+     */
+    public Digester digest(char c) {
+        md.update((byte) (c & 0xff));
+        md.update((byte) (c >> 8));
+        return this;
+    }
+
+    /**
+     * Updates the digest with a bytes array.
+     * @param bytes the bytes
+     * @return this digester
+     */
+    public Digester digest(byte[] bytes) {
+        if (bytes != null) {
+            md.update(bytes);
+        }
+        return this;
+    }
+
+    /**
+     * Updates the digest with an integer.
+     * @param i4 the int
+     * @return this digester
+     */
+    public Digester digest(int i4) {
+        md.update((byte) (i4 & 0xff));
+        md.update((byte) (i4 >> 8));
+        md.update((byte) (i4 >> 16));
+        md.update((byte) (i4 >> 24));
+        return this;
+    }
+
+    /**
+     * Updates the digest with a long.
+     * @param l8 the long
+     * @return this digester
+     */
+    public Digester digest(long l8) {
+        digest(md, l8);
+        return this;
+    }
+
+    /**
+     * Updates the digest with a double.
+     * @param f8 the double
+     * @return this digester
+     */
+    public Digester digest(double f8) {
+        digest(md, Double.doubleToRawLongBits(f8));
+        return this;
+    }
+
+    /**
+     * Updates the digest with a date.
+     * @param date the date
+     * @return this digester
+     */
+    public Digester digest(Date date) {
+        if (date != null) {
+            digest(md, date.getTime());
+        }
+        return this;
+    }
+
+    /**
+     * Updates the digest with a string.
+     * @param str the string
+     * @return this digester
+     */
+    public Digester digest(String str) {
+        if (str != null) {
+            final int sz = str.length();
+            for(int i = 0; i < sz; ++i) {
+                digest(str.charAt(i));
+            }
+        }
+        return this;
+    }
+
+    /**
+     * Updates the digest with a uuid.
+     * @param uid the uuid
+     * @return this digester
+     */
+    public Digester digest(UUID uid) {
+        digest(md, uid);
+        return this;
+    }
+
+    /**
+     * Updates the digest with a uuid.
+     * @param uri the uri
+     * @return this digester
+     */
+    public Digester digest(URI uri) {
+        digest(md, uri.toString());
+        return this;
+    }
+
+    /**
+     * Updates the digest with an object that describes how it digests.
+     * @param digestible the object
+     * @return this digester
+     */
+    public Digester digest(Digestible digestible) {
+        if (digestible != null) {
+            digestible.digest(this);
+        }
+        return this;
+    }
+
+    /**
+     * Updates the digest with a stream.
+     * @param input the stream
+     * @return this digester
+     */
+    public Digester digest(InputStream input) {
+        if (input != null) {
+            digest(md, input);
+        }
+        return this;
+    }
+    /**
+     * Updates the digest with any (serializable) object.
+     * @param obj the object
+     * @return this digester
+     */
+    public Digester digest(Object obj) {
+        if (obj instanceof Digestible) {

Review Comment:
   Do you think this can be better implemented in the switch case?





Issue Time Tracking
-------------------

    Worklog Id:     (was: 858047)
    Time Spent: 5h 40m  (was: 5.5h)

> A persistent property store 
> ----------------------------
>
>                 Key: HIVE-27186
>                 URL: https://issues.apache.org/jira/browse/HIVE-27186
>             Project: Hive
>          Issue Type: Improvement
>          Components: Metastore
>    Affects Versions: 4.0.0-alpha-2
>            Reporter: Henri Biestro
>            Assignee: Henri Biestro
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 5h 40m
>  Remaining Estimate: 0h
>
> WHAT
> A persistent property store usable as a support facility for any metadata 
> augmentation feature.
> WHY
> When adding new meta-data oriented features, we usually need to persist 
> information linking the feature data and the HiveMetaStore objects it applies 
> to. Any information related to a database, a table or the cluster - like 
> statistics for example or any operational data state or data (think rolling 
> backup) -  fall in this use-case.
> Typically, accommodating such a feature requires modifying the Metastore 
> database schema by adding or altering a table. It also usually implies 
> modifying the thrift APIs to expose such meta-data to consumers.
> The proposed feature wants to solve the persistence and query/transport for 
> these types of use-cases by exposing a 'key/(meta)value' store exposed as a 
> property system.
> HOW
> A property-value model is the simple and generic exposed API.
> To provision for several usage scenarios, the model entry point is a 
> 'namespace' that qualifies the feature-component property manager. For 
> example, 'stats' could be the namespace for all properties related to the 
> 'statistics' feature.
> The namespace identifies a manager that handles property-groups persisted as 
> property-maps. For instance, all statistics pertaining to a given table would 
> be collocated in the same property-group. As such, all properties (say number 
> of 'unique_values' per columns) for a given HMS table 'relation0' would all 
> be stored and persisted in the same property-map instance.
> Property-maps may be decorated by an (optional) schema that may declare the 
> name and value-type of allowed properties (and their optional default value). 
> Each property is addressed by a name, a path uniquely identifying the 
> property in a given property map.
> The manager also handles transforming property-map names to the property-map 
> keys used to persist them in the DB.
> The API provides inserting/updating properties in bulk transactionally. It 
> also provides selection/projection to help reduce the volume of exchange 
> between client/server; selection can use (JEXL expression) predicates to 
> filter maps.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to