http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/pom.xml
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-common/pom.xml 
b/standalone-metastore/metastore-common/pom.xml
index d6df15f..b0d9eee 100644
--- a/standalone-metastore/metastore-common/pom.xml
+++ b/standalone-metastore/metastore-common/pom.xml
@@ -170,42 +170,17 @@
       <artifactId>libthrift</artifactId>
     </dependency>
     <dependency>
-      <groupId>org.datanucleus</groupId>
-      <artifactId>datanucleus-api-jdo</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.datanucleus</groupId>
-      <artifactId>datanucleus-core</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.datanucleus</groupId>
-      <artifactId>datanucleus-rdbms</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.datanucleus</groupId>
-      <artifactId>javax.jdo</artifactId>
-    </dependency>
-    <dependency>
       <groupId>org.skyscreamer</groupId>
       <artifactId>jsonassert</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>sqlline</groupId>
-      <artifactId>sqlline</artifactId>
-    </dependency>
-    <dependency>
       <groupId>commons-logging</groupId>
       <artifactId>commons-logging</artifactId>
     </dependency>
     <!-- test scope dependencies -->
 
     <dependency>
-      <groupId>com.microsoft.sqlserver</groupId>
-      <artifactId>mssql-jdbc</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>
@@ -215,18 +190,6 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
-    <dependency>
-      <!-- Note, this is LGPL.  But we're only using it in a test and not 
changing it, so I
-      believe we are fine. -->
-      <groupId>org.mariadb.jdbc</groupId>
-      <artifactId>mariadb-java-client</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.postgresql</groupId>
-      <artifactId>postgresql</artifactId>
-      <scope>test</scope>
-    </dependency>
   </dependencies>
 
   <profiles>
@@ -360,48 +323,6 @@
         </plugins>
       </reporting>
     </profile>
-    <!--
-  <profile>
-    <id>checkin</id>
-    <build>
-      <plugins>
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-          <artifactId>maven-surefire-plugin</artifactId>
-          <version>${maven.surefire.version}</version>
-          <configuration>
-            <includes>
-              <include>**/Test*</include>
-            </includes>
-            <redirectTestOutputToFile>true</redirectTestOutputToFile>
-            <reuseForks>false</reuseForks>
-            <forkCount>${test.forkcount}</forkCount>
-            <argLine>-Xmx2048m</argLine>
-            <failIfNoTests>false</failIfNoTests>
-            <systemPropertyVariables>
-              <build.dir>${project.build.directory}</build.dir>
-              
<datanucleus.schema.autoCreateAll>true</datanucleus.schema.autoCreateAll>
-              <derby.version>${derby.version}</derby.version>
-              
<derby.stream.error.file>${test.tmp.dir}/derby.log</derby.stream.error.file>
-              <log4j.debug>true</log4j.debug>
-              <java.io.tmpdir>${test.tmp.dir}</java.io.tmpdir>
-              
<javax.jdo.option.ConnectionURL>jdbc:derby:memory:${test.tmp.dir}/junit_metastore_db;create=true</javax.jdo.option.ConnectionURL>
-              
<metastore.schema.verification>false</metastore.schema.verification>
-              <test.tmp.dir>${test.tmp.dir}</test.tmp.dir>
-              
<metastore.warehouse.dir>${test.warehouse.scheme}${test.warehouse.dir}</metastore.warehouse.dir>
-            </systemPropertyVariables>
-            <additionalClasspathElements>
-              
<additionalClasspathElement>${log4j.conf.dir}</additionalClasspathElement>
-            </additionalClasspathElements>
-          </configuration>
-
-        </plugin>
-
-
-      </plugins>
-    </build>
-  </profile>
-  -->
   </profiles>
 
   <build>
@@ -490,14 +411,6 @@
             <goals>
               <goal>run</goal>
             </goals>
-            <configuration>
-              <target>
-                <mkdir dir="${test.tmp.dir}/scripts/metastore/upgrade" />
-                <copy todir="${test.tmp.dir}/scripts/metastore/upgrade">
-                  <fileset dir="${basedir}/src/main/sql/"/>
-                </copy>
-              </target>
-            </configuration>
           </execution>
         </executions>
       </plugin>
@@ -692,47 +605,6 @@
         </executions>
       </plugin>
       <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>exec-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <phase>prepare-package</phase>
-            <goals>
-              <goal>exec</goal>
-            </goals>
-            <configuration>
-              <executable>java</executable>
-              <arguments>
-                <argument>-classpath</argument>
-                <classpath/>
-                
<argument>org.apache.hadoop.hive.metastore.conf.ConfTemplatePrinter</argument>
-                
<argument>${project.build.directory}/generated-sources/conf/metastore-site.xml.template</argument>
-              </arguments>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.datanucleus</groupId>
-        <artifactId>datanucleus-maven-plugin</artifactId>
-        <version>4.0.5</version>
-        <configuration>
-          <api>JDO</api>
-          <verbose>false</verbose>
-          
<log4jConfiguration>${basedir}/src/main/resources/datanucleus-log4j.properties</log4jConfiguration>
-          <metadataIncludes>**/*.jdo</metadataIncludes>
-          <fork>false</fork>
-        </configuration>
-        <executions>
-          <execution>
-            <phase>process-classes</phase>
-            <goals>
-              <goal>enhance</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
         <groupId>org.antlr</groupId>
         <artifactId>antlr3-maven-plugin</artifactId>
         <version>${antlr.version}</version>

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/assembly/bin.xml
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-common/src/assembly/bin.xml 
b/standalone-metastore/metastore-common/src/assembly/bin.xml
index 81912d7..5992a48 100644
--- a/standalone-metastore/metastore-common/src/assembly/bin.xml
+++ b/standalone-metastore/metastore-common/src/assembly/bin.xml
@@ -77,27 +77,6 @@
     </fileSet>
 
     <fileSet>
-      <fileMode>755</fileMode>
-      <directory>${project.basedir}/src/main/scripts</directory>
-      <includes>
-        <include>base</include>
-        <include>schematool</include>
-        <include>start-metastore</include>
-        <include>metastore-config.sh</include>
-        <include>ext/**/*</include>
-      </includes>
-      <outputDirectory>bin</outputDirectory>
-    </fileSet>
-
-    <fileSet>
-      <directory>${project.basedir}/src/main/sql</directory>
-      <includes>
-        <include>**/*</include>
-      </includes>
-      <outputDirectory>scripts/metastore/upgrade</outputDirectory>
-    </fileSet>
-
-    <fileSet>
       <directory>${project.basedir}/src/gen/thrift/gen-php</directory>
       <includes>
         <include>**/*</include>
@@ -118,19 +97,12 @@
       <directory>${project.basedir}/src/main/resources/</directory>
       <fileMode>644</fileMode>
       <includes>
-        <include>metastore-site.xml</include>
         <include>metastore-log4j2.properties</include>
       </includes>
       <outputDirectory>conf</outputDirectory>
     </fileSet>
   </fileSets>
 
-  <files>
-    <file>
-      
<source>${project.build.directory}/generated-sources/conf/metastore-site.xml.template</source>
-      <outputDirectory>conf</outputDirectory>
-    </file>
-  </files>
 
 </assembly>
 

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
deleted file mode 100644
index a7ca05a..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ /dev/null
@@ -1,335 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
-
-import com.google.common.collect.ImmutableList;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
-import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.core.JsonGenerator;
-import com.fasterxml.jackson.core.JsonParser;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.DeserializationContext;
-import com.fasterxml.jackson.databind.JsonDeserializer;
-import com.fasterxml.jackson.databind.JsonSerializer;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.ObjectReader;
-import com.fasterxml.jackson.databind.ObjectWriter;
-import com.fasterxml.jackson.databind.SerializerProvider;
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
-import com.fasterxml.jackson.databind.annotation.JsonSerialize;
-
-
-/**
- * A class that defines the constant strings used by the statistics 
implementation.
- */
-
-public class StatsSetupConst {
-
-  protected static final Logger LOG = 
LoggerFactory.getLogger(StatsSetupConst.class.getName());
-
-  public enum StatDB {
-    fs {
-      @Override
-      public String getPublisher(Configuration conf) {
-        return "org.apache.hadoop.hive.ql.stats.fs.FSStatsPublisher";
-      }
-
-      @Override
-      public String getAggregator(Configuration conf) {
-        return "org.apache.hadoop.hive.ql.stats.fs.FSStatsAggregator";
-      }
-    },
-    custom {
-      @Override
-      public String getPublisher(Configuration conf) {
-        return MetastoreConf.getVar(conf, ConfVars.STATS_DEFAULT_PUBLISHER); }
-      @Override
-      public String getAggregator(Configuration conf) {
-        return MetastoreConf.getVar(conf,  ConfVars.STATS_DEFAULT_AGGREGATOR); 
}
-    };
-    public abstract String getPublisher(Configuration conf);
-    public abstract String getAggregator(Configuration conf);
-  }
-
-  // statistics stored in metastore
-  /**
-   * The name of the statistic Num Files to be published or gathered.
-   */
-  public static final String NUM_FILES = "numFiles";
-
-  /**
-   * The name of the statistic Num Partitions to be published or gathered.
-   */
-  public static final String NUM_PARTITIONS = "numPartitions";
-
-  /**
-   * The name of the statistic Total Size to be published or gathered.
-   */
-  public static final String TOTAL_SIZE = "totalSize";
-
-  /**
-   * The name of the statistic Row Count to be published or gathered.
-   */
-  public static final String ROW_COUNT = "numRows";
-
-  public static final String RUN_TIME_ROW_COUNT = "runTimeNumRows";
-
-  /**
-   * The name of the statistic Raw Data Size to be published or gathered.
-   */
-  public static final String RAW_DATA_SIZE = "rawDataSize";
-
-  /**
-   * The name of the statistic for Number of Erasure Coded Files - to be 
published or gathered.
-   */
-  public static final String NUM_ERASURE_CODED_FILES = "numFilesErasureCoded";
-
-  /**
-   * Temp dir for writing stats from tasks.
-   */
-  public static final String STATS_TMP_LOC = "hive.stats.tmp.loc";
-
-  public static final String STATS_FILE_PREFIX = "tmpstats-";
-  /**
-   * List of all supported statistics
-   */
-  public static final List<String> SUPPORTED_STATS = ImmutableList.of(
-      NUM_FILES, ROW_COUNT, TOTAL_SIZE, RAW_DATA_SIZE, 
NUM_ERASURE_CODED_FILES);
-
-  /**
-   * List of all statistics that need to be collected during query execution. 
These are
-   * statistics that inherently require a scan of the data.
-   */
-  public static final List<String> STATS_REQUIRE_COMPUTE = 
ImmutableList.of(ROW_COUNT, RAW_DATA_SIZE);
-
-  /**
-   * List of statistics that can be collected quickly without requiring a scan 
of the data.
-   */
-  public static final List<String> FAST_STATS = ImmutableList.of(
-      NUM_FILES, TOTAL_SIZE, NUM_ERASURE_CODED_FILES);
-
-  // This string constant is used to indicate to AlterHandler that
-  // alterPartition/alterTable is happening via statsTask or via user.
-  public static final String STATS_GENERATED = "STATS_GENERATED";
-
-  public static final String TASK = "TASK";
-
-  public static final String USER = "USER";
-
-  // This string constant is used by AlterHandler to figure out that it should 
not attempt to
-  // update stats. It is set by any client-side task which wishes to signal 
that no stats
-  // update should take place, such as with replication.
-  public static final String DO_NOT_UPDATE_STATS = "DO_NOT_UPDATE_STATS";
-
-  //This string constant will be persisted in metastore to indicate whether 
corresponding
-  //table or partition's statistics and table or partition's column statistics 
are accurate or not.
-  public static final String COLUMN_STATS_ACCURATE = "COLUMN_STATS_ACCURATE";
-
-  public static final String COLUMN_STATS = "COLUMN_STATS";
-
-  public static final String BASIC_STATS = "BASIC_STATS";
-
-  public static final String CASCADE = "CASCADE";
-
-  public static final String TRUE = "true";
-
-  public static final String FALSE = "false";
-
-  // The parameter keys for the table statistics. Those keys are excluded from 
'show create table' command output.
-  public static final List<String> TABLE_PARAMS_STATS_KEYS = ImmutableList.of(
-      COLUMN_STATS_ACCURATE, NUM_FILES, TOTAL_SIZE, ROW_COUNT, RAW_DATA_SIZE, 
NUM_PARTITIONS,
-      NUM_ERASURE_CODED_FILES);
-
-  private static class ColumnStatsAccurate {
-    private static ObjectReader objectReader;
-    private static ObjectWriter objectWriter;
-
-    static {
-      ObjectMapper objectMapper = new ObjectMapper();
-      objectReader = objectMapper.readerFor(ColumnStatsAccurate.class);
-      objectWriter = objectMapper.writerFor(ColumnStatsAccurate.class);
-    }
-
-    static class BooleanSerializer extends JsonSerializer<Boolean> {
-
-      @Override
-      public void serialize(Boolean value, JsonGenerator jsonGenerator,
-          SerializerProvider serializerProvider) throws IOException {
-        jsonGenerator.writeString(value.toString());
-      }
-    }
-
-    static class BooleanDeserializer extends JsonDeserializer<Boolean> {
-
-      public Boolean deserialize(JsonParser jsonParser,
-          DeserializationContext deserializationContext)
-              throws IOException {
-        return Boolean.valueOf(jsonParser.getValueAsString());
-      }
-    }
-
-    @JsonInclude(JsonInclude.Include.NON_DEFAULT)
-    @JsonSerialize(using = BooleanSerializer.class)
-    @JsonDeserialize(using = BooleanDeserializer.class)
-    @JsonProperty(BASIC_STATS)
-    boolean basicStats;
-
-    @JsonInclude(JsonInclude.Include.NON_EMPTY)
-    @JsonProperty(COLUMN_STATS)
-    @JsonSerialize(contentUsing = BooleanSerializer.class)
-    @JsonDeserialize(contentUsing = BooleanDeserializer.class)
-    TreeMap<String, Boolean> columnStats = new TreeMap<>();
-
-  }
-
-  public static boolean areBasicStatsUptoDate(Map<String, String> params) {
-    if (params == null) {
-      return false;
-    }
-    ColumnStatsAccurate stats = 
parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-    return stats.basicStats;
-  }
-
-  public static boolean areColumnStatsUptoDate(Map<String, String> params, 
String colName) {
-    if (params == null) {
-      return false;
-    }
-    ColumnStatsAccurate stats = 
parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-    return stats.columnStats.containsKey(colName);
-  }
-
-  // It will only throw JSONException when stats.put(BASIC_STATS, TRUE)
-  // has duplicate key, which is not possible
-  // note that set basic stats false will wipe out column stats too.
-  public static void setBasicStatsState(Map<String, String> params, String 
setting) {
-    if (setting.equals(FALSE)) {
-      if (params!=null && params.containsKey(COLUMN_STATS_ACCURATE)) {
-        params.remove(COLUMN_STATS_ACCURATE);
-      }
-      return;
-    }
-    if (params == null) {
-      throw new RuntimeException("params are null...cant set 
columnstatstate!");
-    }
-    ColumnStatsAccurate stats = 
parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-    stats.basicStats = true;
-    try {
-      params.put(COLUMN_STATS_ACCURATE, 
ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
-    } catch (JsonProcessingException e) {
-      throw new RuntimeException("can't serialize column stats", e);
-    }
-  }
-
-  public static void setColumnStatsState(Map<String, String> params, 
List<String> colNames) {
-    if (params == null) {
-      throw new RuntimeException("params are null...cant set 
columnstatstate!");
-    }
-    if (colNames == null) {
-      return;
-    }
-    ColumnStatsAccurate stats = 
parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-
-    for (String colName : colNames) {
-      if (!stats.columnStats.containsKey(colName)) {
-        stats.columnStats.put(colName, true);
-      }
-    }
-    try {
-      params.put(COLUMN_STATS_ACCURATE, 
ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
-    } catch (JsonProcessingException e) {
-      LOG.trace(e.getMessage());
-    }
-  }
-
-  public static boolean canColumnStatsMerge(Map<String, String> params, String 
colName) {
-    if (params == null) {
-      return false;
-    }
-    ColumnStatsAccurate stats = 
parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-    return stats.columnStats.containsKey(colName);
-  }
-  
-  public static void clearColumnStatsState(Map<String, String> params) {
-    if (params == null) {
-      return;
-    }
-
-    ColumnStatsAccurate stats = 
parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-    stats.columnStats.clear();
-
-    try {
-      params.put(COLUMN_STATS_ACCURATE, 
ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
-    } catch (JsonProcessingException e) {
-      LOG.trace(e.getMessage());
-    }
-  }
-
-  public static void removeColumnStatsState(Map<String, String> params, 
List<String> colNames) {
-    if (params == null) {
-      return;
-    }
-    try {
-      ColumnStatsAccurate stats = 
parseStatsAcc(params.get(COLUMN_STATS_ACCURATE));
-      for (String string : colNames) {
-        stats.columnStats.remove(string);
-      }
-      params.put(COLUMN_STATS_ACCURATE, 
ColumnStatsAccurate.objectWriter.writeValueAsString(stats));
-    } catch (JsonProcessingException e) {
-      LOG.trace(e.getMessage());
-    }
-  }
-
-  public static void setStatsStateForCreateTable(Map<String, String> params,
-      List<String> cols, String setting) {
-    if (TRUE.equals(setting)) {
-      for (String stat : StatsSetupConst.SUPPORTED_STATS) {
-        params.put(stat, "0");
-      }
-    }
-    setBasicStatsState(params, setting);
-    if (TRUE.equals(setting)) {
-      setColumnStatsState(params, cols);
-    }
-  }
-  
-  private static ColumnStatsAccurate parseStatsAcc(String statsAcc) {
-    if (statsAcc == null) {
-      return new ColumnStatsAccurate();
-    }
-    try {
-      return ColumnStatsAccurate.objectReader.readValue(statsAcc);
-    } catch (Exception e) {
-      ColumnStatsAccurate ret = new ColumnStatsAccurate();
-      if (TRUE.equalsIgnoreCase(statsAcc)) {
-        ret.basicStats = true;
-      }
-      return ret;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimator.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimator.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimator.java
deleted file mode 100644
index 668db10..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimator.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.util.JavaDataModel;
-
-public interface NumDistinctValueEstimator {
-
-  Logger LOG = 
LoggerFactory.getLogger(NumDistinctValueEstimator.class.getName());
-
-  void reset();
-
-  byte[] serialize();
-  
-  NumDistinctValueEstimator deserialize(byte[] buf);
-
-  void addToEstimator(long v);
-
-  void addToEstimator(double d);
-
-  void addToEstimator(String s);
-
-  void addToEstimator(HiveDecimal decimal);
-
-  void mergeEstimators(NumDistinctValueEstimator o);
-
-  long estimateNumDistinctValues();
-
-  int lengthFor(JavaDataModel model);
-
-  boolean canMerge(NumDistinctValueEstimator o);
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
deleted file mode 100644
index b630fa3..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.common.ndv;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.hadoop.hive.common.ndv.fm.FMSketch;
-import org.apache.hadoop.hive.common.ndv.fm.FMSketchUtils;
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
-import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils;
-
-public class NumDistinctValueEstimatorFactory {
-
-  private NumDistinctValueEstimatorFactory() {
-  }
-
-  private static boolean isFMSketch(byte[] buf) throws IOException {
-    byte[] magic = new byte[2];
-    magic[0] = (byte) buf[0];
-    magic[1] = (byte) buf[1];
-    return Arrays.equals(magic, FMSketchUtils.MAGIC);
-  }
-
-  public static NumDistinctValueEstimator getNumDistinctValueEstimator(byte[] 
buf) {
-    // Right now we assume only FM and HLL are available.
-    try {
-      if (isFMSketch(buf)) {
-        return FMSketchUtils.deserializeFM(buf);
-      } else {
-        return HyperLogLogUtils.deserializeHLL(buf);
-      }
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  public static NumDistinctValueEstimator getEmptyNumDistinctValueEstimator(
-      NumDistinctValueEstimator n) {
-    if (n instanceof FMSketch) {
-      return new FMSketch(((FMSketch) n).getNumBitVectors());
-    } else {
-      return HyperLogLog.builder().setSizeOptimized().build();
-    }
-  }
-
-  public static NumDistinctValueEstimator 
getEmptyNumDistinctValueEstimator(String func,
-      int numBitVectors) {
-    if ("fm".equals(func.toLowerCase())) {
-      return new FMSketch(numBitVectors);
-    } else if ("hll".equals(func.toLowerCase())) {
-      return HyperLogLog.builder().setSizeOptimized().build();
-    } else {
-      throw new RuntimeException("Can not recognize " + func);
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java
deleted file mode 100644
index f6cdc4c..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/fm/FMSketch.java
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv.fm;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Random;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.util.JavaDataModel;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javolution.util.FastBitSet;
-
-public class FMSketch implements NumDistinctValueEstimator {
-
-  static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName());
-
-  /* We want a,b,x to come from a finite field of size 0 to k, where k is a 
prime number.
-   * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to 
be 2^p -1.
-   * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k 
will not be pair wise
-   * independent. As a consequence, the hash values will not distribute 
uniformly from 0 to 2^p-1
-   * thus introducing errors in the estimates.
-   */
-  public static final int BIT_VECTOR_SIZE = 31;
-
-  // Refer to Flajolet-Martin'86 for the value of phi
-  private static final double PHI = 0.77351;
-
-  private final int[] a;
-  private final int[] b;
-  private final FastBitSet[] bitVector;
-
-  private final Random aValue;
-  private final Random bValue;
-  
-  private int numBitVectors;
-
-  /* Create a new distinctValueEstimator
-   */
-  public FMSketch(int numBitVectors) {
-    this.numBitVectors = numBitVectors;
-    bitVector = new FastBitSet[numBitVectors];
-    for (int i=0; i< numBitVectors; i++) {
-      bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
-    }
-
-    a = new int[numBitVectors];
-    b = new int[numBitVectors];
-
-    /* Use a large prime number as a seed to the random number generator.
-     * Java's random number generator uses the Linear Congruential Generator 
to generate random
-     * numbers using the following recurrence relation,
-     *
-     * X(n+1) = (a X(n) + c ) mod m
-     *
-     *  where X0 is the seed. Java implementation uses m = 2^48. This is 
problematic because 2^48
-     *  is not a prime number and hence the set of numbers from 0 to m don't 
form a finite field.
-     *  If these numbers don't come from a finite field any give X(n) and 
X(n+1) may not be pair
-     *  wise independent.
-     *
-     *  However, empirically passing in prime numbers as seeds seems to work 
better than when passing
-     *  composite numbers as seeds. Ideally Java's Random should pick m such 
that m is prime.
-     *
-     */
-    aValue = new Random(99397);
-    bValue = new Random(9876413);
-
-    for (int i = 0; i < numBitVectors; i++) {
-      int randVal;
-      /* a and b shouldn't be even; If a and b are even, then none of the 
values
-       * will set bit 0 thus introducing errors in the estimate. Both a and b 
can be even
-       * 25% of the times and as a result 25% of the bit vectors could be 
inaccurate. To avoid this
-       * always pick odd values for a and b.
-       */
-      do {
-        randVal = aValue.nextInt();
-      } while (randVal % 2 == 0);
-
-      a[i] = randVal;
-
-      do {
-        randVal = bValue.nextInt();
-      } while (randVal % 2 == 0);
-
-      b[i] = randVal;
-
-      if (a[i] < 0) {
-        a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1);
-      }
-
-      if (b[i] < 0) {
-        b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1);
-      }
-    }
-  }
-
-  /**
-   * Resets a distinctValueEstimator object to its original state.
-   */
-  public void reset() {
-    for (int i=0; i< numBitVectors; i++) {
-      bitVector[i].clear();
-    }
-  }
-
-  public FastBitSet getBitVector(int index) {
-    return bitVector[index];
-  }
-
-  public FastBitSet setBitVector(FastBitSet fastBitSet, int index) {
-    return bitVector[index] = fastBitSet;
-  }
-
-  public int getNumBitVectors() {
-    return numBitVectors;
-  }
-
-  public int getBitVectorSize() {
-    return BIT_VECTOR_SIZE;
-  }
-
-  public void printNumDistinctValueEstimator() {
-    String t = new String();
-
-    LOG.debug("NumDistinctValueEstimator");
-    LOG.debug("Number of Vectors: {}", numBitVectors);
-    LOG.debug("Vector Size: {}", BIT_VECTOR_SIZE);
-
-    for (int i=0; i < numBitVectors; i++) {
-      t = t + bitVector[i].toString();
-    }
-
-    LOG.debug("Serialized Vectors: ");
-    LOG.debug(t);
-  }
-
-  @Override
-  public byte[] serialize() {
-    ByteArrayOutputStream bos = new ByteArrayOutputStream();
-    // write bytes to bos ...
-    try {
-      FMSketchUtils.serializeFM(bos, this);
-      final byte[] result = bos.toByteArray();
-      bos.close();
-      return result;
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Override
-  public NumDistinctValueEstimator deserialize(byte[] buf) {
-    InputStream is = new ByteArrayInputStream(buf);
-    try {
-      NumDistinctValueEstimator n = FMSketchUtils.deserializeFM(is);
-      is.close();
-      return n;
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  private int generateHash(long v, int hashNum) {
-    int mod = (1<<BIT_VECTOR_SIZE) - 1;
-    long tempHash = a[hashNum] * v  + b[hashNum];
-    tempHash %= mod;
-    int hash = (int) tempHash;
-
-    /* Hash function should map the long value to 0...2^L-1.
-     * Hence hash value has to be non-negative.
-     */
-    if (hash < 0) {
-      hash = hash + mod;
-    }
-    return hash;
-  }
-
-  private int generateHashForPCSA(long v) {
-    int mod = 1 << (BIT_VECTOR_SIZE - 1) - 1;
-    long tempHash = a[0] * v + b[0];
-    tempHash %= mod;
-    int hash = (int) tempHash;
-
-    /* Hash function should map the long value to 0...2^L-1.
-     * Hence hash value has to be non-negative.
-     */
-    if (hash < 0) {
-      hash = hash + mod + 1;
-    }
-    return hash;
-  }
-
-  public void addToEstimator(long v) {
-    /* Update summary bitVector :
-     * Generate hash value of the long value and mod it by 2^bitVectorSize-1.
-     * In this implementation bitVectorSize is 31.
-     */
-
-    for (int i = 0; i<numBitVectors; i++) {
-      int hash = generateHash(v,i);
-      int index;
-
-      // Find the index of the least significant bit that is 1
-      for (index=0; index<BIT_VECTOR_SIZE; index++) {
-        if (hash % 2 != 0) {
-          break;
-        }
-        hash = hash >> 1;
-      }
-
-      // Set bitvector[index] := 1
-      bitVector[i].set(index);
-    }
-  }
-
-  public void addToEstimatorPCSA(long v) {
-    int hash = generateHashForPCSA(v);
-    int rho = hash/numBitVectors;
-    int index;
-
-    // Find the index of the least significant bit that is 1
-    for (index=0; index<BIT_VECTOR_SIZE; index++) {
-      if (rho % 2 != 0) {
-        break;
-      }
-      rho = rho >> 1;
-    }
-
-    // Set bitvector[index] := 1
-    bitVector[hash%numBitVectors].set(index);
-  }
-
-  public void addToEstimator(double d) {
-    int v = new Double(d).hashCode();
-    addToEstimator(v);
-  }
-
-  public void addToEstimatorPCSA(double d) {
-    int v = new Double(d).hashCode();
-    addToEstimatorPCSA(v);
-  }
-
-  public void addToEstimator(HiveDecimal decimal) {
-    int v = decimal.hashCode();
-    addToEstimator(v);
-  }
-
-  public void addToEstimatorPCSA(HiveDecimal decimal) {
-    int v = decimal.hashCode();
-    addToEstimatorPCSA(v);
-  }
-
-  public void mergeEstimators(FMSketch o) {
-    // Bitwise OR the bitvector with the bitvector in the agg buffer
-    for (int i=0; i<numBitVectors; i++) {
-      bitVector[i].or(o.getBitVector(i));
-    }
-  }
-
-  public long estimateNumDistinctValuesPCSA() {
-    double numDistinctValues = 0.0;
-    long S = 0;
-
-    for (int i=0; i < numBitVectors; i++) {
-      int index = 0;
-      while (bitVector[i].get(index) && index < BIT_VECTOR_SIZE) {
-        index = index + 1;
-      }
-      S = S + index;
-    }
-
-    numDistinctValues = ((numBitVectors/PHI) * Math.pow(2.0, S/numBitVectors));
-    return ((long)numDistinctValues);
-  }
-
-  /* We use the Flajolet-Martin estimator to estimate the number of distinct 
values.FM uses the
-   * location of the least significant zero as an estimate of log2(phi*ndvs).
-   */
-  public long estimateNumDistinctValues() {
-    int sumLeastSigZero = 0;
-    double avgLeastSigZero;
-    double numDistinctValues;
-
-    for (int i=0; i< numBitVectors; i++) {
-      int leastSigZero = bitVector[i].nextClearBit(0);
-      sumLeastSigZero += leastSigZero;
-    }
-
-    avgLeastSigZero =
-        sumLeastSigZero/(numBitVectors * 1.0) - (Math.log(PHI)/Math.log(2.0));
-    numDistinctValues = Math.pow(2.0, avgLeastSigZero);
-    return ((long)(numDistinctValues));
-  }
-
-  @InterfaceAudience.LimitedPrivate(value = {"Hive" })
-  static int lengthFor(JavaDataModel model, Integer numVector) {
-    int length = model.object();
-    length += model.primitive1() * 2;       // two int
-    length += model.primitive2();           // one double
-    length += model.lengthForRandom() * 2;  // two Random
-
-    if (numVector == null) {
-      numVector = 16; // HiveConf hive.stats.ndv.error default produces 16 
vectors
-    }
-
-    if (numVector > 0) {
-      length += model.array() * 3;                    // three array
-      length += model.primitive1() * numVector * 2;   // two int array
-      length += (model.object() + model.array() + model.primitive1() +
-          model.primitive2()) * numVector;   // bitset array
-    }
-    return length;
-  }
-
-  public int lengthFor(JavaDataModel model) {
-    return lengthFor(model, getNumBitVectors());
-  }
-
-  // the caller needs to gurrantee that they are the same type based on 
numBitVectors
-  @Override
-  public void mergeEstimators(NumDistinctValueEstimator o) {
-    // Bitwise OR the bitvector with the bitvector in the agg buffer
-    for (int i = 0; i < numBitVectors; i++) {
-      bitVector[i].or(((FMSketch) o).getBitVector(i));
-    }
-  }
-
-  @Override
-  public void addToEstimator(String s) {
-    int v = s.hashCode();
-    addToEstimator(v);
-  }
-
-  @Override
-  public boolean canMerge(NumDistinctValueEstimator o) {
-    return o instanceof FMSketch && this.numBitVectors == ((FMSketch) 
o).numBitVectors;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/081fa368/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
----------------------------------------------------------------------
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
deleted file mode 100644
index 02c64b8..0000000
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.common.ndv.fm;
-
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javolution.util.FastBitSet;
-
-public class FMSketchUtils {
-
-  static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName());
-  public static final byte[] MAGIC = new byte[] { 'F', 'M' };
-
-  /*
-   * Serializes a distinctValueEstimator object to Text for transport.
-   *
-   * <b>4 byte header</b> is encoded like below 2 bytes - FM magic string to
-   * identify serialized stream 2 bytes - numbitvectors because
-   * BIT_VECTOR_SIZE=31, 4 bytes are enough to hold positions of 0-31
-   */
-  public static void serializeFM(OutputStream out, FMSketch fm) throws 
IOException {
-    out.write(MAGIC);
-
-    // max of numBitVectors = 1024, 2 bytes is enough.
-    byte[] nbv = new byte[2];
-    nbv[0] = (byte) fm.getNumBitVectors();
-    nbv[1] = (byte) (fm.getNumBitVectors() >>> 8);
-
-    out.write(nbv);
-
-    // original toString takes too much space
-    // we compress a fastbitset to 4 bytes
-    for (int i = 0; i < fm.getNumBitVectors(); i++) {
-      writeBitVector(out, fm.getBitVector(i));
-    }
-  }
-
-  // BIT_VECTOR_SIZE is 31, we can use 32 bits, i.e., 4 bytes to represent a
-  // FastBitSet, rather than using 31 integers.
-  private static void writeBitVector(OutputStream out, FastBitSet bit) throws 
IOException {
-    int num = 0;
-    for (int pos = 0; pos < FMSketch.BIT_VECTOR_SIZE; pos++) {
-      if (bit.get(pos)) {
-        num |= 1 << pos;
-      }
-    }
-    byte[] i = new byte[4];
-    for (int j = 0; j < 4; j++) {
-      i[j] = (byte) ((num >>> (8 * j)) & 0xff);
-    }
-    out.write(i);
-  }
-
-  /*
-   * Deserializes from string to FastBitSet; Creates a 
NumDistinctValueEstimator
-   * object and returns it.
-   */
-  public static FMSketch deserializeFM(byte[] buf) throws IOException {
-    InputStream is = new ByteArrayInputStream(buf);
-    try {
-      FMSketch sketch = deserializeFM(is);
-      is.close();
-      return sketch;
-    } catch (IOException e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  public static FMSketch deserializeFM(InputStream in) throws IOException {
-    checkMagicString(in);
-
-    byte[] nbv = new byte[2];
-    nbv[0] = (byte) in.read();
-    nbv[1] = (byte) in.read();
-
-    int numBitVectors = 0;
-    numBitVectors |= (nbv[0] & 0xff);
-    numBitVectors |= ((nbv[1] & 0xff) << 8);
-
-    FMSketch sketch = new FMSketch(numBitVectors);
-    for (int n = 0; n < numBitVectors; n++) {
-      sketch.setBitVector(readBitVector(in), n);
-    }
-    return sketch;
-  }
-
-  private static FastBitSet readBitVector(InputStream in) throws IOException {
-    FastBitSet fastBitSet = new FastBitSet();
-    fastBitSet.clear();
-    for (int i = 0; i < 4; i++) {
-      byte b = (byte) in.read();
-      for (int j = 0; j < 8; j++) {
-        if ((b & (1 << j)) != 0) {
-          fastBitSet.set(j + 8 * i);
-        }
-      }
-    }
-    return fastBitSet;
-  }
-
-  private static void checkMagicString(InputStream in) throws IOException {
-    byte[] magic = new byte[2];
-    magic[0] = (byte) in.read();
-    magic[1] = (byte) in.read();
-
-    if (!Arrays.equals(magic, MAGIC)) {
-      throw new IllegalArgumentException("The input stream is not a FMSketch 
stream.");
-    }
-  }
-}

Reply via email to