NoNot sure what this is yet but itPlease be more considerate with your commit messages... it's a lot of code to look through without having any context besides "N round of updates."
On Mon, Aug 29, 2016 at 9:57 PM, <[email protected]> wrote: > Third round of updates. > > > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/ea50630a > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/ea50630a > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/ea50630a > > Branch: refs/heads/master > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a > Parents: 0141656 > Author: Aaron McCurry <[email protected]> > Authored: Sat May 7 13:11:54 2016 -0400 > Committer: Aaron McCurry <[email protected]> > Committed: Sat May 7 13:11:54 2016 -0400 > > ---------------------------------------------------------------------- > blur-indexer/pom.xml | 58 +++ > blur-indexer/src/main/assemble/bin.xml | 45 ++ > .../mapreduce/lib/update/BlurIndexCounter.java | 17 + > .../mapreduce/lib/update/ClusterDriver.java | 362 ++++++++++++++ > .../blur/mapreduce/lib/update/FasterDriver.java | 486 +++++++++++++++++++ > .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++ > .../lib/update/InputSplitPruneUtil.java | 133 +++++ > .../lib/update/LookupBuilderMapper.java | 18 + > .../lib/update/LookupBuilderReducer.java | 165 +++++++ > .../lib/update/MapperForExistingDataMod.java | 46 ++ > .../MapperForExistingDataWithIndexLookup.java | 228 +++++++++ > .../lib/update/MapperForNewDataMod.java | 82 ++++ > .../lib/update/MergeSortRowIdMatcher.java | 372 ++++++++++++++ > .../lib/update/PrunedBlurInputFormat.java | 57 +++ > .../update/PrunedSequenceFileInputFormat.java | 59 +++ > .../src/main/resources/blur-site.properties | 1 + > .../src/main/resources/program-log4j.xml | 29 ++ > blur-indexer/src/main/resources/test-log4j.xml | 46 ++ > 18 files changed, 2319 insertions(+) > ---------------------------------------------------------------------- > > > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/pom.xml > ---------------------------------------------------------------------- > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml > new file mode 100644 > index 0000000..c7c1753 > --- /dev/null > +++ b/blur-indexer/pom.xml > @@ -0,0 +1,58 @@ > +<project xmlns="http://maven.apache.org/POM/4.0.0" > xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 > http://maven.apache.org/xsd/maven-4.0.0.xsd"> > + <modelVersion>4.0.0</modelVersion> > + <groupId>org.apache.blur</groupId> > + <artifactId>blur-indexer</artifactId> > + <version>0.2.8</version> > + <name>blur-indexer</name> > + <packaging>jar</packaging> > + > + <properties> > + > <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-SNAPSHOT</blur.version> > + </properties> > + <dependencies> > + <dependency> > + <groupId>org.apache.blur</groupId> > + <artifactId>blur-mapred</artifactId> > + <version>${blur.version}</version> > + </dependency> > + <dependency> > + <groupId>junit</groupId> > + <artifactId>junit</artifactId> > + <version>4.9</version> > + <scope>test</scope> > + </dependency> > + </dependencies> > + > + <build> > + <pluginManagement> > + <plugins> > + <plugin> > + > <groupId>org.apache.maven.plugins</groupId> > + > <artifactId>maven-compiler-plugin</artifactId> > + <configuration> > + <source>1.8</source> > + <target>1.8</target> > + </configuration> > + </plugin> > + </plugins> > + </pluginManagement> > + <plugins> > + <plugin> > + <artifactId>maven-assembly-plugin</artifactId> > + <configuration> > + > <descriptor>src/main/assemble/bin.xml</descriptor> > + > <finalName>blur-indexer-${project.version}</finalName> > + </configuration> > + <executions> > + <execution> > + <phase>package</phase> > + <goals> > + <goal>single</goal> > + </goals> > + </execution> > + </executions> > + </plugin> > + </plugins> > + </build> > +</project> > > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/assemble/bin.xml > ---------------------------------------------------------------------- > diff --git a/blur-indexer/src/main/assemble/bin.xml > b/blur-indexer/src/main/assemble/bin.xml > new file mode 100644 > index 0000000..5fddd56 > --- /dev/null > +++ b/blur-indexer/src/main/assemble/bin.xml > @@ -0,0 +1,45 @@ > +<assembly > xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2" > + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > + > xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 > http://maven.apache.org/xsd/assembly-1.1.2.xsd"> > + <formats> > + <format>tar.gz</format> > + </formats> > + <includeBaseDirectory>false</includeBaseDirectory> > + > + <dependencySets> > + <dependencySet> > + <useProjectArtifact>true</useProjectArtifact> > + <outputDirectory>blur-indexer-${project.version}/lib</outputDirectory> > + <unpack>false</unpack> > + <includes> > + <include>org.apache.blur:blur-indexer</include> > + <include>org.apache.blur:*</include> > + <include>org.apache.zookeeper:zookeeper</include> > + <include>org.slf4j:slf4j-api</include> > + <include>org.slf4j:slf4j-log4j12</include> > + <include>org.json:json</include> > + <include>log4j:log4j</include> > + <include>com.yammer.metrics:*</include> > + <include>com.google.guava:guava</include> > + <include>org.apache.httpcomponents:*</include> > + <include>org.apache.lucene:*</include> > + <include>com.spatial4j:spatial4j</include> > + <include>commons-cli:commons-cli</include> > + <include>org.eclipse.jetty:*</include> > + > <include>com.googlecode.concurrentlinkedhashmap:concurrentlinkedhashmap-lru</include> > + <include>jline:jline</include> > + <include>com.fasterxml.jackson.core:*</include> > + </includes> > + </dependencySet> > + </dependencySets> > + > + <fileSets> > + <fileSet> > + <directory>${project.build.scriptSourceDirectory}</directory> > + <outputDirectory>blur-indexer-${project.version}</outputDirectory> > + <excludes> > + <exclude>**/.empty</exclude> > + </excludes> > + </fileSet> > + </fileSets> > +</assembly> > > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java > ---------------------------------------------------------------------- > diff --git > a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java > > b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java > new file mode 100644 > index 0000000..a9caabb > --- /dev/null > +++ > b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java > @@ -0,0 +1,17 @@ > +package org.apache.blur.mapreduce.lib.update; > + > +public enum BlurIndexCounter { > + > + NEW_RECORDS, > + ROW_IDS_FROM_INDEX, > + ROW_IDS_TO_UPDATE_FROM_NEW_DATA, > + ROW_IDS_FROM_NEW_DATA, > + > + INPUT_FORMAT_MAPPER, > + INPUT_FORMAT_EXISTING_RECORDS, > + > + LOOKUP_MAPPER, > + LOOKUP_MAPPER_EXISTING_RECORDS, > + LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT > + > +} > > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java > ---------------------------------------------------------------------- > diff --git > a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java > > b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java > new file mode 100644 > index 0000000..d44adf1 > --- /dev/null > +++ > b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java > @@ -0,0 +1,362 @@ > +package org.apache.blur.mapreduce.lib.update; > + > +import java.io.ByteArrayInputStream; > +import java.io.ByteArrayOutputStream; > +import java.io.IOException; > +import java.io.InputStream; > +import java.net.URL; > +import java.util.HashMap; > +import java.util.HashSet; > +import java.util.List; > +import java.util.Map; > +import java.util.Map.Entry; > +import java.util.Set; > +import java.util.UUID; > +import java.util.concurrent.Callable; > +import java.util.concurrent.ExecutionException; > +import java.util.concurrent.ExecutorService; > +import java.util.concurrent.Executors; > +import java.util.concurrent.Future; > +import java.util.concurrent.TimeUnit; > +import java.util.concurrent.atomic.AtomicBoolean; > + > +import org.apache.blur.log.Log; > +import org.apache.blur.log.LogFactory; > +import org.apache.blur.mapreduce.lib.BlurInputFormat; > +import org.apache.blur.thirdparty.thrift_0_9_0.TException; > +import org.apache.blur.thrift.BlurClient; > +import org.apache.blur.thrift.generated.Blur.Iface; > +import org.apache.blur.thrift.generated.BlurException; > +import org.apache.blur.thrift.generated.TableDescriptor; > +import org.apache.blur.thrift.generated.TableStats; > +import org.apache.blur.utils.BlurConstants; > +import org.apache.commons.io.IOUtils; > +import org.apache.hadoop.conf.Configuration; > +import org.apache.hadoop.conf.Configured; > +import org.apache.hadoop.fs.FSDataInputStream; > +import org.apache.hadoop.fs.FileStatus; > +import org.apache.hadoop.fs.FileSystem; > +import org.apache.hadoop.fs.Path; > +import org.apache.hadoop.fs.permission.FsAction; > +import org.apache.hadoop.mapreduce.Cluster; > +import org.apache.hadoop.mapreduce.Job; > +import org.apache.hadoop.mapreduce.JobID; > +import org.apache.hadoop.mapreduce.JobStatus; > +import org.apache.hadoop.util.Tool; > +import org.apache.hadoop.util.ToolRunner; > +import org.apache.hadoop.yarn.exceptions.YarnException; > +import org.apache.log4j.LogManager; > +import org.apache.log4j.xml.DOMConfigurator; > + > +public class ClusterDriver extends Configured implements Tool { > + > + private static final String BLUR_ENV = "blur.env"; > + private static final Log LOG = LogFactory.getLog(ClusterDriver.class); > + private static final String _SEP = "_"; > + private static final String IMPORT = "import"; > + > + public static void main(String[] args) throws Exception { > + String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE"); > + System.out.println("Log file path [" + logFilePath + "]"); > + System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath); > + URL url = ClusterDriver.class.getResource("/program-log4j.xml"); > + if (url != null) { > + LOG.info("Reseting log4j config from classpath resource [{0}]", url); > + LogManager.resetConfiguration(); > + DOMConfigurator.configure(url); > + } > + int res = ToolRunner.run(new Configuration(), new ClusterDriver(), args); Not sure what this thing does yet but it seems we should validate those args since their accessed blindly in run... --tim
