I apologize for the big commits without proper messaging. It was difficult to remember the changs and the original commit messages were lost due to an offline git repo (which is no longer is use). I only had the diff between the original git repo and everything after the changes. Plus the diff didn't apply cleanly so that's why I broke it up in to different sections.
I suppose I should have broke up the changes manually out of the diff and applied them separately and recreated all the commit messages but I didn't have the time to work through all of them. Sorry. Aaron On Tuesday, August 30, 2016, Tim Williams <[email protected]> wrote: > NoNot sure what this is yet but itPlease be more considerate with your > commit messages... it's a lot of code to look through without having > any context besides "N round of updates." > > > On Mon, Aug 29, 2016 at 9:57 PM, <[email protected] <javascript:;>> > wrote: > > Third round of updates. > > > > > > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo > > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/ > commit/ea50630a > > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/ > ea50630a > > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/ > ea50630a > > > > Branch: refs/heads/master > > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a > > Parents: 0141656 > > Author: Aaron McCurry <[email protected] <javascript:;>> > > Authored: Sat May 7 13:11:54 2016 -0400 > > Committer: Aaron McCurry <[email protected] <javascript:;>> > > Committed: Sat May 7 13:11:54 2016 -0400 > > > > ---------------------------------------------------------------------- > > blur-indexer/pom.xml | 58 +++ > > blur-indexer/src/main/assemble/bin.xml | 45 ++ > > .../mapreduce/lib/update/BlurIndexCounter.java | 17 + > > .../mapreduce/lib/update/ClusterDriver.java | 362 ++++++++++++++ > > .../blur/mapreduce/lib/update/FasterDriver.java | 486 > +++++++++++++++++++ > > .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++ > > .../lib/update/InputSplitPruneUtil.java | 133 +++++ > > .../lib/update/LookupBuilderMapper.java | 18 + > > .../lib/update/LookupBuilderReducer.java | 165 +++++++ > > .../lib/update/MapperForExistingDataMod.java | 46 ++ > > .../MapperForExistingDataWithIndexLookup.java | 228 +++++++++ > > .../lib/update/MapperForNewDataMod.java | 82 ++++ > > .../lib/update/MergeSortRowIdMatcher.java | 372 ++++++++++++++ > > .../lib/update/PrunedBlurInputFormat.java | 57 +++ > > .../update/PrunedSequenceFileInputFormat.java | 59 +++ > > .../src/main/resources/blur-site.properties | 1 + > > .../src/main/resources/program-log4j.xml | 29 ++ > > blur-indexer/src/main/resources/test-log4j.xml | 46 ++ > > 18 files changed, 2319 insertions(+) > > ---------------------------------------------------------------------- > > > > > > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ > ea50630a/blur-indexer/pom.xml > > ---------------------------------------------------------------------- > > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml > > new file mode 100644 > > index 0000000..c7c1753 > > --- /dev/null > > +++ b/blur-indexer/pom.xml > > @@ -0,0 +1,58 @@ > > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi=" > http://www.w3.org/2001/XMLSchema-instance" > > + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 > http://maven.apache.org/xsd/maven-4.0.0.xsd"> > > + <modelVersion>4.0.0</modelVersion> > > + <groupId>org.apache.blur</groupId> > > + <artifactId>blur-indexer</artifactId> > > + <version>0.2.8</version> > > + <name>blur-indexer</name> > > + <packaging>jar</packaging> > > + > > + <properties> > > + <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3- > SNAPSHOT</blur.version> > > + </properties> > > + <dependencies> > > + <dependency> > > + <groupId>org.apache.blur</groupId> > > + <artifactId>blur-mapred</artifactId> > > + <version>${blur.version}</version> > > + </dependency> > > + <dependency> > > + <groupId>junit</groupId> > > + <artifactId>junit</artifactId> > > + <version>4.9</version> > > + <scope>test</scope> > > + </dependency> > > + </dependencies> > > + > > + <build> > > + <pluginManagement> > > + <plugins> > > + <plugin> > > + <groupId>org.apache.maven. > plugins</groupId> > > + <artifactId>maven-compiler- > plugin</artifactId> > > + <configuration> > > + <source>1.8</source> > > + <target>1.8</target> > > + </configuration> > > + </plugin> > > + </plugins> > > + </pluginManagement> > > + <plugins> > > + <plugin> > > + <artifactId>maven-assembly- > plugin</artifactId> > > + <configuration> > > + <descriptor>src/main/assemble/ > bin.xml</descriptor> > > + <finalName>blur-indexer-${ > project.version}</finalName> > > + </configuration> > > + <executions> > > + <execution> > > + <phase>package</phase> > > + <goals> > > + > <goal>single</goal> > > + </goals> > > + </execution> > > + </executions> > > + </plugin> > > + </plugins> > > + </build> > > +</project> > > > > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ > ea50630a/blur-indexer/src/main/assemble/bin.xml > > ---------------------------------------------------------------------- > > diff --git a/blur-indexer/src/main/assemble/bin.xml > b/blur-indexer/src/main/assemble/bin.xml > > new file mode 100644 > > index 0000000..5fddd56 > > --- /dev/null > > +++ b/blur-indexer/src/main/assemble/bin.xml > > @@ -0,0 +1,45 @@ > > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly- > plugin/assembly/1.1.2" > > + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > > + xsi:schemaLocation="http://maven.apache.org/plugins/ > maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/ > assembly-1.1.2.xsd"> > > + <formats> > > + <format>tar.gz</format> > > + </formats> > > + <includeBaseDirectory>false</includeBaseDirectory> > > + > > + <dependencySets> > > + <dependencySet> > > + <useProjectArtifact>true</useProjectArtifact> > > + <outputDirectory>blur-indexer-${project.version}/lib</ > outputDirectory> > > + <unpack>false</unpack> > > + <includes> > > + <include>org.apache.blur:blur-indexer</include> > > + <include>org.apache.blur:*</include> > > + <include>org.apache.zookeeper:zookeeper</include> > > + <include>org.slf4j:slf4j-api</include> > > + <include>org.slf4j:slf4j-log4j12</include> > > + <include>org.json:json</include> > > + <include>log4j:log4j</include> > > + <include>com.yammer.metrics:*</include> > > + <include>com.google.guava:guava</include> > > + <include>org.apache.httpcomponents:*</include> > > + <include>org.apache.lucene:*</include> > > + <include>com.spatial4j:spatial4j</include> > > + <include>commons-cli:commons-cli</include> > > + <include>org.eclipse.jetty:*</include> > > + <include>com.googlecode.concurrentlinkedhashmap: > concurrentlinkedhashmap-lru</include> > > + <include>jline:jline</include> > > + <include>com.fasterxml.jackson.core:*</include> > > + </includes> > > + </dependencySet> > > + </dependencySets> > > + > > + <fileSets> > > + <fileSet> > > + <directory>${project.build.scriptSourceDirectory}</directory> > > + <outputDirectory>blur-indexer-${project.version}</ > outputDirectory> > > + <excludes> > > + <exclude>**/.empty</exclude> > > + </excludes> > > + </fileSet> > > + </fileSets> > > +</assembly> > > > > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ > ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ > BlurIndexCounter.java > > ---------------------------------------------------------------------- > > diff --git > > a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java > b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ > update/BlurIndexCounter.java > > new file mode 100644 > > index 0000000..a9caabb > > --- /dev/null > > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ > update/BlurIndexCounter.java > > @@ -0,0 +1,17 @@ > > +package org.apache.blur.mapreduce.lib.update; > > + > > +public enum BlurIndexCounter { > > + > > + NEW_RECORDS, > > + ROW_IDS_FROM_INDEX, > > + ROW_IDS_TO_UPDATE_FROM_NEW_DATA, > > + ROW_IDS_FROM_NEW_DATA, > > + > > + INPUT_FORMAT_MAPPER, > > + INPUT_FORMAT_EXISTING_RECORDS, > > + > > + LOOKUP_MAPPER, > > + LOOKUP_MAPPER_EXISTING_RECORDS, > > + LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT > > + > > +} > > > > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ > ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ > ClusterDriver.java > > ---------------------------------------------------------------------- > > diff --git > > a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java > b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ > update/ClusterDriver.java > > new file mode 100644 > > index 0000000..d44adf1 > > --- /dev/null > > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ > update/ClusterDriver.java > > @@ -0,0 +1,362 @@ > > +package org.apache.blur.mapreduce.lib.update; > > + > > +import java.io.ByteArrayInputStream; > > +import java.io.ByteArrayOutputStream; > > +import java.io.IOException; > > +import java.io.InputStream; > > +import java.net.URL; > > +import java.util.HashMap; > > +import java.util.HashSet; > > +import java.util.List; > > +import java.util.Map; > > +import java.util.Map.Entry; > > +import java.util.Set; > > +import java.util.UUID; > > +import java.util.concurrent.Callable; > > +import java.util.concurrent.ExecutionException; > > +import java.util.concurrent.ExecutorService; > > +import java.util.concurrent.Executors; > > +import java.util.concurrent.Future; > > +import java.util.concurrent.TimeUnit; > > +import java.util.concurrent.atomic.AtomicBoolean; > > + > > +import org.apache.blur.log.Log; > > +import org.apache.blur.log.LogFactory; > > +import org.apache.blur.mapreduce.lib.BlurInputFormat; > > +import org.apache.blur.thirdparty.thrift_0_9_0.TException; > > +import org.apache.blur.thrift.BlurClient; > > +import org.apache.blur.thrift.generated.Blur.Iface; > > +import org.apache.blur.thrift.generated.BlurException; > > +import org.apache.blur.thrift.generated.TableDescriptor; > > +import org.apache.blur.thrift.generated.TableStats; > > +import org.apache.blur.utils.BlurConstants; > > +import org.apache.commons.io.IOUtils; > > +import org.apache.hadoop.conf.Configuration; > > +import org.apache.hadoop.conf.Configured; > > +import org.apache.hadoop.fs.FSDataInputStream; > > +import org.apache.hadoop.fs.FileStatus; > > +import org.apache.hadoop.fs.FileSystem; > > +import org.apache.hadoop.fs.Path; > > +import org.apache.hadoop.fs.permission.FsAction; > > +import org.apache.hadoop.mapreduce.Cluster; > > +import org.apache.hadoop.mapreduce.Job; > > +import org.apache.hadoop.mapreduce.JobID; > > +import org.apache.hadoop.mapreduce.JobStatus; > > +import org.apache.hadoop.util.Tool; > > +import org.apache.hadoop.util.ToolRunner; > > +import org.apache.hadoop.yarn.exceptions.YarnException; > > +import org.apache.log4j.LogManager; > > +import org.apache.log4j.xml.DOMConfigurator; > > + > > +public class ClusterDriver extends Configured implements Tool { > > + > > + private static final String BLUR_ENV = "blur.env"; > > + private static final Log LOG = LogFactory.getLog( > ClusterDriver.class); > > + private static final String _SEP = "_"; > > + private static final String IMPORT = "import"; > > + > > + public static void main(String[] args) throws Exception { > > + String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE"); > > + System.out.println("Log file path [" + logFilePath + "]"); > > + System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath); > > + URL url = ClusterDriver.class.getResource("/program-log4j.xml"); > > + if (url != null) { > > + LOG.info("Reseting log4j config from classpath resource [{0}]", > url); > > + LogManager.resetConfiguration(); > > + DOMConfigurator.configure(url); > > + } > > + int res = ToolRunner.run(new Configuration(), new ClusterDriver(), > args); > > Not sure what this thing does yet but it seems we should validate > those args since their accessed blindly in run... > > --tim >
