Will do. :-) On Tue, Aug 30, 2016 at 9:10 AM, Tim Williams <[email protected]> wrote:
> No worries, just a friendly reminder:) If you get time, I think it'd > be helpful to a couple sentences about any new stuff/big changes... > seems like there's a new project for example... > > Thanks, > --tim > > > On Tue, Aug 30, 2016 at 7:49 AM, Aaron McCurry <[email protected]> wrote: > > I apologize for the big commits without proper messaging. It was > difficult > > to remember the changs and the original commit messages were lost due to > an > > offline git repo (which is no longer is use). I only had the diff > between > > the original git repo and everything after the changes. Plus the diff > > didn't apply cleanly so that's why I broke it up in to different > sections. > > > > I suppose I should have broke up the changes manually out of the diff and > > applied them separately and recreated all the commit messages but I > didn't > > have the time to work through all of them. Sorry. > > > > Aaron > > > > > > On Tuesday, August 30, 2016, Tim Williams <[email protected]> wrote: > > > >> NoNot sure what this is yet but itPlease be more considerate with your > >> commit messages... it's a lot of code to look through without having > >> any context besides "N round of updates." > >> > >> > >> On Mon, Aug 29, 2016 at 9:57 PM, <[email protected] <javascript:;>> > >> wrote: > >> > Third round of updates. > >> > > >> > > >> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo > >> > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/ > >> commit/ea50630a > >> > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/ > >> ea50630a > >> > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/ > >> ea50630a > >> > > >> > Branch: refs/heads/master > >> > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a > >> > Parents: 0141656 > >> > Author: Aaron McCurry <[email protected] <javascript:;>> > >> > Authored: Sat May 7 13:11:54 2016 -0400 > >> > Committer: Aaron McCurry <[email protected] <javascript:;>> > >> > Committed: Sat May 7 13:11:54 2016 -0400 > >> > > >> > ------------------------------------------------------------ > ---------- > >> > blur-indexer/pom.xml | 58 +++ > >> > blur-indexer/src/main/assemble/bin.xml | 45 ++ > >> > .../mapreduce/lib/update/BlurIndexCounter.java | 17 + > >> > .../mapreduce/lib/update/ClusterDriver.java | 362 ++++++++++++++ > >> > .../blur/mapreduce/lib/update/FasterDriver.java | 486 > >> +++++++++++++++++++ > >> > .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++ > >> > .../lib/update/InputSplitPruneUtil.java | 133 +++++ > >> > .../lib/update/LookupBuilderMapper.java | 18 + > >> > .../lib/update/LookupBuilderReducer.java | 165 +++++++ > >> > .../lib/update/MapperForExistingDataMod.java | 46 ++ > >> > .../MapperForExistingDataWithIndexLookup.java | 228 +++++++++ > >> > .../lib/update/MapperForNewDataMod.java | 82 ++++ > >> > .../lib/update/MergeSortRowIdMatcher.java | 372 ++++++++++++++ > >> > .../lib/update/PrunedBlurInputFormat.java | 57 +++ > >> > .../update/PrunedSequenceFileInputFormat.java | 59 +++ > >> > .../src/main/resources/blur-site.properties | 1 + > >> > .../src/main/resources/program-log4j.xml | 29 ++ > >> > blur-indexer/src/main/resources/test-log4j.xml | 46 ++ > >> > 18 files changed, 2319 insertions(+) > >> > ------------------------------------------------------------ > ---------- > >> > > >> > > >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ > >> ea50630a/blur-indexer/pom.xml > >> > ------------------------------------------------------------ > ---------- > >> > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml > >> > new file mode 100644 > >> > index 0000000..c7c1753 > >> > --- /dev/null > >> > +++ b/blur-indexer/pom.xml > >> > @@ -0,0 +1,58 @@ > >> > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi=" > >> http://www.w3.org/2001/XMLSchema-instance" > >> > + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 > >> http://maven.apache.org/xsd/maven-4.0.0.xsd"> > >> > + <modelVersion>4.0.0</modelVersion> > >> > + <groupId>org.apache.blur</groupId> > >> > + <artifactId>blur-indexer</artifactId> > >> > + <version>0.2.8</version> > >> > + <name>blur-indexer</name> > >> > + <packaging>jar</packaging> > >> > + > >> > + <properties> > >> > + <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3- > >> SNAPSHOT</blur.version> > >> > + </properties> > >> > + <dependencies> > >> > + <dependency> > >> > + <groupId>org.apache.blur</groupId> > >> > + <artifactId>blur-mapred</artifactId> > >> > + <version>${blur.version}</version> > >> > + </dependency> > >> > + <dependency> > >> > + <groupId>junit</groupId> > >> > + <artifactId>junit</artifactId> > >> > + <version>4.9</version> > >> > + <scope>test</scope> > >> > + </dependency> > >> > + </dependencies> > >> > + > >> > + <build> > >> > + <pluginManagement> > >> > + <plugins> > >> > + <plugin> > >> > + <groupId>org.apache.maven. > >> plugins</groupId> > >> > + <artifactId>maven-compiler- > >> plugin</artifactId> > >> > + <configuration> > >> > + <source>1.8</source> > >> > + <target>1.8</target> > >> > + </configuration> > >> > + </plugin> > >> > + </plugins> > >> > + </pluginManagement> > >> > + <plugins> > >> > + <plugin> > >> > + <artifactId>maven-assembly- > >> plugin</artifactId> > >> > + <configuration> > >> > + <descriptor>src/main/assemble/ > >> bin.xml</descriptor> > >> > + <finalName>blur-indexer-${ > >> project.version}</finalName> > >> > + </configuration> > >> > + <executions> > >> > + <execution> > >> > + <phase>package</phase> > >> > + <goals> > >> > + > >> <goal>single</goal> > >> > + </goals> > >> > + </execution> > >> > + </executions> > >> > + </plugin> > >> > + </plugins> > >> > + </build> > >> > +</project> > >> > > >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ > >> ea50630a/blur-indexer/src/main/assemble/bin.xml > >> > ------------------------------------------------------------ > ---------- > >> > diff --git a/blur-indexer/src/main/assemble/bin.xml > >> b/blur-indexer/src/main/assemble/bin.xml > >> > new file mode 100644 > >> > index 0000000..5fddd56 > >> > --- /dev/null > >> > +++ b/blur-indexer/src/main/assemble/bin.xml > >> > @@ -0,0 +1,45 @@ > >> > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly- > >> plugin/assembly/1.1.2" > >> > + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > >> > + xsi:schemaLocation="http://maven.apache.org/plugins/ > >> maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/ > >> assembly-1.1.2.xsd"> > >> > + <formats> > >> > + <format>tar.gz</format> > >> > + </formats> > >> > + <includeBaseDirectory>false</includeBaseDirectory> > >> > + > >> > + <dependencySets> > >> > + <dependencySet> > >> > + <useProjectArtifact>true</useProjectArtifact> > >> > + <outputDirectory>blur-indexer-${project.version}/lib</ > >> outputDirectory> > >> > + <unpack>false</unpack> > >> > + <includes> > >> > + <include>org.apache.blur:blur-indexer</include> > >> > + <include>org.apache.blur:*</include> > >> > + <include>org.apache.zookeeper:zookeeper</include> > >> > + <include>org.slf4j:slf4j-api</include> > >> > + <include>org.slf4j:slf4j-log4j12</include> > >> > + <include>org.json:json</include> > >> > + <include>log4j:log4j</include> > >> > + <include>com.yammer.metrics:*</include> > >> > + <include>com.google.guava:guava</include> > >> > + <include>org.apache.httpcomponents:*</include> > >> > + <include>org.apache.lucene:*</include> > >> > + <include>com.spatial4j:spatial4j</include> > >> > + <include>commons-cli:commons-cli</include> > >> > + <include>org.eclipse.jetty:*</include> > >> > + <include>com.googlecode.concurrentlinkedhashmap: > >> concurrentlinkedhashmap-lru</include> > >> > + <include>jline:jline</include> > >> > + <include>com.fasterxml.jackson.core:*</include> > >> > + </includes> > >> > + </dependencySet> > >> > + </dependencySets> > >> > + > >> > + <fileSets> > >> > + <fileSet> > >> > + <directory>${project.build.scriptSourceDirectory}</directory> > >> > + <outputDirectory>blur-indexer-${project.version}</ > >> outputDirectory> > >> > + <excludes> > >> > + <exclude>**/.empty</exclude> > >> > + </excludes> > >> > + </fileSet> > >> > + </fileSets> > >> > +</assembly> > >> > > >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ > >> ea50630a/blur-indexer/src/main/java/org/apache/blur/ > mapreduce/lib/update/ > >> BlurIndexCounter.java > >> > ------------------------------------------------------------ > ---------- > >> > diff --git a/blur-indexer/src/main/java/ > org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java > >> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ > >> update/BlurIndexCounter.java > >> > new file mode 100644 > >> > index 0000000..a9caabb > >> > --- /dev/null > >> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ > >> update/BlurIndexCounter.java > >> > @@ -0,0 +1,17 @@ > >> > +package org.apache.blur.mapreduce.lib.update; > >> > + > >> > +public enum BlurIndexCounter { > >> > + > >> > + NEW_RECORDS, > >> > + ROW_IDS_FROM_INDEX, > >> > + ROW_IDS_TO_UPDATE_FROM_NEW_DATA, > >> > + ROW_IDS_FROM_NEW_DATA, > >> > + > >> > + INPUT_FORMAT_MAPPER, > >> > + INPUT_FORMAT_EXISTING_RECORDS, > >> > + > >> > + LOOKUP_MAPPER, > >> > + LOOKUP_MAPPER_EXISTING_RECORDS, > >> > + LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT > >> > + > >> > +} > >> > > >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ > >> ea50630a/blur-indexer/src/main/java/org/apache/blur/ > mapreduce/lib/update/ > >> ClusterDriver.java > >> > ------------------------------------------------------------ > ---------- > >> > diff --git a/blur-indexer/src/main/java/ > org/apache/blur/mapreduce/lib/update/ClusterDriver.java > >> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ > >> update/ClusterDriver.java > >> > new file mode 100644 > >> > index 0000000..d44adf1 > >> > --- /dev/null > >> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ > >> update/ClusterDriver.java > >> > @@ -0,0 +1,362 @@ > >> > +package org.apache.blur.mapreduce.lib.update; > >> > + > >> > +import java.io.ByteArrayInputStream; > >> > +import java.io.ByteArrayOutputStream; > >> > +import java.io.IOException; > >> > +import java.io.InputStream; > >> > +import java.net.URL; > >> > +import java.util.HashMap; > >> > +import java.util.HashSet; > >> > +import java.util.List; > >> > +import java.util.Map; > >> > +import java.util.Map.Entry; > >> > +import java.util.Set; > >> > +import java.util.UUID; > >> > +import java.util.concurrent.Callable; > >> > +import java.util.concurrent.ExecutionException; > >> > +import java.util.concurrent.ExecutorService; > >> > +import java.util.concurrent.Executors; > >> > +import java.util.concurrent.Future; > >> > +import java.util.concurrent.TimeUnit; > >> > +import java.util.concurrent.atomic.AtomicBoolean; > >> > + > >> > +import org.apache.blur.log.Log; > >> > +import org.apache.blur.log.LogFactory; > >> > +import org.apache.blur.mapreduce.lib.BlurInputFormat; > >> > +import org.apache.blur.thirdparty.thrift_0_9_0.TException; > >> > +import org.apache.blur.thrift.BlurClient; > >> > +import org.apache.blur.thrift.generated.Blur.Iface; > >> > +import org.apache.blur.thrift.generated.BlurException; > >> > +import org.apache.blur.thrift.generated.TableDescriptor; > >> > +import org.apache.blur.thrift.generated.TableStats; > >> > +import org.apache.blur.utils.BlurConstants; > >> > +import org.apache.commons.io.IOUtils; > >> > +import org.apache.hadoop.conf.Configuration; > >> > +import org.apache.hadoop.conf.Configured; > >> > +import org.apache.hadoop.fs.FSDataInputStream; > >> > +import org.apache.hadoop.fs.FileStatus; > >> > +import org.apache.hadoop.fs.FileSystem; > >> > +import org.apache.hadoop.fs.Path; > >> > +import org.apache.hadoop.fs.permission.FsAction; > >> > +import org.apache.hadoop.mapreduce.Cluster; > >> > +import org.apache.hadoop.mapreduce.Job; > >> > +import org.apache.hadoop.mapreduce.JobID; > >> > +import org.apache.hadoop.mapreduce.JobStatus; > >> > +import org.apache.hadoop.util.Tool; > >> > +import org.apache.hadoop.util.ToolRunner; > >> > +import org.apache.hadoop.yarn.exceptions.YarnException; > >> > +import org.apache.log4j.LogManager; > >> > +import org.apache.log4j.xml.DOMConfigurator; > >> > + > >> > +public class ClusterDriver extends Configured implements Tool { > >> > + > >> > + private static final String BLUR_ENV = "blur.env"; > >> > + private static final Log LOG = LogFactory.getLog( > >> ClusterDriver.class); > >> > + private static final String _SEP = "_"; > >> > + private static final String IMPORT = "import"; > >> > + > >> > + public static void main(String[] args) throws Exception { > >> > + String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE"); > >> > + System.out.println("Log file path [" + logFilePath + "]"); > >> > + System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath); > >> > + URL url = ClusterDriver.class.getResource("/program-log4j.xml"); > >> > + if (url != null) { > >> > + LOG.info("Reseting log4j config from classpath resource [{0}]", > >> url); > >> > + LogManager.resetConfiguration(); > >> > + DOMConfigurator.configure(url); > >> > + } > >> > + int res = ToolRunner.run(new Configuration(), new > ClusterDriver(), > >> args); > >> > >> Not sure what this thing does yet but it seems we should validate > >> those args since their accessed blindly in run... > >> > >> --tim > >> >
