No worries, just a friendly reminder:) If you get time, I think it'd be helpful to a couple sentences about any new stuff/big changes... seems like there's a new project for example...
Thanks, --tim On Tue, Aug 30, 2016 at 7:49 AM, Aaron McCurry <[email protected]> wrote: > I apologize for the big commits without proper messaging. It was difficult > to remember the changs and the original commit messages were lost due to an > offline git repo (which is no longer is use). I only had the diff between > the original git repo and everything after the changes. Plus the diff > didn't apply cleanly so that's why I broke it up in to different sections. > > I suppose I should have broke up the changes manually out of the diff and > applied them separately and recreated all the commit messages but I didn't > have the time to work through all of them. Sorry. > > Aaron > > > On Tuesday, August 30, 2016, Tim Williams <[email protected]> wrote: > >> NoNot sure what this is yet but itPlease be more considerate with your >> commit messages... it's a lot of code to look through without having >> any context besides "N round of updates." >> >> >> On Mon, Aug 29, 2016 at 9:57 PM, <[email protected] <javascript:;>> >> wrote: >> > Third round of updates. >> > >> > >> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo >> > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/ >> commit/ea50630a >> > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/ >> ea50630a >> > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/ >> ea50630a >> > >> > Branch: refs/heads/master >> > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a >> > Parents: 0141656 >> > Author: Aaron McCurry <[email protected] <javascript:;>> >> > Authored: Sat May 7 13:11:54 2016 -0400 >> > Committer: Aaron McCurry <[email protected] <javascript:;>> >> > Committed: Sat May 7 13:11:54 2016 -0400 >> > >> > ---------------------------------------------------------------------- >> > blur-indexer/pom.xml | 58 +++ >> > blur-indexer/src/main/assemble/bin.xml | 45 ++ >> > .../mapreduce/lib/update/BlurIndexCounter.java | 17 + >> > .../mapreduce/lib/update/ClusterDriver.java | 362 ++++++++++++++ >> > .../blur/mapreduce/lib/update/FasterDriver.java | 486 >> +++++++++++++++++++ >> > .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++ >> > .../lib/update/InputSplitPruneUtil.java | 133 +++++ >> > .../lib/update/LookupBuilderMapper.java | 18 + >> > .../lib/update/LookupBuilderReducer.java | 165 +++++++ >> > .../lib/update/MapperForExistingDataMod.java | 46 ++ >> > .../MapperForExistingDataWithIndexLookup.java | 228 +++++++++ >> > .../lib/update/MapperForNewDataMod.java | 82 ++++ >> > .../lib/update/MergeSortRowIdMatcher.java | 372 ++++++++++++++ >> > .../lib/update/PrunedBlurInputFormat.java | 57 +++ >> > .../update/PrunedSequenceFileInputFormat.java | 59 +++ >> > .../src/main/resources/blur-site.properties | 1 + >> > .../src/main/resources/program-log4j.xml | 29 ++ >> > blur-indexer/src/main/resources/test-log4j.xml | 46 ++ >> > 18 files changed, 2319 insertions(+) >> > ---------------------------------------------------------------------- >> > >> > >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ >> ea50630a/blur-indexer/pom.xml >> > ---------------------------------------------------------------------- >> > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml >> > new file mode 100644 >> > index 0000000..c7c1753 >> > --- /dev/null >> > +++ b/blur-indexer/pom.xml >> > @@ -0,0 +1,58 @@ >> > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi=" >> http://www.w3.org/2001/XMLSchema-instance" >> > + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 >> http://maven.apache.org/xsd/maven-4.0.0.xsd"> >> > + <modelVersion>4.0.0</modelVersion> >> > + <groupId>org.apache.blur</groupId> >> > + <artifactId>blur-indexer</artifactId> >> > + <version>0.2.8</version> >> > + <name>blur-indexer</name> >> > + <packaging>jar</packaging> >> > + >> > + <properties> >> > + <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3- >> SNAPSHOT</blur.version> >> > + </properties> >> > + <dependencies> >> > + <dependency> >> > + <groupId>org.apache.blur</groupId> >> > + <artifactId>blur-mapred</artifactId> >> > + <version>${blur.version}</version> >> > + </dependency> >> > + <dependency> >> > + <groupId>junit</groupId> >> > + <artifactId>junit</artifactId> >> > + <version>4.9</version> >> > + <scope>test</scope> >> > + </dependency> >> > + </dependencies> >> > + >> > + <build> >> > + <pluginManagement> >> > + <plugins> >> > + <plugin> >> > + <groupId>org.apache.maven. >> plugins</groupId> >> > + <artifactId>maven-compiler- >> plugin</artifactId> >> > + <configuration> >> > + <source>1.8</source> >> > + <target>1.8</target> >> > + </configuration> >> > + </plugin> >> > + </plugins> >> > + </pluginManagement> >> > + <plugins> >> > + <plugin> >> > + <artifactId>maven-assembly- >> plugin</artifactId> >> > + <configuration> >> > + <descriptor>src/main/assemble/ >> bin.xml</descriptor> >> > + <finalName>blur-indexer-${ >> project.version}</finalName> >> > + </configuration> >> > + <executions> >> > + <execution> >> > + <phase>package</phase> >> > + <goals> >> > + >> <goal>single</goal> >> > + </goals> >> > + </execution> >> > + </executions> >> > + </plugin> >> > + </plugins> >> > + </build> >> > +</project> >> > >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ >> ea50630a/blur-indexer/src/main/assemble/bin.xml >> > ---------------------------------------------------------------------- >> > diff --git a/blur-indexer/src/main/assemble/bin.xml >> b/blur-indexer/src/main/assemble/bin.xml >> > new file mode 100644 >> > index 0000000..5fddd56 >> > --- /dev/null >> > +++ b/blur-indexer/src/main/assemble/bin.xml >> > @@ -0,0 +1,45 @@ >> > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly- >> plugin/assembly/1.1.2" >> > + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" >> > + xsi:schemaLocation="http://maven.apache.org/plugins/ >> maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/ >> assembly-1.1.2.xsd"> >> > + <formats> >> > + <format>tar.gz</format> >> > + </formats> >> > + <includeBaseDirectory>false</includeBaseDirectory> >> > + >> > + <dependencySets> >> > + <dependencySet> >> > + <useProjectArtifact>true</useProjectArtifact> >> > + <outputDirectory>blur-indexer-${project.version}/lib</ >> outputDirectory> >> > + <unpack>false</unpack> >> > + <includes> >> > + <include>org.apache.blur:blur-indexer</include> >> > + <include>org.apache.blur:*</include> >> > + <include>org.apache.zookeeper:zookeeper</include> >> > + <include>org.slf4j:slf4j-api</include> >> > + <include>org.slf4j:slf4j-log4j12</include> >> > + <include>org.json:json</include> >> > + <include>log4j:log4j</include> >> > + <include>com.yammer.metrics:*</include> >> > + <include>com.google.guava:guava</include> >> > + <include>org.apache.httpcomponents:*</include> >> > + <include>org.apache.lucene:*</include> >> > + <include>com.spatial4j:spatial4j</include> >> > + <include>commons-cli:commons-cli</include> >> > + <include>org.eclipse.jetty:*</include> >> > + <include>com.googlecode.concurrentlinkedhashmap: >> concurrentlinkedhashmap-lru</include> >> > + <include>jline:jline</include> >> > + <include>com.fasterxml.jackson.core:*</include> >> > + </includes> >> > + </dependencySet> >> > + </dependencySets> >> > + >> > + <fileSets> >> > + <fileSet> >> > + <directory>${project.build.scriptSourceDirectory}</directory> >> > + <outputDirectory>blur-indexer-${project.version}</ >> outputDirectory> >> > + <excludes> >> > + <exclude>**/.empty</exclude> >> > + </excludes> >> > + </fileSet> >> > + </fileSets> >> > +</assembly> >> > >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ >> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ >> BlurIndexCounter.java >> > ---------------------------------------------------------------------- >> > diff --git >> > a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java >> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ >> update/BlurIndexCounter.java >> > new file mode 100644 >> > index 0000000..a9caabb >> > --- /dev/null >> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ >> update/BlurIndexCounter.java >> > @@ -0,0 +1,17 @@ >> > +package org.apache.blur.mapreduce.lib.update; >> > + >> > +public enum BlurIndexCounter { >> > + >> > + NEW_RECORDS, >> > + ROW_IDS_FROM_INDEX, >> > + ROW_IDS_TO_UPDATE_FROM_NEW_DATA, >> > + ROW_IDS_FROM_NEW_DATA, >> > + >> > + INPUT_FORMAT_MAPPER, >> > + INPUT_FORMAT_EXISTING_RECORDS, >> > + >> > + LOOKUP_MAPPER, >> > + LOOKUP_MAPPER_EXISTING_RECORDS, >> > + LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT >> > + >> > +} >> > >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ >> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ >> ClusterDriver.java >> > ---------------------------------------------------------------------- >> > diff --git >> > a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java >> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ >> update/ClusterDriver.java >> > new file mode 100644 >> > index 0000000..d44adf1 >> > --- /dev/null >> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/ >> update/ClusterDriver.java >> > @@ -0,0 +1,362 @@ >> > +package org.apache.blur.mapreduce.lib.update; >> > + >> > +import java.io.ByteArrayInputStream; >> > +import java.io.ByteArrayOutputStream; >> > +import java.io.IOException; >> > +import java.io.InputStream; >> > +import java.net.URL; >> > +import java.util.HashMap; >> > +import java.util.HashSet; >> > +import java.util.List; >> > +import java.util.Map; >> > +import java.util.Map.Entry; >> > +import java.util.Set; >> > +import java.util.UUID; >> > +import java.util.concurrent.Callable; >> > +import java.util.concurrent.ExecutionException; >> > +import java.util.concurrent.ExecutorService; >> > +import java.util.concurrent.Executors; >> > +import java.util.concurrent.Future; >> > +import java.util.concurrent.TimeUnit; >> > +import java.util.concurrent.atomic.AtomicBoolean; >> > + >> > +import org.apache.blur.log.Log; >> > +import org.apache.blur.log.LogFactory; >> > +import org.apache.blur.mapreduce.lib.BlurInputFormat; >> > +import org.apache.blur.thirdparty.thrift_0_9_0.TException; >> > +import org.apache.blur.thrift.BlurClient; >> > +import org.apache.blur.thrift.generated.Blur.Iface; >> > +import org.apache.blur.thrift.generated.BlurException; >> > +import org.apache.blur.thrift.generated.TableDescriptor; >> > +import org.apache.blur.thrift.generated.TableStats; >> > +import org.apache.blur.utils.BlurConstants; >> > +import org.apache.commons.io.IOUtils; >> > +import org.apache.hadoop.conf.Configuration; >> > +import org.apache.hadoop.conf.Configured; >> > +import org.apache.hadoop.fs.FSDataInputStream; >> > +import org.apache.hadoop.fs.FileStatus; >> > +import org.apache.hadoop.fs.FileSystem; >> > +import org.apache.hadoop.fs.Path; >> > +import org.apache.hadoop.fs.permission.FsAction; >> > +import org.apache.hadoop.mapreduce.Cluster; >> > +import org.apache.hadoop.mapreduce.Job; >> > +import org.apache.hadoop.mapreduce.JobID; >> > +import org.apache.hadoop.mapreduce.JobStatus; >> > +import org.apache.hadoop.util.Tool; >> > +import org.apache.hadoop.util.ToolRunner; >> > +import org.apache.hadoop.yarn.exceptions.YarnException; >> > +import org.apache.log4j.LogManager; >> > +import org.apache.log4j.xml.DOMConfigurator; >> > + >> > +public class ClusterDriver extends Configured implements Tool { >> > + >> > + private static final String BLUR_ENV = "blur.env"; >> > + private static final Log LOG = LogFactory.getLog( >> ClusterDriver.class); >> > + private static final String _SEP = "_"; >> > + private static final String IMPORT = "import"; >> > + >> > + public static void main(String[] args) throws Exception { >> > + String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE"); >> > + System.out.println("Log file path [" + logFilePath + "]"); >> > + System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath); >> > + URL url = ClusterDriver.class.getResource("/program-log4j.xml"); >> > + if (url != null) { >> > + LOG.info("Reseting log4j config from classpath resource [{0}]", >> url); >> > + LogManager.resetConfiguration(); >> > + DOMConfigurator.configure(url); >> > + } >> > + int res = ToolRunner.run(new Configuration(), new ClusterDriver(), >> args); >> >> Not sure what this thing does yet but it seems we should validate >> those args since their accessed blindly in run... >> >> --tim >>
