[4/8] incubator-eagle git commit: EAGLE-276 eagle support for mr & spark history job monitoring mr & spark job history monitoring

yonzhang2012 Tue, 05 Jul 2016 11:08:09 -0700

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/DefaultJobIdPartitioner.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/DefaultJobIdPartitioner.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/DefaultJobIdPartitioner.java
new file mode 100644
index 0000000..451b921
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/DefaultJobIdPartitioner.java
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.eagle.jpm.mr.history.storm;
+
+public class DefaultJobIdPartitioner implements JobIdPartitioner {
+    @Override
+    public int partition(int numTotalParts, String jobId) {
+        int hash = jobId.hashCode();
+        hash = Math.abs(hash);
+        return hash % numTotalParts;
+    }
+}


http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/HistoryJobProgressBolt.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/HistoryJobProgressBolt.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/HistoryJobProgressBolt.java
new file mode 100644
index 0000000..30374c4
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/HistoryJobProgressBolt.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.eagle.jpm.mr.history.storm;
+
+import backtype.storm.task.OutputCollector;
+import backtype.storm.task.TopologyContext;
+import backtype.storm.topology.OutputFieldsDeclarer;
+import backtype.storm.topology.base.BaseRichBolt;
+import backtype.storm.tuple.Tuple;
+
+import java.util.*;
+
+import org.apache.eagle.jpm.mr.history.common.JHFConfigManager;
+import org.apache.eagle.jpm.mr.history.entities.JobProcessTimeStampEntity;
+import org.apache.eagle.service.client.IEagleServiceClient;
+import org.apache.eagle.service.client.impl.EagleServiceClientImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HistoryJobProgressBolt extends BaseRichBolt {
+    private static final Logger LOG = 
LoggerFactory.getLogger(HistoryJobProgressBolt.class);
+
+    private final static int MAX_RETRY_TIMES = 3;
+    private Long m_minTimeStamp;
+    private int m_numTotalPartitions;
+    private JHFConfigManager configManager;
+    private Map<Integer, Long> m_partitionTimeStamp = new TreeMap<>();
+    public HistoryJobProgressBolt(String parentName, JHFConfigManager 
configManager) {
+        this.configManager = configManager;
+        m_numTotalPartitions = 
this.configManager.getConfig().getInt("envContextConfig.parallelismConfig." + 
parentName);
+        m_minTimeStamp = 0L;
+    }
+    @Override
+    public void prepare(Map map, TopologyContext topologyContext, 
OutputCollector outputCollector) {
+
+    }
+
+    @Override
+    public void execute(Tuple tuple) {
+        Integer partitionId = tuple.getIntegerByField("partitionId");
+        Long timeStamp = tuple.getLongByField("timeStamp");
+        LOG.info("partition " + partitionId + ", timeStamp " + timeStamp);
+        if (!m_partitionTimeStamp.containsKey(partitionId) || 
(m_partitionTimeStamp.containsKey(partitionId) && 
m_partitionTimeStamp.get(partitionId) < timeStamp)) {
+            m_partitionTimeStamp.put(partitionId, timeStamp);
+        }
+
+        if (m_partitionTimeStamp.size() >= m_numTotalPartitions) {
+            //get min timestamp
+            Long minTimeStamp = Collections.min(m_partitionTimeStamp.values());
+
+            if (m_minTimeStamp == 0L) {
+                m_minTimeStamp = minTimeStamp;
+            }
+
+            if (m_minTimeStamp > minTimeStamp) {
+                //no need to update
+                return;
+            }
+
+            m_minTimeStamp = minTimeStamp;
+            final JHFConfigManager.EagleServiceConfig eagleServiceConfig = 
configManager.getEagleServiceConfig();
+            final JHFConfigManager.JobExtractorConfig jobExtractorConfig = 
configManager.getJobExtractorConfig();
+            Map<String, String> baseTags = new HashMap<String, String>() { {
+                put("site", jobExtractorConfig.site);
+            } };
+            JobProcessTimeStampEntity entity = new JobProcessTimeStampEntity();
+            entity.setCurrentTimeStamp(m_minTimeStamp);
+            entity.setTimestamp(m_minTimeStamp);
+            entity.setTags(baseTags);
+
+            IEagleServiceClient client = new EagleServiceClientImpl(
+                    eagleServiceConfig.eagleServiceHost,
+                    eagleServiceConfig.eagleServicePort,
+                    eagleServiceConfig.username,
+                    eagleServiceConfig.password);
+
+            
client.getJerseyClient().setReadTimeout(jobExtractorConfig.readTimeoutSeconds * 
1000);
+
+            List<JobProcessTimeStampEntity> entities = new ArrayList<>();
+            entities.add(entity);
+
+            int tried = 0;
+            while (tried <= MAX_RETRY_TIMES) {
+                try {
+                    LOG.info("start flushing JobProcessTimeStampEntity 
entities of total number " + entities.size());
+                    client.create(entities);
+                    LOG.info("finish flushing entities of total number " + 
entities.size());
+                    break;
+                } catch (Exception ex) {
+                    if (tried < MAX_RETRY_TIMES) {
+                        LOG.error("Got exception to flush, retry as " + (tried 
+ 1) + " times", ex);
+                    } else {
+                        LOG.error("Got exception to flush, reach max retry 
times " + MAX_RETRY_TIMES, ex);
+                    }
+                }
+                tried ++;
+            }
+
+            client.getJerseyClient().destroy();
+            try {
+                client.close();
+            } catch (Exception e) {
+                LOG.error("failed to close eagle service client ", e);
+            }
+        }
+    }
+
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 
{
+
+    }
+    @Override
+    public void cleanup() {
+        super.cleanup();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobHistorySpout.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobHistorySpout.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobHistorySpout.java
new file mode 100644
index 0000000..a10599b
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobHistorySpout.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.eagle.jpm.mr.history.storm;
+
+import backtype.storm.spout.SpoutOutputCollector;
+import backtype.storm.task.TopologyContext;
+import backtype.storm.topology.OutputFieldsDeclarer;
+import backtype.storm.topology.base.BaseRichSpout;
+import backtype.storm.tuple.Fields;
+import org.apache.eagle.dataproc.core.ValuesArray;
+import org.apache.eagle.jpm.mr.history.common.JHFConfigManager;
+import org.apache.eagle.jpm.mr.history.crawler.*;
+import org.apache.eagle.jpm.mr.history.zkres.JobHistoryZKStateManager;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * Zookeeper znode structure
+ * -zkRoot
+ *   - partitions
+ *      - 0 (20150101)
+ *      - 1 (20150101)
+ *      - 2 (20150101)
+ *      - ... ...
+ *      - N-1 (20150102)
+ *   - jobs
+ *      - 20150101
+ *        - job1
+ *        - job2
+ *        - job3
+ *      - 20150102
+ *        - job1
+ *        - job2
+ *        - job3
+ *
+ * Spout can have multiple instances, which is supported by storm parallelism 
primitive.
+ *
+ * Under znode partitions, N child znodes (name is 0 based integer) would be 
created with each znode mapped to one spout instance. All jobs will be 
partitioned into N
+ * partitions by applying JobPartitioner class to each job Id. The value of 
each partition znode is the date when the last job in this partition
+ * is successfully processed.
+ *
+ * processing steps
+ * 1) In constructor,
+ * 2) In open(), calculate jobPartitionId for current spout (which should be 
exactly same to spout taskId within TopologyContext)
+ * 3) In open(), zkState.ensureJobPartitions to rebuild znode partitions if 
necessary. ensureJobPartitions is only done by one spout task as internally 
this is using lock
+ * 5) In nextTuple(), list job files by invoking hadoop API
+ * 6) In nextTuple(), iterate each jobId and invoke 
JobPartition.partition(jobId) and keep those jobs belonging to current 
partition Id
+ * 7) process job files (job history file and job configuration xml file)
+ * 8) add job Id to current date slot say for example 20150102 after this job 
is successfully processed
+ * 9) clean up all slots with date less than currentProcessDate - 2 days. (2 
days should be configurable)
+ *
+ * Note:
+ * if one spout instance crashes and is brought up again, open() method would 
be invoked again, we need think of this scenario.
+ *
+ */
+
+public class JobHistorySpout extends BaseRichSpout {
+    private static final Logger LOG = 
LoggerFactory.getLogger(JobHistorySpout.class);
+
+    private int partitionId;
+    private int numTotalPartitions;
+    private transient JobHistoryZKStateManager zkState;
+    private transient JHFCrawlerDriver driver;
+    private JobHistoryContentFilter contentFilter;
+    private JobHistorySpoutCollectorInterceptor interceptor;
+    private JHFInputStreamCallback callback;
+    private JHFConfigManager configManager;
+    private JobHistoryLCM m_jhfLCM;
+
+    public JobHistorySpout(JobHistoryContentFilter filter, JHFConfigManager 
configManager) {
+        this(filter, configManager, new JobHistorySpoutCollectorInterceptor());
+    }
+
+    /**
+     * mostly this constructor signature is for unit test purpose as you can 
put customized interceptor here
+     * @param filter
+     * @param adaptor
+     */
+    public JobHistorySpout(JobHistoryContentFilter filter, JHFConfigManager 
configManager, JobHistorySpoutCollectorInterceptor adaptor) {
+        this.contentFilter = filter;
+        this.configManager = configManager;
+        this.interceptor = adaptor;
+        callback = new DefaultJHFInputStreamCallback(contentFilter, 
configManager, interceptor);
+    }
+
+    private int calculatePartitionId(TopologyContext context) {
+        int thisGlobalTaskId = context.getThisTaskId();
+        String componentName = context.getComponentId(thisGlobalTaskId);
+        List<Integer> globalTaskIds = context.getComponentTasks(componentName);
+        numTotalPartitions = globalTaskIds.size();
+        int index = 0;
+        for (Integer id : globalTaskIds) {
+            if (id == thisGlobalTaskId) {
+                return index;
+            }
+            index++;
+        }
+        throw new IllegalStateException();
+    }
+
+    @Override
+    public void open(Map conf, TopologyContext context,
+                     final SpoutOutputCollector collector) {
+        partitionId = calculatePartitionId(context);
+        // sanity verify 0<=partitionId<=numTotalPartitions-1
+        if (partitionId < 0 || partitionId > numTotalPartitions) {
+            throw new IllegalStateException("partitionId should be less than 
numTotalPartitions with partitionId " +
+                    partitionId + " and numTotalPartitions " + 
numTotalPartitions);
+        }
+        Class<? extends JobIdPartitioner> partitionerCls = 
configManager.getControlConfig().partitionerCls;
+        JobIdPartitioner partitioner;
+        try {
+            partitioner = partitionerCls.newInstance();
+        } catch (Exception e) {
+            LOG.error("failing instantiating job partitioner class " + 
partitionerCls,e);
+            throw new IllegalStateException(e);
+        }
+        JobIdFilter jobIdFilter = new JobIdFilterByPartition(partitioner, 
numTotalPartitions, partitionId);
+        zkState = new 
JobHistoryZKStateManager(configManager.getZkStateConfig());
+        zkState.ensureJobPartitions(numTotalPartitions);
+        interceptor.setSpoutOutputCollector(collector);
+
+        try {
+            m_jhfLCM = new 
JobHistoryDAOImpl(configManager.getJobHistoryEndpointConfig());
+            driver = new 
JHFCrawlerDriverImpl(configManager.getJobHistoryEndpointConfig(),
+                    configManager.getControlConfig(),
+                    callback,
+                    zkState,
+                    m_jhfLCM,
+                    jobIdFilter,
+                    partitionId);
+        } catch (Exception e) {
+            LOG.error("failing creating crawler driver");
+            throw new IllegalStateException(e);
+        }
+    }
+
+    @Override
+    public void nextTuple() {
+        try {
+            Long modifiedTime = driver.crawl();
+            interceptor.collect(new ValuesArray(partitionId, modifiedTime));
+        } catch (Exception ex) {
+            LOG.error("fail crawling job history file and continue ...", ex);
+            try {
+                m_jhfLCM.freshFileSystem();
+            } catch (Exception e) {
+                LOG.error("failed to fresh file system ", e);
+            }
+        } finally {
+            try {
+                Thread.sleep(1000);
+            } catch (Exception e) {
+
+            }
+        }
+    }
+
+    /**
+     * empty because framework will take care of output fields declaration
+     */
+    @Override
+    public void declareOutputFields(OutputFieldsDeclarer declarer) {
+        declarer.declare(new Fields("partitionId", "timeStamp"));
+    }
+
+    /**
+     * add to processedJob
+     */
+    @Override
+    public void ack(Object jobId) {
+    }
+
+    /**
+     * job is not fully processed
+     */
+    @Override
+    public void fail(Object jobId) {
+    }
+
+    @Override
+    public void deactivate() {
+    }
+
+    @Override
+    public void close() {
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdFilter.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdFilter.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdFilter.java
new file mode 100644
index 0000000..b58c84f
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdFilter.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.eagle.jpm.mr.history.storm;
+
+public interface JobIdFilter {
+    boolean accept(String jobId);
+}

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdFilterByPartition.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdFilterByPartition.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdFilterByPartition.java
new file mode 100644
index 0000000..07b8519
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdFilterByPartition.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.eagle.jpm.mr.history.storm;
+
+public class JobIdFilterByPartition implements JobIdFilter {
+    private JobIdPartitioner partitioner;
+    private int numTotalPartitions;
+    private int partitionId;
+
+    public JobIdFilterByPartition(JobIdPartitioner partitioner, int 
numTotalPartitions, int partitionId) {
+        this.partitioner = partitioner;
+        this.numTotalPartitions = numTotalPartitions;
+        this.partitionId = partitionId;
+    }
+
+    @Override
+    public boolean accept(String jobId) {
+        int part = partitioner.partition(numTotalPartitions, jobId);
+        if (part == partitionId) {
+            return true;
+        }
+        return false;
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdPartitioner.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdPartitioner.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdPartitioner.java
new file mode 100644
index 0000000..cc7e68c
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/storm/JobIdPartitioner.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.eagle.jpm.mr.history.storm;
+
+public interface JobIdPartitioner {
+    int partition(int numTotalParts, String jobId);
+}

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/zkres/JobHistoryZKStateLCM.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/zkres/JobHistoryZKStateLCM.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/zkres/JobHistoryZKStateLCM.java
new file mode 100644
index 0000000..308057b
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/zkres/JobHistoryZKStateLCM.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.eagle.jpm.mr.history.zkres;
+
+import java.util.List;
+
+public interface JobHistoryZKStateLCM {
+    void ensureJobPartitions(int numTotalPartitions);
+    String readProcessedDate(int partitionId);
+    List<String> readProcessedJobs(String date);
+    void updateProcessedDate(int partitionId, String date);
+    void addProcessedJob(String date, String jobId);
+    void truncateProcessedJob(String date);
+    void truncateEverything();
+}

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/zkres/JobHistoryZKStateManager.java
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/zkres/JobHistoryZKStateManager.java
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/zkres/JobHistoryZKStateManager.java
new file mode 100644
index 0000000..24dd7be
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/java/org/apache/eagle/jpm/mr/history/zkres/JobHistoryZKStateManager.java
@@ -0,0 +1,305 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.eagle.jpm.mr.history.zkres;
+
+import org.apache.curator.framework.CuratorFramework;
+import org.apache.curator.framework.CuratorFrameworkFactory;
+import org.apache.curator.framework.recipes.locks.InterProcessMutex;
+import org.apache.curator.retry.RetryNTimes;
+import org.apache.eagle.jpm.mr.history.common.JHFConfigManager.ZKStateConfig;
+import org.apache.zookeeper.CreateMode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.List;
+
+public class JobHistoryZKStateManager implements JobHistoryZKStateLCM {
+    public static final Logger LOG = 
LoggerFactory.getLogger(JobHistoryZKStateManager.class);
+    private String zkRoot;
+    private CuratorFramework _curator;
+    public static final String ZNODE_LOCK_FOR_ENSURE_JOB_PARTITIONS = 
"lockForEnsureJobPartitions";
+    public static final String ZNODE_FORCE_START_FROM = "forceStartFrom";
+    public static final String ZNODE_PARTITIONS = "partitions";
+
+    public static final int BACKOFF_DAYS = 0;
+
+    private CuratorFramework newCurator(ZKStateConfig config) throws Exception 
{
+        return CuratorFrameworkFactory.newClient(
+                config.zkQuorum,
+                config.zkSessionTimeoutMs,
+                15000,
+                new RetryNTimes(config.zkRetryTimes, config.zkRetryInterval)
+        );
+    }
+
+    public JobHistoryZKStateManager(ZKStateConfig config) {
+        this.zkRoot = config.zkRoot;
+
+        try {
+            _curator = newCurator(config);
+            _curator.start();
+        } catch (Exception e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public void close() {
+        _curator.close();
+        _curator = null;
+    }
+
+    private String readForceStartFrom() {
+        String path = zkRoot + "/" + ZNODE_FORCE_START_FROM;
+        try {
+            if (_curator.checkExists().forPath(path) != null) {
+                return new String(_curator.getData().forPath(path), "UTF-8");
+            }
+        } catch (Exception ex) {
+            LOG.error("fail reading forceStartFrom znode", ex);
+        }
+        return null;
+    }
+
+    private void deleteForceStartFrom() {
+        String path = zkRoot + "/" + ZNODE_FORCE_START_FROM;
+        try {
+            if (_curator.checkExists().forPath(path) != null) {
+                _curator.delete().forPath(path);
+            }
+        } catch(Exception ex) {
+            LOG.error("fail reading forceStartFrom znode", ex);
+        }
+    }
+
+    private String getProcessedDateAfterBackoff(int backOffDays) {
+        SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
+        Calendar c = Calendar.getInstance();
+        c.add(Calendar.DATE, -1);
+        c.add(Calendar.DATE, -1 * backOffDays);
+        return sdf.format(c.getTime());
+    }
+
+    /**
+     * under zkRoot, znode forceStartFrom is used to force job is crawled from 
that date
+     * IF
+     *    forceStartFrom znode is provided, and its value is valid date with 
format "YYYYMMDD",
+     * THEN
+     *    rebuild all partitions with the forceStartFrom
+     * ELSE
+     *    IF
+     *       partition structure is changed
+     *    THEN
+     *       IF
+     *          there is valid mindate for existing partitions
+     *       THEN
+     *          rebuild job partitions from that valid mindate
+     *       ELSE
+     *          rebuild job partitions from (today - BACKOFF_DAYS)
+     *       END
+     *    ELSE
+     *      do nothing
+     *    END
+     * END
+     *
+     *
+     * forceStartFrom is deleted once its value is used, so next time when 
topology is restarted, program can run from where topology is stopped last time
+     */
+    @Override
+    public void ensureJobPartitions(int numTotalPartitions) {
+        // lock before rebuild job partitions
+        String lockForEnsureJobPartitions = zkRoot + "/" + 
ZNODE_LOCK_FOR_ENSURE_JOB_PARTITIONS;
+        InterProcessMutex lock = new InterProcessMutex(_curator, 
lockForEnsureJobPartitions);
+        String path = zkRoot + "/" + ZNODE_PARTITIONS;
+        try {
+            lock.acquire();
+            int minDate = 0;
+            String forceStartFrom = readForceStartFrom();
+            if (forceStartFrom != null) {
+                try {
+                    minDate = Integer.valueOf(forceStartFrom);
+                } catch(Exception ex) {
+                    LOG.error("failing converting forceStartFrom znode value 
to integer with value " + forceStartFrom);
+                    throw new IllegalStateException();
+                }
+            } else {
+                boolean pathExists = _curator.checkExists().forPath(path) == 
null ? false : true;
+                boolean structureChanged = true;
+                if (pathExists) {
+                    int currentCount = 
_curator.getChildren().forPath(path).size();
+                    if (numTotalPartitions == currentCount) {
+                        structureChanged = false;
+                        LOG.info("znode partitions structure is unchanged");
+                    } else {
+                        LOG.info("znode partitions structure is changed, 
current partition count " + currentCount + ", future count " + 
numTotalPartitions);
+                    }
+                }
+                if (!structureChanged)
+                    return; // do nothing
+
+                if (pathExists) {
+                    List<String> partitions = 
_curator.getChildren().forPath(path);
+                    for (String partition : partitions) {
+                        String date = new 
String(_curator.getData().forPath(path + "/" + partition), "UTF-8");
+                        int tmp = Integer.valueOf(date);
+                        if(tmp < minDate)
+                            minDate = tmp;
+                    }
+                }
+
+                if (minDate == 0) {
+                    minDate = 
Integer.valueOf(getProcessedDateAfterBackoff(BACKOFF_DAYS));
+                }
+            }
+            rebuildJobPartitions(numTotalPartitions, String.valueOf(minDate));
+            deleteForceStartFrom();
+        } catch (Exception e) {
+            LOG.error("fail building job partitions", e);
+            throw new RuntimeException(e);
+        } finally {
+            try {
+                lock.release();
+            } catch(Exception e) {
+                LOG.error("fail releasing lock", e);
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+    private void rebuildJobPartitions(int numTotalPartitions, String 
startingDate) throws Exception {
+        LOG.info("rebuild job partitions with numTotalPartitions " + 
numTotalPartitions + " with starting date " + startingDate);
+        String path = zkRoot + "/" + ZNODE_PARTITIONS;
+        // truncate all existing partitions
+        if (_curator.checkExists().forPath(path) != null) {
+            _curator.delete().deletingChildrenIfNeeded().forPath(path);
+        }
+
+        for (int i = 0; i < numTotalPartitions; i++) {
+            _curator.create()
+                    .creatingParentsIfNeeded()
+                    .withMode(CreateMode.PERSISTENT)
+                    .forPath(path + "/" + i, startingDate.getBytes("UTF-8"));
+        }
+    }
+
+    @Override
+    public String readProcessedDate(int partitionId) {
+        String path = zkRoot + "/partitions/" + partitionId;
+        try {
+            if (_curator.checkExists().forPath(path) != null) {
+                return new String(_curator.getData().forPath(path), "UTF-8");
+            } else {
+                return null;
+            }
+        } catch (Exception e) {
+            LOG.error("fail read processed date", e);
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public void updateProcessedDate(int partitionId, String date) {
+        String path = zkRoot + "/partitions/" + partitionId;
+        try {
+            if (_curator.checkExists().forPath(path) == null) {
+                _curator.create()
+                        .creatingParentsIfNeeded()
+                        .withMode(CreateMode.PERSISTENT)
+                        .forPath(path, date.getBytes("UTF-8"));
+            } else {
+                _curator.setData().forPath(path, date.getBytes("UTF-8"));
+            }
+        } catch (Exception e) {
+            LOG.error("fail update processed date", e);
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public void addProcessedJob(String date, String jobId) {
+        String path = zkRoot + "/jobs/" + date + "/" + jobId;
+        try {
+            if (_curator.checkExists().forPath(path) == null) {
+                _curator.create()
+                        .creatingParentsIfNeeded()
+                        .withMode(CreateMode.PERSISTENT)
+                        .forPath(path);
+            } else {
+                _curator.setData().forPath(path);
+            }
+        } catch (Exception e) {
+            LOG.error("fail adding processed jobs", e);
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public void truncateProcessedJob(String date) {
+        LOG.info("trying to truncate all data for day " + date);
+        // we need lock before we do truncate
+        String path = zkRoot + "/jobs/" + date;
+        InterProcessMutex lock = new InterProcessMutex(_curator, path);
+        try {
+            lock.acquire();
+            if (_curator.checkExists().forPath(path) != null) {
+                _curator.delete().deletingChildrenIfNeeded().forPath(path);
+                LOG.info("really truncated all data for day " + date);
+            }
+        } catch (Exception e) {
+            LOG.error("fail truncating processed jobs", e);
+            throw new RuntimeException(e);
+        } finally {
+            try {
+                lock.release();
+            } catch (Exception e) {
+                LOG.error("fail releasing lock", e);
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+    @Override
+    public List<String> readProcessedJobs(String date) {
+        String path = zkRoot + "/jobs/" + date;
+        try {
+            if (_curator.checkExists().forPath(path) != null) {
+                return _curator.getChildren().forPath(path);
+            } else {
+                return null;
+            }
+        } catch (Exception e) {
+            LOG.error("fail read processed jobs", e);
+            throw new RuntimeException(e);
+        }
+    }
+
+    @Override
+    public void truncateEverything() {
+        String path = zkRoot;
+        try {
+            if (_curator.checkExists().forPath(path) != null) {
+                _curator.delete().deletingChildrenIfNeeded().forPath(path);
+            }
+        } catch (Exception ex) {
+            LOG.error("fail truncating verything", ex);
+            throw new RuntimeException(ex);
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/resources/JobCounter.conf
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/resources/JobCounter.conf 
b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/JobCounter.conf
new file mode 100644
index 0000000..db62cfb
--- /dev/null
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/JobCounter.conf
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#### Sample configuration:
+## counter.group0.name = groupname1
+## counter.group0.counter0.names = counterName1,counterName2,...
+## counter.group0.counter0.description = counter description...
+
+counter.group0.name = org.apache.hadoop.mapreduce.FileSystemCounter
+counter.group0.description = File System Counters
+counter.group0.counter0.names = FILE_BYTES_READ
+counter.group0.counter0.description = FILE: Number of bytes read
+counter.group0.counter1.names = FILE_BYTES_WRITTEN
+counter.group0.counter1.description = FILE: Number of bytes written
+counter.group0.counter2.names = FILE_READ_OPS
+counter.group0.counter2.description = FILE: Number of read operations
+counter.group0.counter3.names = FILE_LARGE_READ_OPS
+counter.group0.counter3.description = FILE: Number of large read operations
+counter.group0.counter4.names = FILE_WRITE_OPS
+counter.group0.counter4.description = FILE: Number of write operations
+counter.group0.counter5.names = HDFS_BYTES_READ
+counter.group0.counter5.description = HDFS: Number of bytes read
+counter.group0.counter6.names = HDFS_BYTES_WRITTEN
+counter.group0.counter6.description = HDFS: Number of bytes written
+counter.group0.counter7.names = HDFS_READ_OPS
+counter.group0.counter7.description = HDFS: Number of read operations
+counter.group0.counter8.names = HDFS_LARGE_READ_OPS
+counter.group0.counter8.description = HDFS: Number of large read operations
+counter.group0.counter9.names = HDFS_WRITE_OPS
+counter.group0.counter9.description = HDFS: Number of write operations
+
+counter.group1.name = org.apache.hadoop.mapreduce.TaskCounter
+counter.group1.description = Map-Reduce Framework
+counter.group1.counter0.names = MAP_INPUT_RECORDS
+counter.group1.counter0.description = Map input records
+counter.group1.counter1.names = MAP_OUTPUT_RECORDS
+counter.group1.counter1.description = Map output records
+counter.group1.counter2.names = SPLIT_RAW_BYTES
+counter.group1.counter2.description = Input split bytes
+counter.group1.counter3.names = SPILLED_RECORDS
+counter.group1.counter3.description = Spilled Records
+counter.group1.counter4.names = CPU_MILLISECONDS
+counter.group1.counter4.description = CPU time spent (ms)
+counter.group1.counter5.names = PHYSICAL_MEMORY_BYTES
+counter.group1.counter5.description = Physical memory (bytes) snapshot
+counter.group1.counter6.names = VIRTUAL_MEMORY_BYTES
+counter.group1.counter6.description = Virtual memory (bytes) snapshot
+counter.group1.counter7.names = COMMITTED_HEAP_BYTES
+counter.group1.counter7.description = Total committed heap usage (bytes)
+counter.group1.counter8.names = REDUCE_SHUFFLE_BYTES
+counter.group1.counter8.description = Reduce shuffle bytes (bytes)
+counter.group1.counter9.names = GC_TIME_MILLIS
+counter.group1.counter9.description = GC time milliseconds
+counter.group1.counter10.names = MAP_OUTPUT_BYTES
+counter.group1.counter10.description = map output bytes
+counter.group1.counter11.names = REDUCE_INPUT_RECORDS
+counter.group1.counter11.description = reduce input records
+counter.group1.counter12.names = COMBINE_INPUT_RECORDS
+counter.group1.counter12.description = combine input records
+counter.group1.counter13.names = COMBINE_OUTPUT_RECORDS
+counter.group1.counter13.description = combine output records
+counter.group1.counter14.names = REDUCE_INPUT_GROUPS
+counter.group1.counter14.description = reduce input groups
+counter.group1.counter15.names = REDUCE_OUTPUT_RECORDS
+counter.group1.counter15.description = reduce output records
+counter.group1.counter16.names = SHUFFLED_MAPS
+counter.group1.counter16.description = shuffled maps
+counter.group1.counter17.names = MAP_OUTPUT_MATERIALIZED_BYTES
+counter.group1.counter17.description = MAP_OUTPUT_MATERIALIZED_BYTES
+counter.group1.counter18.names = MERGED_MAP_OUTPUTS
+counter.group1.counter18.description = MERGED_MAP_OUTPUTS
+counter.group1.counter19.names = FAILED_SHUFFLE
+counter.group1.counter19.description = FAILED_SHUFFLE
+
+counter.group2.name = org.apache.hadoop.mapreduce.JobCounter
+counter.group2.description = Map-Reduce Job Counter
+counter.group2.counter0.names = MB_MILLIS_MAPS
+counter.group2.counter0.description = Total megabyte-seconds taken by all map 
tasks
+counter.group2.counter1.names = MB_MILLIS_REDUCES
+counter.group2.counter1.description = Total megabyte-seconds taken by all 
reduce tasks
+counter.group2.counter2.names = VCORES_MILLIS_MAPS
+counter.group2.counter2.description = Total vcore-seconds taken by all map 
tasks
+counter.group2.counter3.names = VCORES_MILLIS_REDUCES
+counter.group2.counter3.description = Total vcore-seconds taken by all reduce 
tasks
+counter.group2.counter4.names = OTHER_LOCAL_MAPS
+counter.group2.counter4.description = Total vcore-seconds taken by all reduce 
tasks
+counter.group2.counter5.names = DATA_LOCAL_MAPS
+counter.group2.counter5.description = Total vcore-seconds taken by all reduce 
tasks
+counter.group2.counter6.names = MILLIS_MAPS
+counter.group2.counter6.description = Total vcore-seconds taken by all reduce 
tasks
+counter.group2.counter7.names = MILLIS_REDUCES
+counter.group2.counter7.description = Total vcore-seconds taken by all reduce 
tasks
+counter.group2.counter8.names = TOTAL_LAUNCHED_MAPS
+counter.group2.counter8.description = Total vcore-seconds taken by all reduce 
tasks
+counter.group2.counter9.names = TOTAL_LAUNCHED_REDUCES
+counter.group2.counter9.description = Total vcore-seconds taken by all reduce 
tasks
+counter.group2.counter10.names = SLOTS_MILLIS_MAPS
+counter.group2.counter10.description = Total vcore-seconds taken by all reduce 
tasks
+counter.group2.counter11.names = SLOTS_MILLIS_REDUCES
+counter.group2.counter11.description = Total vcore-seconds taken by all reduce 
tasks
+
+counter.group3.name = MapTaskAttemptCounter
+counter.group3.description = Reduce Task Attempt Counter Aggregation
+counter.group3.counter0.names = MAP_OUTPUT_MATERIALIZED_BYTES
+counter.group3.counter1.names = MAP_INPUT_RECORDS
+counter.group3.counter2.names = MERGED_MAP_OUTPUTS
+counter.group3.counter3.names = SPILLED_RECORDS
+counter.group3.counter4.names = MAP_OUTPUT_BYTES
+counter.group3.counter5.names = COMMITTED_HEAP_BYTES
+counter.group3.counter6.names = FAILED_SHUFFLE
+counter.group3.counter7.names = CPU_MILLISECONDS
+counter.group3.counter8.names = SPLIT_RAW_BYTES
+counter.group3.counter9.names = COMBINE_INPUT_RECORDS
+counter.group3.counter10.names = PHYSICAL_MEMORY_BYTES
+counter.group3.counter11.names = TASK_ATTEMPT_DURATION
+counter.group3.counter12.names = VIRTUAL_MEMORY_BYTES
+counter.group3.counter13.names = MAP_OUTPUT_RECORDS
+counter.group3.counter14.names = GC_TIME_MILLIS
+counter.group3.counter15.names = COMBINE_OUTPUT_RECORDS
+counter.group3.counter16.names = REDUCE_INPUT_GROUPS
+counter.group3.counter17.names = REDUCE_INPUT_RECORDS
+counter.group3.counter18.names = REDUCE_OUTPUT_RECORDS
+counter.group3.counter19.names = REDUCE_SHUFFLE_BYTES
+counter.group3.counter20.names = SHUFFLED_MAPS
+
+counter.group4.name = ReduceTaskAttemptCounter
+counter.group4.description = Reduce Task Attempt Counter Aggregation
+counter.group4.counter0.names = MAP_OUTPUT_MATERIALIZED_BYTES
+counter.group4.counter1.names = MAP_INPUT_RECORDS
+counter.group4.counter2.names = MERGED_MAP_OUTPUTS
+counter.group4.counter3.names = SPILLED_RECORDS
+counter.group4.counter4.names = MAP_OUTPUT_BYTES
+counter.group4.counter5.names = COMMITTED_HEAP_BYTES
+counter.group4.counter6.names = FAILED_SHUFFLE
+counter.group4.counter7.names = CPU_MILLISECONDS
+counter.group4.counter8.names = SPLIT_RAW_BYTES
+counter.group4.counter9.names = COMBINE_INPUT_RECORDS
+counter.group4.counter10.names = PHYSICAL_MEMORY_BYTES
+counter.group4.counter11.names = TASK_ATTEMPT_DURATION
+counter.group4.counter12.names = VIRTUAL_MEMORY_BYTES
+counter.group4.counter13.names = MAP_OUTPUT_RECORDS
+counter.group4.counter14.names = GC_TIME_MILLIS
+counter.group4.counter15.names = COMBINE_OUTPUT_RECORDS
+counter.group4.counter16.names = REDUCE_INPUT_GROUPS
+counter.group4.counter17.names = REDUCE_INPUT_RECORDS
+counter.group4.counter18.names = REDUCE_OUTPUT_RECORDS
+counter.group4.counter19.names = REDUCE_SHUFFLE_BYTES
+counter.group4.counter20.names = SHUFFLED_MAPS
+
+counter.group5.name = MapTaskAttemptFileSystemCounter
+counter.group5.description = Map Task Attempt File System Counter Aggregation
+counter.group5.counter0.names = FILE_READ_OPS
+counter.group5.counter1.names = FILE_WRITE_OPS
+counter.group5.counter2.names = FILE_BYTES_READ
+counter.group5.counter3.names = FILE_LARGE_READ_OPS
+counter.group5.counter4.names = HDFS_BYTES_READ
+counter.group5.counter5.names = FILE_BYTES_WRITTEN
+counter.group5.counter6.names = HDFS_LARGE_READ_OPS
+counter.group5.counter7.names = HDFS_BYTES_WRITTEN
+counter.group5.counter8.names = HDFS_READ_OPS
+
+counter.group6.name = ReduceTaskAttemptFileSystemCounter
+counter.group6.description = Reduce Task Attempt File System Counter 
Aggregation
+counter.group6.description = Map-Reduce Job Counter
+counter.group6.counter0.names = FILE_READ_OPS
+counter.group6.counter1.names = FILE_WRITE_OPS
+counter.group6.counter2.names = FILE_BYTES_READ
+counter.group6.counter3.names = FILE_LARGE_READ_OPS
+counter.group6.counter4.names = HDFS_BYTES_READ
+counter.group6.counter5.names = FILE_BYTES_WRITTEN
+counter.group6.counter6.names = HDFS_LARGE_READ_OPS
+counter.group6.counter7.names = HDFS_BYTES_WRITTEN
+counter.group6.counter8.names = HDFS_READ_OPS
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
 
b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
new file mode 100644
index 0000000..21686a6
--- /dev/null
+++ 
b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+org.apache.hadoop.hdfs.DistributedFileSystem
+org.apache.hadoop.hdfs.web.HftpFileSystem
+org.apache.hadoop.hdfs.web.HsftpFileSystem
+org.apache.hadoop.hdfs.web.WebHdfsFileSystem
+org.apache.hadoop.hdfs.web.SWebHdfsFileSystem
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/resources/MRErrorCategory.config
----------------------------------------------------------------------
diff --git 
a/eagle-jpm/eagle-jpm-mr-history/src/main/resources/MRErrorCategory.config 
b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/MRErrorCategory.config
new file mode 100644
index 0000000..ee8c0c5
--- /dev/null
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/MRErrorCategory.config
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ATTEMPT_TIMEOUT = false|AttemptID:\S+ Timed out after \d+ secs
+INTERFACE_EXPECTED = false|but interface was expected
+FILE_NOT_EXIST = false|File does not exist
+HIVE_BAD_SCHEMA_EXCEPTION = false|Caused by: 
org.apache.hadoop.hive.serde2.avro.BadSchemaException
+CONNECTION_RESET_BY_PEER = false|Connection reset by peer
+CONTAINER_KILLED_BY_APPMASTER = false|Container killed by the ApplicationMaster
+CONTAINER_CLEANUP_FAILURE = false|cleanup failed for container container_
+USER_NOT_FOUND = false|User \S+ not found
+TASK_TREE_BEYOND_MEMORY_LIMIT = false|^TaskTree \S+ is running beyond 
memory-limits
+EBAY_APPMON_LOG_GET_TRAVERSER = false|^Error: 
com.ebay.appmon.log.traverser.LogTraverser.getTraverser
+MAP_OUTPUT_LOST = false|Map output lost
+BEYOND_PHYSICAL_MEMORY_LIMITS = false|running beyond physical memory limits
+GC_OVERHEAD_LIMIT_EXCEEDED = false|GC overhead limit exceeded
+NO_SPACE_LEFT = false|No space left
+MKDIR_FAILURE = false|mkdir of file:\S+ failed
+KILLED_CLEAN_BY_USER = false|Task has been KILLED by the user
+KILLED_UNCLEAN_BY_USER = false|Task has been KILLED_UNCLEAN by the user
+FAILED_TO_REPORT_STATUS = false|failed to report status for \d+ seconds. 
Killing
+EXCEPTION_FROM_CONTAINER_LAUNCH = false|^Exception from container-launch
+LOST_TASK_TRACKER = false|^Lost task tracker
+TOO_MANY_FETCH_FAILURES = false|^Too many fetch-failures$
+JAVA_HEAP_SPACE = false|$Error: Java heap space$
+JAVA_EXCEPTION = true|^(?:error: Error: |Error: )?(\S+Exception|\S+Error)
+JAVA_THROWABLE = false|^(?:error: Error: |Error: )?java.lang.Throwable
+NO_DETAIL = false|^$
+UNKNOWN = false|.*
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/resources/application.conf
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/resources/application.conf 
b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/application.conf
new file mode 100644
index 0000000..8cb1aa3
--- /dev/null
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/application.conf
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+{
+  "envContextConfig" : {
+    "env" : "local",
+    "topologyName" : "mr_history",
+    "stormConfigFile" : "storm.yaml",
+    "parallelismConfig" : {
+      "mrHistoryJobExecutor" : 6
+    },
+    "tasks" : {
+      "mrHistoryJobExecutor" : 6
+    },
+    "workers" : 3
+  },
+
+  "jobExtractorConfig" : {
+    "site" : "sandbox",
+    "mrVersion": "MRVer2",
+    "readTimeOutSeconds" : 10
+  },
+
+  "dataSourceConfig" : {
+    "zkQuorum" : "sandbox.hortonworks.com:2181",
+    "zkPort" : "2181",
+    "zkRoot" : "/test_mrjobhistory",
+    "zkSessionTimeoutMs" : 15000,
+    "zkRetryTimes" : 3,
+    "zkRetryInterval" : 20000,
+    "nnEndpoint" : "hdfs://sandbox.hortonworks.com:8020",
+    "principal":"", #if not need, then empty
+    "keytab":"",
+    "basePath" : "/mr-history/done",
+    "pathContainsJobTrackerName" : false,
+    "jobTrackerName" : "",
+    "zeroBasedMonth" : false,
+    "dryRun" : false,
+    "partitionerCls" : 
"org.apache.eagle.jpm.mr.history.storm.DefaultJobIdPartitioner",
+    "timeZone" : "UTC"
+  },
+
+  "eagleProps" : {
+    "mailHost" : "abc.com",
+    "mailDebug" : "true",
+    "eagleService": {
+      "host": "sandbox.hortonworks.com",
+      "port": 9099,
+      "username": "admin",
+      "password": "secret"
+    }
+  },
+  
+  "MRConfigureKeys" : [
+    "mapreduce.map.output.compress",
+    "mapreduce.map.output.compress.codec",
+    "mapreduce.output.fileoutputformat.compress",
+    "mapreduce.output.fileoutputformat.compress.type",
+    "mapreduce.output.fileoutputformat.compress.codec",
+    "mapred.output.format.class",
+    "eagle.job.runid",
+    "eagle.job.runidfieldname",
+    "eagle.job.name",
+    "eagle.job.normalizedfieldname",
+    "eagle.alert.email",
+    "eagle.job.alertemailaddress",
+    "dataplatform.etl.info",
+    "mapreduce.map.memory.mb",
+    "mapreduce.reduce.memory.mb",
+    "mapreduce.map.java.opts",
+    "mapreduce.reduce.java.opts"
+  ]
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/resources/core-site.xml
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/resources/core-site.xml 
b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/core-site.xml
new file mode 100644
index 0000000..11e8486
--- /dev/null
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/core-site.xml
@@ -0,0 +1,497 @@
+<?xml version="1.0"?>
+<!-- ~ Licensed to the Apache Software Foundation (ASF) under one or more
+       ~ contributor license agreements. See the NOTICE file distributed with ~
+       this work for additional information regarding copyright ownership. ~ 
The
+       ASF licenses this file to You under the Apache License, Version 2.0 ~ 
(the
+       "License"); you may not use this file except in compliance with ~ the 
License.
+       You may obtain a copy of the License at ~ ~ 
http://www.apache.org/licenses/LICENSE-2.0
+       ~ ~ Unless required by applicable law or agreed to in writing, software 
~
+       distributed under the License is distributed on an "AS IS" BASIS, ~ 
WITHOUT
+       WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ~ See 
the
+       License for the specific language governing permissions and ~ 
limitations
+       under the License. -->
+
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration xmlns:xi="http://www.w3.org/2001/XInclude";>
+
+<!-- i/o properties -->
+
+  <property>
+    <name>io.file.buffer.size</name>
+    <value>131072</value>
+    <description>The size of buffer for use in sequence files.
+  The size of this buffer should probably be a multiple of hardware
+  page size (4096 on Intel x86), and it determines how much data is
+  buffered during read and write operations.</description>
+  </property>
+
+<property>
+  <description>If users connect through a SOCKS proxy, we don't
+   want their SocketFactory settings interfering with the socket
+   factory associated with the actual daemons.</description>
+   <name>hadoop.rpc.socket.factory.class.default</name>
+   <value>org.apache.hadoop.net.StandardSocketFactory</value>
+</property>
+
+<property>
+  <name>hadoop.tmp.dir</name>
+  <value>/tmp/hadoop/hadoop-${user.name}</value>
+  <description>A base for other temporary directories.</description>
+</property>
+
+<property>
+  <name>hadoop.rpc.socket.factory.class.ClientProtocol</name>
+  <value></value>
+</property>
+
+<property>
+  <name>hadoop.rpc.socket.factory.class.JobSubmissionProtocol</name>
+  <value></value>
+</property>
+              
+  <property>
+    <name>io.serializations</name>
+    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
+  </property>
+
+  <property>
+    <name>io.compression.codecs</name>
+    
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec</value>
+  </property>
+
+  <!-- LZO: see 
http://www.facebook.com/notes/cloudera/hadoop-at-twitter-part-1-splittable-lzo-compression/178581952002
 -->
+  <property>
+      <name>io.compression.codec.lzo.class</name>
+      <value>com.hadoop.compression.lzo.LzoCodec</value>
+  </property>
+
+
+<!-- file system properties -->
+
+  <property>
+    <name>fs.defaultFS</name>
+    <!-- cluster variant -->
+    <value>hdfs://apollo-phx-nn-ha</value>
+    <description>The name of the default file system.  Either the
+  literal string "local" or a host:port for NDFS.</description>
+    <final>true</final>
+  </property>
+
+  <property>
+    <description>Topology script</description>
+    <name>net.topology.script.file.name</name>
+    <value>/apache/hadoop/etc/hadoop/topology</value>
+    <final>true</final>
+  </property>
+
+  <property>
+    <name>fs.trash.interval</name>
+    <value>480</value>
+    <description>Number of minutes between trash checkpoints.
+                 If zero, the trash feature is disabled.
+    </description>
+  </property>  
+
+  <!-- mobius-proxyagent impersonation configurations -->
+<property>
+  <name>hadoop.proxyuser.mobius-proxyagent.groups</name>
+  
<value>hdmi-mm,hdmi-set,hdmi-research,hdmi-technology,hdmi-hadoopeng,hdmi-cs,hdmi-milo,hdmi-appdev,hdmi-siteanalytics,hdmi-prod,hdmi-others,hdmi-sdc,hdmi-finance,hdmi-est,hdmi-cci,hdmi-mptna,hdmi-xcom,hdmi-stu,hdmi-mobile</value>
+  <description>Allow user mobius-proxyagent to impersonate any members of the 
groups </description>
+</property>
+
+<property>
+    <name>hadoop.proxyuser.mobius-proxyagent.hosts</name>
+    <value>10.114.118.13,10.115.201.53</value>
+    <description>The mobius-proxyagent can connect from hosts to impersonate a 
user</description>
+</property>
+
+<property>
+      <name>hadoop.proxyuser.bridge_adm.groups</name>
+      
<value>hdmi-mm,hdmi-set,hdmi-research,hdmi-technology,hdmi-hadoopeng,hdmi-cs,hdmi-milo,hdmi-appdev,hdmi-siteanalytics,hdmi-prod,hdmi-others,hdmi-sdc,hdmi-finance,hdmi-est,hdmi-cci,hdmi-mptna,hdmi-xcom,hdmi-stu,hdmi-mobile</value>
+      <description>Allow user bridge_adm (Teradata-Hadoop bridge) to 
impersonate any members of the groups </description>
+</property>
+
+<property>
+    <name>hadoop.proxyuser.bridge_adm.hosts</name>
+    
<value>10.103.47.11,10.103.47.12,10.103.47.13,10.103.47.14,10.103.47.15,10.103.47.16,10.103.47.17,10.103.47.18,10.103.47.19,10.103.47.20,10.103.47.21,10.103.47.22,10.103.48.11,10.103.48.12,10.103.48.13,10.103.48.14,10.103.48.15,10.103.48.16,10.103.48.17,10.103.48.18,10.103.48.19,10.103.48.20,10.103.48.21,10.103.48.22,10.103.88.11,10.103.88.12,10.103.88.13,10.103.88.14,10.103.88.15,10.103.88.16,10.103.88.17,10.103.88.18,10.103.88.19,10.103.88.20,10.103.88.21,10.103.88.22,10.103.88.23,10.103.88.24,10.103.88.25,10.103.88.26,10.103.88.27,10.103.88.28,10.103.88.29,10.103.88.30,10.103.88.31,10.103.88.32,10.103.88.33,10.103.88.34,10.103.89.11,10.103.89.12,10.103.89.13,10.103.89.14,10.103.89.15,10.103.89.16,10.103.89.17,10.103.89.18,10.103.89.19,10.103.89.20,10.103.89.21,10.103.89.22,10.103.89.23,10.103.89.24,10.103.89.25,10.103.89.26,10.103.89.27,10.103.89.28,10.103.89.29,10.103.89.30,10.103.89.31,10.103.89.32,10.103.89.33,10.103.89.34,10.115.37.50,10.115.37.51,10.115.37.52,10.115.37.5
 
3,10.115.38.50,10.115.38.51,10.115.38.52,10.115.38.53,10.115.208.11,10.115.208.12,10.115.208.13,10.115.208.14,10.115.208.15,10.115.208.16,10.115.208.17,10.115.208.18,10.115.208.19,10.115.208.20,10.115.208.21,10.115.208.22,10.115.208.23,10.115.208.24,10.115.208.25,10.115.208.26,10.103.158.101,10.103.158.102,10.103.158.103,10.103.158.104,10.103.158.105,10.103.158.106,10.103.158.107,10.103.158.108,10.103.158.109,10.103.158.110,10.103.158.111,10.103.158.112,10.103.158.113,10.103.158.114,10.103.158.115,10.103.158.116</value>
+    <description>The bridge_adm user (Teradata-Hadoop bridge) can connect from 
hosts to impersonate a user</description>
+</property>
+
+<property>
+    <name>hadoop.proxyuser.hadoop.hosts</name>
+    <value>*</value>
+</property>
+
+<property>
+    <name>hadoop.proxyuser.hadoop.groups</name>
+    <value>*</value>
+</property>
+
+<property>
+   <name>hadoop.proxyuser.sg_adm.groups</name>
+   <value>hdmi-etl</value>
+   <description>Allow user sg_adm (HDMIT-4462) to impersonate any  members of 
the groups </description>
+</property>
+
+<property>
+   <name>hadoop.proxyuser.sg_adm.hosts</name>
+   <value>*</value>
+   <description>The sg_adm user (HDMIT-4462) can connect from hosts to 
impersonate a user</description>
+</property>
+
+  <property>
+    <name>fs.inmemory.size.mb</name>
+    <value>256</value>
+  </property>
+
+  <!-- ipc properties: copied from kryptonite configuration -->
+  <property>
+    <name>ipc.client.idlethreshold</name>
+    <value>8000</value>
+    <description>Defines the threshold number of connections after which
+               connections will be inspected for idleness.
+  </description>
+  </property>
+
+  <property>
+    <name>ipc.client.connection.maxidletime</name>
+    <value>30000</value>
+    <description>The maximum time after which a client will bring down the
+               connection to the server.
+  </description>
+  </property>
+
+  <property>
+    <name>ipc.client.connect.max.retries</name>
+    <value>50</value>
+    <description>Defines the maximum number of retries for IPC 
connections.</description>
+  </property>
+
+  <!-- Web Interface Configuration -->
+  <property>
+    <name>webinterface.private.actions</name>
+    <value>false</value>
+    <description> If set to true, the web interfaces of JT and NN may contain
+                actions, such as kill job, delete file, etc., that should
+                not be exposed to public. Enable this option if the interfaces
+                are only reachable by those who have the right authorization.
+  </description>
+  </property>
+
+<property>
+  <name>hadoop.proxyuser.hive.groups</name>
+  <value>*</value>
+  <description>
+     Proxy group for Hadoop.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.proxyuser.hive.hosts</name>
+  <value>*</value>
+  <description>
+     Proxy host for Hadoop.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.proxyuser.oozie.groups</name>
+  <value>*</value>
+  <description>
+     Proxy group for Hadoop.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.proxyuser.oozie.hosts</name>
+  <value>phxaishdc9en0007-be.phx.ebay.com</value>
+  <description>
+     Proxy host for Hadoop.
+  </description>
+</property>
+
+<!-- BEGIN security configuration -->
+  <property>
+    <name>hadoop.security.authentication</name>
+    <value>kerberos</value>
+    <!-- A value of "simple" would  disable security. -->
+  </property>
+  
+  <property>
+    <name>hadoop.security.authorization</name>
+    <value>true</value>
+  </property>
+
+  <!-- Setting to ShellBasedUnixGroupsMapping to override the default of 
+       JniBasedUnixGroupsMappingWithFallback.  See HWX case 00006991 -->
+  <property>
+    <name>hadoop.security.group.mapping</name>
+    <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
+  </property>
+
+  <property>
+    <name>hadoop.http.filter.initializers</name>
+    <value>org.apache.hadoop.security.AuthenticationFilterInitializer</value>
+  </property>
+
+<!-- BEGIN hadoop.http.authentication properties --> 
+  <property>
+    <name>hadoop.http.authentication.type</name>
+    
<value>org.apache.hadoop.security.authentication.server.CompositeAuthenticationHandler</value>
+  </property>
+
+  <property>
+    <name>hadoop.http.authentication.token.validity</name>
+    <value>36000</value>
+    <!-- in seconds -->
+  </property>
+
+  <property>
+    <name>hadoop.http.authentication.signature.secret.file</name>
+    <value>/etc/hadoop/http_auth_secret</value>
+  </property>
+
+  <property>
+    <name>hadoop.http.authentication.cookie.domain</name>
+    <value>ebay.com</value>
+  </property>
+
+  <property>
+    <name>hadoop.http.authentication.pingFederate.config.file</name>
+    <value>/etc/hadoop/pingfederate-agent-config.txt</value>
+  </property>
+
+  <property>
+    <name>hadoop.http.authentication.pingFederate.url</name>
+    
<value>https://sso.corp.ebay.com/sp/startSSO.ping?PartnerIdpId=eBayHadoop</value>
+  </property>
+
+  <property>
+    <name>hadoop.http.authentication.pingFederate.anonymous.allowed</name>
+    <value>true</value>
+  </property>
+
+<!-- BEGIN properties enabled per HDP-2.1.3 upgrade -->
+
+  <property>
+    <name>hadoop.http.authentication.composite.handlers</name>
+    
<value>org.apache.hadoop.security.authentication.server.PingFederateAuthenticationHandler,kerberos,anonymous</value>
+  </property>
+
+  <property>
+    
<name>hadoop.http.authentication.composite.default-non-browser-handler-type</name>
+    <value>kerberos</value>
+  </property>
+
+  <property>
+    <name>hadoop.http.authentication.kerberos.keytab</name>
+    <value>/etc/hadoop/hadoop.keytab</value>
+  </property>
+
+  <property>
+    <name>hadoop.http.authentication.kerberos.principal</name>
+    <value>*</value>
+  </property>
+
+<!-- END properties enabled per HDP-2.1.3 upgrade -->
+
+<!-- END hadoop.http.authentication properties --> 
+
+
+  <property>
+    <name>hadoop.security.auth_to_local</name>
+    <value>
+        RULE:[1:$1]
+        RULE:[2:$1]
+        DEFAULT
+    </value>
+  </property>
+
+  <property>
+    <name>kerberos.multiplerealm.supported</name>
+    <value>true</value>
+  </property>
+  
+  <property>
+    <name>kerberos.multiplerealm.realms</name>
+    <value>CORP.EBAY.COM</value>
+  </property>
+
+<!--SSL SUPPORT -->
+
+<property>
+  <name>hadoop.ssl.keystores.factory.class</name>
+    <value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
+    <description>
+          The keystores factory to use for retrieving certificates.
+    </description>
+</property>
+
+<property>
+  <name>hadoop.ssl.require.client.cert</name>
+  <value>false</value>
+  <description>Whether client certificates are required</description>
+</property>
+
+<property>
+  <name>hadoop.ssl.hostname.verifier</name>
+  <value>ALLOW_ALL</value>
+  <description>
+    The hostname verifier to provide for HttpsURLConnections.
+    Valid values are: DEFAULT, STRICT, STRICT_I6, DEFAULT_AND_LOCALHOST and
+    ALLOW_ALL
+  </description>
+</property>
+
+<property>
+  <name>hadoop.ssl.server.conf</name>
+  <value>ssl-server.xml</value>
+  <description>
+    Resource file from which ssl server keystore information will be extracted.
+    This file is looked up in the classpath, typically it should be in Hadoop
+    conf/ directory.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.ssl.client.conf</name>
+  <value>ssl-client.xml</value>
+  <description>
+    Resource file from which ssl client keystore information will be extracted
+    This file is looked up in the classpath, typically it should be in Hadoop
+    conf/ directory.
+  </description>
+</property>
+
+<property>
+  <name>hadoop.ssl.enabled</name>
+  <value>false</value>
+  <description>
+    Whether to use SSL for the HTTP endpoints. If set to true, the
+    NameNode, DataNode, ResourceManager, NodeManager, HistoryServer and
+    MapReduceAppMaster web UIs will be served over HTTPS instead HTTP.
+  </description>
+</property>
+
+<!-- User Group Resolution -->
+
+<property>
+    <name>hadoop.security.groups.cache.secs</name>
+    <value>3600</value>
+</property>
+
+<!-- END security configuration -->
+
+
+
+<!-- BEGIN properties enabled per HDP-2.1.3 upgrade -->
+
+<!-- BEGIN Quality of Service -->
+
+  <property>
+    <name>ipc.8020.callqueue.impl</name>
+    <value>com.ebay.hadoop.ipc.FairCallQueue</value>
+  </property>
+
+  <property>
+    <name>ipc.8020.identity-provider.impl</name>
+    <value>com.ebay.hadoop.ipc.EbayUserIdentityProvider</value>
+  </property>
+
+  <property>
+    <name>ipc.8020.faircallqueue.rpc-scheduler</name>
+    <value>com.ebay.hadoop.ipc.DecayRpcScheduler</value>
+  </property>
+
+  <property>
+    <name>ipc.8020.faircallqueue.priority-levels</name>
+    <value>10</value>
+  </property>
+
+  <property>
+    <name>ipc.8020.faircallqueue.decay-scheduler.thresholds</name>
+   <!-- <value>1,2,7,10,20,30,40,50,60</value> -->
+    <value>1,2,3,5,8,13,20,35,50</value>
+  </property>
+
+  <property>
+    <name>ipc.8020.faircallqueue.decay-scheduler.period-ms</name>
+    <value>1000</value>
+  </property>
+
+  <property>
+    <name>ipc.8020.faircallqueue.multiplexer.weights</name>
+   <!-- <value>10,5,3,2,1,1,1,1,1,1</value> -->
+     <value>80,30,25,20,17,12,6,3,2,1</value>
+  </property>
+
+<!-- END Quality of Service -->
+
+
+
+<!-- BEGIN Selective Encryption --> 
+<!-- disabled per HADP-6065 - miguenther - 26 August 2014 
+  <property>
+    <name>hadoop.rpc.protection</name>
+    <value>authentication,privacy</value>
+    <final>true</final>
+  </property>
+
+  <property>
+    <name>hadoop.security.saslproperties.resolver.class</name>
+    <value>org.apache.hadoop.security.WhitelistBasedResolver</value>
+    <final>true</final>
+  </property>
+
+  <property>
+    <name>hadoop.security.sasl.variablewhitelist.enable</name>
+    <value>true</value>
+    <final>true</final>
+  </property>
+-->
+<!-- END Selective Encryption -->
+
+
+<!-- END properties enabled per HDP-2.1.3 upgrade -->
+
+<property>
+  <name>ha.zookeeper.quorum</name>
+  
<value>apollo-phx-zk-1.vip.ebay.com:2181,apollo-phx-zk-2.vip.ebay.com:2181,apollo-phx-zk-3.vip.ebay.com:2181,apollo-phx-zk-4.vip.ebay.com:2181,apollo-phx-zk-5.vip.ebay.com:2181</value>
+</property>
+
+<!-- NEW QOP Proposed configs below - Same as Ares Tiffany Sept 01, 2015 -->
+<property>
+    <name>hadoop.rpc.protection</name>
+    <value>authentication,privacy</value>
+</property>
+
+  <property>
+      <name>hadoop.security.saslproperties.resolver.class</name>
+      <value>org.apache.hadoop.security.WhitelistBasedResolver</value>
+  </property>
+
+  <property>
+      <name>hadoop.security.sasl.fixedwhitelist.file</name>
+      <value>/etc/hadoop/fixedwhitelist</value>
+  </property>
+
+  <property>
+      <name>hadoop.security.sasl.variablewhitelist.enable</name>
+      <value>true</value>
+  </property>
+
+  <property>
+      <name>hadoop.security.sasl.variablewhitelist.file</name>
+      <value>/etc/hadoop/whitelist</value>
+  </property>
+
+  <property>
+        <name>hadoop.security.sasl.variablewhitelist.cache.secs</name>
+        <value>3600</value>
+  </property>
+<!-- END NEW QOP Proposed configs below - Same as Ares Tiffany Sept 01, 2015 
-->
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/resources/hdfs-site.xml
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/resources/hdfs-site.xml 
b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/hdfs-site.xml
new file mode 100644
index 0000000..52ba754
--- /dev/null
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/hdfs-site.xml
@@ -0,0 +1,449 @@
+<?xml version="1.0"?>
+<!-- ~ Licensed to the Apache Software Foundation (ASF) under one or more
+       ~ contributor license agreements. See the NOTICE file distributed with ~
+       this work for additional information regarding copyright ownership. ~ 
The
+       ASF licenses this file to You under the Apache License, Version 2.0 ~ 
(the
+       "License"); you may not use this file except in compliance with ~ the 
License.
+       You may obtain a copy of the License at ~ ~ 
http://www.apache.org/licenses/LICENSE-2.0
+       ~ ~ Unless required by applicable law or agreed to in writing, software 
~
+       distributed under the License is distributed on an "AS IS" BASIS, ~ 
WITHOUT
+       WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ~ See 
the
+       License for the specific language governing permissions and ~ 
limitations
+       under the License. -->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<!-- Put site-specific property overrides in this file. -->
+
+<configuration>
+
+  <!-- The directories for NN, DN and SNN configs -->
+
+  <property>
+    <name>dfs.namenode.name.dir</name>
+    <value>/hadoop/nn1/1</value>
+    <final>true</final>
+  </property>
+
+  <property>
+    <name>dfs.datanode.data.dir</name>
+    
<value>/hadoop/1/data,/hadoop/2/data,/hadoop/3/data,/hadoop/4/data,/hadoop/5/data,/hadoop/6/data,/hadoop/7/data,/hadoop/8/data,/hadoop/9/data,/hadoop/10/data,/hadoop/11/data,/hadoop/12/data</value>
+  </property>
+
+  <property>
+    <name>dfs.blockreport.initialDelay</name>
+    <value>900</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.decommission.interval</name>
+    <value>150</value>
+  </property>
+
+  <!-- The Nodes include and exclude -->
+
+  <property>
+    <name>dfs.hosts</name>
+    <!-- The files containing hosts allowed to connect to namenode -->
+    <value>/apache/hadoop/etc/hadoop/hosts</value>
+  </property>
+
+  <property>
+    <name>dfs.hosts.exclude</name>
+    <!-- The files containing hosts allowed to connect to namenode -->
+    <value>/apache/hadoop/etc/hadoop/hdfs-exclude</value>
+  </property>
+
+
+  <property>
+    <name>dfs.datanode.failed.volumes.tolerated</name>
+    <value>3</value>
+  </property>
+
+  <property>
+    <name>dfs.datanode.balance.bandwidthPerSec</name>
+    <value>10485760</value>
+  </property>
+
+  <property>
+    <!-- Amount of space which HDFS will refuse to use in bytes -->
+    <name>dfs.datanode.du.reserved</name>
+    <value>107374182400</value> <!-- 100 GB-->
+  </property>
+
+  <!-- RMERCHIA AISOPS159160 2012-09-25 -->
+
+  <property>
+    <name>dfs.heartbeat.interval</name>
+    <value>6</value>
+    <description>how frequently dn send a heartbeat.</description>
+  </property>
+
+  <!-- RMERCHIA AISOPS159160 2012-09-25  change to 6 hours on 2012-10-02 -->
+
+  <property>
+    <name>dfs.blockreport.intervalMsec</name>
+    <value>21600000</value>
+    <description>how frequently dn send a blockreport.</description>
+  </property>
+
+  <property>
+    <name>dfs.namenode.safemode.threshold-pct</name>
+    <value>1.0f</value>
+    <!-- Allows 10 blocks unreported out of 10,000,000 -->
+    <description>
+      Specifies the percentage of blocks that should satisfy
+      the minimal replication requirement defined by dfs.replication.min.
+      Values less than or equal to 0 mean not to start in safe mode.
+      Values greater than 1 will make safe mode permanent.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.namenode.safemode.extension</name>
+    <value>120000</value>
+    <!-- 2 minutes -->
+    <description> Determines extension of safe mode in milliseconds after the 
threshold level is reached. </description>
+  </property>
+
+  <property>
+    <name>dfs.permissions.enabled</name>
+    <value>true</value>
+    <description>
+      If "true", enable permission checking in HDFS.
+      If "false", permission checking is turned off,
+      but all other behavior is unchanged.
+      Switching from one parameter value to the other does not change the mode,
+      owner or group of files or directories.
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.replication</name>
+    <value>3</value>
+  </property>
+
+  <property>
+    <name>dfs.blocksize</name>
+    <!-- 128mb (default 64m or 67108864) -->
+    <value>268435456</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.handler.count</name>
+    <value>128</value>
+  </property>
+
+  <property>
+    <name>dfs.datanode.handler.count</name>
+    <value>50</value>
+  </property>
+
+  <!-- updated from 4k to 16k as part of HADP-6065 - miguenther - 26 august 
2014 -->
+  <property>
+    <name>dfs.datanode.max.transfer.threads</name>
+    <value>16384</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.replication.max-streams</name>
+    <value>40</value>
+  </property>
+
+  <property>
+    <name>dfs.webhdfs.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>dfs.block.local-path-access.user</name>
+    <value>hadoop</value>
+    <description>the user who is allowed to perform short circuit 
reads.</description>
+  </property>
+
+  <property>
+    <name>dfs.block.access.token.enable</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.name.dir.restore</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>dfs.ls.limit</name>
+    <value>4096</value>
+  </property>
+
+  <!-- NameNode security config -->
+  <property>
+    <name>dfs.web.authentication.kerberos.keytab</name>
+    <value>/etc/hadoop/hadoop.keytab</value>
+  </property>
+  <property>
+    <name>dfs.namenode.kerberos.internal.spnego.principal</name>
+    <value>*</value>
+  </property>
+  <property>
+    <name>dfs.namenode.keytab.file</name>
+    <value>/etc/hadoop/hadoop.keytab</value>
+  </property>
+  <property>
+    <name>dfs.namenode.kerberos.principal</name>
+    <value>hadoop/[email protected]</value>
+    <!-- _HOST will be replaced by the the domain name present in 
fs.default.name. It is better to use the actual host name  -->
+  </property>
+  <property>
+    <name>dfs.web.authentication.kerberos.principal</name>
+    
<value>HTTP/[email protected],HTTP/[email protected]</value>
+  </property>
+
+  <!-- DataNode security config -->
+  <property>
+    <name>dfs.datanode.data.dir.perm</name>
+    <value>700</value>
+  </property>
+  <property>
+    <name>dfs.datanode.address</name>
+    <value>0.0.0.0:1004</value>
+  </property>
+  <property>
+    <name>dfs.datanode.http.address</name>
+    <value>0.0.0.0:1006</value>
+  </property>
+  <property>
+    <name>dfs.datanode.keytab.file</name>
+    <value>/etc/hadoop/hadoop.keytab</value>
+  </property>
+  <property>
+    <name>dfs.datanode.kerberos.principal</name>
+    <value>hadoop/[email protected]</value>
+    <!-- _HOST will be replaced by the frst domain name mapped to the ip -->
+  </property>
+
+  <property>
+    <name>dfs.cluster.administrators</name>
+    <value> hdmi-hadoopeng</value>
+  </property>
+
+  <!-- HTTPS SUPPORT -->
+
+  <property>
+    <name>dfs.https.need.client.auth</name>
+    <value>false</value>
+    <description>Whether SSL client certificate authentication is required
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.https.server.keystore.resource</name>
+    <value>ssl-server.xml</value>
+    <description>Resource file from which ssl server keystore
+      information will be extracted
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.https.client.keystore.resource</name>
+    <value>ssl-client.xml</value>
+    <description>Resource file from which ssl client keystore
+      information will be extracted
+    </description>
+  </property>
+
+  <property>
+    <name>dfs.datanode.https.address</name>
+    <value>0.0.0.0:50075</value>
+  </property>
+
+  <property>
+    <name>dfs.datanode.http.address</name>
+    <value>0.0.0.0:1006</value>
+  </property>
+
+
+
+  <property>
+    <name>dfs.domain.socket.path</name>
+    <value>/var/run/hadoop-hdfs/dn</value>
+  </property>
+
+  <property>
+    <name>dfs.client.read.shortcircuit</name>
+    <value>true</value>
+  </property>
+
+
+  <property>
+    <name>dfs.namenode.service.handler.count</name>
+    <value>55</value>
+  </property>
+
+
+
+
+  <!-- BEGIN properties enabled per HDP-2.1.3 upgrade -->
+
+  <property>
+    <name>dfs.namenode.acls.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>dfs.http.policy</name>
+    <value>HTTP_AND_HTTPS</value>
+  </property>
+
+  <property>
+    <name>dfs.web.authentication.filter</name>
+    <value>org.apache.hadoop.hdfs.web.TokenAuthFilter,authentication</value>
+  </property>
+
+  <!-- END properties enabled per HDP-2.1.3 upgrade -->
+
+
+  <!-- added as part of HAPD-6065 - miguenther 26 August 2014 -->
+  <property>
+    <name>ipc.server.read.threadpool.size</name>
+    <value>3</value>
+  </property>
+
+
+  <!-- Apollo PHX HA Configs -->
+  <property>
+    <name>dfs.nameservices</name>
+    <value>apollo-phx-nn-ha</value>
+    <description>Logical name for this new nameservice</description>
+  </property>
+
+  <property>
+    <name>dfs.ha.namenodes.apollo-phx-nn-ha</name>
+    <value>nn1,nn2</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.rpc-address.apollo-phx-nn-ha.nn1</name>
+    <value>apollo-phx-nn.vip.ebay.com:8020</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.rpc-address.apollo-phx-nn-ha.nn2</name>
+    <value>apollo-phx-nn-2.vip.ebay.com:8020</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.servicerpc-address.apollo-phx-nn-ha.nn1</name>
+    <value>apollo-phx-nn.vip.ebay.com:8030</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.servicerpc-address.apollo-phx-nn-ha.nn2</name>
+    <value>apollo-phx-nn-2.vip.ebay.com:8030</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.http-address.apollo-phx-nn-ha.nn1</name>
+    <value>apollo-phx-nn.vip.ebay.com:50080</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.http-address.apollo-phx-nn-ha.nn2</name>
+    <value>apollo-phx-nn-2.vip.ebay.com:50080</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.shared.edits.dir</name>
+    
<value>qjournal://phxaishdc9en0010-be.phx.ebay.com:8485;phxaishdc9en0011-be.phx.ebay.com:8485;phxaishdc9en0012-be.phx.ebay.com:8485;phxaishdc9en0013-be.phx.ebay.com:8485;phxaishdc9en0014-be.phx.ebay.com:8485/apollo-phx-nn-ha</value>
+  </property>
+
+  <property>
+    <name>dfs.client.failover.proxy.provider.apollo-phx-nn-ha</name>
+    
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
+  </property>
+
+  <property>
+    <name>dfs.ha.fencing.methods</name>
+    <value>sshfence
+      shell(/bin/true)
+    </value>
+  </property>
+
+  <property>
+    <name>dfs.ha.fencing.ssh.private-key-files</name>
+    <value>/home/hadoop/.ssh/id_rsa</value>
+  </property>
+
+  <property>
+    <name>dfs.ha.fencing.ssh.connect-timeout</name>
+    <value>30000</value>
+  </property>
+
+  <property>
+    <name>dfs.ha.automatic-failover.enabled</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>dfs.journalnode.edits.dir</name>
+    <value>/hadoop/qjm/apollo</value>
+  </property>
+
+  <property>
+    <name>dfs.journalnode.kerberos.principal</name>
+    <value>hadoop/[email protected]</value>
+  </property>
+
+  <property>
+    <name>dfs.journalnode.kerberos.internal.spnego.principal</name>
+    <value>HTTP/[email protected]</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.https-address.apollo-phx-nn-ha.nn2</name>
+    <value>apollo-phx-nn-2.vip.ebay.com:50070</value>
+  </property>
+
+  <property>
+    <name>dfs.namenode.https-address.apollo-phx-nn-ha.nn1</name>
+    <value>apollo-phx-nn.vip.ebay.com:50070</value>
+  </property>
+
+  <property>
+    <name>dfs.journalnode.keytab.file</name>
+    <value>/etc/hadoop/hadoop.keytab</value>
+  </property>
+
+  <!-- Apollo HA Configs END -->
+
+  <!-- BEGIN Selective Encryption as in Ares - Sept 01, 2015 Tiffany -->
+  <property>
+    <name>dfs.encrypt.data.transfer</name>
+    <value>true</value>
+  </property>
+
+  <property>
+    <name>dfs.encrypt.data.transfer.algorithm</name>
+    <value>rc4</value>
+    <final>true</final>
+  </property>
+  <property>
+    <name>dfs.trustedchannel.resolver.class</name>
+    
<value>org.apache.hadoop.hdfs.datatransfer.FlagListTrustedChannelResolver</value>
+    <final>true</final>
+  </property>
+  <property>
+    <name>dfs.datatransfer.client.encrypt</name>
+    <value>false</value>
+    <final>true</final>
+  </property>
+
+  <!-- END Selective Encryption as in Ares - Sept 01, 2015 Tiffany -->
+
+  <!-- Post Upgrade - improve performance - Oct 23, 2015 Tiffany -->
+  <property>
+    <name>dfs.client.block.write.locateFollowingBlock.retries</name>
+    <value>8</value>
+  </property>
+  <!-- END Post Upgrade - improve performance - Oct 23, 2015 Tiffany -->
+
+</configuration>

http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/fe509125/eagle-jpm/eagle-jpm-mr-history/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/eagle-jpm/eagle-jpm-mr-history/src/main/resources/log4j.properties 
b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/log4j.properties
new file mode 100644
index 0000000..71a5dac
--- /dev/null
+++ b/eagle-jpm/eagle-jpm-mr-history/src/main/resources/log4j.properties
@@ -0,0 +1,34 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootLogger=INFO, DRFA, stdout
+eagle.log.dir=./logs
+eagle.log.file=eagle.log
+
+# standard output
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %p [%t] %c{2}[%L]: 
%m%n
+
+# Daily Rolling File Appender
+log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
+log4j.appender.DRFA.File=${eagle.log.dir}/${eagle.log.file}
+log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
+# 30-day backup
+#log4j.appender.DRFA.MaxBackupIndex=30
+log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
+
+# Pattern format: Date LogLevel LoggerName LogMessage
+log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p [%t] %c{2}[%L]: 
%m%n
\ No newline at end of file

[4/8] incubator-eagle git commit: EAGLE-276 eagle support for mr & spark history job monitoring mr & spark job history monitoring

Reply via email to