[
https://issues.apache.org/jira/browse/FLINK-10992?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16700478#comment-16700478
]
ASF GitHub Bot commented on FLINK-10992:
----------------------------------------
GJL closed pull request #7164: [FLINK-10992][tests] Revise Hadoop configuration.
URL: https://github.com/apache/flink/pull/7164
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git a/flink-jepsen/src/jepsen/flink/hadoop.clj
b/flink-jepsen/src/jepsen/flink/hadoop.clj
index 99f3f72d45c..4ba78593b44 100644
--- a/flink-jepsen/src/jepsen/flink/hadoop.clj
+++ b/flink-jepsen/src/jepsen/flink/hadoop.clj
@@ -19,12 +19,15 @@
[clojure.tools.logging :refer :all]
[jepsen
[control :as c]
- [db :as db]]
- [jepsen.control.util :as cu]))
+ [db :as db]
+ [util :refer [meh]]]
+ [jepsen.control.util :as cu]
+ [jepsen.flink.utils :as fu]))
(def install-dir "/opt/hadoop")
(def hadoop-conf-dir (str install-dir "/etc/hadoop"))
-(def yarn-log-dir "/tmp/logs/yarn")
+(def log-dir (str install-dir "/logs"))
+(def yarn-log-dir (str log-dir "/yarn"))
(defn name-node
[nodes]
@@ -51,7 +54,12 @@
(defn core-site-config
[test]
- {:fs.defaultFS (str "hdfs://" (name-node (:nodes test)) ":9000")})
+ {:hadoop.tmp.dir (str install-dir "/tmp")
+ :fs.defaultFS (str "hdfs://" (name-node (:nodes test)) ":9000")})
+
+(defn hdfs-site-config
+ [_]
+ {:dfs.replication "1"})
(defn property-value
[property value]
@@ -66,8 +74,23 @@
(xml/element :configuration
{}
(map (fn [[k v]] (property-value k v)) (seq
config))))]
- (c/exec :echo config-xml :> config-file)
- ))
+ (c/exec :echo config-xml :> config-file)))
+
+(defn- write-hadoop-env!
+ "Configures additional environment variables in hadoop-env.sh"
+ []
+ (let [env-vars ["export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64"
+ "export HADOOP_NAMENODE_OPTS=\"-Xms2G -Xmx2G
$HADOOP_NAMENODE_OPTS\""
+ "export HADOOP_DATANODE_OPTS=\"-Xms2G -Xmx2G
$HADOOP_DATANODE_OPTS\""]]
+ (doseq [env-var env-vars]
+ (c/exec :echo env-var :>> (str install-dir
"/etc/hadoop/hadoop-env.sh")))))
+
+(defn- write-configuration!
+ [test]
+ (write-config! (str install-dir "/etc/hadoop/yarn-site.xml")
(yarn-site-config test))
+ (write-config! (str install-dir "/etc/hadoop/core-site.xml")
(core-site-config test))
+ (write-config! (str install-dir "/etc/hadoop/hdfs-site.xml")
(hdfs-site-config test))
+ (write-hadoop-env!))
(defn start-name-node!
[test node]
@@ -104,12 +127,6 @@
(info "Start NodeManager")
(c/exec (str install-dir "/sbin/yarn-daemon.sh") :--config hadoop-conf-dir
:start :nodemanager)))
-(defn find-files!
- [dir]
- (->>
- (clojure.string/split (c/exec :find dir :-type :f) #"\n")
- (remove clojure.string/blank?)))
-
(defn db
[url]
(reify db/DB
@@ -117,26 +134,20 @@
(info "Install Hadoop from" url)
(c/su
(cu/install-archive! url install-dir)
- (write-config! (str install-dir "/etc/hadoop/yarn-site.xml")
(yarn-site-config test))
- (write-config! (str install-dir "/etc/hadoop/core-site.xml")
(core-site-config test))
- (c/exec :echo (c/lit "export
JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64") :>> (str install-dir
"/etc/hadoop/hadoop-env.sh"))
+ (write-configuration! test)
(start-name-node-formatted! test node)
(start-data-node! test node)
(start-resource-manager! test node)
(start-node-manager! test node)))
- (teardown! [_ test node]
+ (teardown! [_ _ _]
(info "Teardown Hadoop")
(c/su
(cu/grepkill! "hadoop")
- (c/exec (c/lit (str "rm -rf /tmp/hadoop-* ||:")))))
+ (c/exec :rm :-rf install-dir)))
db/LogFiles
(log-files [_ _ _]
(c/su
- (concat (find-files! (str install-dir "/logs"))
- (if (cu/exists? yarn-log-dir)
- (do
- (c/exec :chmod :-R :777 yarn-log-dir)
- (find-files! yarn-log-dir))
- []))))))
+ (meh (c/exec :chmod :-R :755 log-dir))
+ (fu/find-files! log-dir)))))
diff --git a/flink-jepsen/src/jepsen/flink/utils.clj
b/flink-jepsen/src/jepsen/flink/utils.clj
index 2c7328f21a8..1aa53efe7ae 100644
--- a/flink-jepsen/src/jepsen/flink/utils.clj
+++ b/flink-jepsen/src/jepsen/flink/utils.clj
@@ -51,6 +51,20 @@
(recur op (assoc keys :retries (dec retries))))
(success r)))))
+(defn find-files!
+ "Lists files recursively given a directory. If the directory does not exist,
an empty collection
+ is returned."
+ [dir]
+ (let [files (try
+ (c/exec :find dir :-type :f)
+ (catch Exception e
+ (if (.contains (.getMessage e) "No such file or directory")
+ ""
+ (throw e))))]
+ (->>
+ (clojure.string/split files #"\n")
+ (remove clojure.string/blank?))))
+
;;; runit process supervisor (http://smarden.org/runit/)
(def runit-version "2.1.2-3")
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
> Jepsen: Do not use /tmp as HDFS Data Directory
> ----------------------------------------------
>
> Key: FLINK-10992
> URL: https://issues.apache.org/jira/browse/FLINK-10992
> Project: Flink
> Issue Type: Bug
> Components: Tests
> Affects Versions: 1.8.0
> Reporter: Gary Yao
> Assignee: Gary Yao
> Priority: Major
> Labels: pull-request-available
>
> {{dfs.name.dir}} and {{dfs.data.dir}} should not be located in {{/tmp}}. The
> directories might get deleted unintentionally, which can cause test failures.
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)