Try cd to your storm local working directory before running supervisor.
I'm not sure if they fixed this in later versions but storm supervisor is
not very good about setting it's cwd so if you start it from a folder that
it cannot write to then it can have problems. If you run with supervision
(eg supervisord) it can be useful to have the storm process run as a script
that does cd before running supervisor instead of just running storm
supervisor directly.
On Apr 16, 2015 4:18 AM, "nanda n" <[email protected]> wrote:
> Hi All,
> We have two supervisors running in production environment ,one of the
> them is working fine .
> But the other one keeps on dying continuously and trying to restart every
> time.
> Below is the exception i am seeing in supervisor log .
>
> *Note :* i don't see any topology serialized content in the
> supervisor/stormdist folder .
> I am able to see this topology serialized content on other supervisor box
> .
>
> Please anyone help me to sort out this issue .
>
> 2015-04-16 12:16:15 b.s.d.supervisor [INFO] Starting Supervisor with conf
> {"dev.zookeeper.path" "/tmp/dev-storm-zookeeper",
> "topology.tick.tuple.freq.secs" nil,
> "topology.builtin.metrics.bucket.size.secs" 60,
> "topology.fall.back.on.java.serialization" true,
> "topology.max.error.report.per.interval" 5, "zmq.linger.millis" 5000,
> "topology.skip.missing.kryo.registrations" false,
> "storm.messaging.netty.client_worker_threads" 1, "ui.childopts" "-Xmx768m
> -Djava.net.preferIPv4Stack=true", "storm.zookeeper.session.timeout" 20000,
> "nimbus.reassign" true, "topology.trident.batch.emit.interval.millis" 500,
> "
> storm.messaging.netty.flush.check.interval.ms" 10,
> "nimbus.monitor.freq.secs" 10, "logviewer.childopts" "-Xmx128m",
> "java.library.path" "/usr/local/lib:/opt/local/lib:/usr/lib",
> "topology.executor.send.buffer.size" 1024, "storm.local.dir"
> "/app/clksuser/storm", "storm.messaging.netty.buffer_size" 5242880,
> "supervisor.worker.start.timeout.secs" 120,
> "topology.enable.message.timeouts" true, "nimbus.cleanup.inbox.freq.secs"
> 600, "nimbus.inbox.jar.expiration.secs" 3600, "drpc.worker.threads" 64,
> "topology.worker.shared.thread.pool.size" 4, "nimbus.host" "
> nimbus-qa.shared.clks-int.com", "storm.messaging.netty.min_wait_ms" 100,
> "storm.zookeeper.port" 2181, "transactional.zookeeper.port" nil,
> "topology.executor.receive.buffer.size" 1024,
> "transactional.zookeeper.servers" nil, "storm.zookeeper.root" "/storm",
> "storm.zookeeper.retry.intervalceiling.millis" 30000, "supervisor.enable"
> true, "storm.messaging.netty.server_worker_threads" 1,
> "storm.zookeeper.servers" ["zk1-qa.shared.clks-int.com" "
> zk2-qa.shared.clks-int.com" "zk3-qa.shared.clks-int.com"],
> "transactional.zookeeper.root" "/transactional", "topology.acker.executors"
> nil, "topology.transfer.buffer.size" 1024, "topology.worker.childopts" nil,
> "drpc.queue.size" 128, "worker.childopts" "-Xmx768m
> -Djava.net.preferIPv4Stack=true", "supervisor.heartbeat.frequency.secs" 5,
> "topology.error.throttle.interval.secs" 10, "zmq.hwm" 0, "drpc.port" 3772,
> "supervisor.monitor.frequency.secs" 3, "drpc.childopts" "-Xmx768m",
> "topology.receiver.buffer.size" 8, "task.heartbeat.frequency.secs" 3,
> "topology.tasks" nil, "storm.messaging.netty.max_retries" 30,
> "topology.spout.wait.strategy"
> "backtype.storm.spout.SleepSpoutWaitStrategy",
> "nimbus.thrift.max_buffer_size" 1048576, "topology.max.spout.pending" nil,
> "storm.zookeeper.retry.interval" 1000, "
> topology.sleep.spout.wait.strategy.time.ms" 1, "nimbus.topology.validator"
> "backtype.storm.nimbus.DefaultTopologyValidator", "supervisor.slots.ports"
> [6700 6701 6702 6703], "topology.debug" false, "nimbus.task.launch.secs"
> 120, "nimbus.supervisor.timeout.secs" 60, "topology.message.timeout.secs"
> 30, "task.refresh.poll.secs" 10, "topology.workers" 1,
> "supervisor.childopts" "-Djava.net.preferIPv4Stack=true",
> "nimbus.thrift.port" 6627, "topology.stats.sample.rate" 0.05,
> "worker.heartbeat.frequency.secs" 1, "topology.tuple.serializer"
> "backtype.storm.serialization.types.ListDelegateSerializer",
> "topology.disruptor.wait.strategy"
> "com.lmax.disruptor.BlockingWaitStrategy", "topology.multilang.serializer"
> "backtype.storm.multilang.JsonSerializer", "nimbus.task.timeout.secs" 30,
> "storm.zookeeper.connection.timeout" 15000, "topology.kryo.factory"
> "backtype.storm.serialization.DefaultKryoFactory", "drpc.invocations.port"
> 3773, "logviewer.port" 8000, "zmq.threads" 1, "storm.zookeeper.retry.times"
> 5, "topology.worker.receiver.thread.count" 1, "storm.thrift.transport"
> "backtype.storm.security.auth.SimpleTransportPlugin",
> "topology.state.synchronization.timeout.secs" 60,
> "supervisor.worker.timeout.secs" 30, "nimbus.file.copy.expiration.secs"
> 600, "storm.messaging.transport" "backtype.storm.messaging.netty.Context",
> "
> logviewer.appender.name" "A1", "storm.messaging.netty.max_wait_ms" 1000,
> "drpc.request.timeout.secs" 600, "storm.local.mode.zmq" false, "ui.port"
> 8080, "nimbus.childopts" "-Xmx1024m -Djava.net.preferIPv4Stack=true",
> "storm.cluster.mode" "distributed", "topology.max.task.parallelism" nil,
> "storm.messaging.netty.transfer.batch.size" 262144}
> 2015-04-16 12:16:15 o.a.c.f.i.CuratorFrameworkImpl [INFO] Starting
> 2015-04-16 12:16:15 o.a.z.ZooKeeper [INFO] Initiating client connection,
> connectString=zk1-qa.shared.clks-int.com:2181,
> zk2-qa.shared.clks-int.com:2181,zk3-qa.shared.clks-int.com:2181
> sessionTimeout=20000 watcher=org.apache.curator.ConnectionState@104486db
> 2015-04-16 12:16:15 o.a.z.ClientCnxn [INFO] Opening socket connection to
> server 10.20.14.3/10.20.14.3:2181. Will not attempt to authenticate using
> SASL (unknown error)
> 2015-04-16 12:16:15 o.a.z.ClientCnxn [INFO] Socket connection established
> to 10.20.14.3/10.20.14.3:2181, initiating session
> 2015-04-16 12:16:15 o.a.z.ClientCnxn [INFO] Session establishment complete
> on server 10.20.14.3/10.20.14.3:2181, sessionid = 0x14c6beb5e5851c3,
> negotiated timeout = 20000
> 2015-04-16 12:16:15 o.a.c.f.s.ConnectionStateManager [INFO] State change:
> CONNECTED
> 2015-04-16 12:16:15 o.a.c.f.s.ConnectionStateManager [WARN] There are no
> ConnectionStateListeners registered.
> 2015-04-16 12:16:15 b.s.zookeeper [INFO] Zookeeper state update:
> :connected:none
> 2015-04-16 12:16:16 o.a.z.ZooKeeper [INFO] Session: 0x14c6beb5e5851c3
> closed
> 2015-04-16 12:16:16 o.a.z.ClientCnxn [INFO] EventThread shut down
> 2015-04-16 12:16:16 o.a.c.f.i.CuratorFrameworkImpl [INFO] Starting
> 2015-04-16 12:16:16 o.a.z.ZooKeeper [INFO] Initiating client connection,
> connectString=zk1-qa.shared.clks-int.com:2181,
> zk2-qa.shared.clks-int.com:2181,zk3-qa.shared.clks-int.com:2181/storm
> sessionTimeout=20000 watcher=org.apache.curator.ConnectionState@5560889
> 2015-04-16 12:16:16 o.a.z.ClientCnxn [INFO] Opening socket connection to
> server 10.20.14.4/10.20.14.4:2181. Will not attempt to authenticate using
> SASL (unknown error)
> 2015-04-16 12:16:16 o.a.z.ClientCnxn [INFO] Socket connection established
> to 10.20.14.4/10.20.14.4:2181, initiating session
> 2015-04-16 12:16:16 o.a.z.ClientCnxn [INFO] Session establishment complete
> on server 10.20.14.4/10.20.14.4:2181, sessionid = 0x24c6beb5ffa5106,
> negotiated timeout = 20000
> 2015-04-16 12:16:16 o.a.c.f.s.ConnectionStateManager [INFO] State change:
> CONNECTED
> 2015-04-16 12:16:16 o.a.c.f.s.ConnectionStateManager [WARN] There are no
> ConnectionStateListeners registered.
> 2015-04-16 12:16:16 b.s.d.supervisor [INFO] Starting supervisor with id
> f7534d0c-4b1e-4494-be15-f43266ca3de4 at host as3-qa.shared.clks-int.com
> 2015-04-16 12:16:17 b.s.d.supervisor [INFO] Shutting down and clearing
> state for id 4e5185c3-d5c6-4d98-93ae-c49e784e588b. Current supervisor time:
> 1429166777. State: :disallowed, Heartbeat:
> #backtype.storm.daemon.common.WorkerHeartbeat{:time-secs 1422699940,
> :storm-id "CommonStreamTopology-37-1422690234", :executors #{[2 2] [4 4] [6
> 6] [8 8] [-1 -1]}, :port 6703}
> 2015-04-16 12:16:17 b.s.d.supervisor [INFO] Shutting down
> f7534d0c-4b1e-4494-be15-f43266ca3de4:4e5185c3-d5c6-4d98-93ae-c49e784e588b
> 2015-04-16 12:16:17 b.s.event [ERROR] Error when processing event
> *java.io.IOException: . doesn't exist.*
> at
> org.apache.commons.exec.DefaultExecutor.execute(DefaultExecutor.java:157)
> ~[commons-exec-1.1.jar:1.1]
> at
> org.apache.commons.exec.DefaultExecutor.execute(DefaultExecutor.java:147)
> ~[commons-exec-1.1.jar:1.1]
> at backtype.storm.util$exec_command_BANG_.invoke(util.clj:378)
> ~[storm-core-0.9.2-incubating.jar:0.9.2-incubating]
> at backtype.storm.util$ensure_process_killed_BANG_.invoke(util.clj:394)
> ~[storm-core-0.9.2-incubating.jar:0.9.2-incubating]
> at
> backtype.storm.daemon.supervisor$shutdown_worker.invoke(supervisor.clj:175)
> ~[storm-core-0.9.2-incubating.jar:0.9.2-incubating]
> at
> backtype.storm.daemon.supervisor$sync_processes.invoke(supervisor.clj:240)
> ~[storm-core-0.9.2-incubating.jar:0.9.2-incubating]
> at clojure.lang.AFn.applyToHelper(AFn.java:161) [clojure-1.5.1.jar:na]
> at clojure.lang.AFn.applyTo(AFn.java:151) [clojure-1.5.1.jar:na]
> at clojure.core$apply.invoke(core.clj:619) ~[clojure-1.5.1.jar:na]
> at clojure.core$partial$fn__4190.doInvoke(core.clj:2396)
> ~[clojure-1.5.1.jar:na]
> at clojure.lang.RestFn.invoke(RestFn.java:397) ~[clojure-1.5.1.jar:na]
> at backtype.storm.event$event_manager$fn__2378.invoke(event.clj:39)
> ~[storm-core-0.9.2-incubating.jar:0.9.2-incubating]
> at clojure.lang.AFn.run(AFn.java:24) [clojure-1.5.1.jar:na]
> at java.lang.Thread.run(Thread.java:745) [na:1.7.0_67]
> 2015-04-16 12:16:17 b.s.util [INFO] Halting process: ("Error when
> processing an event")
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client environment:host.name=
> as3-qa.shared.clks-int.com
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:java.version=1.7.0_67
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:java.vendor=Oracle Corporation
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:java.home=/usr/lib/jvm/jdk1.7.0_67/jre
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
>
> environment:java.class.path=/app/clksuser/storm/lib/ring-devel-0.3.11.jar:/app/clksuser/storm/lib/netty-3.6.3.Final.jar:/app/clksuser/storm/lib/core.incubator-0.1.0.jar:/app/clksuser/storm/lib/commons-io-2.4.jar:/app/clksuser/storm/lib/ring-servlet-0.3.11.jar:/app/clksuser/storm/lib/curator-framework-2.4.0.jar:/app/clksuser/storm/lib/log4j-over-slf4j-1.6.6.jar:/app/clksuser/storm/lib/commons-logging-1.1.3.jar:/app/clksuser/storm/lib/json-simple-1.1.jar:/app/clksuser/storm/lib/commons-fileupload-1.2.1.jar:/app/clksuser/storm/lib/jgrapht-core-0.9.0.jar:/app/clksuser/storm/lib/compojure-1.1.3.jar:/app/clksuser/storm/lib/guava-13.0.jar:/app/clksuser/storm/lib/jline-2.11.jar:/app/clksuser/storm/lib/tools.macro-0.1.0.jar:/app/clksuser/storm/lib/logback-classic-1.0.6.jar:/app/clksuser/storm/lib/carbonite-1.4.0.jar:/app/clksuser/storm/lib/curator-client-2.4.0.jar:/app/clksuser/storm/lib/httpcore-4.3.2.jar:/app/clksuser/storm/lib/servlet-api-2.5-20081211.jar:/app/clksuser/storm/lib/servlet-api-2.5.jar:/app/clksuser/storm/lib/chill-java-0.3.5.jar:/app/clksuser/storm/lib/netty-3.2.2.Final.jar:/app/clksuser/storm/lib/commons-exec-1.1.jar:/app/clksuser/storm/lib/httpclient-4.3.3.jar:/app/clksuser/storm/lib/objenesis-1.2.jar:/app/clksuser/storm/lib/commons-lang-2.5.jar:/app/clksuser/storm/lib/logback-core-1.0.6.jar:/app/clksuser/storm/lib/ring-core-1.1.5.jar:/app/clksuser/storm/lib/slf4j-api-1.6.5.jar:/app/clksuser/storm/lib/clj-stacktrace-0.2.4.jar:/app/clksuser/storm/lib/math.numeric-tower-0.0.1.jar:/app/clksuser/storm/lib/tools.cli-0.2.4.jar:/app/clksuser/storm/lib/clout-1.0.1.jar:/app/clksuser/storm/lib/zookeeper-3.4.5.jar:/app/clksuser/storm/lib/clj-time-0.4.1.jar:/app/clksuser/storm/lib/joda-time-2.0.jar:/app/clksuser/storm/lib/tools.logging-0.2.3.jar:/app/clksuser/storm/lib/snakeyaml-1.11.jar:/app/clksuser/storm/lib/jetty-util-6.1.26.jar:/app/clksuser/storm/lib/kryo-2.21.jar:/app/clksuser/storm/lib/commons-codec-1.6.jar:/app/clksuser/storm/lib/hiccup-0.3.6.jar:/app/clksuser/storm/lib/asm-4.0.jar:/app/clksuser/storm/lib/minlog-1.2.jar:/app/clksuser/storm/lib/ring-jetty-adapter-0.3.11.jar:/app/clksuser/storm/lib/clojure-1.5.1.jar:/app/clksuser/storm/lib/disruptor-2.10.1.jar:/app/clksuser/storm/lib/jetty-6.1.26.jar:/app/clksuser/storm/lib/reflectasm-1.07-shaded.jar:/app/clksuser/storm/lib/storm-core-0.9.2-incubating.jar:/app/clksuser/storm/conf
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:java.library.path=/usr/local/lib:/opt/local/lib:/usr/lib
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:java.io.tmpdir=/tmp
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:java.compiler=<NA>
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client environment:os.name
> =Linux
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client environment:os.arch=amd64
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:os.version=2.6.32-504.1.3.el6.x86_64
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client environment:user.name
> =clksuser
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:user.home=/app/clksuser
> 2015-04-16 12:16:47 o.a.z.ZooKeeper [INFO] Client
> environment:user.dir=/root
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server environment:
> host.name=as3-qa.shared.clks-int.com
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:java.version=1.7.0_67
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:java.vendor=Oracle Corporation
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:java.home=/usr/lib/jvm/jdk1.7.0_67/jre
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
>
> environment:java.class.path=/app/clksuser/storm/lib/ring-devel-0.3.11.jar:/app/clksuser/storm/lib/netty-3.6.3.Final.jar:/app/clksuser/storm/lib/core.incubator-0.1.0.jar:/app/clksuser/storm/lib/commons-io-2.4.jar:/app/clksuser/storm/lib/ring-servlet-0.3.11.jar:/app/clksuser/storm/lib/curator-framework-2.4.0.jar:/app/clksuser/storm/lib/log4j-over-slf4j-1.6.6.jar:/app/clksuser/storm/lib/commons-logging-1.1.3.jar:/app/clksuser/storm/lib/json-simple-1.1.jar:/app/clksuser/storm/lib/commons-fileupload-1.2.1.jar:/app/clksuser/storm/lib/jgrapht-core-0.9.0.jar:/app/clksuser/storm/lib/compojure-1.1.3.jar:/app/clksuser/storm/lib/guava-13.0.jar:/app/clksuser/storm/lib/jline-2.11.jar:/app/clksuser/storm/lib/tools.macro-0.1.0.jar:/app/clksuser/storm/lib/logback-classic-1.0.6.jar:/app/clksuser/storm/lib/carbonite-1.4.0.jar:/app/clksuser/storm/lib/curator-client-2.4.0.jar:/app/clksuser/storm/lib/httpcore-4.3.2.jar:/app/clksuser/storm/lib/servlet-api-2.5-20081211.jar:/app/clksuser/storm/lib/servlet-api-2.5.jar:/app/clksuser/storm/lib/chill-java-0.3.5.jar:/app/clksuser/storm/lib/netty-3.2.2.Final.jar:/app/clksuser/storm/lib/commons-exec-1.1.jar:/app/clksuser/storm/lib/httpclient-4.3.3.jar:/app/clksuser/storm/lib/objenesis-1.2.jar:/app/clksuser/storm/lib/commons-lang-2.5.jar:/app/clksuser/storm/lib/logback-core-1.0.6.jar:/app/clksuser/storm/lib/ring-core-1.1.5.jar:/app/clksuser/storm/lib/slf4j-api-1.6.5.jar:/app/clksuser/storm/lib/clj-stacktrace-0.2.4.jar:/app/clksuser/storm/lib/math.numeric-tower-0.0.1.jar:/app/clksuser/storm/lib/tools.cli-0.2.4.jar:/app/clksuser/storm/lib/clout-1.0.1.jar:/app/clksuser/storm/lib/zookeeper-3.4.5.jar:/app/clksuser/storm/lib/clj-time-0.4.1.jar:/app/clksuser/storm/lib/joda-time-2.0.jar:/app/clksuser/storm/lib/tools.logging-0.2.3.jar:/app/clksuser/storm/lib/snakeyaml-1.11.jar:/app/clksuser/storm/lib/jetty-util-6.1.26.jar:/app/clksuser/storm/lib/kryo-2.21.jar:/app/clksuser/storm/lib/commons-codec-1.6.jar:/app/clksuser/storm/lib/hiccup-0.3.6.jar:/app/clksuser/storm/lib/asm-4.0.jar:/app/clksuser/storm/lib/minlog-1.2.jar:/app/clksuser/storm/lib/ring-jetty-adapter-0.3.11.jar:/app/clksuser/storm/lib/clojure-1.5.1.jar:/app/clksuser/storm/lib/disruptor-2.10.1.jar:/app/clksuser/storm/lib/jetty-6.1.26.jar:/app/clksuser/storm/lib/reflectasm-1.07-shaded.jar:/app/clksuser/storm/lib/storm-core-0.9.2-incubating.jar:/app/clksuser/storm/conf
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:java.library.path=/usr/local/lib:/opt/local/lib:/usr/lib
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:java.io.tmpdir=/tmp
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:java.compiler=<NA>
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server environment:
> os.name=Linux
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:os.arch=amd64
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:os.version=2.6.32-504.1.3.el6.x86_64
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server environment:
> user.name=clksuser
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:user.home=/app/clksuser
> 2015-04-16 12:16:47 o.a.z.s.ZooKeeperServer [INFO] Server
> environment:user.dir=/root
>
>
>
> Thanks & Regards,
> Winston
>