[ 
https://issues.apache.org/jira/browse/FLUME-1232?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13288073#comment-13288073
 ] 

Will McQueen commented on FLUME-1232:
-------------------------------------

I found a slightly simpler test case.

Steps:
1) Put custom-timestamp-interceptor.jar in /usr/lib/flume-ng/lib (built from 
the source file shown in the previous comment)
2) Start clean by wiping-out any existing /tmp/flume-{check,data} dirs
3) Start the agent. Tail the log in separate window. Monitor the presence of 
lock files in separate window using 'watch ls /tmp/flume-{check,data}'. Confirm 
lock files exist.
4) Wait 30 seconds, stop agent. Confirm no lock files.
5) Start agent. Confirm lock files exist. Wait 30 seconds. Stop agent (no need 
to stop the agent during the replay... can stop it any time after the replay is 
done). Confirm no lock files.
6) Start agent. Confirm lock files exist. After the reply is done, you should 
see the same exception repeatedly, which prevents agent from doing work.

Config file is:
{code}
agent.channels = c1
agent.sources = r1
agent.sinks = k1

agent.channels.c1.type = FILE
agent.channels.c1.checkpointDir = /tmp/flume-check
agent.channels.c1.dataDirs = /tmp/flume-data

agent.sources.r1.channels = c1
agent.sources.r1.type = EXEC
agent.sources.r1.command = /bin/echo FLUME-1232
agent.sources.r1.restart = true
agent.sources.r1.restartThrottle = 100
agent.sources.r1.interceptors = i1
agent.sources.r1.interceptors.i1.type = 
org.apache.flume.interceptor.custom.TimestampInterceptor$Builder

agent.sinks.k1.channel = c1
agent.sinks.k1.type = LOGGER
{code}

2012-06-02 20:41:27,932 DEBUG file.ReplayHandler: Pending take 
FlumeEventPointer [fileID=1, offset=42175]
2012-06-02 20:41:27,932 DEBUG file.ReplayHandler: Pending take 
FlumeEventPointer [fileID=1, offset=42268]
2012-06-02 20:41:27,932 DEBUG file.ReplayHandler: Pending take 
FlumeEventPointer [fileID=1, offset=42361]
2012-06-02 20:41:27,933 ERROR file.Log: Failed to initialize Log
java.lang.IllegalStateException: Pending takes 3 exist after the end of replay
    at com.google.common.base.Preconditions.checkState(Preconditions.java:145)
    at 
org.apache.flume.channel.file.ReplayHandler.replayLog(ReplayHandler.java:137)
    at org.apache.flume.channel.file.Log.replay(Log.java:205)
    at org.apache.flume.channel.file.FileChannel.start(FileChannel.java:180)
    at 
org.apache.flume.lifecycle.LifecycleSupervisor$MonitorRunnable.run(LifecycleSupervisor.java:228)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
    at 
java.util.concurrent.FutureTask$Sync.innerRunAndReset(FutureTask.java:317)
    at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:150)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$101(ScheduledThreadPoolExecutor.java:98)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.runPeriodic(ScheduledThreadPoolExecutor.java:180)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:204)
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
    at java.lang.Thread.run(Thread.java:662)
2012-06-02 20:41:27,934 ERROR lifecycle.LifecycleSupervisor: Unable to start 
org.apache.flume.channel.file.FileChannel@37bd2664 - Exception follows.
java.lang.IllegalStateException: Log is closed
    at com.google.common.base.Preconditions.checkState(Preconditions.java:145)
    at org.apache.flume.channel.file.Log.getFlumeEventQueue(Log.java:226)
    at org.apache.flume.channel.file.FileChannel.getDepth(FileChannel.java:253)
    at org.apache.flume.channel.file.FileChannel.start(FileChannel.java:187)
    at 
org.apache.flume.lifecycle.LifecycleSupervisor$MonitorRunnable.run(LifecycleSupervisor.java:228)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
    at 
java.util.concurrent.FutureTask$Sync.innerRunAndReset(FutureTask.java:317)
    at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:150)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$101(ScheduledThreadPoolExecutor.java:98)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.runPeriodic(ScheduledThreadPoolExecutor.java:180)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:204)
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
    at java.lang.Thread.run(Thread.java:662)
2012-06-02 20:41:27,934 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:28,434 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:28,934 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:29,435 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:29,935 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:30,435 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:30,934 INFO file.FileChannel: Starting FileChannel with 
dataDir [/tmp/flume-data]
2012-06-02 20:41:30,935 INFO file.Log: Cannot lock /tmp/flume-check. The 
directory is already locked.
2012-06-02 20:41:30,935 ERROR lifecycle.LifecycleSupervisor: Unable to start 
org.apache.flume.channel.file.FileChannel@37bd2664 - Exception follows.
java.lang.RuntimeException: java.io.IOException: Cannot lock /tmp/flume-check. 
The directory is already locked.
    at com.google.common.base.Throwables.propagate(Throwables.java:156)
    at org.apache.flume.channel.file.FileChannel.start(FileChannel.java:182)
    at 
org.apache.flume.lifecycle.LifecycleSupervisor$MonitorRunnable.run(LifecycleSupervisor.java:228)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
    at 
java.util.concurrent.FutureTask$Sync.innerRunAndReset(FutureTask.java:317)
    at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:150)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$101(ScheduledThreadPoolExecutor.java:98)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.runPeriodic(ScheduledThreadPoolExecutor.java:180)
    at 
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:204)
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
    at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
    at java.lang.Thread.run(Thread.java:662)
Caused by: java.io.IOException: Cannot lock /tmp/flume-check. The directory is 
already locked.
    at org.apache.flume.channel.file.Log.lock(Log.java:574)
    at org.apache.flume.channel.file.Log.<init>(Log.java:95)
    at org.apache.flume.channel.file.FileChannel.start(FileChannel.java:178)
    ... 10 more
2012-06-02 20:41:30,936 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:31,436 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:31,936 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:32,436 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:32,937 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:33,437 INFO nodemanager.DefaultLogicalNodeManager: Waiting for 
channel: c1 to start. Sleeping for 500 ms
2012-06-02 20:41:33,936 INFO file.FileChannel: Starting FileChannel with 
dataDir [/tmp/flume-data]
2012-06-02 20:41:33,936 INFO file.Log: Cannot lock /tmp/flume-check. The 
directory is already locked.
2012-06-02 20:41:33,936 ERROR lifecycle.LifecycleSupervisor: Unable to start 
org.apache.flume.channel.file.FileChannel@37bd2664 - Exception follows.


                
> Cannot start agent a 3rd time when using FileChannel
> ----------------------------------------------------
>
>                 Key: FLUME-1232
>                 URL: https://issues.apache.org/jira/browse/FLUME-1232
>             Project: Flume
>          Issue Type: Bug
>          Components: Channel
>    Affects Versions: v1.2.0
>         Environment: RHEL 5.6 64-bit
>            Reporter: Will McQueen
>            Priority: Blocker
>             Fix For: v1.2.0
>
>
> Steps:
> 1) Start clean by wiping-out FileChannel's existing checkpoint dir and data 
> dir
> 2) Configure the agent to use filechannel (type = FILE). THe config file I 
> used is at the end of this text.
> 3) Start the agent, confirm lock files exist in data and checkpoint dirs, 
> stop agent, confirm lock files are remove from data and checkpoint dirs.
> 4) Repeat step 3
> 5) Start the agent. The following exceptions are shown in the logs:
> 2012-05-29 03:15:36,453 DEBUG file.ReplayHandler: record.getTimestamp() = 
> 1338286275813, lastCheckpoint = 1338286279596, fileId = 1, offset = 1924, 
> type = Commit, transaction 1338285619250
> 2012-05-29 03:15:36,453 DEBUG file.ReplayHandler: record.getTimestamp() = 
> 1338286279783, lastCheckpoint = 1338286279596, fileId = 1, offset = 1949, 
> type = Take, transaction 1338285619251
> 2012-05-29 03:15:36,453 DEBUG file.ReplayHandler: record.getTimestamp() = 
> 1338286279784, lastCheckpoint = 1338286279596, fileId = 1, offset = 1980, 
> type = Commit, transaction 1338285619251
> 2012-05-29 03:15:36,453 DEBUG file.ReplayHandler: Processing commit of Take
> 2012-05-29 03:15:36,453 INFO file.ReplayHandler: Replayed 1 from 
> /var/run/flume-ng/.flume/file-channel/data/log-1
> 2012-05-29 03:15:36,453 INFO file.ReplayHandler: Replaying 
> /var/run/flume-ng/.flume/file-channel/data/log-2
> 2012-05-29 03:15:36,454 DEBUG file.ReplayHandler: record.getTimestamp() = 
> 1338286370280, lastCheckpoint = 1338286279596, fileId = 2, offset = 8, type = 
> Take, transaction 1338286369982
> 2012-05-29 03:15:36,454 DEBUG file.ReplayHandler: record.getTimestamp() = 
> 1338286370287, lastCheckpoint = 1338286279596, fileId = 2, offset = 39, type 
> = Commit, transaction 1338286369982
> 2012-05-29 03:15:36,454 DEBUG file.ReplayHandler: Processing commit of Take
> 2012-05-29 03:15:36,454 INFO file.ReplayHandler: Unable to remove 
> FlumeEventPointer [fileID=1, offset=1853] added to pending list
> 2012-05-29 03:15:36,454 INFO file.ReplayHandler: Replayed 1 from 
> /var/run/flume-ng/.flume/file-channel/data/log-2
> 2012-05-29 03:15:36,454 DEBUG file.ReplayHandler: Pending take 
> FlumeEventPointer [fileID=1, offset=1853]
> 2012-05-29 03:15:36,455 ERROR file.Log: Failed to initialize Log
> java.lang.IllegalStateException: Pending takes 1 exist after the end of replay
>         at 
> com.google.common.base.Preconditions.checkState(Preconditions.java:145)
>         at 
> org.apache.flume.channel.file.ReplayHandler.replayLog(ReplayHandler.java:137)
>         at org.apache.flume.channel.file.Log.replay(Log.java:205)
>         at 
> org.apache.flume.channel.file.FileChannel.start(FileChannel.java:180)
>         at 
> org.apache.flume.lifecycle.LifecycleSupervisor$MonitorRunnable.run(LifecycleSupervisor.java:228)
>         at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
>         at 
> java.util.concurrent.FutureTask$Sync.innerRunAndReset(FutureTask.java:317)
>         at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:150)
>         at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$101(ScheduledThreadPoolExecutor.java:98)
>         at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.runPeriodic(ScheduledThreadPoolExecutor.java:180)
>         at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:204)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>         at java.lang.Thread.run(Thread.java:662)
> 2012-05-29 03:15:36,457 ERROR lifecycle.LifecycleSupervisor: Unable to start 
> org.apache.flume.channel.file.FileChannel@1ac88440 - Exception follows.
> java.lang.IllegalStateException: Log is closed
>         at 
> com.google.common.base.Preconditions.checkState(Preconditions.java:145)
>         at org.apache.flume.channel.file.Log.getFlumeEventQueue(Log.java:226)
>         at 
> org.apache.flume.channel.file.FileChannel.getDepth(FileChannel.java:253)
>         at 
> org.apache.flume.channel.file.FileChannel.start(FileChannel.java:187)
>         at 
> org.apache.flume.lifecycle.LifecycleSupervisor$MonitorRunnable.run(LifecycleSupervisor.java:228)
>         at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
>         at 
> java.util.concurrent.FutureTask$Sync.innerRunAndReset(FutureTask.java:317)
>         at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:150)
>         at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$101(ScheduledThreadPoolExecutor.java:98)
>         at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.runPeriodic(ScheduledThreadPoolExecutor.java:180)
>         at 
> java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:204)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.runTask(ThreadPoolExecutor.java:886)
>         at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:908)
>         at java.lang.Thread.run(Thread.java:662)
> 2012-05-29 03:15:36,458 ERROR flume.SinkRunner: Unhandled exception, logging 
> and sleeping for 5000ms
> java.lang.IllegalStateException: Channel closed
>         at 
> com.google.common.base.Preconditions.checkState(Preconditions.java:145)
>         at 
> org.apache.flume.channel.file.FileChannel.createTransaction(FileChannel.java:237)
>         at 
> org.apache.flume.channel.BasicChannelSemantics.getTransaction(BasicChannelSemantics.java:118)
>         at org.apache.flume.sink.LoggerSink.process(LoggerSink.java:61)
>         at 
> org.apache.flume.sink.DefaultSinkProcessor.process(DefaultSinkProcessor.java:68)
>         at org.apache.flume.SinkRunner$PollingRunner.run(SinkRunner.java:147)
>         at java.lang.Thread.run(Thread.java:662)
> 2012-05-29 03:15:39,460 INFO file.FileChannel: Starting FileChannel with 
> dataDir [/var/run/flume-ng/.flume/file-channel/data]
> 2012-05-29 03:15:39,460 INFO file.Log: Cannot lock 
> /var/run/flume-ng/.flume/file-channel/checkpoint. The directory is already 
> locked.
> 2012-05-29 03:15:39,461 ERROR lifecycle.LifecycleSupervisor: Unable to start 
> org.apache.flume.channel.file.FileChannel@1ac88440 - Exception follows.
> java.lang.RuntimeException: java.io.IOException: Cannot lock 
> /var/run/flume-ng/.flume/file-channel/checkpoint. The directory is already 
> locked.
>         at com.google.common.base.Throwables.propagate(Throwables.java:156)
>         at 
> org.apache.flume.channel.file.FileChannel.start(FileChannel.java:182)
>         at 
> org.apache.flume.lifecycle.LifecycleSupervisor$MonitorRunnable.run(LifecycleSupervisor.java:228)
>         at 
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:441)
>         at 
> java.util.concurrent.FutureTask$Sync.innerRunAndReset(FutureTask.java:317)
>         at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:150)
> Config file I used (flume.conf):
> agent.channels = c1
> agent.sources = r1
> agent.sinks = k1
> #
> agent.channels.c1.type = FILE
> #
> agent.sources.r1.channels = c1
> agent.sources.r1.type = NETCAT
> agent.sources.r1.bind = 0.0.0.0
> agent.sources.r1.port = 41414
> #
> agent.sinks.k1.channel = c1
> agent.sinks.k1.type = LOGGER

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators: 
https://issues.apache.org/jira/secure/ContactAdministrators!default.jspa
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

Reply via email to