[ 
https://issues.apache.org/jira/browse/MESOS-6047?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15427940#comment-15427940
 ] 

Sankar Mittapally commented on MESOS-6047:
------------------------------------------

root@ip-10-0-0-24:/var/log/mesos# vi mesos-slave.ERROR
Log file created at: 2016/08/16 10:44:56
Running on machine: ip-10-0-0-24
Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
E0816 10:44:56.592535 18440 process.cpp:2105] Failed to shutdown socket with fd 
12: Transport endpoint is not connected
E0816 10:46:11.616282 18440 process.cpp:2105] Failed to shutdown socket with fd 
12: Transport endpoint is not connected
E0817 04:31:50.218361 18440 process.cpp:2105] Failed to shutdown socket with fd 
12: Transport endpoint is not connected
E0817 04:31:50.733708 18440 process.cpp:2105] Failed to shutdown socket with fd 
12: Transport endpoint is not connected
E0817 04:31:51.552975 18440 process.cpp:2105] Failed to shutdown socket with fd 
12: Transport endpoint is not connected
E0817 04:31:52.250030 18440 process.cpp:2105] Failed to shutdown socket with fd 
12: Transport endpoint is not connected



Log file created at: 2016/08/16 09:55:13
Running on machine: ip-10-0-0-24
Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
I0816 09:55:13.757421 18421 logging.cpp:194] INFO level logging started!
I0816 09:55:13.760455 18421 containerizer.cpp:196] Using isolation: 
posix/cpu,posix/mem,filesystem/posix,network/cni
I0816 09:55:13.762310 18421 linux_launcher.cpp:101] Using 
/sys/fs/cgroup/freezer as the freezer hierarchy for the Linux launcher
I0816 09:55:13.764400 18421 main.cpp:434] Starting Mesos agent
I0816 09:55:13.765594 18437 slave.cpp:198] Agent started on 1)@10.0.0.24:5051
I0816 09:55:13.765614 18437 slave.cpp:199] Flags at startup: 
--appc_simple_discovery_uri_prefix="http://"; 
--appc_store_dir="/tmp/mesos/store/appc" --authenticate_http_readonly="false" 
--authenticate_http_readwrite="false" --authenticatee="crammd5" 
--authentication_backoff_factor="1secs" --authorizer="local" 
--cgroups_cpu_enable_pids_and_tids_count="false" --cgroups_enable_cfs="false" 
--cgroups_hierarchy="/sys/fs/cgroup" --cgroups_limit_swap="false" 
--cgroups_root="mesos" --container_disk_watch_interval="15secs" 
--containerizers="mesos" --default_role="*" --disk_watch_interval="1mins" 
--docker="docker" --docker_kill_orphans="true" 
--docker_registry="https://registry-1.docker.io"; --docker_remove_delay="6hrs" 
--docker_socket="/var/run/docker.sock" --docker_stop_timeout="0ns" 
--docker_store_dir="/tmp/mesos/store/docker" 
--docker_volume_checkpoint_dir="/var/run/mesos/isolators/docker/volume" 
--enforce_container_disk_quota="false" --executor_registration_timeout="1mins" 
--executor_shutdown_grace_period="5secs" --fetcher_cache_dir="/tmp/mesos/fetch" 
--fetcher_cache_size="2GB" --frameworks_home="" --gc_delay="1weeks" 
--gc_disk_headroom="0.1" --hadoop_home="" --help="false" 
--hostname_lookup="true" --http_authenticators="basic" 
--http_command_executor="false" --image_provisioner_backend="copy" 
--initialize_driver_logging="true" --isolation="posix/cpu,posix/mem" 
--launcher_dir="/usr/libexec/mesos" --log_dir="/var/log/mesos" --logbufsecs="0" 
--logging_level="INFO" --master="zk://10.0.0.197:2181,10.0.0.203:2181/mesos" 
--oversubscribed_resources_interval="15secs" --perf_duration="10secs" 
--perf_interval="1mins" --port="5051" --qos_correction_interval_min="0ns" 
--quiet="false" --recover="reconnect" --recovery_timeout="15mins" 
--registration_backoff_factor="1secs" --revocable_cpu_low_priority="true" 
--sandbox_directory="/mnt/mesos/sandbox" --strict="true" --switch_user="true" 
--systemd_enable_support="true" 
--systemd_runtime_directory="/run/systemd/system" --version="false" 
--work_dir="/var/lib/mesos"
I0816 09:55:13.766063 18437 slave.cpp:519] Agent resources: cpus(*):1; 
mem(*):1000; disk(*):3962; ports(*):[31000-32000]
I0816 09:55:13.766104 18437 slave.cpp:527] Agent attributes: [  ]
I0816 09:55:13.766113 18437 slave.cpp:532] Agent hostname: server1
I0816 09:55:13.768714 18437 state.cpp:57] Recovering state from 
'/var/lib/mesos/meta'
I0816 09:55:13.768750 18437 state.cpp:697] No checkpointed resources found at 
'/var/lib/mesos/meta/resources/resources.info'
I0816 09:55:13.769193 18437 status_update_manager.cpp:200] Recovering status 
update manager
I0816 09:55:13.769718 18434 containerizer.cpp:522] Recovering containerizer
I0816 09:55:13.770333 18434 provisioner.cpp:253] Provisioner recovery complete
I0816 09:55:13.770530 18438 slave.cpp:4782] Finished recovery
I0816 09:55:13.894096 18435 group.cpp:349] Group process 
(group(1)@10.0.0.24:5051) connected to ZooKeeper
I0816 09:55:13.894140 18435 group.cpp:837] Syncing group operations: queue size 
(joins, cancels, datas) = (0, 0, 0)
I0816 09:55:13.894150 18435 group.cpp:427] Trying to create path '/mesos' in 
ZooKeeper
I0816 09:56:11.984453 18439 detector.cpp:152] Detected a new leader: (id='3')
I0816 09:56:11.984571 18439 group.cpp:706] Trying to get 
'/mesos/json.info_0000000003' in ZooKeeper
I0816 09:56:11.984571 18439 group.cpp:706] Trying to get 
'/mesos/json.info_0000000003' in ZooKeeper
I0816 09:56:11.989174 18439 zookeeper.cpp:259] A new leading master 
([email protected]:5050) is detected
I0816 09:56:11.989357 18439 slave.cpp:895] New master detected at 
[email protected]:5050
I0816 09:56:11.989508 18439 slave.cpp:916] No credentials provided. Attempting 
to register without authentication
I0816 09:56:11.989648 18439 slave.cpp:927] Detecting new master
I0816 09:56:11.989488 18438 status_update_manager.cpp:174] Pausing sending 
status updates


> Mesos Slave is unable to register with Master
> ---------------------------------------------
>
>                 Key: MESOS-6047
>                 URL: https://issues.apache.org/jira/browse/MESOS-6047
>             Project: Mesos
>          Issue Type: Bug
>          Components: master, slave
>    Affects Versions: 1.0.0
>         Environment: Prod
>            Reporter: Sankar Mittapally
>
> Hi ,
>  I am new to mesos. I am setting up mesos cluster for my company first time, 
> my setup is like this 3 masters and one slave (for now). If I stopped one 
> master which is leading master, slave is able to detect the master but unable 
> to register with it. Following logs are from slave. Please provide me a 
> solution to fix this. I tried all the possible ways which is available on 
> site.
> I0816 06:04:10.008586  7623 group.cpp:706] Trying to get 
> '/mesos/json.info_0000004749' in ZooKeeper
> I0816 06:04:10.009474  7623 zookeeper.cpp:259] A new leading master 
> ([email protected]:5050) is detected
> I0816 06:04:10.009521  7623 slave.cpp:895] New master detected at 
> [email protected]:5050
> I0816 06:04:10.009531  7623 slave.cpp:916] No credentials provided. 
> Attempting to register without authentication
> I0816 06:04:10.009549  7623 slave.cpp:927] Detecting new master
> I0816 06:04:10.009567  7623 status_update_manager.cpp:174] Pausing sending 
> status updates
> I0816 06:04:22.006172  7627 detector.cpp:152] Detected a new leader: 
> (id='4750')
> I0816 06:04:22.006250  7627 group.cpp:706] Trying to get 
> '/mesos/json.info_0000004750' in ZooKeeper
> I0816 06:04:22.007297  7627 zookeeper.cpp:259] A new leading master 
> ([email protected]:5050) is detected
> I0816 06:04:22.007355  7627 slave.cpp:895] New master detected at 
> [email protected]:5050
> I0816 06:04:22.007365  7627 slave.cpp:916] No credentials provided. 
> Attempting to register without authentication
> I0816 06:04:22.007382  7627 slave.cpp:927] Detecting new master
> I0816 06:04:22.007400  7627 status_update_manager.cpp:174] Pausing sending 
> status updates
> I0816 06:04:52.323489  7625 slave.cpp:4591] Current disk usage 25.94%. Max 
> allowed age: 4.484391992870625days
> I0816 06:05:22.147099  7626 slave.cpp:3732] [email protected]:5050 exited
> W0816 06:05:22.147161  7626 slave.cpp:3737] Master disconnected! Waiting for 
> a new master to be elected
> I0816 06:05:30.005616  7624 detector.cpp:152] Detected a new leader: 
> (id='4751')
> I0816 06:05:30.005691  7624 group.cpp:706] Trying to get 
> '/mesos/json.info_0000004751' in ZooKeeper
> I0816 06:05:30.006444  7624 zookeeper.cpp:259] A new leading master 
> ([email protected]:5050) is detected
> I0816 06:05:30.006499  7624 slave.cpp:895] New master detected at 
> [email protected]:5050
> I0816 06:05:30.006510  7624 slave.cpp:916] No credentials provided. 
> Attempting to register without authentication
> I0816 06:05:30.006527  7624 slave.cpp:927] Detecting new master
> I0816 06:05:30.006544  7624 status_update_manager.cpp:174] Pausing sending 
> status updates
> I0816 06:05:30.132377  7623 slave.cpp:3732] [email protected]:5050 exited
> W0816 06:05:30.132426  7623 slave.cpp:3737] Master disconnected! Waiting for 
> a new master to be elected
> I0816 06:05:42.007211  7625 detector.cpp:152] Detected a new leader: 
> (id='4752')
> I0816 06:05:42.007285  7625 group.cpp:706] Trying to get 
> '/mesos/json.info_0000004752' in ZooKeeper
> I0816 06:05:42.008180  7625 zookeeper.cpp:259] A new leading master 
> ([email protected]:5050) is detected
> I0816 06:05:42.008229  7625 slave.cpp:895] New master detected at 
> [email protected]:5050
> I0816 06:05:42.008239  7625 slave.cpp:916] No credentials provided. 
> Attempting to register without authentication
> I0816 06:05:42.008257  7625 slave.cpp:927] Detecting new master
> I0816 06:05:42.008275  7625 status_update_manager.cpp:174] Pausing sending 
> status updates
> ^C
> FYI...
> All the servers are in same VLAN and there is no blocker for communication.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to