[ 
https://issues.apache.org/jira/browse/MESOS-9216?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16606909#comment-16606909
 ] 

Till Toenshoff commented on MESOS-9216:
---------------------------------------

Also visible when running 
{noformat}
$ ./bin/mesos-tests.sh 
--gtest_filter="*SchedulerFailoverExecutorToFrameworkMessage*" 
--gtest_repeat=100 --gtest_break_on_failure --verbose
{noformat}

{noformat}
[ RUN      ] HttpFaultToleranceTest.SchedulerFailoverExecutorToFrameworkMessage
I0907 11:51:57.034297 474326400 cluster.cpp:173] Creating default 'local' 
authorizer
I0907 11:51:57.036867 175075328 master.cpp:413] Master 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd (lobomacpro4.fritz.box) started on 
192.168.178.20:56581
I0907 11:51:57.036911 175075328 master.cpp:416] Flags at startup: --acls="" 
--agent_ping_timeout="15secs" --agent_reregister_timeout="10mins" 
--allocation_interval="1secs" --allocator="hierarchical" 
--authenticate_agents="true" --authenticate_frameworks="false" 
--authenticate_http_frameworks="true" --authenticate_http_readonly="true" 
--authenticate_http_readwrite="true" --authentication_v0_timeout="15secs" 
--authenticators="crammd5" --authorizers="local" 
--credentials="/private/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/tqpX2K/credentials"
 --filter_gpu_resources="true" --framework_sorter="drf" --help="false" 
--hostname_lookup="true" --http_authenticators="basic" 
--http_framework_authenticators="basic" --initialize_driver_logging="true" 
--log_auto_initialize="true" --logbufsecs="0" --logging_level="INFO" 
--max_agent_ping_timeouts="5" --max_completed_frameworks="50" 
--max_completed_tasks_per_framework="1000" 
--max_unreachable_tasks_per_framework="1000" --memory_profiling="false" 
--min_allocatable_resources="cpus:0.01|mem:32" --port="5050" --quiet="false" 
--recovery_agent_removal_limit="100%" --registry="in_memory" 
--registry_fetch_timeout="1mins" --registry_gc_interval="15mins" 
--registry_max_agent_age="2weeks" --registry_max_agent_count="102400" 
--registry_store_timeout="100secs" --registry_strict="false" 
--require_agent_domain="false" --role_sorter="drf" --root_submissions="true" 
--version="false" --webui_dir="/usr/local/share/mesos/webui" 
--work_dir="/private/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/tqpX2K/master"
 --zk_session_timeout="10secs"
I0907 11:51:57.037212 175075328 master.cpp:467] Master allowing unauthenticated 
frameworks to register
I0907 11:51:57.037225 175075328 master.cpp:471] Master only allowing 
authenticated agents to register
I0907 11:51:57.037235 175075328 master.cpp:477] Master only allowing 
authenticated HTTP frameworks to register
I0907 11:51:57.037257 175075328 credentials.hpp:37] Loading credentials for 
authentication from 
'/private/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/tqpX2K/credentials'
I0907 11:51:57.037452 175075328 master.cpp:521] Using default 'crammd5' 
authenticator
I0907 11:51:57.037497 175075328 http.cpp:1037] Creating default 'basic' HTTP 
authenticator for realm 'mesos-master-readonly'
I0907 11:51:57.037544 175075328 http.cpp:1037] Creating default 'basic' HTTP 
authenticator for realm 'mesos-master-readwrite'
I0907 11:51:57.037612 175075328 http.cpp:1037] Creating default 'basic' HTTP 
authenticator for realm 'mesos-master-scheduler'
I0907 11:51:57.037663 175075328 master.cpp:602] Authorization enabled
I0907 11:51:57.038981 179367936 master.cpp:2083] Elected as the leading master!
I0907 11:51:57.038993 179367936 master.cpp:1638] Recovering from registrar
I0907 11:51:57.039261 178294784 registrar.cpp:383] Successfully fetched the 
registry (0B) in 185856ns
I0907 11:51:57.039304 178294784 registrar.cpp:487] Applied 1 operations in 
17067ns; attempting to update the registry
I0907 11:51:57.039539 174538752 registrar.cpp:544] Successfully updated the 
registry in 217088ns
I0907 11:51:57.039577 174538752 registrar.cpp:416] Successfully recovered 
registrar
I0907 11:51:57.039719 177221632 master.cpp:1752] Recovered 0 agents from the 
registry (162B); allowing 10mins for agents to reregister
W0907 11:51:57.043033 474326400 process.cpp:2810] Attempted to spawn already 
running process [email protected]:56581
I0907 11:51:57.043262 474326400 cluster.cpp:485] Creating default 'local' 
authorizer
I0907 11:51:57.043828 175611904 slave.cpp:267] Mesos agent started on 
(124)@192.168.178.20:56581
I0907 11:51:57.043843 175611904 slave.cpp:268] Flags at startup: --acls="" 
--appc_simple_discovery_uri_prefix="http://"; 
--appc_store_dir="/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu/store/appc"
 --authenticate_http_executors="true" --authenticate_http_readonly="true" 
--authenticate_http_readwrite="true" --authenticatee="crammd5" 
--authentication_backoff_factor="1secs" --authentication_timeout_max="1mins" 
--authentication_timeout_min="5secs" --authorizer="local" 
--container_disk_watch_interval="15secs" --containerizers="mesos" 
--credential="/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu/credential"
 --default_role="*" --disk_watch_interval="1mins" --docker="docker" 
--docker_kill_orphans="true" --docker_registry="https://registry-1.docker.io"; 
--docker_remove_delay="6hrs" --docker_socket="/var/run/docker.sock" 
--docker_stop_timeout="0ns" 
--docker_store_dir="/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu/store/docker"
 --docker_volume_checkpoint_dir="/var/run/mesos/isolators/docker/volume" 
--enforce_container_disk_quota="false" --executor_registration_timeout="1mins" 
--executor_reregistration_timeout="2secs" 
--executor_shutdown_grace_period="5secs" 
--fetcher_cache_dir="/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu/fetch"
 --fetcher_cache_size="2GB" --fetcher_stall_timeout="1mins" 
--frameworks_home="" --gc_delay="1weeks" --gc_disk_headroom="0.1" 
--gc_non_executor_container_sandboxes="false" --help="false" 
--hostname_lookup="true" --http_command_executor="false" 
--http_credentials="/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu/http_credentials"
 --http_heartbeat_interval="30secs" --initialize_driver_logging="true" 
--isolation="posix/cpu,posix/mem" 
--jwt_secret_key="/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu/jwt_secret_key"
 --launcher="posix" 
--launcher_dir="/Users/till/Development/mesos-private/build/src" 
--logbufsecs="0" --logging_level="INFO" 
--max_completed_executors_per_framework="150" --memory_profiling="false" 
--network_cni_metrics="true" --oversubscribed_resources_interval="15secs" 
--port="5051" --qos_correction_interval_min="0ns" --quiet="false" 
--reconfiguration_policy="equal" --recover="reconnect" 
--recovery_timeout="15mins" --registration_backoff_factor="10ms" 
--resources="cpus:2;gpus:0;mem:1024;disk:1024;ports:[31000-32000]" 
--runtime_dir="/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu"
 --sandbox_directory="/mnt/mesos/sandbox" --strict="true" --switch_user="true" 
--version="false" 
--work_dir="/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_gPLOMx"
 --zk_session_timeout="10secs"
I0907 11:51:57.044073 175611904 credentials.hpp:86] Loading credential for 
authentication from 
'/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu/credential'
I0907 11:51:57.044188 175611904 slave.cpp:300] Agent using credential for: 
test-principal
I0907 11:51:57.044214 175611904 credentials.hpp:37] Loading credentials for 
authentication from 
'/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_ShN2uu/http_credentials'
I0907 11:51:57.044303 474326400 scheduler.cpp:189] Version: 1.8.0
I0907 11:51:57.044415 175611904 http.cpp:1037] Creating default 'basic' HTTP 
authenticator for realm 'mesos-agent-executor'
I0907 11:51:57.044448 175611904 http.cpp:1058] Creating default 'jwt' HTTP 
authenticator for realm 'mesos-agent-executor'
I0907 11:51:57.044519 175611904 http.cpp:1037] Creating default 'basic' HTTP 
authenticator for realm 'mesos-agent-readonly'
I0907 11:51:57.044585 175611904 http.cpp:1058] Creating default 'jwt' HTTP 
authenticator for realm 'mesos-agent-readonly'
I0907 11:51:57.044697 175611904 http.cpp:1037] Creating default 'basic' HTTP 
authenticator for realm 'mesos-agent-readwrite'
I0907 11:51:57.044725 175611904 http.cpp:1058] Creating default 'jwt' HTTP 
authenticator for realm 'mesos-agent-readwrite'
I0907 11:51:57.044831 175611904 disk_profile_adaptor.cpp:80] Creating default 
disk profile adaptor module
I0907 11:51:57.046610 175075328 scheduler.cpp:355] Using default 'basic' HTTP 
authenticatee
I0907 11:51:57.046883 174002176 scheduler.cpp:538] New master detected at 
[email protected]:56581
I0907 11:51:57.047374 175611904 slave.cpp:615] Agent resources: 
[{"name":"cpus","scalar":{"value":2.0},"type":"SCALAR"},{"name":"mem","scalar":{"value":1024.0},"type":"SCALAR"},{"name":"disk","scalar":{"value":1024.0},"type":"SCALAR"},{"name":"ports","ranges":{"range":[{"begin":31000,"end":32000}]},"type":"RANGES"}]
I0907 11:51:57.047502 175611904 slave.cpp:623] Agent attributes: [  ]
I0907 11:51:57.047514 175611904 slave.cpp:632] Agent hostname: 
lobomacpro4.fritz.box
I0907 11:51:57.047559 179367936 task_status_update_manager.cpp:181] Pausing 
sending task status updates
I0907 11:51:57.048003 176148480 state.cpp:66] Recovering state from 
'/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_gPLOMx/meta'
I0907 11:51:57.048107 179367936 slave.cpp:6909] Finished recovering 
checkpointed state from 
'/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_gPLOMx/meta',
 beginning agent recovery
I0907 11:51:57.048177 174538752 task_status_update_manager.cpp:207] Recovering 
task status update manager
I0907 11:51:57.048748 174002176 composing.cpp:339] Finished recovering all 
containerizers
I0907 11:51:57.048871 179904512 slave.cpp:7138] Recovering executors
I0907 11:51:57.048923 179904512 slave.cpp:7291] Finished recovery
I0907 11:51:57.049669 175075328 slave.cpp:1254] New master detected at 
[email protected]:56581
I0907 11:51:57.049692 176148480 task_status_update_manager.cpp:181] Pausing 
sending task status updates
I0907 11:51:57.049706 175075328 slave.cpp:1319] Detecting new master
I0907 11:51:57.049878 179904512 http.cpp:1177] HTTP POST for 
/master/api/v1/scheduler from 192.168.178.20:57070
I0907 11:51:57.049945 179904512 master.cpp:2502] Received subscription request 
for HTTP framework 'default'
I0907 11:51:57.049964 179904512 master.cpp:2155] Authorizing framework 
principal 'test-principal' to receive offers for roles '{ * }'
I0907 11:51:57.050127 179904512 master.cpp:2637] Subscribing framework 
'default' with checkpointing disabled and capabilities [ MULTI_ROLE, 
RESERVATION_REFINEMENT ]
I0907 11:51:57.050590 179904512 master.cpp:9883] Adding framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000 (default) with roles {  } suppressed
I0907 11:51:57.050808 175075328 hierarchical.cpp:306] Added framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000
I0907 11:51:57.057564 178294784 slave.cpp:1346] Authenticating with master 
[email protected]:56581
I0907 11:51:57.057598 178294784 slave.cpp:1355] Using default CRAM-MD5 
authenticatee
I0907 11:51:57.057718 177221632 authenticatee.cpp:121] Creating new client SASL 
connection
I0907 11:51:57.057863 179367936 master.cpp:9653] Authenticating 
slave(124)@192.168.178.20:56581
I0907 11:51:57.058058 175075328 authenticator.cpp:98] Creating new server SASL 
connection
I0907 11:51:57.058168 174002176 authenticatee.cpp:213] Received SASL 
authentication mechanisms: CRAM-MD5
I0907 11:51:57.058195 174002176 authenticatee.cpp:239] Attempting to 
authenticate with mechanism 'CRAM-MD5'
I0907 11:51:57.058254 178831360 authenticator.cpp:204] Received SASL 
authentication start
I0907 11:51:57.058323 178831360 authenticator.cpp:326] Authentication requires 
more steps
I0907 11:51:57.058378 175611904 authenticatee.cpp:259] Received SASL 
authentication step
I0907 11:51:57.058435 174538752 authenticator.cpp:232] Received SASL 
authentication step
I0907 11:51:57.058470 174538752 authenticator.cpp:318] Authentication success
I0907 11:51:57.058557 177758208 authenticatee.cpp:299] Authentication success
I0907 11:51:57.058637 176685056 master.cpp:9685] Successfully authenticated 
principal 'test-principal' at slave(124)@192.168.178.20:56581
I0907 11:51:57.058761 176148480 slave.cpp:1446] Successfully authenticated with 
master [email protected]:56581
I0907 11:51:57.058995 178831360 master.cpp:6605] Received register agent 
message from slave(124)@192.168.178.20:56581 (lobomacpro4.fritz.box)
I0907 11:51:57.059082 178831360 master.cpp:3964] Authorizing agent providing 
resources 'cpus:2; mem:1024; disk:1024; ports:[31000-32000]' with principal 
'test-principal'
I0907 11:51:57.061252 179367936 master.cpp:6787] Registering agent at 
slave(124)@192.168.178.20:56581 (lobomacpro4.fritz.box) with id 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0
I0907 11:51:57.061463 178294784 registrar.cpp:487] Applied 1 operations in 
94578ns; attempting to update the registry
I0907 11:51:57.061697 179904512 registrar.cpp:544] Successfully updated the 
registry in 190976ns
I0907 11:51:57.061902 176148480 master.cpp:6880] Registered agent 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0 at slave(124)@192.168.178.20:56581 
(lobomacpro4.fritz.box) with cpus:2; mem:1024; disk:1024; ports:[31000-32000]
I0907 11:51:57.061969 175611904 slave.cpp:1479] Registered with master 
[email protected]:56581; given agent ID 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0
I0907 11:51:57.062037 174538752 hierarchical.cpp:601] Added agent 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0 (lobomacpro4.fritz.box) with cpus:2; 
mem:1024; disk:1024; ports:[31000-32000] (allocated: {})
I0907 11:51:57.062047 177758208 task_status_update_manager.cpp:188] Resuming 
sending task status updates
I0907 11:51:57.062598 179367936 master.cpp:9468] Sending offers [ 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-O0 ] to framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000 (default)
I0907 11:51:57.062849 175611904 slave.cpp:1548] Forwarding agent update 
{"operations":{},"resource_version_uuid":{"value":"OuOcBTYOSlqWBHw4ae+4rQ=="},"slave_id":{"value":"f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0"},"update_oversubscribed_resources":false}
I0907 11:51:57.063163 178831360 master.cpp:7939] Ignoring update on agent 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0 at slave(124)@192.168.178.20:56581 
(lobomacpro4.fritz.box) as it reports no changes
I0907 11:51:57.065382 177758208 http.cpp:1177] HTTP POST for 
/master/api/v1/scheduler from 192.168.178.20:57071
I0907 11:51:57.065758 177758208 master.cpp:11462] Removing offer 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-O0
I0907 11:51:57.065927 177758208 master.cpp:4467] Processing ACCEPT call for 
offers: [ f62cf074-90f4-41a4-bda1-354ee7f6f7bd-O0 ] on agent 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0 at slave(124)@192.168.178.20:56581 
(lobomacpro4.fritz.box) for framework f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000 
(default)
I0907 11:51:57.065968 177758208 master.cpp:3541] Authorizing framework 
principal 'test-principal' to launch task 78999c25-de49-4e96-80bf-988d957a0302
W0907 11:51:57.066761 175611904 validation.cpp:1444] Executor 'default' for 
task '78999c25-de49-4e96-80bf-988d957a0302' uses less CPUs (None) than the 
minimum required (0.01). Please update your executor, as this will be mandatory 
in future releases.
W0907 11:51:57.066781 175611904 validation.cpp:1456] Executor 'default' for 
task '78999c25-de49-4e96-80bf-988d957a0302' uses less memory (None) than the 
minimum required (32MB). Please update your executor, as this will be mandatory 
in future releases.
I0907 11:51:57.066956 175611904 master.cpp:12209] Adding task 
78999c25-de49-4e96-80bf-988d957a0302 with resources cpus(allocated: *):2; 
mem(allocated: *):1024; disk(allocated: *):1024; ports(allocated: 
*):[31000-32000] on agent f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0 at 
slave(124)@192.168.178.20:56581 (lobomacpro4.fritz.box)
I0907 11:51:57.067075 175611904 master.cpp:5439] Launching task 
78999c25-de49-4e96-80bf-988d957a0302 of framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000 (default) with resources 
[{"allocation_info":{"role":"*"},"name":"cpus","scalar":{"value":2.0},"type":"SCALAR"},{"allocation_info":{"role":"*"},"name":"mem","scalar":{"value":1024.0},"type":"SCALAR"},{"allocation_info":{"role":"*"},"name":"disk","scalar":{"value":1024.0},"type":"SCALAR"},{"allocation_info":{"role":"*"},"name":"ports","ranges":{"range":[{"begin":31000,"end":32000}]},"type":"RANGES"}]
 on agent f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0 at 
slave(124)@192.168.178.20:56581 (lobomacpro4.fritz.box) on  new executor
I0907 11:51:57.067495 177221632 slave.cpp:2014] Got assigned task 
'78999c25-de49-4e96-80bf-988d957a0302' for framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000
I0907 11:51:57.067931 177221632 slave.cpp:2388] Authorizing task 
'78999c25-de49-4e96-80bf-988d957a0302' for framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000
I0907 11:51:57.067965 177221632 slave.cpp:8466] Authorizing framework principal 
'test-principal' to launch task 78999c25-de49-4e96-80bf-988d957a0302
I0907 11:51:57.068640 177221632 slave.cpp:2831] Launching task 
'78999c25-de49-4e96-80bf-988d957a0302' for framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000
I0907 11:51:57.068683 177221632 paths.cpp:752] Creating sandbox 
'/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_gPLOMx/slaves/f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0/frameworks/f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000/executors/default/runs/c563fbf6-8d25-4592-b9fe-a59ed3b400ef'
 for user 'till'
I0907 11:51:57.069314 177221632 slave.cpp:8994] Launching executor 'default' of 
framework f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000 with resources [] in work 
directory 
'/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_gPLOMx/slaves/f62cf074-90f4-41a4-bda1-354ee7f6f7bd-S0/frameworks/f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000/executors/default/runs/c563fbf6-8d25-4592-b9fe-a59ed3b400ef'
I0907 11:51:57.069581 177221632 slave.cpp:3028] Queued task 
'78999c25-de49-4e96-80bf-988d957a0302' for executor 'default' of framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000
I0907 11:51:57.069715 177221632 slave.cpp:3509] Launching container 
c563fbf6-8d25-4592-b9fe-a59ed3b400ef for executor 'default' of framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000
I0907 11:51:57.070297 176148480 executor.cpp:201] Version: 1.8.0
W0907 11:51:57.070315 176148480 process.cpp:2810] Attempted to spawn already 
running process [email protected]:56581
I0907 11:51:57.072620 175075328 http.cpp:1177] HTTP POST for 
/slave(124)/api/v1/executor from 192.168.178.20:57072
I0907 11:51:57.072690 175075328 slave.cpp:4607] Received Subscribe request for 
HTTP executor 'default' of framework f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000
I0907 11:51:57.073210 176148480 slave.cpp:3241] Sending queued task 
'78999c25-de49-4e96-80bf-988d957a0302' to executor 'default' of framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000 (via HTTP)
I0907 11:51:57.074273 474326400 scheduler.cpp:189] Version: 1.8.0
I0907 11:51:57.075779 176685056 scheduler.cpp:355] Using default 'basic' HTTP 
authenticatee
I0907 11:51:57.075950 179367936 scheduler.cpp:538] New master detected at 
[email protected]:56581
I0907 11:51:57.077976 177221632 http.cpp:1177] HTTP POST for 
/master/api/v1/scheduler from 192.168.178.20:57074
I0907 11:51:57.078054 177221632 master.cpp:2502] Received subscription request 
for HTTP framework 'default'
I0907 11:51:57.078078 177221632 master.cpp:2155] Authorizing framework 
principal 'test-principal' to receive offers for roles '{ * }'
I0907 11:51:57.078271 179904512 master.cpp:2637] Subscribing framework 
'default' with checkpointing disabled and capabilities [ MULTI_ROLE, 
RESERVATION_REFINEMENT ]
I0907 11:51:57.078297 179904512 master.cpp:7760] Updating framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000 (default) with roles {  } suppressed
I0907 11:51:57.078600 179904512 master.cpp:1226] Ignoring disconnection for 
framework f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000 (default) as it has already 
reconnected
I0907 11:51:57.078716 176148480 slave.cpp:4067] Updating info for framework 
f62cf074-90f4-41a4-bda1-354ee7f6f7bd-0000
I0907 11:51:57.078842 174538752 task_status_update_manager.cpp:188] Resuming 
sending task status updates
I0907 11:51:57.078866 174002176 scheduler.cpp:512] Re-detecting master
I0907 11:51:57.079116 174002176 scheduler.cpp:538] New master detected at 
[email protected]:56581
../../src/tests/http_fault_tolerance_tests.cpp:647: Failure
Failed to wait 15secs for error
*** Aborted at 1536313932 (unix time) try "date -d @1536313932" if you are 
using GNU date ***
PC: @        0x10b314ded testing::UnitTest::AddTestPartResult()
*** SIGSEGV (@0x0) received by PID 22533 (TID 0x11c45a580) stack trace: ***
    @     0x7fff72af7b3d _sigtramp
    @        0x118b84a00 (unknown)
    @        0x10b3145e7 testing::internal::AssertHelper::operator=()
    @        0x109b052ed 
mesos::internal::tests::HttpFaultToleranceTest_SchedulerFailoverExecutorToFrameworkMessage_Test::TestBody()
    @        0x10b387c4e 
testing::internal::HandleSehExceptionsInMethodIfSupported<>()
    @        0x10b32ca9b 
testing::internal::HandleExceptionsInMethodIfSupported<>()
    @        0x10b32c9c6 testing::Test::Run()
    @        0x10b32e79d testing::TestInfo::Run()
    @        0x10b32fddc testing::TestCase::Run()
    @        0x10b3402cc testing::internal::UnitTestImpl::RunAllTests()
    @        0x10b38a79e 
testing::internal::HandleSehExceptionsInMethodIfSupported<>()
    @        0x10b33fceb 
testing::internal::HandleExceptionsInMethodIfSupported<>()
    @        0x10b33fbac testing::UnitTest::Run()
    @        0x109c782f1 RUN_ALL_TESTS()
    @        0x109c73c9c main
    @     0x7fff7290e0a1 start
Segmentation fault: 11
{noformat}

> SchedulerTest.SchedulerFailover is flaky and times out.
> -------------------------------------------------------
>
>                 Key: MESOS-9216
>                 URL: https://issues.apache.org/jira/browse/MESOS-9216
>             Project: Mesos
>          Issue Type: Bug
>          Components: scheduler api, test
>    Affects Versions: 1.8.0
>         Environment: debian-9, centos-6, ubuntu-16.04, ..., macOS
>            Reporter: Till Toenshoff
>            Priority: Major
>              Labels: flaky, test
>
> Easy to reproduce for me on macOS but also observed on the ASF CI;
> {noformat}
> $ ./bin/mesos-tests.sh --gtest_filter="*SchedulerTest.SchedulerFailover*" 
> --gtest_repeat=100 --gtest_break_on_failure --verbose
> {noformat}
> {noformat}
> [...]
> Repeating all tests (iteration 61) . . .
> [...]
> [ RUN      ] ContentType/SchedulerTest.SchedulerFailover/1
> I0907 11:31:42.409766 311620992 cluster.cpp:173] Creating default 'local' 
> authorizer
> I0907 11:31:42.411957 110624768 master.cpp:413] Master 
> 4450e893-595f-48c2-9ea2-31325fda2c76 (lobomacpro4.fritz.box) started on 
> 192.168.178.20:54546
> I0907 11:31:42.411975 110624768 master.cpp:416] Flags at startup: --acls="" 
> --agent_ping_timeout="15secs" --agent_reregister_timeout="10mins" 
> --allocation_interval="1secs" --allocator="hierarchical" 
> --authenticate_agents="true" --authenticate_frameworks="true" 
> --authenticate_http_frameworks="true" --authenticate_http_readonly="true" 
> --authenticate_http_readwrite="true" --authentication_v0_timeout="15secs" 
> --authenticators="crammd5" --authorizers="local" 
> --credentials="/private/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/aVGDNy/credentials"
>  --filter_gpu_resources="true" --framework_sorter="drf" --help="false" 
> --hostname_lookup="true" --http_authenticators="basic" 
> --http_framework_authenticators="basic" --initialize_driver_logging="true" 
> --log_auto_initialize="true" --logbufsecs="0" --logging_level="INFO" 
> --max_agent_ping_timeouts="5" --max_completed_frameworks="50" 
> --max_completed_tasks_per_framework="1000" 
> --max_unreachable_tasks_per_framework="1000" --memory_profiling="false" 
> --min_allocatable_resources="cpus:0.01|mem:32" --port="5050" --quiet="false" 
> --recovery_agent_removal_limit="100%" --registry="in_memory" 
> --registry_fetch_timeout="1mins" --registry_gc_interval="15mins" 
> --registry_max_agent_age="2weeks" --registry_max_agent_count="102400" 
> --registry_store_timeout="100secs" --registry_strict="false" 
> --require_agent_domain="false" --role_sorter="drf" --root_submissions="true" 
> --version="false" --webui_dir="/usr/local/share/mesos/webui" 
> --work_dir="/private/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/aVGDNy/master"
>  --zk_session_timeout="10secs"
> I0907 11:31:42.412191 110624768 master.cpp:465] Master only allowing 
> authenticated frameworks to register
> I0907 11:31:42.412202 110624768 master.cpp:471] Master only allowing 
> authenticated agents to register
> I0907 11:31:42.412210 110624768 master.cpp:477] Master only allowing 
> authenticated HTTP frameworks to register
> I0907 11:31:42.412219 110624768 credentials.hpp:37] Loading credentials for 
> authentication from 
> '/private/var/folders/66/mgr662nx7t90lspb7wjg8ctr0000gn/T/aVGDNy/credentials'
> I0907 11:31:42.412322 110624768 master.cpp:521] Using default 'crammd5' 
> authenticator
> I0907 11:31:42.412355 110624768 http.cpp:1037] Creating default 'basic' HTTP 
> authenticator for realm 'mesos-master-readonly'
> I0907 11:31:42.412390 110624768 http.cpp:1037] Creating default 'basic' HTTP 
> authenticator for realm 'mesos-master-readwrite'
> I0907 11:31:42.412417 110624768 http.cpp:1037] Creating default 'basic' HTTP 
> authenticator for realm 'mesos-master-scheduler'
> I0907 11:31:42.412439 110624768 master.cpp:602] Authorization enabled
> I0907 11:31:42.413738 110624768 master.cpp:2083] Elected as the leading 
> master!
> I0907 11:31:42.413750 110624768 master.cpp:1638] Recovering from registrar
> I0907 11:31:42.413913 109551616 registrar.cpp:383] Successfully fetched the 
> registry (0B) in 128us
> I0907 11:31:42.413962 109551616 registrar.cpp:487] Applied 1 operations in 
> 19755ns; attempting to update the registry
> I0907 11:31:42.414093 109551616 registrar.cpp:544] Successfully updated the 
> registry in 107008ns
> I0907 11:31:42.414126 109551616 registrar.cpp:416] Successfully recovered 
> registrar
> I0907 11:31:42.414232 110624768 master.cpp:1752] Recovered 0 agents from the 
> registry (162B); allowing 10mins for agents to reregister
> I0907 11:31:42.414614 311620992 scheduler.cpp:189] Version: 1.8.0
> I0907 11:31:42.415856 113844224 scheduler.cpp:355] Using default 'basic' HTTP 
> authenticatee
> I0907 11:31:42.415974 112771072 scheduler.cpp:538] New master detected at 
> [email protected]:54546
> I0907 11:31:42.417650 113844224 http.cpp:1177] HTTP POST for 
> /master/api/v1/scheduler from 192.168.178.20:55273
> I0907 11:31:42.417768 113844224 master.cpp:2502] Received subscription 
> request for HTTP framework 'default'
> I0907 11:31:42.417788 113844224 master.cpp:2155] Authorizing framework 
> principal 'test-principal' to receive offers for roles '{ * }'
> I0907 11:31:42.417914 113844224 master.cpp:2637] Subscribing framework 
> 'default' with checkpointing disabled and capabilities [ MULTI_ROLE, 
> RESERVATION_REFINEMENT ]
> I0907 11:31:42.418388 113844224 master.cpp:9883] Adding framework 
> 4450e893-595f-48c2-9ea2-31325fda2c76-0000 (default) with roles {  } suppressed
> I0907 11:31:42.418522 110624768 hierarchical.cpp:306] Added framework 
> 4450e893-595f-48c2-9ea2-31325fda2c76-0000
> I0907 11:31:42.419454 311620992 scheduler.cpp:189] Version: 1.8.0
> I0907 11:31:42.420704 110088192 scheduler.cpp:355] Using default 'basic' HTTP 
> authenticatee
> I0907 11:31:42.420807 111161344 scheduler.cpp:538] New master detected at 
> [email protected]:54546
> I0907 11:31:42.422297 113844224 http.cpp:1177] HTTP POST for 
> /master/api/v1/scheduler from 192.168.178.20:55275
> I0907 11:31:42.422423 113844224 master.cpp:2502] Received subscription 
> request for HTTP framework 'default'
> I0907 11:31:42.422446 113844224 master.cpp:2155] Authorizing framework 
> principal 'test-principal' to receive offers for roles '{ * }'
> I0907 11:31:42.422591 113844224 master.cpp:2637] Subscribing framework 
> 'default' with checkpointing disabled and capabilities [ MULTI_ROLE, 
> RESERVATION_REFINEMENT ]
> I0907 11:31:42.422608 113844224 master.cpp:7760] Updating framework 
> 4450e893-595f-48c2-9ea2-31325fda2c76-0000 (default) with roles {  } suppressed
> I0907 11:31:42.422904 111161344 master.cpp:1226] Ignoring disconnection for 
> framework 4450e893-595f-48c2-9ea2-31325fda2c76-0000 (default) as it has 
> already reconnected
> I0907 11:31:42.423132 113844224 scheduler.cpp:512] Re-detecting master
> I0907 11:31:42.423475 113844224 scheduler.cpp:538] New master detected at 
> [email protected]:54546
> ../../src/tests/scheduler_tests.cpp:251: Failure
> Failed to wait 15secs for error
> *** Aborted at 1536312717 (unix time) try "date -d @1536312717" if you are 
> using GNU date ***
> PC: @        0x10d891ded testing::UnitTest::AddTestPartResult()
> *** SIGSEGV (@0x0) received by PID 16639 (TID 0x11292f580) stack trace: ***
>     @     0x7fff72af7b3d _sigtramp
>     @        0x1108a1a00 (unknown)
>     @        0x10d8915e7 testing::internal::AssertHelper::operator=()
>     @        0x10cf83948 
> mesos::internal::tests::SchedulerTest_SchedulerFailover_Test::TestBody()
>     @        0x10d904c4e 
> testing::internal::HandleSehExceptionsInMethodIfSupported<>()
>     @        0x10d8a9a9b 
> testing::internal::HandleExceptionsInMethodIfSupported<>()
>     @        0x10d8a99c6 testing::Test::Run()
>     @        0x10d8ab79d testing::TestInfo::Run()
>     @        0x10d8acddc testing::TestCase::Run()
>     @        0x10d8bd2cc testing::internal::UnitTestImpl::RunAllTests()
>     @        0x10d90779e 
> testing::internal::HandleSehExceptionsInMethodIfSupported<>()
>     @        0x10d8bcceb 
> testing::internal::HandleExceptionsInMethodIfSupported<>()
>     @        0x10d8bcbac testing::UnitTest::Run()
>     @        0x10c1f52f1 RUN_ALL_TESTS()
>     @        0x10c1f0c9c main
>     @     0x7fff7290e0a1 start
> Segmentation fault: 11
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to