Hi,

I have a pool of grpc channels and quite sophisticated client side load 
balancing on to of it. Some hosts are temporary offline, so I want to 
monitor channel states to call through a ready channel. 

I use NotifyOnStateChange with gpr_inf_future(GPR_CLOCK_REALTIME) deadline, 
and a dedicated CompletionQueue and processing thread for it.
grpc version is 1.43.2

However, there are several issues with this approach:
1. The more channels are monitored the more state change notifications are 
missing. With a single thread and 2k+ channels monitoring stalls. 
Increasing number of queues and threads to 4 helps but some notifications 
are still missing.
Is there another approach to monitor states of multiple channels? Is adding 
more queues and threads ok?

2. Sometimes is crashes inside grpc, probably on bulk channel removal. Any 
ideas where to start looking ?

#0 
 
std::__y1::__tree<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> >, 
std::__y1::__map_value_compare<grpc_core::ConnectivityStateWatcherInterface*, 
std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*, 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> >, 
std::__y1::less<grpc_core::ConnectivityStateWatcherInterface*>, true>, 
std::__y1::allocator<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> > > >::__root (this=0xe0) at 
.../cxxsupp/libcxx/include/__tree:1081
#1 
 
std::__y1::__tree<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> >, 
std::__y1::__map_value_compare<grpc_core::ConnectivityStateWatcherInterface*, 
std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*, 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> >, 
std::__y1::less<grpc_core::ConnectivityStateWatcherInterface*>, true>, 
std::__y1::allocator<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> > > 
>::find<grpc_core::ConnectivityStateWatcherInterface*> (this=0xe0, 
__v=@0x7fa93bdf3f88: 0x7fa7bd7f5770) at 
.../cxxsupp/libcxx/include/__tree:2468
#2 
 
std::__y1::__tree<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> >, 
std::__y1::__map_value_compare<grpc_core::ConnectivityStateWatcherInterface*, 
std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*, 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> >, 
std::__y1::less<grpc_core::ConnectivityStateWatcherInterface*>, true>, 
std::__y1::allocator<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> > > 
>::__erase_unique<grpc_core::ConnectivityStateWatcherInterface*> 
(this=this@entry=0xe0, __k=@0x7fa93bdf3f88: 0x7fa7bd7f5770) at 
.../cxxsupp/libcxx/include/__tree:2444
#3  0x0000000002967997 in 
std::__y1::map<grpc_core::ConnectivityStateWatcherInterface*, 
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete>, 
std::__y1::less<grpc_core::ConnectivityStateWatcherInterface*>, 
std::__y1::allocator<std::__y1::pair<grpc_core::ConnectivityStateWatcherInterface*
 
const, std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface, 
grpc_core::OrphanableDelete> > > >::erase (this=<optimized out>, 
__k=@0x7fa93bdf3f88: 0x7fa7bd7f5770) at .../cxxsupp/libcxx/include/map:1367
#4  grpc_core::ConnectivityStateTracker::RemoveWatcher (this=<optimized 
out>, watcher=0x7fa7bd7f5770) at 
.../grpc/src/core/lib/transport/connectivity_state.cc:149
#5  0x0000000002968452 in std::__y1::__function::__value_func<void 
()>::operator()() const (this=0x7fa7f2f5f470) at 
.../cxxsupp/libcxx/include/__functional/function.h:507
#6  std::__y1::function<void ()>::operator()() const (this=0x7fa7f2f5f470) 
at .../cxxsupp/libcxx/include/__functional/function.h:1191
#7  grpc_core::WorkSerializer::WorkSerializerImpl::DrainQueueOwned 
(this=this@entry=0x7fa80c9cc0a0) at 
.../grpc/src/core/lib/iomgr/work_serializer.cc:200
#8  0x00000000029682c9 in 
grpc_core::WorkSerializer::WorkSerializerImpl::Run(std::__y1::function<void 
()>, grpc_core::DebugLocation const&) (this=<optimized out>, 
this@entry=0x7fa80c9cc0a0, callback=..., location=...) at 
.../grpc/src/core/lib/iomgr/work_serializer.cc:93
#9  0x0000000002968706 in 
grpc_core::WorkSerializer::Run(std::__y1::function<void ()>, 
grpc_core::DebugLocation const&) (this=<optimized out>, callback=..., 
location=...) at .../grpc/src/core/lib/iomgr/work_serializer.cc:216
#10 0x0000000002ad6dbd in 
grpc_core::ClientChannel::SubchannelWrapper::WatcherWrapper::OnConnectivityStateChange
 
(this=<optimized out>) at 
.../grpc/src/core/ext/filters/client_channel/client_channel.cc:589
#11 0x0000000002a72aad in 
grpc_core::Subchannel::AsyncWatcherNotifierLocked::AsyncWatcherNotifierLocked(grpc_core::RefCountedPtr<grpc_core::Subchannel::ConnectivityStateWatcherInterface>,
 
grpc_connectivity_state, y_absl::lts_y_20211102::Status 
const&)::{lambda(void*, grpc_error*)#1}::operator()(void*, grpc_error*) 
const (this=<optimized out>, arg=0x7fa7dc390e20) at 
.../grpc/src/core/ext/filters/client_channel/subchannel.cc:368
#12 
grpc_core::Subchannel::AsyncWatcherNotifierLocked::AsyncWatcherNotifierLocked(grpc_core::RefCountedPtr<grpc_core::Subchannel::ConnectivityStateWatcherInterface>,
 
grpc_connectivity_state, y_absl::lts_y_20211102::Status 
const&)::{lambda(void*, grpc_error*)#1}::__invoke(void*, grpc_error*) 
(arg=0x7fa7dc390e20) at 
.../grpc/src/core/ext/filters/client_channel/subchannel.cc:368
#13 0x0000000002912699 in exec_ctx_run (closure=<optimized out>, error=0x0) 
at .../grpc/src/core/lib/iomgr/exec_ctx.cc:40
#14 grpc_core::ExecCtx::Flush (this=0x7fa93bdf40e0) at 
.../grpc/src/core/lib/iomgr/exec_ctx.cc:156
#15 0x0000000002945bb1 in run_some_timers () at 
.../grpc/src/core/lib/iomgr/timer_manager.cc:134
#16 timer_main_loop () at .../grpc/src/core/lib/iomgr/timer_manager.cc:237
#17 timer_thread (completed_thread_ptr=0x7fa93be00000) at 
.../grpc/src/core/lib/iomgr/timer_manager.cc:284
#18 0x000000000291820c in grpc_core::(anonymous 
namespace)::ThreadInternalsPosix::ThreadInternalsPosix(char const*, void 
(*)(void*), void*, bool*, grpc_core::Thread::Options 
const&)::{lambda(void*)#1}::operator()(void*) const (this=<optimized out>, 
v=<optimized out>) at .../grpc/src/core/lib/gprpp/thd_posix.cc:129
#19 grpc_core::(anonymous 
namespace)::ThreadInternalsPosix::ThreadInternalsPosix(char const*, void 
(*)(void*), void*, bool*, grpc_core::Thread::Options 
const&)::{lambda(void*)#1}::__invoke(void*) (v=<optimized out>) at 
.../grpc/src/core/lib/gprpp/thd_posix.cc:111
#20 0x00007fa9c0bdc6db in start_thread () from 
/lib/x86_64-linux-gnu/libpthread.so.0
#21 0x00007fa9c06eaa3f in clone () from /lib/x86_64-linux-gnu/libc.so.6

Best regards,
Sergey Gulyaevsky

-- 
You received this message because you are subscribed to the Google Groups 
"grpc.io" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/grpc-io/e085ad75-9cda-41bf-8c37-5baf94ecd30en%40googlegroups.com.

Reply via email to