Hi,
I have a pool of grpc channels and quite sophisticated client side load
balancing on to of it. Some hosts are temporary offline, so I want to
monitor channel states to call through a ready channel.
I use NotifyOnStateChange with gpr_inf_future(GPR_CLOCK_REALTIME) deadline,
and a dedicated CompletionQueue and processing thread for it.
grpc version is 1.43.2
However, there are several issues with this approach:
1. The more channels are monitored the more state change notifications are
missing. With a single thread and 2k+ channels monitoring stalls.
Increasing number of queues and threads to 4 helps but some notifications
are still missing.
Is there another approach to monitor states of multiple channels? Is adding
more queues and threads ok?
2. Sometimes is crashes inside grpc, probably on bulk channel removal. Any
ideas where to start looking ?
#0
std::__y1::__tree<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> >,
std::__y1::__map_value_compare<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> >,
std::__y1::less<grpc_core::ConnectivityStateWatcherInterface*>, true>,
std::__y1::allocator<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> > > >::__root (this=0xe0) at
.../cxxsupp/libcxx/include/__tree:1081
#1
std::__y1::__tree<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> >,
std::__y1::__map_value_compare<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> >,
std::__y1::less<grpc_core::ConnectivityStateWatcherInterface*>, true>,
std::__y1::allocator<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> > >
>::find<grpc_core::ConnectivityStateWatcherInterface*> (this=0xe0,
__v=@0x7fa93bdf3f88: 0x7fa7bd7f5770) at
.../cxxsupp/libcxx/include/__tree:2468
#2
std::__y1::__tree<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> >,
std::__y1::__map_value_compare<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> >,
std::__y1::less<grpc_core::ConnectivityStateWatcherInterface*>, true>,
std::__y1::allocator<std::__y1::__value_type<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> > >
>::__erase_unique<grpc_core::ConnectivityStateWatcherInterface*>
(this=this@entry=0xe0, __k=@0x7fa93bdf3f88: 0x7fa7bd7f5770) at
.../cxxsupp/libcxx/include/__tree:2444
#3 0x0000000002967997 in
std::__y1::map<grpc_core::ConnectivityStateWatcherInterface*,
std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete>,
std::__y1::less<grpc_core::ConnectivityStateWatcherInterface*>,
std::__y1::allocator<std::__y1::pair<grpc_core::ConnectivityStateWatcherInterface*
const, std::__y1::unique_ptr<grpc_core::ConnectivityStateWatcherInterface,
grpc_core::OrphanableDelete> > > >::erase (this=<optimized out>,
__k=@0x7fa93bdf3f88: 0x7fa7bd7f5770) at .../cxxsupp/libcxx/include/map:1367
#4 grpc_core::ConnectivityStateTracker::RemoveWatcher (this=<optimized
out>, watcher=0x7fa7bd7f5770) at
.../grpc/src/core/lib/transport/connectivity_state.cc:149
#5 0x0000000002968452 in std::__y1::__function::__value_func<void
()>::operator()() const (this=0x7fa7f2f5f470) at
.../cxxsupp/libcxx/include/__functional/function.h:507
#6 std::__y1::function<void ()>::operator()() const (this=0x7fa7f2f5f470)
at .../cxxsupp/libcxx/include/__functional/function.h:1191
#7 grpc_core::WorkSerializer::WorkSerializerImpl::DrainQueueOwned
(this=this@entry=0x7fa80c9cc0a0) at
.../grpc/src/core/lib/iomgr/work_serializer.cc:200
#8 0x00000000029682c9 in
grpc_core::WorkSerializer::WorkSerializerImpl::Run(std::__y1::function<void
()>, grpc_core::DebugLocation const&) (this=<optimized out>,
this@entry=0x7fa80c9cc0a0, callback=..., location=...) at
.../grpc/src/core/lib/iomgr/work_serializer.cc:93
#9 0x0000000002968706 in
grpc_core::WorkSerializer::Run(std::__y1::function<void ()>,
grpc_core::DebugLocation const&) (this=<optimized out>, callback=...,
location=...) at .../grpc/src/core/lib/iomgr/work_serializer.cc:216
#10 0x0000000002ad6dbd in
grpc_core::ClientChannel::SubchannelWrapper::WatcherWrapper::OnConnectivityStateChange
(this=<optimized out>) at
.../grpc/src/core/ext/filters/client_channel/client_channel.cc:589
#11 0x0000000002a72aad in
grpc_core::Subchannel::AsyncWatcherNotifierLocked::AsyncWatcherNotifierLocked(grpc_core::RefCountedPtr<grpc_core::Subchannel::ConnectivityStateWatcherInterface>,
grpc_connectivity_state, y_absl::lts_y_20211102::Status
const&)::{lambda(void*, grpc_error*)#1}::operator()(void*, grpc_error*)
const (this=<optimized out>, arg=0x7fa7dc390e20) at
.../grpc/src/core/ext/filters/client_channel/subchannel.cc:368
#12
grpc_core::Subchannel::AsyncWatcherNotifierLocked::AsyncWatcherNotifierLocked(grpc_core::RefCountedPtr<grpc_core::Subchannel::ConnectivityStateWatcherInterface>,
grpc_connectivity_state, y_absl::lts_y_20211102::Status
const&)::{lambda(void*, grpc_error*)#1}::__invoke(void*, grpc_error*)
(arg=0x7fa7dc390e20) at
.../grpc/src/core/ext/filters/client_channel/subchannel.cc:368
#13 0x0000000002912699 in exec_ctx_run (closure=<optimized out>, error=0x0)
at .../grpc/src/core/lib/iomgr/exec_ctx.cc:40
#14 grpc_core::ExecCtx::Flush (this=0x7fa93bdf40e0) at
.../grpc/src/core/lib/iomgr/exec_ctx.cc:156
#15 0x0000000002945bb1 in run_some_timers () at
.../grpc/src/core/lib/iomgr/timer_manager.cc:134
#16 timer_main_loop () at .../grpc/src/core/lib/iomgr/timer_manager.cc:237
#17 timer_thread (completed_thread_ptr=0x7fa93be00000) at
.../grpc/src/core/lib/iomgr/timer_manager.cc:284
#18 0x000000000291820c in grpc_core::(anonymous
namespace)::ThreadInternalsPosix::ThreadInternalsPosix(char const*, void
(*)(void*), void*, bool*, grpc_core::Thread::Options
const&)::{lambda(void*)#1}::operator()(void*) const (this=<optimized out>,
v=<optimized out>) at .../grpc/src/core/lib/gprpp/thd_posix.cc:129
#19 grpc_core::(anonymous
namespace)::ThreadInternalsPosix::ThreadInternalsPosix(char const*, void
(*)(void*), void*, bool*, grpc_core::Thread::Options
const&)::{lambda(void*)#1}::__invoke(void*) (v=<optimized out>) at
.../grpc/src/core/lib/gprpp/thd_posix.cc:111
#20 0x00007fa9c0bdc6db in start_thread () from
/lib/x86_64-linux-gnu/libpthread.so.0
#21 0x00007fa9c06eaa3f in clone () from /lib/x86_64-linux-gnu/libc.so.6
Best regards,
Sergey Gulyaevsky
--
You received this message because you are subscribed to the Google Groups
"grpc.io" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To view this discussion on the web visit
https://groups.google.com/d/msgid/grpc-io/e085ad75-9cda-41bf-8c37-5baf94ecd30en%40googlegroups.com.