[ 
https://issues.apache.org/jira/browse/IMPALA-11653?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17621353#comment-17621353
 ] 

Qifan Chen edited comment on IMPALA-11653 at 10/20/22 8:17 PM:
---------------------------------------------------------------

Before the very first communication with the client, impalad needs to setup the 
connection by calling SetupConnection(). It is in this setup method that the 
above "super long wait" can happen. 


{code:java}
(gdb) bt
#0  0x00007f3b972ee20d in poll () from ./lib64/libc.so.6
#1  0x0000000002dcd5bc in apache::thrift::transport::TSocket::read(unsigned 
char*, unsigned int) ()
#2  0x0000000002dd1803 in unsigned int 
apache::thrift::transport::readAll<apache::thrift::transport::TSocket>(apache::thrift::transport::TSocket&,
 unsigned char*, unsigned int) ()
#3  0x0000000001330cc9 in readAll (len=5, buf=0x7f3277ea4f8b "", 
this=<optimized out>) at 
../../../toolchain/toolchain-packages-gcc7.5.0/thrift-0.9.3-p8/include/thrift/transport/TTransport.h:121
#4  apache::thrift::transport::TSaslTransport::receiveSaslMessage 
(this=this@entry=0x278a96b0, status=status@entry=0x7f3277ea500c, 
length=length@entry=0x7f3277ea5008) at TSaslTransport.cpp:259
#5  0x000000000132db14 in 
apache::thrift::transport::TSaslServerTransport::handleSaslStartMessage 
(this=0x278a96b0) at TSaslServerTransport.cpp:95
#6  0x0000000001330e33 in 
apache::thrift::transport::TSaslTransport::doSaslNegotiation (this=0x278a96b0) 
at TSaslTransport.cpp:81
#7  0x000000000132e723 in open (this=0x12e29750) at 
../../../toolchain/toolchain-packages-gcc7.5.0/thrift-0.9.3-p8/include/thrift/transport/TBufferTransports.h:218.
   void open() { transport_->open(); }

132   void TSaslTransport::open() {                                             
               
133     // Only client should open the underlying transport.                    
               
134     if (isClient_ && !transport_->isOpen()) {                               
               
135       transport_->open();                                                   
               
136     }                                                                       
               
137                                                                             
               
138     // Start the SASL negotiation protocol.                                 
               
139     doSaslNegotiation();                                                    
               
140   } 

#8  apache::thrift::transport::TSaslServerTransport::Factory::getTransport 
(this=0xf825a70, trans=...) at TSaslServerTransport.cpp:173
#9  0x00000000010cd49d in 
apache::thrift::server::TAcceptQueueServer::SetupConnection (this=0x174270c0, 
entry=...) at TAcceptQueueServer.cpp:233
#10 0x00000000010cef4d in operator() 
{code}




{code:java}
130 std::shared_ptr<TTransport> TSaslServerTransport::Factory::getTransport(    
               
131     std::shared_ptr<TTransport> trans) {                                    
               
132   // Thrift servers use both an input and an output transport to 
communicate with          
133   // clients. In principal, these can be different, but for SASL clients we 
require them   
134   // to be the same so that the authentication state is identical for 
communication in     
135   // both directions. In order to do this, we share the same TTransport 
object for both    
136   // input and output set in TAcceptQueueServer::SetupConnection.           
               
137   std::shared_ptr<TBufferedTransport> ret_transport;                        
               
138   std::shared_ptr<TTransport> wrapped(                                      
               
139       new TSaslServerTransport(serverDefinitionMap_, trans));               
               
140   // Set socket timeouts to prevent TSaslServerTransport->open from 
blocking the server    
141   // from accepting new connections if a read/write blocks during the 
handshake            
142   TSocket* socket = static_cast<TSocket*>(trans.get());                     
               
143   socket->setRecvTimeout(FLAGS_sasl_connect_tcp_timeout_ms);        <== 
5min timeout for read() calls invoked indirectly at line 147: open()            
           
144   socket->setSendTimeout(FLAGS_sasl_connect_tcp_timeout_ms);                
               
145   ret_transport.reset(new TBufferedTransport(wrapped,                       
               
146         
impala::ThriftServer::BufferedTransportFactory::DEFAULT_BUFFER_SIZE_BYTES));    
   
147   ret_transport.get()->open();                                              
               
148   // Reset socket timeout back to zero, so idle clients do not timeout      
               
149   socket->setRecvTimeout(0);                                                
               
150   socket->setSendTimeout(0);                                                
               
151   return ret_transport;                                                     
               
152 }                                                                           
               
153                                                                             
               
154 }}}                                                                         
               
155 #endif                                                                      
               
transport/TSaslServerTransport.cpp
{code}




was (Author: sql_forever):
Before the very first communication with the client, impalad needs to setup the 
connection by calling SetupConnection(). It is in this setup method that the 
above "infinite wait" can happen. 


{code:java}
(gdb) bt
#0  0x00007f3b972ee20d in poll () from ./lib64/libc.so.6
#1  0x0000000002dcd5bc in apache::thrift::transport::TSocket::read(unsigned 
char*, unsigned int) ()
#2  0x0000000002dd1803 in unsigned int 
apache::thrift::transport::readAll<apache::thrift::transport::TSocket>(apache::thrift::transport::TSocket&,
 unsigned char*, unsigned int) ()
#3  0x0000000001330cc9 in readAll (len=5, buf=0x7f3277ea4f8b "", 
this=<optimized out>) at 
../../../toolchain/toolchain-packages-gcc7.5.0/thrift-0.9.3-p8/include/thrift/transport/TTransport.h:121
#4  apache::thrift::transport::TSaslTransport::receiveSaslMessage 
(this=this@entry=0x278a96b0, status=status@entry=0x7f3277ea500c, 
length=length@entry=0x7f3277ea5008) at TSaslTransport.cpp:259
#5  0x000000000132db14 in 
apache::thrift::transport::TSaslServerTransport::handleSaslStartMessage 
(this=0x278a96b0) at TSaslServerTransport.cpp:95
#6  0x0000000001330e33 in 
apache::thrift::transport::TSaslTransport::doSaslNegotiation (this=0x278a96b0) 
at TSaslTransport.cpp:81
#7  0x000000000132e723 in open (this=0x12e29750) at 
../../../toolchain/toolchain-packages-gcc7.5.0/thrift-0.9.3-p8/include/thrift/transport/TBufferTransports.h:218.
   void open() { transport_->open(); }

132   void TSaslTransport::open() {                                             
               
133     // Only client should open the underlying transport.                    
               
134     if (isClient_ && !transport_->isOpen()) {                               
               
135       transport_->open();                                                   
               
136     }                                                                       
               
137                                                                             
               
138     // Start the SASL negotiation protocol.                                 
               
139     doSaslNegotiation();                                                    
               
140   } 

#8  apache::thrift::transport::TSaslServerTransport::Factory::getTransport 
(this=0xf825a70, trans=...) at TSaslServerTransport.cpp:173
#9  0x00000000010cd49d in 
apache::thrift::server::TAcceptQueueServer::SetupConnection (this=0x174270c0, 
entry=...) at TAcceptQueueServer.cpp:233
#10 0x00000000010cef4d in operator() 
{code}




{code:java}
130 std::shared_ptr<TTransport> TSaslServerTransport::Factory::getTransport(    
               
131     std::shared_ptr<TTransport> trans) {                                    
               
132   // Thrift servers use both an input and an output transport to 
communicate with          
133   // clients. In principal, these can be different, but for SASL clients we 
require them   
134   // to be the same so that the authentication state is identical for 
communication in     
135   // both directions. In order to do this, we share the same TTransport 
object for both    
136   // input and output set in TAcceptQueueServer::SetupConnection.           
               
137   std::shared_ptr<TBufferedTransport> ret_transport;                        
               
138   std::shared_ptr<TTransport> wrapped(                                      
               
139       new TSaslServerTransport(serverDefinitionMap_, trans));               
               
140   // Set socket timeouts to prevent TSaslServerTransport->open from 
blocking the server    
141   // from accepting new connections if a read/write blocks during the 
handshake            
142   TSocket* socket = static_cast<TSocket*>(trans.get());                     
               
143   socket->setRecvTimeout(FLAGS_sasl_connect_tcp_timeout_ms);        <== 
5min timeout for read() calls invoked indirectly at line 147: open()            
           
144   socket->setSendTimeout(FLAGS_sasl_connect_tcp_timeout_ms);                
               
145   ret_transport.reset(new TBufferedTransport(wrapped,                       
               
146         
impala::ThriftServer::BufferedTransportFactory::DEFAULT_BUFFER_SIZE_BYTES));    
   
147   ret_transport.get()->open();                                              
               
148   // Reset socket timeout back to zero, so idle clients do not timeout      
               
149   socket->setRecvTimeout(0);                                                
               
150   socket->setSendTimeout(0);                                                
               
151   return ret_transport;                                                     
               
152 }                                                                           
               
153                                                                             
               
154 }}}                                                                         
               
155 #endif                                                                      
               
transport/TSaslServerTransport.cpp
{code}



> Identify and time out connections that are not from a supported Impala client 
> more eagerly
> ------------------------------------------------------------------------------------------
>
>                 Key: IMPALA-11653
>                 URL: https://issues.apache.org/jira/browse/IMPALA-11653
>             Project: IMPALA
>          Issue Type: Improvement
>    Affects Versions: Impala 4.1.0
>            Reporter: Vincent Tran
>            Assignee: Qifan Chen
>            Priority: Major
>         Attachments: simple_tcp_client.py
>
>
> When a tcp client opens a connection to an Impala client interface (hs2 or 
> beeswax), the connection is accepted immediately after the 3-way handshake 
> (SYN, SYN-ACK, ACK) and is queued forĀ 
> *TAcceptQueueServer::SetupConnection()*.  However, if the client sends 
> nothing else, the ImpalaServer will block in 
> *apache::thrift::transport::TSocket::read()* until the client sends a RST/FIN 
> or until *sasl_connect_tcp_timeout_ms* elapses (which is by default, 5 
> minutes).
> The connection setup thread stack trace can be observed below during this 
> period.
> {noformat}
> (gdb) bt
> #0  0x00007f3b972ee20d in poll () from ./lib64/libc.so.6
> #1  0x0000000002dcd5bc in apache::thrift::transport::TSocket::read(unsigned 
> char*, unsigned int) ()
> #2  0x0000000002dd1803 in unsigned int 
> apache::thrift::transport::readAll<apache::thrift::transport::TSocket>(apache::thrift::transport::TSocket&,
>  unsigned char*, unsigned int) ()
> #3  0x0000000001330cc9 in readAll (len=5, buf=0x7f3277ea4f8b "", 
> this=<optimized out>) at 
> ../../../toolchain/toolchain-packages-gcc7.5.0/thrift-0.9.3-p8/include/thrift/transport/TTransport.h:121
> #4  apache::thrift::transport::TSaslTransport::receiveSaslMessage 
> (this=this@entry=0x278a96b0, status=status@entry=0x7f3277ea500c, 
> length=length@entry=0x7f3277ea5008) at TSaslTransport.cpp:259
> #5  0x000000000132db14 in 
> apache::thrift::transport::TSaslServerTransport::handleSaslStartMessage 
> (this=0x278a96b0) at TSaslServerTransport.cpp:95
> #6  0x0000000001330e33 in 
> apache::thrift::transport::TSaslTransport::doSaslNegotiation 
> (this=0x278a96b0) at TSaslTransport.cpp:81
> #7  0x000000000132e723 in open (this=0x12e29750) at 
> ../../../toolchain/toolchain-packages-gcc7.5.0/thrift-0.9.3-p8/include/thrift/transport/TBufferTransports.h:218
> #8  apache::thrift::transport::TSaslServerTransport::Factory::getTransport 
> (this=0xf825a70, trans=...) at TSaslServerTransport.cpp:173
> #9  0x00000000010cd49d in 
> apache::thrift::server::TAcceptQueueServer::SetupConnection (this=0x174270c0, 
> entry=...) at TAcceptQueueServer.cpp:233
> #10 0x00000000010cef4d in operator() (tid=<optimized out>, item=..., 
> __closure=<optimized out>) at TAcceptQueueServer.cpp:323
> #11 
> boost::detail::function::void_function_obj_invoker2<apache::thrift::server::TAcceptQueueServer::serve()::<lambda(int,
>  const boost::shared_ptr<apache::thrift::server::TAcceptQueueEntry>&)>, void, 
> int, const 
> boost::shared_ptr<apache::thrift::server::TAcceptQueueEntry>&>::invoke(boost::detail::function::function_buffer
>  &, int, const boost::shared_ptr<apache::thrift::server::TAcceptQueueEntry> 
> &) (function_obj_ptr=..., a0=<optimized out>, a1=...)
>     at 
> ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/function/function_template.hpp:159
> #12 0x00000000010d3e59 in operator() (a1=..., a0=1, this=0x7f3279ea9510) at 
> ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/function/function_template.hpp:770
> #13 
> impala::ThreadPool<boost::shared_ptr<apache::thrift::server::TAcceptQueueEntry>
>  >::WorkerThread (this=0x7f3279ea94c0, thread_id=1) at 
> ../util/thread-pool.h:166
> #14 0x000000000144f8f2 in operator() (this=0x7f3277ea5b40) at 
> ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/function/function_template.hpp:770
> #15 impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, 
> std::char_traits<char>, std::allocator<char> > const&, 
> std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> 
> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, 
> impala::Promise<long, (impala::PromiseMode)0>*) (name=..., category=..., 
> functor=..., parent_thread_info=<optimized out>, 
> thread_started=0x7f3279ea9110) at thread.cc:360
> #16 0x0000000001450d6b in operator()<void (*)(const 
> std::__cxx11::basic_string<char>&, const std::__cxx11::basic_string<char>&, 
> boost::function<void()>, const impala::ThreadDebugInfo*, impala::Promise<long 
> int>*), boost::_bi::list0> (a=<synthetic pointer>,
>     f=@0x1417ccf8: 0x144f5f0 
> <impala::Thread::SuperviseThread(std::__cxx11::basic_string<char, 
> std::char_traits<char>, std::allocator<char> > const&, 
> std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> 
> > const&, boost::function<void ()>, impala::ThreadDebugInfo const*, 
> impala::Promise<long, (impala::PromiseMode)0>*)>, this=0x1417cd00) at 
> ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/bind/bind.hpp:531
> #17 operator() (this=0x1417ccf8) at 
> ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/bind/bind.hpp:1222
> #18 boost::detail::thread_data<boost::_bi::bind_t<void, void 
> (*)(std::__cxx11::basic_string<char, std::char_traits<char>, 
> std::allocator<char> > const&, std::__cxx11::basic_string<char, 
> std::char_traits<char>, std::allocator<char> > const&, boost::function<void 
> ()>, impala::ThreadDebugInfo const*, impala::Promise<long, 
> (impala::PromiseMode)0>*), 
> boost::_bi::list5<boost::_bi::value<std::__cxx11::basic_string<char, 
> std::char_traits<char>, std::allocator<char> > >, 
> boost::_bi::value<std::__cxx11::basic_string<char, std::char_traits<char>, 
> std::allocator<char> > >, boost::_bi::value<boost::function<void ()> >, 
> boost::_bi::value<impala::ThreadDebugInfo*>, 
> boost::_bi::value<impala::Promise<long, (impala::PromiseMode)0>*> > > 
> >::run() (this=0x1417cb40)
>     at 
> ../../../toolchain/toolchain-packages-gcc7.5.0/boost-1.61.0-p2/include/boost/thread/detail/thread.hpp:116
> #19 0x0000000001ca17c2 in thread_proxy ()
> #20 0x00007f3b9a7f2dd5 in start_thread () from ./lib64/libpthread.so.0
> #21 0x00007f3b972f8ead in clone () from ./lib64/libc.so.6
> {noformat}
> As discussed in IMPALA-7638, we need to distinguish between a client that is 
> slow to finish sasl negotiation (due to slow Kerberos negotiation) from a 
> client that is never going to do anything beyond the tcp handshake and time 
> these out much sooner than the *sasl_connect_tcp_timeout_ms* duration.
> The logging pattern below captures some instances of these abnormal 
> connections
> {noformat}
> I1007 12:16:07.636166 185038 TAcceptQueueServer.cpp:227] TAcceptQueueServer: 
> hiveserver2-frontend started connection setup for client <Host: x.x.x.x Port: 
> 32878>
> I1007 12:21:06.634896 185038 thrift-util.cc:96] TAcceptQueueServer: 
> hiveserver2-frontend connection setup failed for client <Host: x.x.x.x Port: 
> 32878>. Caught TException: No more data to read.
> ====
> I1007 12:16:23.488011 185039 TAcceptQueueServer.cpp:227] TAcceptQueueServer: 
> hiveserver2-frontend started connection setup for client <Host: x.x.x.x Port: 
> 33934>
> I1007 12:21:22.488610 185039 thrift-util.cc:96] TAcceptQueueServer: 
> hiveserver2-frontend connection setup failed for client <Host: x.x.x.x Port: 
> 33934>. Caught TException: No more data to read.
> {noformat}
> This instance of ImpalaServer was running with 
> *accepted_cnxn_setup_thread_pool_size*=2. That means that both of these 
> threads (185038 &185039) were tied up during this 5-minute window. Subsequent 
> incoming client connections to the HS2 interface will be in the accept queue 
> until one of these threads frees up.  If more of those accepted connections 
> do not start sasl negotiation, the problem will snowball.
> Attached a simple tcp client  [^simple_tcp_client.py] that when called more 
> than once in quick succession, will block port 21050 and cause otherwise 
> legitimate connections from Impala-supported clients to wait in the "accept" 
> queue for at least 5 minutes.
> {noformat}
> # python simple_tcp_client.py &
> [1] 19986
> 2022-10-12 11:28:16 INFO     Created a tcp client
> 2022-10-12 11:28:16 INFO     Connecting to: c908086-2.vpc.cloudera.com:21050
> 2022-10-12 11:28:16 INFO     Client1 connected
> 2022-10-12 11:28:16 INFO     Sleeping for 5 minutes
> # python simple_tcp_client.py &
> [2] 19989
> 2022-10-12 11:28:20 INFO     Created a tcp client
> 2022-10-12 11:28:20 INFO     Connecting to: c908086-2.vpc.cloudera.com:21050
> 2022-10-12 11:28:20 INFO     Client1 connected
> 2022-10-12 11:28:20 INFO     Sleeping for 5 minutes
> {noformat}
> {noformat}
> # impala-shell -i c908086-2.vpc.cloudera.com -d default -k --protocol=hs2
> Starting Impala Shell using Kerberos authentication
> Using service name 'impala'
> Socket error None: timed out
> ***********************************************************************************
> Welcome to the Impala shell.
> (Impala Shell v3.4.0-SNAPSHOT (a1dfdfd) built on Sun Aug 21 10:10:08 UTC 2022)
> When pretty-printing is disabled, you can use the '--output_delimiter' flag 
> to set
> the delimiter for fields in the same row. The default is '\t'.
> ***********************************************************************************
> [Not connected] >
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to