372046933 commented on issue #2265:
URL: https://github.com/apache/brpc/issues/2265#issuecomment-1648878368

   日志不多的。其中有些日志是我在BRPC里面加的
   ```patch
   diff --git a/src/brpc/rdma/rdma_endpoint.cpp 
b/src/brpc/rdma/rdma_endpoint.cpp
   index d3a91560..e7e60ff4 100644
   --- a/src/brpc/rdma/rdma_endpoint.cpp
   +++ b/src/brpc/rdma/rdma_endpoint.cpp
   @@ -251,7 +251,9 @@ void RdmaConnect::StartConnect(const Socket* socket,
        }
    }
    
   -void RdmaConnect::StopConnect(Socket* socket) { }
   +void RdmaConnect::StopConnect(Socket* socket) {
   +    LOG(INFO) << "RdmaConnect StopConnect called";
   +}
    
    void RdmaConnect::Run() {
        _done(errno, _data);
   @@ -274,6 +276,7 @@ static void TryReadOnTcpDuringRdmaEst(Socket* s) {
                    break;
                }
            } else if (nr == 0) {
   +            LOG(INFO) << "Got socket EOF, " << *s;
                s->SetEOF();
                return;
            } else {
   @@ -330,7 +333,7 @@ void RdmaEndpoint::OnNewDataFromTcp(Socket* m) {
        }
    }
    
   -bool HelloNegotiationValid(HelloMessage& msg) {
   +bool HelloNegotiationValid(const HelloMessage& msg) {
        if (msg.hello_ver == g_rdma_hello_version &&
            msg.impl_ver == g_rdma_impl_version &&
            msg.block_size >= MIN_BLOCK_SIZE &&
   @@ -360,6 +363,7 @@ int RdmaEndpoint::ReadFromFd(void* data, size_t len) {
                        }
                    }
                } else {
   +                PLOG(WARNING) << "Failed to read";
                    return -1;
                }
            } else if (nr == 0) {  // Got EOF
   @@ -1030,7 +1034,7 @@ int RdmaEndpoint::PostRecv(uint32_t num, bool 
zerocopy) {
            if (_rq_received == _rq_size) {
                _rq_received = 0;
            }
   -    };
   +    }
        return 0;
    }
    
   @@ -1168,11 +1172,6 @@ int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid 
gid, uint32_t qp_num) {
            return -1;
        }
    
   -    if (PostRecv(_rq_size, true) < 0) {
   -        PLOG(WARNING) << "Fail to post recv wr";
   -        return -1;
   -    }
   -
        attr.qp_state = IBV_QPS_RTR;
        attr.path_mtu = IBV_MTU_1024;  // TODO: support more mtu in future
        attr.ah_attr.grh.dgid = gid;
   @@ -1202,6 +1201,11 @@ int RdmaEndpoint::BringUpQp(uint16_t lid, ibv_gid 
gid, uint32_t qp_num) {
            return -1;
        }
    
   +    if (PostRecv(_rq_size, true) < 0) {
   +        PLOG(WARNING) << "Fail to post recv wr";
   +        return -1;
   +    }
   +
        attr.qp_state = IBV_QPS_RTS;
        attr.timeout = TIMEOUT;
        attr.retry_cnt = RETRY_CNT;
   @@ -1293,7 +1297,8 @@ void RdmaEndpoint::DeallocateResources() {
    static const int MAX_CQ_EVENTS = 128;
    
    int RdmaEndpoint::GetAndAckEvents() {
   -    int events = 0; void* context = NULL;
   +    int events = 0;
   +    void* context = NULL;
        while (1) {
            if (IbvGetCqEvent(_resource->comp_channel, &_resource->cq, 
&context) < 0) {
                if (errno != EAGAIN) {
   @@ -1354,11 +1359,13 @@ void RdmaEndpoint::PollCq(Socket* m) {
                    // that the event arrives after the poll but before the 
notify,
                    // we should re-poll the CQ once after the notify to check 
if
                    // there is an available CQE.
   -                if (ibv_req_notify_cq(ep->_resource->cq, 1) < 0) {
   +                if (ibv_req_notify_cq(ep->_resource->cq, 
/*solicited_only=*/1) <
   +                    0) {
                        const int saved_errno = errno;
   -                    PLOG(WARNING) << "Fail to arm CQ comp channel: " << 
s->description();
   +                    PLOG(WARNING)
   +                        << "Fail to arm CQ comp channel: " << 
s->description();
                        s->SetFailed(saved_errno, "Fail to arm cq channel from 
%s: %s",
   -                            s->description().c_str(), berror(saved_errno));
   +                                s->description().c_str(), 
berror(saved_errno));
                        return;
                    }
                    notified = true;
   @@ -1387,7 +1394,8 @@ void RdmaEndpoint::PollCq(Socket* m) {
                    PLOG(WARNING) << "Fail to handle RDMA completion, error 
status("
                                  << wc[i].status << "): " << s->description();
                    s->SetFailed(ERDMA, "RDMA completion error(%d) from %s: %s",
   -                             wc[i].status, s->description().c_str(), 
berror(ERDMA));
   +                             wc[i].status, s->description().c_str(),
   +                             berror(ERDMA));
                    continue;
                }
    
   diff --git a/src/brpc/socket.cpp b/src/brpc/socket.cpp
   index c49ca083..4ebba8d2 100644
   --- a/src/brpc/socket.cpp
   +++ b/src/brpc/socket.cpp
   @@ -803,6 +803,7 @@ int Socket::WaitAndReset(int32_t expected_nref) {
    
    #if BRPC_WITH_RDMA
        if (_rdma_ep) {
   +        LOG(WARNING) << "Reset RdmaEndpoint, " << description();
            _rdma_ep->Reset();
            _rdma_state = RDMA_UNKNOWN;
        }
   
   ```
   
   server
   ```
   BRPC 日志:
   W0724 21:29:37.355377   978 external/brpc/src/brpc/input_messenger.cpp:247] 
Close Socket{id=8589934594 fd=297 addr=10.156.8.29:29765:37376} 
(0x560a5f5cc480): absolutely wrong message
   
   应用层日志:
   W0724 21:29:37.355577   838 ./xxx/yyy/zzz.h:119] Method:[FuncFoo] Fail to 
get response from shard#0 [E22]Close Socket{id=8589934594 fd=297 
addr=10.156.8.29:29765:37376} (0x0x560a5f5cc480): absolutely wrong message
   
   BRPC 日志
   I0724 21:29:37.458484  1004 external/brpc/src/brpc/socket.cpp:2465] Checking 
Socket{id=8589934594 addr=10.156.8.29:29765} (0x560a5f5cc480)
   I0724 21:29:37.458786  1016 external/brpc/src/brpc/socket.cpp:2525] Revived 
Socket{id=8589934594 addr=10.156.8.29:29765} (0x560a5f5cc480) (Connectable)
   I0724 21:29:42.355888   985 
external/brpc/src/brpc/rdma/rdma_endpoint.cpp:418] Start handshake on 
10.156.0.21:54384
   I0724 21:29:42.362131   995 
external/brpc/src/brpc/rdma/rdma_endpoint.cpp:543] Handshake ends (use rdma) on 
Socket{id=8589934594 fd=297 addr=10.156.8.29:29765:54384} (0x0x560a5f5cc480)
   
   ```
   
   client
   ```
   I0724 21:29:37.446674  1045 
external/brpc/src/brpc/rdma/rdma_endpoint.cpp:279] Got socket EOF, 
Socket{id=25769805360 fd=300 addr=10.156.0.21:37376:29765} (0x557bf1110480)
   I0724 21:29:37.449893  1090 
external/brpc/src/brpc/rdma/rdma_endpoint.cpp:560] Start handshake on 
Socket{id=8589943858 fd=1413 addr=10.156.0.21:54366:29765} (0x0x557ef5f2c000)
   W0724 21:29:37.449923  1090 
external/brpc/src/brpc/rdma/rdma_endpoint.cpp:568] Fail to read Hello Message 
from client:Socket{id=8589943858 fd=1413 addr=10.156.0.21:54366:29765} 
(0x0x557ef5f2c000) 10.156.0.21:54366: Got EOF
   I0724 21:29:42.347077  1047 
external/brpc/src/brpc/rdma/rdma_endpoint.cpp:560] Start handshake on 
Socket{id=17179869414 fd=1358 addr=10.156.0.21:54384:29765} (0x0x557bf0b22900)
   I0724 21:29:42.353112  1097 
external/brpc/src/brpc/rdma/rdma_endpoint.cpp:696] Handshake ends (use rdma) on 
Socket{id=17179869414 fd=1358 addr=10.156.0.21:54384:29765} (0x0x557bf0b22900)
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to