This is an automated email from the ASF dual-hosted git repository.
humingcheng pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/servicecomb-service-center.git
The following commit(s) were added to refs/heads/dev by this push:
new 215f4b51 bugfix: add restart empty sd protection (#1494)
215f4b51 is described below
commit 215f4b5151fdedfe585e26dca912bf54d8a1a472
Author: Wanghb1 <[email protected]>
AuthorDate: Mon Dec 2 18:05:18 2024 +0800
bugfix: add restart empty sd protection (#1494)
* bugfix:
The SDK switches back to the service center recovering from the crash as
soon as the server starts, even though the service center has not synced data
from the peer yet, which results in an interruption of service discovery.
add ut
* Update first_launch.go
* Update first_launch.go
* Update first_launch.go
* Update first_launch.go
* Update first_launch.go
* Update first_launch.go
* review
* review
---
datasource/etcd/ops.go | 3 --
go.sum | 2 +-
pkg/protect/protect.go | 73 ++++++++++++++++++++++++++++++
pkg/protect/protect_test.go | 30 ++++++++++++
pkg/rpc/client.go | 5 ++
server/resource/disco/instance_resource.go | 12 ++++-
server/server.go | 8 +++-
server/service/disco/instance.go | 5 +-
8 files changed, 129 insertions(+), 9 deletions(-)
diff --git a/datasource/etcd/ops.go b/datasource/etcd/ops.go
index 2bbdd5f8..6f48775a 100644
--- a/datasource/etcd/ops.go
+++ b/datasource/etcd/ops.go
@@ -98,9 +98,6 @@ func (ds *MetadataManager) CountEnvironment(ctx
context.Context, request *ev.Get
if err != nil {
return nil, err
}
- if err != nil {
- return nil, err
- }
return &ev.GetEnvironmentCountResponse{
Count: all - preEnvNum,
}, nil
diff --git a/go.sum b/go.sum
index ff31b5d2..01243bb3 100644
--- a/go.sum
+++ b/go.sum
@@ -99,7 +99,7 @@ github.com/armon/go-metrics v0.3.10
h1:FR+drcQStOe+32sYyJYyZ7FIdgoGGBnwLl+flodp8
github.com/armon/go-metrics v0.3.10/go.mod
h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod
h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-radix v1.0.0
h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
-github.com/armon/go-radix v1.0.0/go.mod
h1:TsTFsXBVHVK4HQ+UrFSsQEhBXZGCDqoY+cr+sUq5ZmA=
+github.com/armon/go-radix v1.0.0/go.mod
h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a
h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA=
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod
h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
github.com/aws/aws-sdk-go v1.34.28
h1:sscPpn/Ns3i0F4HPEWAVcwdIRaZZCuL7llJ2/60yPIk=
diff --git a/pkg/protect/protect.go b/pkg/protect/protect.go
new file mode 100644
index 00000000..59cf5d9f
--- /dev/null
+++ b/pkg/protect/protect.go
@@ -0,0 +1,73 @@
+package protect
+
+import (
+ "fmt"
+ "net/http"
+ "time"
+
+ "github.com/apache/servicecomb-service-center/server/config"
+
+ "github.com/apache/servicecomb-service-center/pkg/log"
+)
+
+/**
+for restart service center, set a restartProtectInterval time window to return
RestartProtectHttpCode on discovery apis,
+indicating that sdk not need to clear cache
+*/
+
+var (
+ isWithinProtection bool
+ startupTimestamp int64
+ enableInstanceNullProtect bool
+ restartProtectInterval time.Duration
+ RestartProtectHttpCode int
+ validProtectCode = map[int]struct{}{http.StatusNotModified:
{}, http.StatusUnprocessableEntity: {}, http.StatusInternalServerError: {}}
+)
+
+const (
+ maxInterval = 120 * time.Second
+ minInterval = 0 * time.Second
+ defaultRestartProtectInterval = 120 * time.Second
+)
+
+func Init() {
+ enableInstanceNullProtect =
config.GetBool("instance_null_protect.enable", false)
+ if !enableInstanceNullProtect {
+ return
+ }
+ restartProtectInterval =
time.Duration(config.GetInt("instance_null_protect.restart_protect_interval",
120)) * time.Second
+ if restartProtectInterval > maxInterval || restartProtectInterval <
minInterval {
+ log.Warn(fmt.Sprintf("invalid
instance_null_protect.restart_protect_interval: %d,"+
+ " must between %d-%ds inclusively",
restartProtectInterval, minInterval, maxInterval))
+ restartProtectInterval = defaultRestartProtectInterval
+ }
+ RestartProtectHttpCode =
config.GetInt("instance_null_protect.http_status", http.StatusNotModified)
+ if _, ok := validProtectCode[RestartProtectHttpCode]; !ok {
+ log.Warn(fmt.Sprintf("invalid
instance_null_protect.http_status: %d, must be %v", RestartProtectHttpCode,
validProtectCode))
+ RestartProtectHttpCode = http.StatusNotModified
+ }
+
+ log.Info(fmt.Sprintf("instance_null_protect.enable: %t",
enableInstanceNullProtect))
+ log.Info(fmt.Sprintf("instance_null_protect.restart_protect_interval:
%d", restartProtectInterval))
+ log.Info(fmt.Sprintf("instance_null_protect.http_status: %d",
RestartProtectHttpCode))
+ startupTimestamp = time.Now().UnixNano()
+ isWithinProtection = true
+}
+
+func IsWithinRestartProtection() bool {
+ if !enableInstanceNullProtect {
+ return false
+ }
+
+ if !isWithinProtection {
+ return false
+ }
+
+ if time.Now().Add(-restartProtectInterval).UnixNano() >
startupTimestamp {
+ log.Info("restart protection stop")
+ isWithinProtection = false
+ return false
+ }
+ log.Info("within restart protection")
+ return true
+}
diff --git a/pkg/protect/protect_test.go b/pkg/protect/protect_test.go
new file mode 100644
index 00000000..c7c2dd50
--- /dev/null
+++ b/pkg/protect/protect_test.go
@@ -0,0 +1,30 @@
+package protect
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestIsWithinRestartProtection(t *testing.T) {
+ restartProtectInterval = 2 * time.Minute
+
+ // protection switch off
+ enableInstanceNullProtect = false
+ assert.False(t, IsWithinRestartProtection())
+ // within protection
+ enableInstanceNullProtect = true
+ isWithinProtection = true
+ startupTimestamp = time.Now().Add(-1 * time.Minute).UnixNano()
+ assert.True(t, IsWithinRestartProtection())
+
+ // protection delay exceed
+ enableInstanceNullProtect = true
+ isWithinProtection = true
+ startupTimestamp = time.Now().Add(-2 * time.Minute).Unix()
+ assert.False(t, IsWithinRestartProtection())
+
+ // always false after exceed
+ assert.False(t, IsWithinRestartProtection())
+}
diff --git a/pkg/rpc/client.go b/pkg/rpc/client.go
index b67ccee6..3ca42814 100644
--- a/pkg/rpc/client.go
+++ b/pkg/rpc/client.go
@@ -20,8 +20,10 @@ package rpc
import (
"crypto/tls"
"errors"
+ "time"
"google.golang.org/grpc"
+ "google.golang.org/grpc/backoff"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/resolver"
@@ -50,9 +52,12 @@ func GetPickFirstLbConn(config *Config) (*grpc.ClientConn,
error) {
}
func GetRoundRobinLbConn(config *Config) (*grpc.ClientConn, error) {
+ connBackoff := backoff.DefaultConfig
+ connBackoff.MaxDelay = 30 * time.Second
return getLbConn(config, func() []grpc.DialOption {
return []grpc.DialOption{
grpc.WithDefaultServiceConfig(`{"loadBalancingConfig":
[{"round_robin":{}}]}`),
+ grpc.WithConnectParams(grpc.ConnectParams{Backoff:
connBackoff}),
}
})
}
diff --git a/server/resource/disco/instance_resource.go
b/server/resource/disco/instance_resource.go
index ec336b46..df5fb145 100644
--- a/server/resource/disco/instance_resource.go
+++ b/server/resource/disco/instance_resource.go
@@ -19,18 +19,20 @@ package disco
import (
"fmt"
+ "github.com/apache/servicecomb-service-center/pkg/protect"
"io"
"net/http"
"strings"
"github.com/go-chassis/go-chassis/v2/pkg/codec"
+ pb "github.com/go-chassis/cari/discovery"
+
"github.com/apache/servicecomb-service-center/datasource"
"github.com/apache/servicecomb-service-center/pkg/log"
"github.com/apache/servicecomb-service-center/pkg/rest"
"github.com/apache/servicecomb-service-center/pkg/util"
discosvc
"github.com/apache/servicecomb-service-center/server/service/disco"
- pb "github.com/go-chassis/cari/discovery"
)
type InstanceResource struct {
@@ -167,6 +169,10 @@ func (s *InstanceResource) FindInstances(w
http.ResponseWriter, r *http.Request)
w.WriteHeader(http.StatusNotModified)
return
}
+ if len(resp.Instances) == 0 && protect.IsWithinRestartProtection() {
+ w.WriteHeader(protect.RestartProtectHttpCode)
+ return
+ }
rest.WriteResponse(w, r, nil, resp)
}
@@ -266,6 +272,10 @@ func (s *InstanceResource) ListInstance(w
http.ResponseWriter, r *http.Request)
w.WriteHeader(http.StatusNotModified)
return
}
+ if len(resp.Instances) == 0 && protect.IsWithinRestartProtection() {
+ w.WriteHeader(protect.RestartProtectHttpCode)
+ return
+ }
rest.WriteResponse(w, r, nil, resp)
}
diff --git a/server/server.go b/server/server.go
index cf996801..b05be2f3 100644
--- a/server/server.go
+++ b/server/server.go
@@ -20,15 +20,19 @@ package server
import (
"context"
"crypto/tls"
+ "github.com/apache/servicecomb-service-center/pkg/protect"
"os"
+ "github.com/gofiber/fiber/v2"
+
"github.com/apache/servicecomb-service-center/server/middleware"
"github.com/apache/servicecomb-service-center/server/resource/disco"
- "github.com/gofiber/fiber/v2"
"github.com/go-chassis/go-chassis/v2"
chassisServer "github.com/go-chassis/go-chassis/v2/core/server"
+ "github.com/go-chassis/foundation/gopool"
+
"github.com/apache/servicecomb-service-center/datasource"
nf "github.com/apache/servicecomb-service-center/pkg/event"
"github.com/apache/servicecomb-service-center/pkg/log"
@@ -41,7 +45,6 @@ import (
"github.com/apache/servicecomb-service-center/server/plugin/security/tlsconf"
"github.com/apache/servicecomb-service-center/server/service/grc"
"github.com/apache/servicecomb-service-center/server/service/rbac"
- "github.com/go-chassis/foundation/gopool"
)
var sc ServiceCenterServer
@@ -211,6 +214,7 @@ func (s *ServiceCenterServer) startServices() {
func (s *ServiceCenterServer) startAPIService() {
s.APIServer.SetHostPort(s.Endpoint.Host, s.Endpoint.Port)
+ protect.Init()
s.APIServer.Start()
}
diff --git a/server/service/disco/instance.go b/server/service/disco/instance.go
index 45b8cc4d..b2c71d22 100644
--- a/server/service/disco/instance.go
+++ b/server/service/disco/instance.go
@@ -25,6 +25,9 @@ import (
"sync"
"time"
+ pb "github.com/go-chassis/cari/discovery"
+ "github.com/go-chassis/cari/pkg/errsvc"
+
"github.com/apache/servicecomb-service-center/datasource"
"github.com/apache/servicecomb-service-center/pkg/log"
"github.com/apache/servicecomb-service-center/pkg/util"
@@ -33,8 +36,6 @@ import (
"github.com/apache/servicecomb-service-center/server/health"
quotasvc
"github.com/apache/servicecomb-service-center/server/service/quota"
"github.com/apache/servicecomb-service-center/server/service/validator"
- pb "github.com/go-chassis/cari/discovery"
- "github.com/go-chassis/cari/pkg/errsvc"
)
const (