This is an automated email from the ASF dual-hosted git repository.
alexstocks pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/dubbo-go-pixiu.git
The following commit(s) were added to refs/heads/develop by this push:
new 13542ab0 [AI] feat: Add a new field "LLMMeta" for endpoint, simpilify
the use of llm endpoint (#678)
13542ab0 is described below
commit 13542ab0b6cf042bbdbe52461697fb44129822d0
Author: Alan <[email protected]>
AuthorDate: Tue May 27 23:30:55 2025 +0800
[AI] feat: Add a new field "LLMMeta" for endpoint, simpilify the use of llm
endpoint (#678)
* move file
* add llm endpoint
* import formatter
* update copilot
* add comments
* add endpoint tracking for LLM providers and update YAML configuration
* update dependency
* refactor: improve endpoint naming and ID assignment in cluster management
---
go.mod | 19 +++---
go.sum | 32 +++++-----
pkg/filter/{ => llm}/tokenizer/tokenizer.go | 0
pkg/filter/{ => llm}/tokenizer/tokenizer_test.go | 0
pkg/model/cluster.go | 4 +-
pkg/model/llm.go | 76 ++++++++++++++++++++++++
pkg/model/llmprovider.yaml | 38 ++++++++++++
pkg/pluginregistry/registry.go | 2 +-
pkg/server/cluster_manager.go | 49 +++++++++++++--
9 files changed, 188 insertions(+), 32 deletions(-)
diff --git a/go.mod b/go.mod
index 8cc62525..3c21c813 100644
--- a/go.mod
+++ b/go.mod
@@ -16,7 +16,7 @@ require (
github.com/creasty/defaults v1.5.2
github.com/dubbo-go-pixiu/pixiu-api v0.1.6-0.20220612115254-d9a176b25b99
github.com/dubbogo/go-zookeeper v1.0.4-0.20211212162352-f9d2183d89d5
- github.com/dubbogo/gost v1.14.0
+ github.com/dubbogo/gost v1.14.1
github.com/dubbogo/grpc-go v1.42.10
github.com/dubbogo/triple v1.2.2-rc3
github.com/envoyproxy/go-control-plane
v0.11.1-0.20230524094728-9239064ad72f
@@ -57,8 +57,7 @@ require (
)
require (
- cloud.google.com/go/compute v1.19.1 // indirect
- cloud.google.com/go/compute/metadata v0.2.3 // indirect
+ cloud.google.com/go/compute/metadata v0.3.0 // indirect
github.com/RoaringBitmap/roaring v1.2.3 // indirect
github.com/Workiva/go-datastructures v1.0.52 // indirect
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5 //
indirect
@@ -143,6 +142,7 @@ require (
github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 //
indirect
github.com/robfig/cron/v3 v3.0.1 // indirect
github.com/shirou/gopsutil/v3 v3.22.2 // indirect
+ github.com/sirupsen/logrus v1.8.1 // indirect
github.com/smartystreets/assertions v1.2.0 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
@@ -162,13 +162,12 @@ require (
go.opentelemetry.io/proto/otlp v0.19.0 // indirect
go.uber.org/atomic v1.10.0 // indirect
go.uber.org/multierr v1.8.0 // indirect
- golang.org/x/arch v0.11.0 // indirect
- golang.org/x/oauth2 v0.7.0 // indirect
- golang.org/x/sync v0.13.0 // indirect
- golang.org/x/sys v0.32.0 // indirect
- golang.org/x/text v0.24.0 // indirect
- golang.org/x/time v0.1.0 // indirect
- google.golang.org/appengine v1.6.7 // indirect
+ golang.org/x/arch v0.17.0 // indirect
+ golang.org/x/oauth2 v0.30.0 // indirect
+ golang.org/x/sync v0.14.0 // indirect
+ golang.org/x/sys v0.33.0 // indirect
+ golang.org/x/text v0.25.0 // indirect
+ golang.org/x/time v0.11.0 // indirect
google.golang.org/genproto v0.0.0-20230410155749-daa745c078e1 //
indirect
gopkg.in/ini.v1 v1.66.2 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
diff --git a/go.sum b/go.sum
index 0260750d..e84f14e6 100644
--- a/go.sum
+++ b/go.sum
@@ -115,13 +115,12 @@ cloud.google.com/go/compute v1.12.0/go.mod
h1:e8yNOBcBONZU1vJKCvCoDw/4JQsA0dpM4x
cloud.google.com/go/compute v1.12.1/go.mod
h1:e8yNOBcBONZU1vJKCvCoDw/4JQsA0dpM4x/6PIIOocU=
cloud.google.com/go/compute v1.13.0/go.mod
h1:5aPTS0cUNMIc1CE546K+Th6weJUNQErARyZtRXDJ8GE=
cloud.google.com/go/compute v1.14.0/go.mod
h1:YfLtxrj9sU4Yxv+sXzZkyPjEyPBZfXHUvjxega5vAdo=
-cloud.google.com/go/compute v1.19.1
h1:am86mquDUgjGNWxiGn+5PGLbmgiWXlE/yNWpIpNvuXY=
-cloud.google.com/go/compute v1.19.1/go.mod
h1:6ylj3a05WF8leseCdIf77NK0g1ey+nj5IKd5/kvShxE=
cloud.google.com/go/compute/metadata v0.1.0/go.mod
h1:Z1VN+bulIf6bt4P/C37K4DyZYZEXYonfTBHHFPO/4UU=
cloud.google.com/go/compute/metadata v0.2.0/go.mod
h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k=
cloud.google.com/go/compute/metadata v0.2.1/go.mod
h1:jgHgmJd2RKBGzXqF5LR2EZMGxBkeanZ9wwa75XHJgOM=
-cloud.google.com/go/compute/metadata v0.2.3
h1:mg4jlk7mCAj6xXp9UJ4fjI9VUI5rubuGBW5aJ7UnBMY=
cloud.google.com/go/compute/metadata v0.2.3/go.mod
h1:VAV5nSsACxMJvgaAuX6Pk2AawlZn8kiOGuCv6gTkwuA=
+cloud.google.com/go/compute/metadata v0.3.0
h1:Tz+eQXMEqDIKRsmY3cHTL6FVaynIjX2QxYC4trgAKZc=
+cloud.google.com/go/compute/metadata v0.3.0/go.mod
h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k=
cloud.google.com/go/contactcenterinsights v1.3.0/go.mod
h1:Eu2oemoePuEFc/xKFPjbTuPSj0fYJcPls9TFlPNnHHY=
cloud.google.com/go/contactcenterinsights v1.4.0/go.mod
h1:L2YzkGbPsv+vMQMCADxJoT9YiTTnSEd6fEvCeHTYVck=
cloud.google.com/go/container v1.6.0/go.mod
h1:Xazp7GjJSeUYo688S+6J5V+n/t+G5sKBTFkKNudGRxg=
@@ -550,8 +549,9 @@ github.com/dubbogo/gost v1.9.0/go.mod
h1:pPTjVyoJan3aPxBPNUX0ADkXjPibLo+/Ib0/fAD
github.com/dubbogo/gost v1.11.18/go.mod
h1:vIcP9rqz2KsXHPjsAwIUtfJIJjppQLQDcYaZTy/61jI=
github.com/dubbogo/gost v1.12.6-0.20220824084206-300e27e9e524/go.mod
h1:0YHTGJtjHiYlWtVEkZnyrvhr7rR+23GczNaJrgc2v38=
github.com/dubbogo/gost v1.13.1/go.mod
h1:9HMXBv+WBMRWhF3SklpqDjkS/01AKWm2SrVdz/A0xJI=
-github.com/dubbogo/gost v1.14.0 h1:yc5YfozvUBAChAox8H7CkmHb6/TvF6cKdqZNJNv2jdE=
github.com/dubbogo/gost v1.14.0/go.mod
h1:YP28JweR+hhJdikP3bZ3bVKUWWI313xX1rgLaEE0FvQ=
+github.com/dubbogo/gost v1.14.1 h1:veA+5FqwUHvqaB5OPvsWz4iDkGGpyL4lig2rSOZANEs=
+github.com/dubbogo/gost v1.14.1/go.mod
h1:m3VJVqCjQ87SwYRrxN2s4T33cOCDrMVbRh4+MAUitnQ=
github.com/dubbogo/grpc-go v1.42.9/go.mod
h1:F1T9hnUvYGW4JLK1QNriavpOkhusU677ovPzLkk6zHM=
github.com/dubbogo/grpc-go v1.42.10
h1:CoyCdtqKJEar/3rPa6peZbDqYZ/mVsCqAxB6TfTSkhQ=
github.com/dubbogo/grpc-go v1.42.10/go.mod
h1:JMkPt1mIHL96GAFeYsMoMjew6f1ROKycikGzZQH1s5U=
@@ -1401,8 +1401,8 @@ go.uber.org/zap v1.17.0/go.mod
h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo=
go.uber.org/zap v1.21.0 h1:WefMeulhovoZ2sYXz7st6K0sLj7bBhpiFaud4r4zST8=
go.uber.org/zap v1.21.0/go.mod h1:wjWOCqI0f2ZZrJF/UufIOkiC8ii6tm1iqIsLo76RfJw=
golang.org/x/arch v0.0.0-20200826200359-b19915210f00/go.mod
h1:flIaEI6LNU6xOCD5PaJvn9wGP0agmIOqjrtsKGRguv4=
-golang.org/x/arch v0.11.0 h1:KXV8WWKCXm6tRpLirl2szsO5j/oOODwZf4hATmGVNs4=
-golang.org/x/arch v0.11.0/go.mod
h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys=
+golang.org/x/arch v0.17.0 h1:4O3dfLzd+lQewptAHqjewQZQDyEdejz3VwgeYwkZneU=
+golang.org/x/arch v0.17.0/go.mod
h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
golang.org/x/crypto v0.0.0-20180807104621-f027049dab0a/go.mod
h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod
h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20181029021203-45a5f77698d3/go.mod
h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
@@ -1571,8 +1571,8 @@ golang.org/x/oauth2
v0.0.0-20220909003341-f21342109be1/go.mod h1:h4gKUeWbJ4rQPri
golang.org/x/oauth2 v0.0.0-20221006150949-b44042a4b9c1/go.mod
h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg=
golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783/go.mod
h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg=
golang.org/x/oauth2 v0.6.0/go.mod
h1:ycmewcwgD4Rpr3eZJLSB4Kyyljb3qDh40vJ8STE5HKw=
-golang.org/x/oauth2 v0.7.0 h1:qe6s0zUXlPX80/dITx3440hWZ7GwMwgDDyrSGTPJG/g=
-golang.org/x/oauth2 v0.7.0/go.mod
h1:hPLQkd9LyjfXTiRohC/41GhcFqxisoUQ99sCUOHO9x4=
+golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI=
+golang.org/x/oauth2 v0.30.0/go.mod
h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -1588,8 +1588,8 @@ golang.org/x/sync
v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220929204114-8fcdb60fdcc0/go.mod
h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.13.0 h1:AauUjRAJ9OSnvULf/ARrrVywoJDy0YS2AwQ98I37610=
-golang.org/x/sync v0.13.0/go.mod
h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ=
+golang.org/x/sync v0.14.0/go.mod
h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sys v0.0.0-20180807162357-acbc56fc7007/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod
h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -1702,8 +1702,8 @@ golang.org/x/sys v0.2.0/go.mod
h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
-golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
+golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod
h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod
h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.2.0/go.mod h1:TVmDHMZPmdnySmBfhjOoOdhjzdE1h4u1VwSiw2l1Nuc=
@@ -1725,8 +1725,8 @@ golang.org/x/text v0.4.0/go.mod
h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.5.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
-golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0=
-golang.org/x/text v0.24.0/go.mod
h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU=
+golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4=
+golang.org/x/text v0.25.0/go.mod
h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -1735,8 +1735,9 @@ golang.org/x/time
v0.0.0-20200630173020-3af7569d3a1e/go.mod h1:tRJNPiyCQ0inRvYxb
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20220722155302-e5dcc9cfc0b9/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20220922220347-f3bd1da661af/go.mod
h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
-golang.org/x/time v0.1.0 h1:xYY+Bajn2a7VBmTM5GikTmnK8ZuX8YgnQCqZpbBNtmA=
golang.org/x/time v0.1.0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
+golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0=
+golang.org/x/time v0.11.0/go.mod
h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod
h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
@@ -1885,7 +1886,6 @@ google.golang.org/appengine v1.5.0/go.mod
h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
google.golang.org/appengine v1.6.1/go.mod
h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/appengine v1.6.5/go.mod
h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.6/go.mod
h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
-google.golang.org/appengine v1.6.7
h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
google.golang.org/appengine v1.6.7/go.mod
h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20170818010345-ee236bd376b0/go.mod
h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod
h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
diff --git a/pkg/filter/tokenizer/tokenizer.go
b/pkg/filter/llm/tokenizer/tokenizer.go
similarity index 100%
rename from pkg/filter/tokenizer/tokenizer.go
rename to pkg/filter/llm/tokenizer/tokenizer.go
diff --git a/pkg/filter/tokenizer/tokenizer_test.go
b/pkg/filter/llm/tokenizer/tokenizer_test.go
similarity index 100%
rename from pkg/filter/tokenizer/tokenizer_test.go
rename to pkg/filter/llm/tokenizer/tokenizer_test.go
diff --git a/pkg/model/cluster.go b/pkg/model/cluster.go
index 4fd66ba9..33be18a8 100644
--- a/pkg/model/cluster.go
+++ b/pkg/model/cluster.go
@@ -90,10 +90,12 @@ type (
// Endpoint
Endpoint struct {
ID string `yaml:"ID" json:"ID"`
// ID indicate one endpoint
- Name string `yaml:"name" json:"name"`
// Name the cluster unique name
+ Name string `yaml:"name" json:"name"`
// Name the endpoint unique name
Address SocketAddress `yaml:"socket_address"
json:"socket_address" mapstructure:"socket_address"` // Address socket address
Metadata map[string]string `yaml:"meta" json:"meta"`
// Metadata extra info such as label or
other meta data
UnHealthy bool
+
+ LLMMeta *LLMMeta `yaml:"llm_meta" json:"llm_meta"` // LLMMeta
extra info such as label or other meta data
}
// ConsistentHash methods include: RingHash, MaglevHash
diff --git a/pkg/model/llm.go b/pkg/model/llm.go
new file mode 100644
index 00000000..d74b1250
--- /dev/null
+++ b/pkg/model/llm.go
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package model
+
+import (
+ "sync"
+)
+
+import (
+ perrors "github.com/pkg/errors"
+)
+
+import (
+ "github.com/apache/dubbo-go-pixiu/pkg/common/yaml"
+)
+
+type (
+ // LLMMeta LLM metadata for llm call
+ LLMMeta struct {
+ Provider string `yaml:"provider" json:"provider"`
// Provider the cluster unique name
+ APIKeys []LLMAPIKey `yaml:"api_keys" json:"api_keys"
mapstructure:"api_keys"` // APIKey the cluster unique name
+ }
+
+ LLMAPIKey struct {
+ Name string `yaml:"name" json:"name"` // Name of the api key
+ Key string `yaml:"key" json:"key"` // Real Key
+ }
+
+ LLMProviderDomains struct {
+ Providers map[string]LLMProvider `yaml:"providers"
mapstructure:"providers"`
+ }
+
+ LLMProvider struct {
+ Name string `yaml:"name" json:"name"` //
provider' name
+ Description string `yaml:"description"
json:"description"`
+ BaseUrl string `yaml:"base_url" json:"base_url"`
// Target domain
+ Endpoints map[string]string `yaml:"endpoints"
json:"endpoints" mapstructure:"endpoints"` // Endpoints for the provider
+ }
+)
+
+var (
+ loadLLMProviderDomains sync.Once
+ domains *LLMProviderDomains
+ err error
+)
+
+// GetLLMProviderDomains get llm provider domains
+func GetLLMProviderDomains(id string) (*LLMProvider, error) {
+ loadLLMProviderDomains.Do(func() {
+ domains = &LLMProviderDomains{}
+ err = yaml.UnmarshalYMLConfig("pkg/model/llmprovider.yaml",
domains)
+ })
+ if err != nil {
+ return nil, perrors.Wrap(err, "failed to load llm provider
domains")
+ }
+
+ if p, ok := domains.Providers[id]; ok {
+ return &p, nil
+ }
+ return nil, perrors.Errorf("provider %s not found", id)
+}
diff --git a/pkg/model/llmprovider.yaml b/pkg/model/llmprovider.yaml
new file mode 100644
index 00000000..5b913b95
--- /dev/null
+++ b/pkg/model/llmprovider.yaml
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+providers:
+ deepseek:
+ name: DeepSeek
+ description: DeepSeek uses advanced llms to improve search accuracy and
relevance, seamlessly integrating AI into existing search systems for smarter
information retrieval across various fields.
+ base_url: api.deepseek.com
+ endpoints:
+ chatComplete: /chat/completions
+ openai:
+ name: OpenAI
+ description: OpenAI is an AI research and deployment company. Their
mission is to ensure that artificial general intelligence benefits all of
humanity.
+ base_url: api.openai.com/v1
+ endpoints:
+ chatComplete: /chat/completions
+ dashscope:
+ name: Dashscope
+ description: Alibaba Cloud's Tongyi Generative AI Platform (BaiLian)
offers official APIs for the Tongyi series of large models. This
high-performance, cost-effective model service helps you focus on business
innovation, freeing you from the burden of technical operations and maintenance.
+ base_url: dashscope.aliyuncs.com/compatible-mode/v1
+ endpoints:
+ chatComplete: /chat/completions
\ No newline at end of file
diff --git a/pkg/pluginregistry/registry.go b/pkg/pluginregistry/registry.go
index 1dd1d599..1ad0926e 100644
--- a/pkg/pluginregistry/registry.go
+++ b/pkg/pluginregistry/registry.go
@@ -39,6 +39,7 @@ import (
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/http/loadbalancer"
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/http/proxyrewrite"
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/http/remote"
+ _ "github.com/apache/dubbo-go-pixiu/pkg/filter/llm/tokenizer"
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/metric"
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/network/dubboproxy"
_
"github.com/apache/dubbo-go-pixiu/pkg/filter/network/dubboproxy/filter/http"
@@ -47,7 +48,6 @@ import (
_
"github.com/apache/dubbo-go-pixiu/pkg/filter/network/httpconnectionmanager"
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/prometheus"
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/sentinel/ratelimit"
- _ "github.com/apache/dubbo-go-pixiu/pkg/filter/tokenizer"
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/tracing"
_ "github.com/apache/dubbo-go-pixiu/pkg/filter/traffic"
_ "github.com/apache/dubbo-go-pixiu/pkg/listener/http"
diff --git a/pkg/server/cluster_manager.go b/pkg/server/cluster_manager.go
index 8e33046f..acbbebf4 100644
--- a/pkg/server/cluster_manager.go
+++ b/pkg/server/cluster_manager.go
@@ -19,6 +19,7 @@ package server
import (
"fmt"
+ "strconv"
"sync"
"sync/atomic"
)
@@ -26,6 +27,7 @@ import (
import (
"github.com/apache/dubbo-go-pixiu/pkg/cluster"
"github.com/apache/dubbo-go-pixiu/pkg/cluster/loadbalancer"
+ "github.com/apache/dubbo-go-pixiu/pkg/common/constant"
"github.com/apache/dubbo-go-pixiu/pkg/common/yaml"
"github.com/apache/dubbo-go-pixiu/pkg/logger"
"github.com/apache/dubbo-go-pixiu/pkg/model"
@@ -33,7 +35,9 @@ import (
)
// generate cluster name for unnamed cluster
-var clusterIndex int32 = 1
+var (
+ clusterIndex int32 = 1
+)
type (
ClusterManager struct {
@@ -216,16 +220,53 @@ func (cm *ClusterManager) HasCluster(clusterName string)
bool {
func (s *ClusterStore) AddCluster(c *model.ClusterConfig) {
if c.Name == "" {
- index := atomic.AddInt32(&clusterIndex, 1)
- c.Name = fmt.Sprintf("cluster%d", index)
+ c.Name = fmt.Sprintf("cluster-%d", clusterIndex)
+ atomic.AddInt32(&clusterIndex, 1)
}
+
+ s.AssembleClusterEndpoints(c)
+
s.Config = append(s.Config, c)
s.clustersMap[c.Name] = cluster.NewCluster(c)
c.CreateConsistentHash()
}
-func (s *ClusterStore) UpdateCluster(new *model.ClusterConfig) {
+// AssembleClusterEndpoints assembles the cluster endpoints
+// by formatting the ID, name and domains for each endpoint
+// If endpoint.LLMMeta is not nil, the assimilation of name and domain is
based on
+// the LLM provider denoted in the endpoint LLMMeta.
+func (s *ClusterStore) AssembleClusterEndpoints(c *model.ClusterConfig) {
+ if c == nil {
+ return
+ }
+
+ for i, endpoint := range c.Endpoints {
+ // If the endpoint ID is not set, set it to the index + 1
+ if endpoint.ID == "" {
+ endpoint.ID = strconv.Itoa(i + 1)
+ }
+ // If the endpoint has no name, set a default name
+ if endpoint.Name == "" && endpoint.LLMMeta != nil {
+ endpoint.Name = fmt.Sprintf("endpoint-%d#%s", i+1,
endpoint.LLMMeta.Provider)
+ } else if endpoint.Name == "" && endpoint.LLMMeta == nil {
+ endpoint.Name = fmt.Sprintf("endpoint-%d", i+1)
+ }
+
+ // If the endpoint address and domain are not set, set them
based on the LLM provider.
+ // If the endpoint address or domain is set, do not modify them.
+ if endpoint.LLMMeta != nil && endpoint.Address.Address ==
constant.PprofDefaultAddress && endpoint.Address.Domains == nil {
+ domain, err :=
model.GetLLMProviderDomains(endpoint.LLMMeta.Provider)
+ if err != nil {
+ logger.Errorf("failed to get llm provider
domains, err: %v", err)
+ continue
+ }
+ endpoint.Address.Domains = []string{domain.BaseUrl}
+ }
+ }
+}
+
+func (s *ClusterStore) UpdateCluster(new *model.ClusterConfig) {
for i, c := range s.Config {
if c.Name == new.Name {
s.Config[i] = new