Skip to content

Commit aae13df

Browse files
authored
feat(ai-proxy): support token usage statistics (#6609)
1 parent 6068b45 commit aae13df

File tree

37 files changed

+2480
-19
lines changed

37 files changed

+2480
-19
lines changed
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
syntax = "proto3";
2+
3+
package erda.apps.aiproxy.usage;
4+
option go_package = "github.com/erda-project/erda-proto-go/apps/aiproxy/usage/token/pb";
5+
6+
import "google/api/annotations.proto";
7+
import "apps/aiproxy/metadata/metadata.proto";
8+
import "google/protobuf/timestamp.proto";
9+
import "github.com/envoyproxy/protoc-gen-validate/validate/validate.proto";
10+
import "common/http.proto";
11+
import "common/openapi.proto";
12+
13+
service TokenUsageService {
14+
rpc Create(TokenUsageCreateRequest) returns (TokenUsage) {
15+
option(google.api.http) = {
16+
post: "/api/ai-proxy/usages/token-usage"
17+
};
18+
option (erda.common.openapi) = {
19+
private: true
20+
};
21+
}
22+
23+
rpc Paging(TokenUsagePagingRequest) returns (TokenUsagePagingResponse) {
24+
option(google.api.http) = {
25+
get: "/api/ai-proxy/usages/token-usage"
26+
};
27+
}
28+
29+
rpc Aggregate(TokenUsagePagingRequest) returns (TokenUsageAggregateResponse) {
30+
option(google.api.http) = {
31+
get: "/api/ai-proxy/usages/token-usage/aggregate"
32+
};
33+
}
34+
}
35+
36+
message TokenUsage {
37+
uint64 id = 1;
38+
39+
// request correlation
40+
string callId = 2 [(validate.rules).string = {min_len: 1, max_len: 64}];
41+
string xRequestId = 3 [(validate.rules).string = {ignore_empty: true, min_len: 1, max_len: 64}];
42+
43+
// attribution
44+
string clientId = 4 [(validate.rules).string = {len: 36}];
45+
string clientTokenId = 5 [(validate.rules).string = {ignore_empty: true, len: 36}];
46+
string providerId = 6 [(validate.rules).string = {len: 36}];
47+
string modelId = 7 [(validate.rules).string = {len: 36}];
48+
49+
google.protobuf.Timestamp createdAt = 8; // http request begin time
50+
google.protobuf.Timestamp updatedAt = 9;
51+
52+
// usage
53+
uint64 inputTokens = 10;
54+
uint64 outputTokens = 11;
55+
uint64 totalTokens = 12;
56+
string usageDetails = 13;
57+
bool isEstimated = 14;
58+
59+
metadata.Metadata metadata = 15;
60+
}
61+
62+
message TokenUsageCreateRequest {
63+
// request correlation
64+
string callId = 1 [(validate.rules).string = {min_len: 1, max_len: 64}];
65+
string xRequestId = 2 [(validate.rules).string = {min_len: 1, max_len: 64}];
66+
67+
// attribution
68+
string clientId = 3 [(validate.rules).string = {len: 36}];
69+
string clientTokenId = 4 [(validate.rules).string = {ignore_empty: true, len: 36}];
70+
string providerId = 5 [(validate.rules).string = {len: 36}];
71+
string modelId = 6 [(validate.rules).string = {len: 36}];
72+
73+
google.protobuf.Timestamp createdAt = 7;
74+
75+
// usage
76+
uint64 inputTokens = 8 [(validate.rules).uint64 = {gte: 0}];
77+
uint64 outputTokens = 9 [(validate.rules).uint64 = {gte: 0}];
78+
uint64 totalTokens = 10 [(validate.rules).uint64 = {gte: 0}];
79+
string usageDetails = 11;
80+
bool isEstimated = 12;
81+
82+
metadata.Metadata metadata = 13;
83+
84+
}
85+
86+
message TokenUsagePagingRequest {
87+
// paging
88+
int64 pageNum = 1 [(validate.rules).int64 = {ignore_empty: true, gte: 1}];
89+
int64 pageSize = 2 [(validate.rules).int64 = {ignore_empty: true, gte: 1, lte: 1000}];
90+
91+
// request correlation
92+
string callId = 3 [(validate.rules).string = {ignore_empty: true, min_len: 1, max_len: 64}];
93+
string xRequestId = 4 [(validate.rules).string = {min_len: 0, max_len: 64}];
94+
95+
// attribution
96+
string clientId = 5 [(validate.rules).string = {ignore_empty: true, len: 36}];
97+
string clientTokenId = 6 [(validate.rules).string = {ignore_empty: true, len: 36}];
98+
string providerId = 7 [(validate.rules).string = {ignore_empty: true, len: 36}];
99+
string modelId = 8 [(validate.rules).string = {ignore_empty: true, len: 36}];
100+
101+
repeated uint64 ids = 9;
102+
optional bool isEstimated = 10;
103+
104+
// time range
105+
// unix milliseconds, used for http query parsing convenience
106+
uint64 timeRangeAfterMs = 11 [(validate.rules).uint64 = {ignore_empty: true, gte: 0}];
107+
uint64 timeRangeBeforeMs = 12 [(validate.rules).uint64 = {ignore_empty: true, gte: 0}];
108+
}
109+
110+
message TokenUsagePagingResponse {
111+
uint64 total = 1;
112+
repeated TokenUsage list = 2;
113+
}
114+
115+
message TokenUsageAggregateResponse {
116+
double totalCost = 1;
117+
string currency = 2;
118+
uint64 totalInputTokens = 3;
119+
uint64 totalOutputTokens = 4;
120+
uint64 totalTokens = 5;
121+
uint64 recordCount = 6;
122+
repeated TokenUsageDetail details = 7;
123+
}
124+
125+
message TokenUsageDetail {
126+
double cost = 1;
127+
string currency = 2;
128+
// usage
129+
uint64 recordId = 3;
130+
uint64 inputTokens = 4;
131+
uint64 outputTokens = 5;
132+
uint64 totalTokens = 6;
133+
string modelId = 7 [(validate.rules).string = {len: 36}];
134+
}

cmd/ai-proxy/conf/routes/openai-compatible/chat-completions.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ routes:
77
- name: context
88
- name: context-chat
99
- name: extra-body
10+
- name: force-stream-usage
1011
- name: thinking-handler
1112
- name: openai-compatible-director
1213
- name: anthropic-compatible-director

cmd/ai-proxy/conf/routes/openai-compatible/responses.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ routes:
66
- name: auth
77
- name: context
88
- name: context-responses
9+
- name: force-stream-usage
910
- name: thinking-handler
1011
- name: openai-compatible-director
1112
- name: set-response-chunk-splitter

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ require (
4040
github.com/cespare/xxhash v1.1.0
4141
github.com/cespare/xxhash/v2 v2.3.0
4242
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
43-
github.com/dlclark/regexp2 v1.4.0
43+
github.com/dlclark/regexp2 v1.11.5
4444
github.com/docker/docker v25.0.3+incompatible
4545
github.com/docker/spdystream v0.2.0
4646
github.com/doug-martin/goqu/v9 v9.18.0
@@ -428,6 +428,7 @@ require (
428428
github.com/syndtr/goleveldb v1.0.1-0.20190625010220-02440ea7a285 // indirect
429429
github.com/tealeg/xlsx v1.0.5 // indirect
430430
github.com/tebeka/strftime v0.1.5 // indirect
431+
github.com/tiktoken-go/tokenizer v0.7.0
431432
github.com/tikv/pd v1.1.0-beta.0.20200907080620-6830f5bb92a2 // indirect
432433
github.com/tjfoc/gmsm v1.3.2 // indirect
433434
github.com/tklauser/go-sysconf v0.3.12 // indirect

go.sum

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,8 +1164,8 @@ github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2 h1:aB
11641164
github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2/go.mod h1:WHNsWjnIn2V1LYOrME7e8KxSeKunYHsxEm4am0BUtcI=
11651165
github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0=
11661166
github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
1167-
github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=
1168-
github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
1167+
github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
1168+
github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
11691169
github.com/docker/cli v25.0.1+incompatible h1:mFpqnrS6Hsm3v1k7Wa/BO23oz0k121MTbTO1lpcGSkU=
11701170
github.com/docker/cli v25.0.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
11711171
github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
@@ -2635,6 +2635,8 @@ github.com/tidwall/rtree v0.0.0-20180113144539-6cd427091e0e h1:+NL1GDIUOKxVfbp2K
26352635
github.com/tidwall/rtree v0.0.0-20180113144539-6cd427091e0e/go.mod h1:/h+UnNGt0IhNNJLkGikcdcJqm66zGD/uJGMRxK/9+Ao=
26362636
github.com/tidwall/tinyqueue v0.0.0-20180302190814-1e39f5511563 h1:Otn9S136ELckZ3KKDyCkxapfufrqDqwmGjcHfAyXRrE=
26372637
github.com/tidwall/tinyqueue v0.0.0-20180302190814-1e39f5511563/go.mod h1:mLqSmt7Dv/CNneF2wfcChfN1rvapyQr01LGKnKex0DQ=
2638+
github.com/tiktoken-go/tokenizer v0.7.0 h1:VMu6MPT0bXFDHr7UPh9uii7CNItVt3X9K90omxL54vw=
2639+
github.com/tiktoken-go/tokenizer v0.7.0/go.mod h1:6UCYI/DtOallbmL7sSy30p6YQv60qNyU/4aVigPOx6w=
26382640
github.com/tikv/pd v1.0.8/go.mod h1:v6C/D7ONC49SgjI4jbGnooSizvijaO/bdIm62DVR4tI=
26392641
github.com/tikv/pd v1.1.0-beta.0.20200907080620-6830f5bb92a2 h1:cC5v/gn9NdcmAlpBrWI5x3MiYmQcW2k7EHccg8837p4=
26402642
github.com/tikv/pd v1.1.0-beta.0.20200907080620-6830f5bb92a2/go.mod h1:6OYi62ks7nFIBtWWpOjnngr5LNos4Hvi1BzArCWAlBc=

internal/apps/ai-proxy/common/auth/akutil/ak.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ func GetClientInfo(ak string, dao dao.DAO) (*clienttokenpb.ClientToken, *clientp
122122
return nil, client, nil
123123
}
124124

125-
func AutoCheckAndSetClientId(clientId string, req any, skipSet bool) error {
125+
func AutoCheckAndSetClientInfo(clientId string, clientTokenId string, req any, skipSet bool) error {
126126
// use reflect to set req's clientId field if have
127127
clientIdField := reflect.ValueOf(req).Elem().FieldByName("ClientId")
128128
if clientIdField != (reflect.Value{}) {
@@ -135,5 +135,19 @@ func AutoCheckAndSetClientId(clientId string, req any, skipSet bool) error {
135135
clientIdField.SetString(clientId)
136136
}
137137
}
138+
// check client token id
139+
if clientTokenId != "" {
140+
clientTokenIdField := reflect.ValueOf(req).Elem().FieldByName("ClientTokenId")
141+
if clientTokenIdField != (reflect.Value{}) {
142+
// compare if ClientTokenId already exists
143+
currentClientTokenId := clientTokenIdField.String()
144+
if currentClientTokenId != "" && currentClientTokenId != clientTokenId {
145+
return handlers.ErrTokenNotMatch
146+
}
147+
if !skipSet {
148+
clientTokenIdField.SetString(clientTokenId)
149+
}
150+
}
151+
}
138152
return nil
139153
}
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Copyright (c) 2021 Terminus, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package akutil
16+
17+
import (
18+
"errors"
19+
"testing"
20+
21+
"github.com/erda-project/erda/internal/apps/ai-proxy/handlers"
22+
)
23+
24+
type mockRequest struct {
25+
ClientId string
26+
ClientTokenId string
27+
}
28+
29+
func TestAutoCheckAndSetClientInfoSetsFields(t *testing.T) {
30+
req := &mockRequest{}
31+
32+
clientID := "client-id"
33+
tokenID := "token-id"
34+
35+
if err := AutoCheckAndSetClientInfo(clientID, tokenID, req, false); err != nil {
36+
t.Fatalf("AutoCheckAndSetClientInfo returned error: %v", err)
37+
}
38+
if req.ClientId != clientID {
39+
t.Fatalf("expected ClientId to be set to %q, got %q", clientID, req.ClientId)
40+
}
41+
if req.ClientTokenId != tokenID {
42+
t.Fatalf("expected ClientTokenId to be set to %q, got %q", tokenID, req.ClientTokenId)
43+
}
44+
}
45+
46+
func TestAutoCheckAndSetClientInfoSkipSet(t *testing.T) {
47+
req := &mockRequest{}
48+
49+
if err := AutoCheckAndSetClientInfo("client-id", "token-id", req, true); err != nil {
50+
t.Fatalf("AutoCheckAndSetClientInfo returned error: %v", err)
51+
}
52+
if req.ClientId != "" {
53+
t.Fatalf("expected ClientId to remain empty when skipSet is true, got %q", req.ClientId)
54+
}
55+
if req.ClientTokenId != "" {
56+
t.Fatalf("expected ClientTokenId to remain empty when skipSet is true, got %q", req.ClientTokenId)
57+
}
58+
}
59+
60+
func TestAutoCheckAndSetClientInfoClientMismatch(t *testing.T) {
61+
req := &mockRequest{ClientId: "other"}
62+
63+
err := AutoCheckAndSetClientInfo("client-id", "", req, false)
64+
if !errors.Is(err, handlers.ErrAkNotMatch) {
65+
t.Fatalf("expected ErrAkNotMatch, got %v", err)
66+
}
67+
}
68+
69+
func TestAutoCheckAndSetClientInfoTokenMismatch(t *testing.T) {
70+
req := &mockRequest{ClientId: "client-id", ClientTokenId: "other-token"}
71+
72+
err := AutoCheckAndSetClientInfo("client-id", "token-id", req, false)
73+
if !errors.Is(err, handlers.ErrTokenNotMatch) {
74+
t.Fatalf("expected ErrTokenNotMatch, got %v", err)
75+
}
76+
}

internal/apps/ai-proxy/common/ctxhelper/keys.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package ctxhelper
1818

1919
import (
2020
"net/http"
21+
"time"
2122

2223
"github.com/erda-project/erda-infra/base/logs"
2324
clientpb "github.com/erda-project/erda-proto-go/apps/aiproxy/client/pb"
@@ -68,6 +69,9 @@ type (
6869
mapKeyReverseProxyRequestRewriteError struct{ *ReverseProxyFilterError }
6970
mapKeyReverseProxyResponseModifyError struct{ *ReverseProxyFilterError }
7071
mapKeyReverseProxyRequestInSnapshot struct{ *http.Request }
72+
mapKeyReverseProxyRequestOutSnapshot struct{ *http.Request }
73+
74+
mapKeyReverseProxyWholeHandledResponseBodyStr struct{ string }
7175

7276
// Keys for migrated context keys
7377
mapKeyDBClient struct{ dao.DAO }
@@ -82,6 +86,8 @@ type (
8286

8387
mapKeyRequestBodyTransformChanges struct{ any }
8488
mapKeyRequestThinkingTransformChanges struct{ any }
89+
90+
mapKeyRequestBeginAt struct{ time.Time }
8591
)
8692

8793
// KeysWithCustomMustGet defines keys with custom MustGet implementations (should not generate default MustGet)

0 commit comments

Comments
 (0)