erda-project
diff --git a/‎api/proto/apps/aiproxy/usage/token/usage.proto‎
Lines changed: 134 additions & 0 deletions b/‎api/proto/apps/aiproxy/usage/token/usage.proto‎
Lines changed: 134 additions & 0 deletions
diff --git a/‎cmd/ai-proxy/conf/routes/openai-compatible/chat-completions.yaml‎
Lines changed: 1 addition & 0 deletions b/‎cmd/ai-proxy/conf/routes/openai-compatible/chat-completions.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cmd/ai-proxy/conf/routes/openai-compatible/responses.yaml‎
Lines changed: 1 addition & 0 deletions b/‎cmd/ai-proxy/conf/routes/openai-compatible/responses.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎go.mod‎
Lines changed: 2 additions & 1 deletion b/‎go.mod‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎go.sum‎
Lines changed: 4 additions & 2 deletions b/‎go.sum‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎internal/apps/ai-proxy/common/auth/akutil/ak.go‎
Lines changed: 15 additions & 1 deletion b/‎internal/apps/ai-proxy/common/auth/akutil/ak.go‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎internal/apps/ai-proxy/common/auth/akutil/ak_test.go‎
Lines changed: 76 additions & 0 deletions b/‎internal/apps/ai-proxy/common/auth/akutil/ak_test.go‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎internal/apps/ai-proxy/common/ctxhelper/keys.go‎
Lines changed: 6 additions & 0 deletions b/‎internal/apps/ai-proxy/common/ctxhelper/keys.go‎
Lines changed: 6 additions & 0 deletions
@@ -0,0 +1,134 @@
+syntax = "proto3";
+
+package erda.apps.aiproxy.usage;
+option go_package = "github.com/erda-project/erda-proto-go/apps/aiproxy/usage/token/pb";
+
+import "google/api/annotations.proto";
+import "apps/aiproxy/metadata/metadata.proto";
+import "google/protobuf/timestamp.proto";
+import "github.com/envoyproxy/protoc-gen-validate/validate/validate.proto";
+import "common/http.proto";
+import "common/openapi.proto";
+
+service TokenUsageService {
+    rpc Create(TokenUsageCreateRequest) returns (TokenUsage) {
+        option(google.api.http) = {
+            post: "/api/ai-proxy/usages/token-usage"
+        };
+        option (erda.common.openapi) = {
+            private: true
+        };
+    }
+
+    rpc Paging(TokenUsagePagingRequest) returns (TokenUsagePagingResponse) {
+        option(google.api.http) = {
+            get: "/api/ai-proxy/usages/token-usage"
+        };
+    }
+
+    rpc Aggregate(TokenUsagePagingRequest) returns (TokenUsageAggregateResponse) {
+        option(google.api.http) = {
+            get: "/api/ai-proxy/usages/token-usage/aggregate"
+        };
+    }
+}
+
+message TokenUsage {
+    uint64 id = 1;
+
+    // request correlation
+    string callId = 2 [(validate.rules).string = {min_len: 1, max_len: 64}];
+    string xRequestId = 3 [(validate.rules).string = {ignore_empty: true, min_len: 1, max_len: 64}];
+
+    // attribution
+    string clientId = 4 [(validate.rules).string = {len: 36}];
+    string clientTokenId = 5 [(validate.rules).string = {ignore_empty: true, len: 36}];
+    string providerId = 6 [(validate.rules).string = {len: 36}];
+    string modelId = 7 [(validate.rules).string = {len: 36}];
+
+    google.protobuf.Timestamp createdAt = 8; // http request begin time
+    google.protobuf.Timestamp updatedAt = 9;
+
+    // usage
+    uint64 inputTokens = 10;
+    uint64 outputTokens = 11;
+    uint64 totalTokens = 12;
+    string usageDetails = 13;
+    bool isEstimated = 14;
+
+    metadata.Metadata metadata = 15;
+}
+
+message TokenUsageCreateRequest {
+    // request correlation
+    string callId = 1 [(validate.rules).string = {min_len: 1, max_len: 64}];
+    string xRequestId = 2 [(validate.rules).string = {min_len: 1, max_len: 64}];
+
+    // attribution
+    string clientId = 3 [(validate.rules).string = {len: 36}];
+    string clientTokenId = 4 [(validate.rules).string = {ignore_empty: true, len: 36}];
+    string providerId = 5 [(validate.rules).string = {len: 36}];
+    string modelId = 6 [(validate.rules).string = {len: 36}];
+
+    google.protobuf.Timestamp createdAt = 7;
+
+    // usage
+    uint64 inputTokens = 8 [(validate.rules).uint64 = {gte: 0}];
+    uint64 outputTokens = 9 [(validate.rules).uint64 = {gte: 0}];
+    uint64 totalTokens = 10 [(validate.rules).uint64 = {gte: 0}];
+    string usageDetails = 11;
+    bool isEstimated = 12;
+
+    metadata.Metadata metadata = 13;
+
+}
+
+message TokenUsagePagingRequest {
+    // paging
+    int64 pageNum = 1 [(validate.rules).int64 = {ignore_empty: true, gte: 1}];
+    int64 pageSize = 2 [(validate.rules).int64 = {ignore_empty: true, gte: 1, lte: 1000}];
+
+    // request correlation
+    string callId = 3 [(validate.rules).string = {ignore_empty: true, min_len: 1, max_len: 64}];
+    string xRequestId = 4 [(validate.rules).string = {min_len: 0, max_len: 64}];
+
+    // attribution
+    string clientId = 5 [(validate.rules).string = {ignore_empty: true, len: 36}];
+    string clientTokenId = 6 [(validate.rules).string = {ignore_empty: true, len: 36}];
+    string providerId = 7 [(validate.rules).string = {ignore_empty: true, len: 36}];
+    string modelId = 8 [(validate.rules).string = {ignore_empty: true, len: 36}];
+
+    repeated uint64 ids = 9;
+    optional bool isEstimated = 10;
+
+    // time range
+    // unix milliseconds, used for http query parsing convenience
+    uint64 timeRangeAfterMs = 11 [(validate.rules).uint64 = {ignore_empty: true, gte: 0}];
+    uint64 timeRangeBeforeMs = 12 [(validate.rules).uint64 = {ignore_empty: true, gte: 0}];
+}
+
+message TokenUsagePagingResponse {
+    uint64 total = 1;
+    repeated TokenUsage list = 2;
+}
+
+message TokenUsageAggregateResponse {
+    double totalCost = 1;
+    string currency = 2;
+    uint64 totalInputTokens = 3;
+    uint64 totalOutputTokens = 4;
+    uint64 totalTokens = 5;
+    uint64 recordCount = 6;
+    repeated TokenUsageDetail details = 7;
+}
+
+message TokenUsageDetail {
+    double cost = 1;
+    string currency = 2;
+    // usage
+    uint64 recordId = 3;
+    uint64 inputTokens = 4;
+    uint64 outputTokens = 5;
+    uint64 totalTokens = 6;
+    string modelId = 7 [(validate.rules).string = {len: 36}];
+}
@@ -7,6 +7,7 @@ routes:
       - name: context
       - name: context-chat
       - name: extra-body
+      - name: force-stream-usage
       - name: thinking-handler
       - name: openai-compatible-director
       - name: anthropic-compatible-director
 
@@ -6,6 +6,7 @@ routes:
       - name: auth
       - name: context
       - name: context-responses
+      - name: force-stream-usage
       - name: thinking-handler
       - name: openai-compatible-director
       - name: set-response-chunk-splitter
 
@@ -40,7 +40,7 @@ require (
 	github.com/cespare/xxhash v1.1.0
 	github.com/cespare/xxhash/v2 v2.3.0
 	github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc
-	github.com/dlclark/regexp2 v1.4.0
+	github.com/dlclark/regexp2 v1.11.5
 	github.com/docker/docker v25.0.3+incompatible
 	github.com/docker/spdystream v0.2.0
 	github.com/doug-martin/goqu/v9 v9.18.0
@@ -428,6 +428,7 @@ require (
 	github.com/syndtr/goleveldb v1.0.1-0.20190625010220-02440ea7a285 // indirect
 	github.com/tealeg/xlsx v1.0.5 // indirect
 	github.com/tebeka/strftime v0.1.5 // indirect
+	github.com/tiktoken-go/tokenizer v0.7.0
 	github.com/tikv/pd v1.1.0-beta.0.20200907080620-6830f5bb92a2 // indirect
 	github.com/tjfoc/gmsm v1.3.2 // indirect
 	github.com/tklauser/go-sysconf v0.3.12 // indirect
 
@@ -1164,8 +1164,8 @@ github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2 h1:aB
 github.com/distribution/distribution/v3 v3.0.0-20221208165359-362910506bc2/go.mod h1:WHNsWjnIn2V1LYOrME7e8KxSeKunYHsxEm4am0BUtcI=
 github.com/distribution/reference v0.5.0 h1:/FUIFXtfc/x2gpa5/VGfiGLuOIdYa1t65IKK2OFGvA0=
 github.com/distribution/reference v0.5.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
-github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=
-github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
+github.com/dlclark/regexp2 v1.11.5 h1:Q/sSnsKerHeCkc/jSTNq1oCm7KiVgUMZRDUoRu0JQZQ=
+github.com/dlclark/regexp2 v1.11.5/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
 github.com/docker/cli v25.0.1+incompatible h1:mFpqnrS6Hsm3v1k7Wa/BO23oz0k121MTbTO1lpcGSkU=
 github.com/docker/cli v25.0.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
 github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk=
@@ -2635,6 +2635,8 @@ github.com/tidwall/rtree v0.0.0-20180113144539-6cd427091e0e h1:+NL1GDIUOKxVfbp2K
 github.com/tidwall/rtree v0.0.0-20180113144539-6cd427091e0e/go.mod h1:/h+UnNGt0IhNNJLkGikcdcJqm66zGD/uJGMRxK/9+Ao=
 github.com/tidwall/tinyqueue v0.0.0-20180302190814-1e39f5511563 h1:Otn9S136ELckZ3KKDyCkxapfufrqDqwmGjcHfAyXRrE=
 github.com/tidwall/tinyqueue v0.0.0-20180302190814-1e39f5511563/go.mod h1:mLqSmt7Dv/CNneF2wfcChfN1rvapyQr01LGKnKex0DQ=
+github.com/tiktoken-go/tokenizer v0.7.0 h1:VMu6MPT0bXFDHr7UPh9uii7CNItVt3X9K90omxL54vw=
+github.com/tiktoken-go/tokenizer v0.7.0/go.mod h1:6UCYI/DtOallbmL7sSy30p6YQv60qNyU/4aVigPOx6w=
 github.com/tikv/pd v1.0.8/go.mod h1:v6C/D7ONC49SgjI4jbGnooSizvijaO/bdIm62DVR4tI=
 github.com/tikv/pd v1.1.0-beta.0.20200907080620-6830f5bb92a2 h1:cC5v/gn9NdcmAlpBrWI5x3MiYmQcW2k7EHccg8837p4=
 github.com/tikv/pd v1.1.0-beta.0.20200907080620-6830f5bb92a2/go.mod h1:6OYi62ks7nFIBtWWpOjnngr5LNos4Hvi1BzArCWAlBc=
 
@@ -122,7 +122,7 @@ func GetClientInfo(ak string, dao dao.DAO) (*clienttokenpb.ClientToken, *clientp
 	return nil, client, nil
 }
 
-func AutoCheckAndSetClientId(clientId string, req any, skipSet bool) error {
+func AutoCheckAndSetClientInfo(clientId string, clientTokenId string, req any, skipSet bool) error {
 	// use reflect to set req's clientId field if have
 	clientIdField := reflect.ValueOf(req).Elem().FieldByName("ClientId")
 	if clientIdField != (reflect.Value{}) {
@@ -135,5 +135,19 @@ func AutoCheckAndSetClientId(clientId string, req any, skipSet bool) error {
 			clientIdField.SetString(clientId)
 		}
 	}
+	// check client token id
+	if clientTokenId != "" {
+		clientTokenIdField := reflect.ValueOf(req).Elem().FieldByName("ClientTokenId")
+		if clientTokenIdField != (reflect.Value{}) {
+			// compare if ClientTokenId already exists
+			currentClientTokenId := clientTokenIdField.String()
+			if currentClientTokenId != "" && currentClientTokenId != clientTokenId {
+				return handlers.ErrTokenNotMatch
+			}
+			if !skipSet {
+				clientTokenIdField.SetString(clientTokenId)
+			}
+		}
+	}
 	return nil
 }
@@ -0,0 +1,76 @@
+// Copyright (c) 2021 Terminus, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package akutil
+
+import (
+	"errors"
+	"testing"
+
+	"github.com/erda-project/erda/internal/apps/ai-proxy/handlers"
+)
+
+type mockRequest struct {
+	ClientId      string
+	ClientTokenId string
+}
+
+func TestAutoCheckAndSetClientInfoSetsFields(t *testing.T) {
+	req := &mockRequest{}
+
+	clientID := "client-id"
+	tokenID := "token-id"
+
+	if err := AutoCheckAndSetClientInfo(clientID, tokenID, req, false); err != nil {
+		t.Fatalf("AutoCheckAndSetClientInfo returned error: %v", err)
+	}
+	if req.ClientId != clientID {
+		t.Fatalf("expected ClientId to be set to %q, got %q", clientID, req.ClientId)
+	}
+	if req.ClientTokenId != tokenID {
+		t.Fatalf("expected ClientTokenId to be set to %q, got %q", tokenID, req.ClientTokenId)
+	}
+}
+
+func TestAutoCheckAndSetClientInfoSkipSet(t *testing.T) {
+	req := &mockRequest{}
+
+	if err := AutoCheckAndSetClientInfo("client-id", "token-id", req, true); err != nil {
+		t.Fatalf("AutoCheckAndSetClientInfo returned error: %v", err)
+	}
+	if req.ClientId != "" {
+		t.Fatalf("expected ClientId to remain empty when skipSet is true, got %q", req.ClientId)
+	}
+	if req.ClientTokenId != "" {
+		t.Fatalf("expected ClientTokenId to remain empty when skipSet is true, got %q", req.ClientTokenId)
+	}
+}
+
+func TestAutoCheckAndSetClientInfoClientMismatch(t *testing.T) {
+	req := &mockRequest{ClientId: "other"}
+
+	err := AutoCheckAndSetClientInfo("client-id", "", req, false)
+	if !errors.Is(err, handlers.ErrAkNotMatch) {
+		t.Fatalf("expected ErrAkNotMatch, got %v", err)
+	}
+}
+
+func TestAutoCheckAndSetClientInfoTokenMismatch(t *testing.T) {
+	req := &mockRequest{ClientId: "client-id", ClientTokenId: "other-token"}
+
+	err := AutoCheckAndSetClientInfo("client-id", "token-id", req, false)
+	if !errors.Is(err, handlers.ErrTokenNotMatch) {
+		t.Fatalf("expected ErrTokenNotMatch, got %v", err)
+	}
+}
@@ -18,6 +18,7 @@ package ctxhelper
 
 import (
 	"net/http"
+	"time"
 
 	"github.com/erda-project/erda-infra/base/logs"
 	clientpb "github.com/erda-project/erda-proto-go/apps/aiproxy/client/pb"
@@ -68,6 +69,9 @@ type (
 	mapKeyReverseProxyRequestRewriteError struct{ *ReverseProxyFilterError }
 	mapKeyReverseProxyResponseModifyError struct{ *ReverseProxyFilterError }
 	mapKeyReverseProxyRequestInSnapshot   struct{ *http.Request }
+	mapKeyReverseProxyRequestOutSnapshot  struct{ *http.Request }
+
+	mapKeyReverseProxyWholeHandledResponseBodyStr struct{ string }
 
 	// Keys for migrated context keys
 	mapKeyDBClient     struct{ dao.DAO }
@@ -82,6 +86,8 @@ type (
 
 	mapKeyRequestBodyTransformChanges     struct{ any }
 	mapKeyRequestThinkingTransformChanges struct{ any }
+
+	mapKeyRequestBeginAt struct{ time.Time }
 )
 
 // KeysWithCustomMustGet defines keys with custom MustGet implementations (should not generate default MustGet)