Skip to content

Commit 2289a7b

Browse files
Copilotpelikhan
andauthored
Fix audit tool type undercount for Copilot MCP-only runs (#26689)
* Initial plan * chore: plan audit tool_types fix Agent-Logs-Url: https://github.com/github/gh-aw/sessions/d7000781-6a64-476c-84a4-6eb743492530 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * fix(audit): count MCP tools in tool_types for copilot runs Agent-Logs-Url: https://github.com/github/gh-aw/sessions/d7000781-6a64-476c-84a4-6eb743492530 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * refactor(audit): tighten MCP duration merge logic Agent-Logs-Url: https://github.com/github/gh-aw/sessions/d7000781-6a64-476c-84a4-6eb743492530 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Co-authored-by: Peli de Halleux <pelikhan@users.noreply.github.com>
1 parent 2ebf132 commit 2289a7b

File tree

4 files changed

+153
-0
lines changed

4 files changed

+153
-0
lines changed

pkg/cli/audit_agentic_analysis.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,89 @@ func buildToolUsageInfo(metrics LogMetrics) []ToolUsageInfo {
8989
return toolUsage
9090
}
9191

92+
func mergeMCPToolUsageInfo(toolUsage []ToolUsageInfo, mcpToolUsage *MCPToolUsageData) []ToolUsageInfo {
93+
if mcpToolUsage == nil {
94+
return toolUsage
95+
}
96+
97+
toolStats := make(map[string]*ToolUsageInfo)
98+
for _, info := range toolUsage {
99+
cloned := info
100+
toolStats[info.Name] = &cloned
101+
}
102+
103+
addOrUpdateToolUsage := func(name string, callCount, maxInputSize, maxOutputSize int, maxDuration string) {
104+
normalizedName := strings.TrimSpace(name)
105+
if normalizedName == "" {
106+
return
107+
}
108+
displayKey := workflow.PrettifyToolName(normalizedName)
109+
if existing, exists := toolStats[displayKey]; exists {
110+
existing.CallCount += callCount
111+
if maxInputSize > existing.MaxInputSize {
112+
existing.MaxInputSize = maxInputSize
113+
}
114+
if maxOutputSize > existing.MaxOutputSize {
115+
existing.MaxOutputSize = maxOutputSize
116+
}
117+
if maxDuration != "" {
118+
maxDurationValue := parseDurationString(maxDuration)
119+
if existing.MaxDuration == "" {
120+
existing.MaxDuration = maxDuration
121+
} else {
122+
existingMaxDurationValue := parseDurationString(existing.MaxDuration)
123+
if maxDurationValue > existingMaxDurationValue {
124+
existing.MaxDuration = maxDuration
125+
}
126+
}
127+
}
128+
return
129+
}
130+
131+
toolStats[displayKey] = &ToolUsageInfo{
132+
Name: displayKey,
133+
CallCount: callCount,
134+
MaxInputSize: maxInputSize,
135+
MaxOutputSize: maxOutputSize,
136+
MaxDuration: maxDuration,
137+
}
138+
}
139+
140+
if len(mcpToolUsage.Summary) > 0 {
141+
for _, summary := range mcpToolUsage.Summary {
142+
switch {
143+
case summary.ServerName != "" && summary.ToolName != "":
144+
addOrUpdateToolUsage(summary.ServerName+"."+summary.ToolName, summary.CallCount, summary.MaxInputSize, summary.MaxOutputSize, summary.MaxDuration)
145+
case summary.ToolName != "":
146+
addOrUpdateToolUsage(summary.ToolName, summary.CallCount, summary.MaxInputSize, summary.MaxOutputSize, summary.MaxDuration)
147+
}
148+
}
149+
} else {
150+
for _, call := range mcpToolUsage.ToolCalls {
151+
switch {
152+
case call.ServerName != "" && call.ToolName != "":
153+
addOrUpdateToolUsage(call.ServerName+"."+call.ToolName, 1, call.InputSize, call.OutputSize, call.Duration)
154+
case call.ToolName != "":
155+
addOrUpdateToolUsage(call.ToolName, 1, call.InputSize, call.OutputSize, call.Duration)
156+
}
157+
}
158+
}
159+
160+
mergedToolUsage := make([]ToolUsageInfo, 0, len(toolStats))
161+
for _, info := range toolStats {
162+
mergedToolUsage = append(mergedToolUsage, *info)
163+
}
164+
165+
slices.SortFunc(mergedToolUsage, func(a, b ToolUsageInfo) int {
166+
if a.CallCount != b.CallCount {
167+
return b.CallCount - a.CallCount
168+
}
169+
return strings.Compare(a.Name, b.Name)
170+
})
171+
172+
return mergedToolUsage
173+
}
174+
92175
func deriveRunAgenticAnalysis(processedRun ProcessedRun, metrics LogMetrics) (*AwContext, []ToolUsageInfo, []CreatedItemReport, *TaskDomainInfo, *BehaviorFingerprint, []AgenticAssessment) {
93176
auditAgenticLog.Printf("Deriving agentic analysis for run: id=%d workflow=%s", processedRun.Run.DatabaseID, processedRun.Run.WorkflowName)
94177
var awContext *AwContext

pkg/cli/audit_agentic_analysis_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,36 @@ func TestBuildAuditDataToolUsageMatchesBuildToolUsageInfo(t *testing.T) {
325325
require.Equal(t, expected, auditData.ToolUsage, "buildAuditData tool usage should match buildToolUsageInfo output")
326326
}
327327

328+
func TestMergeMCPToolUsageInfoUsesSummaryWhenMetricsToolCallsEmpty(t *testing.T) {
329+
merged := mergeMCPToolUsageInfo(nil, &MCPToolUsageData{
330+
Summary: []MCPToolSummary{
331+
{ServerName: "safeoutputs", ToolName: "create_discussion", CallCount: 2, MaxInputSize: 50, MaxOutputSize: 1200},
332+
{ServerName: "safeoutputs", ToolName: "push_repo_memory", CallCount: 1, MaxInputSize: 32, MaxOutputSize: 240},
333+
},
334+
})
335+
336+
require.Len(t, merged, 2, "MCP summary should be represented as tool usage entries")
337+
assert.Equal(t, "safeoutputs.create_discussion", merged[0].Name, "higher call-count MCP tool should sort first")
338+
assert.Equal(t, 2, merged[0].CallCount, "call count should come from MCP summary")
339+
assert.Equal(t, "safeoutputs.push_repo_memory", merged[1].Name, "second MCP tool should be preserved")
340+
}
341+
342+
func TestMergeMCPToolUsageInfoFallsBackToToolCalls(t *testing.T) {
343+
merged := mergeMCPToolUsageInfo(nil, &MCPToolUsageData{
344+
ToolCalls: []MCPToolCall{
345+
{ServerName: "safeoutputs", ToolName: "create_discussion", InputSize: 20, OutputSize: 200},
346+
{ServerName: "safeoutputs", ToolName: "create_discussion", InputSize: 40, OutputSize: 250},
347+
{ServerName: "safeoutputs", ToolName: "push_repo_memory", InputSize: 10, OutputSize: 100},
348+
},
349+
})
350+
351+
require.Len(t, merged, 2, "fallback should aggregate MCP tool calls when summary is absent")
352+
assert.Equal(t, "safeoutputs.create_discussion", merged[0].Name, "tool call fallback should aggregate by MCP tool name")
353+
assert.Equal(t, 2, merged[0].CallCount, "fallback should count repeated MCP tool calls")
354+
assert.Equal(t, 40, merged[0].MaxInputSize, "fallback should keep max input size from calls")
355+
assert.Equal(t, 250, merged[0].MaxOutputSize, "fallback should keep max output size from calls")
356+
}
357+
328358
// TestDeriveRunAgenticAnalysisFingerprintConsistency verifies that the fingerprint
329359
// produced by deriveRunAgenticAnalysis is consistent when Run.Turns is correctly
330360
// populated from log metrics. This guards against the bug where logs_orchestrator.go

pkg/cli/audit_mcp_tool_usage_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,3 +207,42 @@ func TestBuildAuditDataWithMCPToolUsage(t *testing.T) {
207207
assert.Equal(t, 1024, tool.TotalInputSize)
208208
assert.Equal(t, 5120, tool.TotalOutputSize)
209209
}
210+
211+
func TestBuildAuditDataUsesMCPToolUsageForToolTypes(t *testing.T) {
212+
processedRun := ProcessedRun{
213+
Run: WorkflowRun{
214+
DatabaseID: 2468,
215+
WorkflowName: "Copilot MCP Workflow",
216+
Status: "completed",
217+
Conclusion: "success",
218+
Turns: 20,
219+
},
220+
}
221+
metrics := LogMetrics{
222+
Turns: 20,
223+
// Intentionally empty to emulate copilot runs where ToolCalls parsing misses MCP tool calls.
224+
ToolCalls: nil,
225+
}
226+
mcpData := &MCPToolUsageData{
227+
Summary: []MCPToolSummary{
228+
{ServerName: "safeoutputs", ToolName: "create_discussion", CallCount: 3},
229+
{ServerName: "safeoutputs", ToolName: "push_repo_memory", CallCount: 1},
230+
},
231+
}
232+
233+
auditData := buildAuditData(processedRun, metrics, mcpData)
234+
235+
require.Len(t, auditData.ToolUsage, 2, "MCP tools should contribute to tool usage when metrics tool calls are empty")
236+
require.NotNil(t, auditData.BehaviorFingerprint, "behavior fingerprint should be present")
237+
assert.Equal(t, "narrow", auditData.BehaviorFingerprint.ToolBreadth, "2 tool types should still be narrow but no longer zero")
238+
239+
var executionInsight *ObservabilityInsight
240+
for i := range auditData.ObservabilityInsights {
241+
if auditData.ObservabilityInsights[i].Category == "execution" {
242+
executionInsight = &auditData.ObservabilityInsights[i]
243+
break
244+
}
245+
}
246+
require.NotNil(t, executionInsight, "execution insight should be present")
247+
assert.Contains(t, executionInsight.Evidence, "tool_types=2", "execution insight should report MCP-derived tool type count")
248+
}

pkg/cli/audit_report.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage
321321
}
322322

323323
toolUsage := buildToolUsageInfo(metrics)
324+
toolUsage = mergeMCPToolUsageInfo(toolUsage, mcpToolUsage)
324325

325326
createdItems := extractCreatedItemsFromManifest(run.LogsPath)
326327
taskDomain := detectTaskDomain(processedRun, createdItems, toolUsage, overview.AwContext)

0 commit comments

Comments
 (0)