Fix audit tool type undercount for Copilot MCP-only runs (#26689)

Copilot · pelikhan · web-flow · commit 2289a7b6fe7c · 2026-04-16T11:26:12.000-07:00
* Initial plan * chore: plan audit tool_types fix Agent-Logs-Url: https://github.com/github/gh-aw/sessions/d7000781-6a64-476c-84a4-6eb743492530 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * fix(audit): count MCP tools in tool_types for copilot runs Agent-Logs-Url: https://github.com/github/gh-aw/sessions/d7000781-6a64-476c-84a4-6eb743492530 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> * refactor(audit): tighten MCP duration merge logic Agent-Logs-Url: https://github.com/github/gh-aw/sessions/d7000781-6a64-476c-84a4-6eb743492530 Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: pelikhan <4175913+pelikhan@users.noreply.github.com> Co-authored-by: Peli de Halleux <pelikhan@users.noreply.github.com>
diff --git a/pkg/cli/audit_agentic_analysis.go b/pkg/cli/audit_agentic_analysis.go
@@ -89,6 +89,89 @@ func buildToolUsageInfo(metrics LogMetrics) []ToolUsageInfo {
 	return toolUsage
 }
 
+func mergeMCPToolUsageInfo(toolUsage []ToolUsageInfo, mcpToolUsage *MCPToolUsageData) []ToolUsageInfo {
+	if mcpToolUsage == nil {
+		return toolUsage
+	}
+
+	toolStats := make(map[string]*ToolUsageInfo)
+	for _, info := range toolUsage {
+		cloned := info
+		toolStats[info.Name] = &cloned
+	}
+
+	addOrUpdateToolUsage := func(name string, callCount, maxInputSize, maxOutputSize int, maxDuration string) {
+		normalizedName := strings.TrimSpace(name)
+		if normalizedName == "" {
+			return
+		}
+		displayKey := workflow.PrettifyToolName(normalizedName)
+		if existing, exists := toolStats[displayKey]; exists {
+			existing.CallCount += callCount
+			if maxInputSize > existing.MaxInputSize {
+				existing.MaxInputSize = maxInputSize
+			}
+			if maxOutputSize > existing.MaxOutputSize {
+				existing.MaxOutputSize = maxOutputSize
+			}
+			if maxDuration != "" {
+				maxDurationValue := parseDurationString(maxDuration)
+				if existing.MaxDuration == "" {
+					existing.MaxDuration = maxDuration
+				} else {
+					existingMaxDurationValue := parseDurationString(existing.MaxDuration)
+					if maxDurationValue > existingMaxDurationValue {
+						existing.MaxDuration = maxDuration
+					}
+				}
+			}
+			return
+		}
+
+		toolStats[displayKey] = &ToolUsageInfo{
+			Name:          displayKey,
+			CallCount:     callCount,
+			MaxInputSize:  maxInputSize,
+			MaxOutputSize: maxOutputSize,
+			MaxDuration:   maxDuration,
+		}
+	}
+
+	if len(mcpToolUsage.Summary) > 0 {
+		for _, summary := range mcpToolUsage.Summary {
+			switch {
+			case summary.ServerName != "" && summary.ToolName != "":
+				addOrUpdateToolUsage(summary.ServerName+"."+summary.ToolName, summary.CallCount, summary.MaxInputSize, summary.MaxOutputSize, summary.MaxDuration)
+			case summary.ToolName != "":
+				addOrUpdateToolUsage(summary.ToolName, summary.CallCount, summary.MaxInputSize, summary.MaxOutputSize, summary.MaxDuration)
+			}
+		}
+	} else {
+		for _, call := range mcpToolUsage.ToolCalls {
+			switch {
+			case call.ServerName != "" && call.ToolName != "":
+				addOrUpdateToolUsage(call.ServerName+"."+call.ToolName, 1, call.InputSize, call.OutputSize, call.Duration)
+			case call.ToolName != "":
+				addOrUpdateToolUsage(call.ToolName, 1, call.InputSize, call.OutputSize, call.Duration)
+			}
+		}
+	}
+
+	mergedToolUsage := make([]ToolUsageInfo, 0, len(toolStats))
+	for _, info := range toolStats {
+		mergedToolUsage = append(mergedToolUsage, *info)
+	}
+
+	slices.SortFunc(mergedToolUsage, func(a, b ToolUsageInfo) int {
+		if a.CallCount != b.CallCount {
+			return b.CallCount - a.CallCount
+		}
+		return strings.Compare(a.Name, b.Name)
+	})
+
+	return mergedToolUsage
+}
+
 func deriveRunAgenticAnalysis(processedRun ProcessedRun, metrics LogMetrics) (*AwContext, []ToolUsageInfo, []CreatedItemReport, *TaskDomainInfo, *BehaviorFingerprint, []AgenticAssessment) {
 	auditAgenticLog.Printf("Deriving agentic analysis for run: id=%d workflow=%s", processedRun.Run.DatabaseID, processedRun.Run.WorkflowName)
 	var awContext *AwContext
diff --git a/pkg/cli/audit_agentic_analysis_test.go b/pkg/cli/audit_agentic_analysis_test.go
@@ -325,6 +325,36 @@ func TestBuildAuditDataToolUsageMatchesBuildToolUsageInfo(t *testing.T) {
 	require.Equal(t, expected, auditData.ToolUsage, "buildAuditData tool usage should match buildToolUsageInfo output")
 }
 
+func TestMergeMCPToolUsageInfoUsesSummaryWhenMetricsToolCallsEmpty(t *testing.T) {
+	merged := mergeMCPToolUsageInfo(nil, &MCPToolUsageData{
+		Summary: []MCPToolSummary{
+			{ServerName: "safeoutputs", ToolName: "create_discussion", CallCount: 2, MaxInputSize: 50, MaxOutputSize: 1200},
+			{ServerName: "safeoutputs", ToolName: "push_repo_memory", CallCount: 1, MaxInputSize: 32, MaxOutputSize: 240},
+		},
+	})
+
+	require.Len(t, merged, 2, "MCP summary should be represented as tool usage entries")
+	assert.Equal(t, "safeoutputs.create_discussion", merged[0].Name, "higher call-count MCP tool should sort first")
+	assert.Equal(t, 2, merged[0].CallCount, "call count should come from MCP summary")
+	assert.Equal(t, "safeoutputs.push_repo_memory", merged[1].Name, "second MCP tool should be preserved")
+}
+
+func TestMergeMCPToolUsageInfoFallsBackToToolCalls(t *testing.T) {
+	merged := mergeMCPToolUsageInfo(nil, &MCPToolUsageData{
+		ToolCalls: []MCPToolCall{
+			{ServerName: "safeoutputs", ToolName: "create_discussion", InputSize: 20, OutputSize: 200},
+			{ServerName: "safeoutputs", ToolName: "create_discussion", InputSize: 40, OutputSize: 250},
+			{ServerName: "safeoutputs", ToolName: "push_repo_memory", InputSize: 10, OutputSize: 100},
+		},
+	})
+
+	require.Len(t, merged, 2, "fallback should aggregate MCP tool calls when summary is absent")
+	assert.Equal(t, "safeoutputs.create_discussion", merged[0].Name, "tool call fallback should aggregate by MCP tool name")
+	assert.Equal(t, 2, merged[0].CallCount, "fallback should count repeated MCP tool calls")
+	assert.Equal(t, 40, merged[0].MaxInputSize, "fallback should keep max input size from calls")
+	assert.Equal(t, 250, merged[0].MaxOutputSize, "fallback should keep max output size from calls")
+}
+
 // TestDeriveRunAgenticAnalysisFingerprintConsistency verifies that the fingerprint
 // produced by deriveRunAgenticAnalysis is consistent when Run.Turns is correctly
 // populated from log metrics. This guards against the bug where logs_orchestrator.go
diff --git a/pkg/cli/audit_mcp_tool_usage_test.go b/pkg/cli/audit_mcp_tool_usage_test.go
@@ -207,3 +207,42 @@ func TestBuildAuditDataWithMCPToolUsage(t *testing.T) {
 	assert.Equal(t, 1024, tool.TotalInputSize)
 	assert.Equal(t, 5120, tool.TotalOutputSize)
 }
+
+func TestBuildAuditDataUsesMCPToolUsageForToolTypes(t *testing.T) {
+	processedRun := ProcessedRun{
+		Run: WorkflowRun{
+			DatabaseID:   2468,
+			WorkflowName: "Copilot MCP Workflow",
+			Status:       "completed",
+			Conclusion:   "success",
+			Turns:        20,
+		},
+	}
+	metrics := LogMetrics{
+		Turns: 20,
+		// Intentionally empty to emulate copilot runs where ToolCalls parsing misses MCP tool calls.
+		ToolCalls: nil,
+	}
+	mcpData := &MCPToolUsageData{
+		Summary: []MCPToolSummary{
+			{ServerName: "safeoutputs", ToolName: "create_discussion", CallCount: 3},
+			{ServerName: "safeoutputs", ToolName: "push_repo_memory", CallCount: 1},
+		},
+	}
+
+	auditData := buildAuditData(processedRun, metrics, mcpData)
+
+	require.Len(t, auditData.ToolUsage, 2, "MCP tools should contribute to tool usage when metrics tool calls are empty")
+	require.NotNil(t, auditData.BehaviorFingerprint, "behavior fingerprint should be present")
+	assert.Equal(t, "narrow", auditData.BehaviorFingerprint.ToolBreadth, "2 tool types should still be narrow but no longer zero")
+
+	var executionInsight *ObservabilityInsight
+	for i := range auditData.ObservabilityInsights {
+		if auditData.ObservabilityInsights[i].Category == "execution" {
+			executionInsight = &auditData.ObservabilityInsights[i]
+			break
+		}
+	}
+	require.NotNil(t, executionInsight, "execution insight should be present")
+	assert.Contains(t, executionInsight.Evidence, "tool_types=2", "execution insight should report MCP-derived tool type count")
+}
diff --git a/pkg/cli/audit_report.go b/pkg/cli/audit_report.go
@@ -321,6 +321,7 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage
 	}
 
 	toolUsage := buildToolUsageInfo(metrics)
+	toolUsage = mergeMCPToolUsageInfo(toolUsage, mcpToolUsage)
 
 	createdItems := extractCreatedItemsFromManifest(run.LogsPath)
 	taskDomain := detectTaskDomain(processedRun, createdItems, toolUsage, overview.AwContext)

Original file line number	Diff line number	Diff line change
`@@ -321,6 +321,7 @@ func buildAuditData(processedRun ProcessedRun, metrics LogMetrics, mcpToolUsage`
`321`	`321`	`}`
`322`	`322`
`323`	`323`	`toolUsage := buildToolUsageInfo(metrics)`
	`324`	`+ toolUsage = mergeMCPToolUsageInfo(toolUsage, mcpToolUsage)`
`324`	`325`
`325`	`326`	`createdItems := extractCreatedItemsFromManifest(run.LogsPath)`
`326`	`327`	`taskDomain := detectTaskDomain(processedRun, createdItems, toolUsage, overview.AwContext)`