Skip to content

Commit 1de9ebc

Browse files
committed
add include_raw_events option to snapshot config
1 parent ac7c493 commit 1de9ebc

3 files changed

Lines changed: 196 additions & 6 deletions

File tree

src/ghdcbot/config/models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ class SnapshotConfig(BaseModel):
214214
repo_path: str = "" # Format: "owner/repo" (e.g., "org/gitcord-data")
215215
# Optional: branch to write to (default: main/master)
216216
branch: str | None = None
217+
# Optional: export raw ContributionEvent records to events.json (can be large)
218+
include_raw_events: bool = False
217219

218220

219221
class BotConfig(BaseModel):

src/ghdcbot/engine/snapshots.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ def _write_snapshots(
114114
contribution_summaries=contribution_summaries,
115115
run_id=run_id,
116116
generated_at=now,
117+
include_raw_events=getattr(snapshot_config, "include_raw_events", False),
117118
)
118119

119120
# Write each snapshot file to GitHub
@@ -164,6 +165,7 @@ def _collect_snapshot_data(
164165
contribution_summaries: list[ContributionSummary] | None,
165166
run_id: str,
166167
generated_at: datetime,
168+
include_raw_events: bool = False,
167169
) -> dict[str, dict[str, Any]]:
168170
"""Collect all snapshot data into structured dictionaries."""
169171
org = config.github.org
@@ -302,7 +304,7 @@ def _collect_snapshot_data(
302304
"data": notifications_data,
303305
}
304306

305-
return {
307+
files: dict[str, dict[str, Any]] = {
306308
"meta.json": meta,
307309
"identities.json": identities,
308310
"scores.json": scores_snapshot,
@@ -312,6 +314,30 @@ def _collect_snapshot_data(
312314
"notifications.json": notifications,
313315
}
314316

317+
if include_raw_events:
318+
raw_events = storage.list_contributions(period_start)
319+
events_data = [
320+
{
321+
"github_user": event.github_user,
322+
"event_type": event.event_type,
323+
"repo": event.repo,
324+
"created_at": event.created_at.isoformat(),
325+
"payload": event.payload,
326+
}
327+
for event in raw_events
328+
]
329+
files["events.json"] = {
330+
"schema_version": SCHEMA_VERSION,
331+
"generated_at": generated_at.isoformat(),
332+
"org": org,
333+
"run_id": run_id,
334+
"period_start": period_start.isoformat(),
335+
"period_end": period_end.isoformat(),
336+
"data": events_data,
337+
}
338+
339+
return files
340+
315341

316342
def _parse_repo_path(repo_path: str) -> tuple[str, str]:
317343
"""Parse 'owner/repo' or 'owner/repo/path' into (owner, repo).

tests/test_snapshots.py

Lines changed: 167 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
SnapshotConfig,
1919
)
2020
from ghdcbot.core.modes import RunMode
21-
from ghdcbot.core.models import ContributionSummary, Score
21+
from ghdcbot.core.models import ContributionEvent, ContributionSummary, Score
2222
from ghdcbot.engine.snapshots import (
2323
SCHEMA_VERSION,
2424
_collect_snapshot_data,
@@ -29,16 +29,20 @@
2929

3030
class MockStorage:
3131
"""Mock storage for testing."""
32-
32+
3333
def __init__(self) -> None:
34-
self.notifications = []
35-
34+
self.notifications: list[dict] = []
35+
self.contributions: list[ContributionEvent] = []
36+
3637
def list_recent_notifications(self, limit: int = 1000) -> list[dict]:
3738
return self.notifications[:limit]
38-
39+
3940
def list_pending_issue_requests(self) -> list[dict]:
4041
return []
4142

43+
def list_contributions(self, since: datetime) -> list[ContributionEvent]:
44+
return [e for e in self.contributions if e.created_at >= since]
45+
4246

4347
class MockGitHubWriter:
4448
"""Mock GitHub writer for testing."""
@@ -334,3 +338,161 @@ def test_write_snapshots_handles_errors() -> None:
334338

335339
# Should not have written files due to error
336340
assert len(github_writer.files_written) == 0
341+
342+
343+
def _make_config(*, snapshots: "SnapshotConfig | None" = None) -> "BotConfig":
344+
return BotConfig(
345+
runtime=RuntimeConfig(
346+
mode=RunMode.DRY_RUN,
347+
log_level="INFO",
348+
data_dir="/tmp/test",
349+
github_adapter="test",
350+
discord_adapter="test",
351+
storage_adapter="test",
352+
),
353+
github=GitHubConfig(org="test-org", token="test", api_base="https://api.github.com", permissions=PermissionConfig()),
354+
discord=DiscordConfig(guild_id="123", token="test", permissions=PermissionConfig()),
355+
scoring=ScoringConfig(period_days=30, weights={}),
356+
role_mappings=[RoleMappingConfig(discord_role="Contributor", min_score=10)],
357+
assignments=AssignmentConfig(),
358+
snapshots=snapshots,
359+
)
360+
361+
362+
def test_raw_events_excluded_by_default() -> None:
363+
"""events.json is not written when include_raw_events is False (default)."""
364+
storage = MockStorage()
365+
storage.contributions = [
366+
ContributionEvent(
367+
github_user="alice",
368+
event_type="pr_merged",
369+
repo="org/repo",
370+
created_at=datetime(2024, 1, 15, tzinfo=timezone.utc),
371+
payload={"pr_number": 1},
372+
)
373+
]
374+
snapshots = _collect_snapshot_data(
375+
storage=storage,
376+
config=_make_config(),
377+
identity_mappings=[],
378+
scores=[],
379+
member_roles={},
380+
period_start=datetime(2024, 1, 1, tzinfo=timezone.utc),
381+
period_end=datetime(2024, 1, 31, tzinfo=timezone.utc),
382+
contribution_summaries=None,
383+
run_id="test-run",
384+
generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=timezone.utc),
385+
include_raw_events=False,
386+
)
387+
assert "events.json" not in snapshots
388+
389+
390+
def test_raw_events_included_when_enabled() -> None:
391+
"""events.json is written with correct structure when include_raw_events=True."""
392+
storage = MockStorage()
393+
storage.contributions = [
394+
ContributionEvent(
395+
github_user="alice",
396+
event_type="pr_merged",
397+
repo="org/repo",
398+
created_at=datetime(2024, 1, 15, tzinfo=timezone.utc),
399+
payload={"pr_number": 42},
400+
),
401+
ContributionEvent(
402+
github_user="bob",
403+
event_type="issue_opened",
404+
repo="org/repo",
405+
created_at=datetime(2024, 1, 20, tzinfo=timezone.utc),
406+
payload={"issue_number": 7},
407+
),
408+
]
409+
snapshots = _collect_snapshot_data(
410+
storage=storage,
411+
config=_make_config(),
412+
identity_mappings=[],
413+
scores=[],
414+
member_roles={},
415+
period_start=datetime(2024, 1, 1, tzinfo=timezone.utc),
416+
period_end=datetime(2024, 1, 31, tzinfo=timezone.utc),
417+
contribution_summaries=None,
418+
run_id="test-run",
419+
generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=timezone.utc),
420+
include_raw_events=True,
421+
)
422+
assert "events.json" in snapshots
423+
events_snapshot = snapshots["events.json"]
424+
assert events_snapshot["schema_version"] == SCHEMA_VERSION
425+
assert events_snapshot["org"] == "test-org"
426+
assert len(events_snapshot["data"]) == 2
427+
assert events_snapshot["data"][0]["github_user"] == "alice"
428+
assert events_snapshot["data"][0]["event_type"] == "pr_merged"
429+
assert events_snapshot["data"][0]["payload"] == {"pr_number": 42}
430+
assert events_snapshot["data"][1]["github_user"] == "bob"
431+
432+
433+
def test_raw_events_respects_period_start() -> None:
434+
"""Only events at or after period_start are included in events.json."""
435+
storage = MockStorage()
436+
period_start = datetime(2024, 1, 10, tzinfo=timezone.utc)
437+
storage.contributions = [
438+
ContributionEvent(
439+
github_user="alice",
440+
event_type="pr_merged",
441+
repo="org/repo",
442+
created_at=datetime(2024, 1, 5, tzinfo=timezone.utc), # before period_start
443+
payload={},
444+
),
445+
ContributionEvent(
446+
github_user="bob",
447+
event_type="pr_merged",
448+
repo="org/repo",
449+
created_at=datetime(2024, 1, 15, tzinfo=timezone.utc), # within period
450+
payload={},
451+
),
452+
]
453+
snapshots = _collect_snapshot_data(
454+
storage=storage,
455+
config=_make_config(),
456+
identity_mappings=[],
457+
scores=[],
458+
member_roles={},
459+
period_start=period_start,
460+
period_end=datetime(2024, 1, 31, tzinfo=timezone.utc),
461+
contribution_summaries=None,
462+
run_id="test-run",
463+
generated_at=datetime(2024, 1, 31, 12, 0, 0, tzinfo=timezone.utc),
464+
include_raw_events=True,
465+
)
466+
events_data = snapshots["events.json"]["data"]
467+
assert len(events_data) == 1
468+
assert events_data[0]["github_user"] == "bob"
469+
470+
471+
def test_write_snapshots_raw_events_via_config() -> None:
472+
"""include_raw_events=True in SnapshotConfig results in events.json being written."""
473+
storage = MockStorage()
474+
storage.contributions = [
475+
ContributionEvent(
476+
github_user="alice",
477+
event_type="pr_merged",
478+
repo="org/repo",
479+
created_at=datetime(2024, 1, 15, tzinfo=timezone.utc),
480+
payload={"pr_number": 1},
481+
)
482+
]
483+
config = _make_config(snapshots=SnapshotConfig(enabled=True, repo_path="org/repo", include_raw_events=True))
484+
github_writer = MockGitHubWriter()
485+
486+
write_snapshots_to_github(
487+
storage=storage,
488+
config=config,
489+
github_writer=github_writer,
490+
identity_mappings=[],
491+
scores=[],
492+
member_roles={},
493+
period_start=datetime(2024, 1, 1, tzinfo=timezone.utc),
494+
period_end=datetime(2024, 1, 31, tzinfo=timezone.utc),
495+
)
496+
497+
written_paths = [path for _, _, path, _ in github_writer.files_written]
498+
assert any("events.json" in p for p in written_paths)

0 commit comments

Comments
 (0)