Skip to content

Commit d0dcfd2

Browse files
committed
Add --tables-per-iteration arg, separate chart PNGs, disable telemetry
- All 4 profiling scripts now accept --tables-per-iteration (defaults to --threads, i.e. 1 table per thread per iteration). - NUM_TABLES bumped to 128. - plot_comparison.py generates separate PNGs for table-level comparison (wall-clock, tables/sec) and individual operation detail (ops/sec, P50, P99, max). Column tags and table tags get their own PNGs. - Chart labels spell out parameters (columns, tags_per_column, tables). - All scripts pass enable_telemetry=False to sql.connect(). Co-authored-by: Isaac
1 parent 53a3088 commit d0dcfd2

5 files changed

Lines changed: 166 additions & 70 deletions

File tree

examples/plot_comparison.py

Lines changed: 82 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def parse_report(filepath):
3838

3939
m = re.search(r"\*\*(ALTERs/sec|SELECTs/sec|Operations/sec)\*\*:\s*([\d.]+)", content)
4040
if m:
41-
metrics["throughput"] = float(m.group(2))
41+
metrics["throughput_ops"] = float(m.group(2))
4242

4343
for pct in ["p50", "p90", "p95", "p99"]:
4444
m = re.search(rf"\|\s*{pct}\s*\|\s*([\d.]+)\s*\|", content)
@@ -65,6 +65,17 @@ def parse_report(filepath):
6565
if m:
6666
metrics["columns"] = int(m.group(1))
6767

68+
m = re.search(r"\*\*Tables per iteration\*\*:\s*(\d+)", content)
69+
if m:
70+
metrics["tables_per_iteration"] = int(m.group(1))
71+
72+
# Also match older reports that used "Tables": N
73+
if "tables_per_iteration" not in metrics:
74+
m = re.search(r"\*\*Total SELECTs\*\*:\s*(\d+)", content)
75+
iters = metrics.get("iterations", 1)
76+
if m and iters:
77+
metrics["tables_per_iteration"] = int(float(m.group(1))) // iters
78+
6879
m = re.search(r"\*\*Tags per ALTER\*\*:\s*(\d+)", content)
6980
if m:
7081
metrics["tags"] = int(m.group(1))
@@ -111,17 +122,19 @@ def discover_reports():
111122

112123
threads = metrics["threads"]
113124

125+
tbl = metrics.get("tables_per_iteration", "?")
126+
114127
if report_type == "alter" and category == "column":
115128
cols = metrics.get("columns", "?")
116129
tags = metrics.get("tags", "?")
117-
label = f"ALTER column tags (c={cols}, t={tags})"
130+
label = f"ALTER column tags (columns={cols}, tags_per_column={tags}, tables={tbl})"
118131
elif report_type == "alter" and category == "table":
119132
tags = metrics.get("tags", "?")
120-
label = f"ALTER table tags (t={tags})"
133+
label = f"ALTER table tags (tags={tags}, tables={tbl})"
121134
elif report_type == "info_schema" and category == "column":
122-
label = "info_schema column_tags SELECT"
135+
label = f"info_schema column_tags SELECT (tables={tbl})"
123136
elif report_type == "info_schema" and category == "table":
124-
label = "info_schema table_tags SELECT"
137+
label = f"info_schema table_tags SELECT (tables={tbl})"
125138
else:
126139
continue
127140

@@ -130,23 +143,24 @@ def discover_reports():
130143
if existing and metrics.get("iterations", 0) <= existing.get("iterations", 0):
131144
continue
132145

146+
# Compute tables/sec from wall-clock and tables_per_iteration
147+
tpi = metrics.get("tables_per_iteration")
148+
wc = metrics.get("wall_clock_s")
149+
if tpi and wc and wc > 0:
150+
metrics["tables_per_sec"] = round(tpi / wc, 2)
151+
133152
categories[category][label][threads] = metrics
134153
print(f" [{category}] {label} threads={threads}: "
135154
f"wall={metrics.get('wall_clock_s', '?')}s, "
136155
f"p50={metrics.get('p50', '?')}ms, "
137-
f"throughput={metrics.get('throughput', '?')} ops/s "
156+
f"tables/s={metrics.get('tables_per_sec', '?')} "
138157
f"[{fname}]")
139158

140159
return categories
141160

142161

143-
def plot_category(category_name, series, output_path):
144-
"""Generate a 2x2 chart PNG for one category (column or table)."""
145-
if not series:
146-
print(f" No data for {category_name}, skipping.")
147-
return
148-
149-
# Color/style assignment
162+
def build_style_map(series):
163+
"""Assign colors and styles to series labels."""
150164
colors_info = ["#d62728", "#ff7f0e"]
151165
colors_alter = ["#1f77b4", "#2ca02c", "#9467bd", "#17becf", "#8c564b"]
152166
info_idx = 0
@@ -161,16 +175,20 @@ def plot_category(category_name, series, output_path):
161175
style_map[label] = {"color": colors_alter[alter_idx % len(colors_alter)], "marker": "s", "linestyle": "-"}
162176
alter_idx += 1
163177

164-
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
178+
return style_map
165179

166-
chart_configs = [
167-
(axes[0][0], "wall_clock_s", "Wall-Clock Time (seconds)", "Wall-Clock Time vs Thread Count"),
168-
(axes[0][1], "throughput", "Operations / second", "Throughput vs Thread Count"),
169-
(axes[1][0], "p50", "P50 Latency (ms)", "P50 Latency vs Thread Count"),
170-
(axes[1][1], "p99", "P99 Latency (ms)", "P99 Latency vs Thread Count"),
171-
]
172180

173-
for ax, metric_key, ylabel, title in chart_configs:
181+
def plot_charts(series, style_map, chart_configs, suptitle, output_path):
182+
"""Generate a chart PNG with len(chart_configs) subplots."""
183+
n = len(chart_configs)
184+
cols = 2
185+
rows = (n + 1) // 2
186+
fig, axes = plt.subplots(rows, cols, figsize=(16, 6 * rows))
187+
if rows == 1:
188+
axes = [axes]
189+
190+
for idx, (metric_key, ylabel, title) in enumerate(chart_configs):
191+
ax = axes[idx // cols][idx % cols]
174192
for label, thread_data in sorted(series.items()):
175193
threads = sorted(thread_data.keys())
176194
values = [thread_data[t].get(metric_key) for t in threads]
@@ -184,15 +202,51 @@ def plot_category(category_name, series, output_path):
184202
ax.legend(fontsize=8)
185203
ax.grid(True, alpha=0.3)
186204

187-
title_label = "Column Tags" if category_name == "column" else "Table Tags"
188-
plt.suptitle(f"SET TAGS Profiling: {title_label} — info_schema SELECT vs Direct ALTER",
189-
fontsize=14, fontweight="bold")
205+
# Hide unused subplot if odd number of charts
206+
if n % 2 == 1:
207+
axes[rows - 1][1].set_visible(False)
208+
209+
plt.suptitle(suptitle, fontsize=14, fontweight="bold")
190210
plt.tight_layout()
191211
plt.savefig(output_path, dpi=150, bbox_inches="tight")
192212
plt.close(fig)
193213
print(f" Chart saved to: {output_path}")
194214

195215

216+
def plot_category(category_name, series, output_dir):
217+
"""Generate two PNGs per category: table-level comparison + individual operation detail."""
218+
if not series:
219+
print(f" No data for {category_name}, skipping.")
220+
return
221+
222+
style_map = build_style_map(series)
223+
title_label = "Column Tags" if category_name == "column" else "Table Tags"
224+
225+
# Chart 1: Table-level comparison (apples-to-apples across approaches)
226+
table_charts = [
227+
("wall_clock_s", "Wall-Clock Time (seconds)", "Wall-Clock Time vs Thread Count (Lower is better)"),
228+
("tables_per_sec", "Tables / second", "Tables Processed per Second vs Thread Count (Higher is better)"),
229+
]
230+
plot_charts(
231+
series, style_map, table_charts,
232+
f"{title_label}: Table-Level Comparison — info_schema SELECT vs Direct ALTER",
233+
os.path.join(output_dir, f"comparison_{category_name}_tags_tables.png"),
234+
)
235+
236+
# Chart 2: Individual operation detail (per-op latency)
237+
op_charts = [
238+
("throughput_ops", "Individual Operations / second", "Individual Op Throughput vs Thread Count (Higher is better)"),
239+
("p50", "P50 Latency per Op (ms)", "P50 Latency vs Thread Count (Lower is better)"),
240+
("p99", "P99 Latency per Op (ms)", "P99 Latency vs Thread Count (Lower is better)"),
241+
("max", "Max Latency per Op (ms)", "Max Latency vs Thread Count (Lower is better)"),
242+
]
243+
plot_charts(
244+
series, style_map, op_charts,
245+
f"{title_label}: Individual Operation Detail",
246+
os.path.join(output_dir, f"comparison_{category_name}_tags_ops.png"),
247+
)
248+
249+
196250
if __name__ == "__main__":
197251
print("Discovering results...\n")
198252
categories = discover_reports()
@@ -202,14 +256,12 @@ def plot_category(category_name, series, output_path):
202256
print(f"\nFound {total_series} series across {total_points} data points.\n")
203257

204258
if "column" in categories:
205-
print("Generating column tags chart...")
206-
plot_category("column", categories["column"],
207-
os.path.join(RESULTS_DIR, "comparison_column_tags.png"))
259+
print("Generating column tags charts...")
260+
plot_category("column", categories["column"], RESULTS_DIR)
208261

209262
if "table" in categories:
210-
print("Generating table tags chart...")
211-
plot_category("table", categories["table"],
212-
os.path.join(RESULTS_DIR, "comparison_table_tags.png"))
263+
print("Generating table tags charts...")
264+
plot_category("table", categories["table"], RESULTS_DIR)
213265

214266
if not categories:
215267
print("No results found. Run experiments first.")

examples/profile_column_tags.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
SCHEMA = _creds["SCHEMA"]
5656
# ============================================================
5757

58-
NUM_TABLES = 64
58+
NUM_TABLES = 128 # total tables available (table1..table128)
5959
MAX_COLUMNS = 128 # tables always created with this many columns
6060
RESULTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "results", "column_tags")
6161

@@ -279,11 +279,14 @@ def run_iteration(
279279
num_columns: int,
280280
num_tags: int,
281281
num_threads: int,
282+
tables_per_iteration: int,
282283
) -> tuple:
283-
"""Run a single iteration: distribute 20 tables across threads."""
284+
"""Run a single iteration: tables_per_iteration tables distributed across num_threads threads."""
284285
table_queue = Queue()
285-
for t in range(1, NUM_TABLES + 1):
286-
table_queue.put(f"table{t}")
286+
start = ((iteration - 1) * tables_per_iteration) % NUM_TABLES
287+
for i in range(tables_per_iteration):
288+
table_idx = start + i + 1
289+
table_queue.put(f"table{table_idx}")
287290

288291
alter_results: list = []
289292
table_results: list = []
@@ -353,7 +356,7 @@ def w(text=""):
353356
w(f"- **Server**: `{SERVER_HOSTNAME}`")
354357
w(f"- **HTTP Path**: `{HTTP_PATH}`")
355358
w(f"- **Catalog.Schema**: `{CATALOG}.{SCHEMA}`")
356-
w(f"- **Tables**: {NUM_TABLES}")
359+
w(f"- **Tables per iteration**: {args.tables_per_iteration}")
357360
w(f"- **Columns tagged per table**: {args.columns}")
358361
w(f"- **Tags per ALTER**: {args.tags}")
359362
w(f"- **Threads**: {args.threads}")
@@ -583,18 +586,26 @@ def w(text=""):
583586

584587
def main():
585588
parser = argparse.ArgumentParser(description="Profile SET COLUMN TAGS performance")
586-
parser.add_argument("--columns", type=int, required=True, help="Number of columns to tag per table (1, 2, 4)")
587-
parser.add_argument("--tags", type=int, required=True, help="Number of tags per ALTER command (1, 2, 4)")
588-
parser.add_argument("--threads", type=int, required=True, help="Number of concurrent threads (1, 2, 4, 8, 16)")
589-
parser.add_argument("--iterations", type=int, required=True, help="Number of times to repeat the full sweep")
589+
parser.add_argument("--columns", type=int, required=True, help="Number of columns to tag per table")
590+
parser.add_argument("--tags", type=int, required=True, help="Number of tags per ALTER command")
591+
parser.add_argument("--threads", type=int, required=True, help="Number of concurrent threads")
592+
parser.add_argument("--iterations", type=int, required=True, help="Number of iterations")
593+
parser.add_argument("--tables-per-iteration", type=int, default=None, help="Tables to process per iteration (default = --threads, i.e. 1 table per thread)")
590594
parser.add_argument("--validate", action="store_true", help="Quick validation: override to 1 iteration, print result")
591595
parser.add_argument("--skip-setup", action="store_true", help="Skip table creation (tables already exist)")
592596
args = parser.parse_args()
593597

598+
if args.tables_per_iteration is None:
599+
args.tables_per_iteration = args.threads
600+
594601
if args.columns > MAX_COLUMNS:
595602
print(f"Error: --columns {args.columns} exceeds MAX_COLUMNS={MAX_COLUMNS}")
596603
sys.exit(1)
597604

605+
if args.tables_per_iteration > NUM_TABLES:
606+
print(f"Error: --tables-per-iteration {args.tables_per_iteration} exceeds NUM_TABLES={NUM_TABLES}")
607+
sys.exit(1)
608+
598609
if args.validate:
599610
args.iterations = 1
600611
print("=== VALIDATION MODE: 1 iteration only ===\n")
@@ -609,9 +620,9 @@ def main():
609620
# Logging
610621
profile_handler = setup_logging(log_path)
611622

612-
print(f"Profile: columns={args.columns}, tags={args.tags}, threads={args.threads}, iterations={args.iterations}")
613-
print(f"ALTERs per iteration: {NUM_TABLES * args.columns}")
614-
print(f"Total ALTERs: {NUM_TABLES * args.columns * args.iterations}")
623+
print(f"Profile: columns={args.columns}, tags={args.tags}, threads={args.threads}, iterations={args.iterations}, tables_per_iteration={args.tables_per_iteration}")
624+
print(f"ALTERs per iteration: {args.tables_per_iteration * args.columns} ({args.tables_per_iteration} tables x {args.columns} columns)")
625+
print(f"Total ALTERs: {args.tables_per_iteration * args.columns * args.iterations}")
615626
print(f"Output: {report_path}")
616627
print()
617628

@@ -631,6 +642,7 @@ def main():
631642
num_columns=args.columns,
632643
num_tags=args.tags,
633644
num_threads=args.threads,
645+
tables_per_iteration=args.tables_per_iteration,
634646
)
635647
all_alter_results.extend(alter_results)
636648
all_table_results.extend(table_results)

examples/profile_read_then_write_table_tags.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
SCHEMA = _creds["SCHEMA"]
4646
# ============================================================
4747

48-
NUM_TABLES = 64
48+
NUM_TABLES = 128
4949
RESULTS_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "results", "read_then_write_table_tags")
5050

5151
SELECT_TEMPLATE = """SELECT tag_name, tag_value
@@ -215,10 +215,12 @@ def worker(
215215
# Run one iteration
216216
# ---------------------------------------------------------------------------
217217

218-
def run_iteration(iteration: int, num_threads: int) -> tuple:
218+
def run_iteration(iteration: int, num_threads: int, tables_per_iteration: int) -> tuple:
219219
table_queue = Queue()
220-
for t in range(1, NUM_TABLES + 1):
221-
table_queue.put(f"table{t}")
220+
start = ((iteration - 1) * tables_per_iteration) % NUM_TABLES
221+
for i in range(tables_per_iteration):
222+
table_idx = start + i + 1
223+
table_queue.put(f"table{table_idx}")
222224

223225
results: list = []
224226
results_lock = threading.Lock()
@@ -484,10 +486,18 @@ def main():
484486
description="Profile read-from-information_schema then write-table-tag pattern"
485487
)
486488
parser.add_argument("--threads", type=int, required=True, help="Number of concurrent threads")
487-
parser.add_argument("--iterations", type=int, required=True, help="Number of times to repeat the full sweep")
489+
parser.add_argument("--iterations", type=int, required=True, help="Number of iterations")
490+
parser.add_argument("--tables-per-iteration", type=int, default=None, help="Tables per iteration (default = --threads)")
488491
parser.add_argument("--validate", action="store_true", help="Quick validation: override to 1 iteration")
489492
args = parser.parse_args()
490493

494+
if args.tables_per_iteration is None:
495+
args.tables_per_iteration = args.threads
496+
497+
if args.tables_per_iteration > NUM_TABLES:
498+
print(f"Error: --tables-per-iteration {args.tables_per_iteration} exceeds NUM_TABLES={NUM_TABLES}")
499+
sys.exit(1)
500+
491501
if args.validate:
492502
args.iterations = 1
493503
print("=== VALIDATION MODE: 1 iteration only ===\n")
@@ -500,9 +510,9 @@ def main():
500510

501511
profile_handler = setup_logging(log_path)
502512

503-
print(f"Profile (information_schema.table_tags): threads={args.threads}, iterations={args.iterations}")
504-
print(f"SELECTs per iteration: {NUM_TABLES} (1 per table)")
505-
print(f"Total SELECTs: {NUM_TABLES * args.iterations}")
513+
print(f"Profile (information_schema.table_tags): threads={args.threads}, iterations={args.iterations}, tables_per_iteration={args.tables_per_iteration}")
514+
print(f"SELECTs per iteration: {args.tables_per_iteration} (1 per table)")
515+
print(f"Total SELECTs: {args.tables_per_iteration * args.iterations}")
506516
print(f"Output: {report_path}")
507517
print()
508518

@@ -511,7 +521,7 @@ def main():
511521

512522
for i in range(1, args.iterations + 1):
513523
print(f"Iteration {i}/{args.iterations}...", end=" ", flush=True)
514-
results, duration = run_iteration(iteration=i, num_threads=args.threads)
524+
results, duration = run_iteration(iteration=i, num_threads=args.threads, tables_per_iteration=args.tables_per_iteration)
515525
all_results.extend(results)
516526
iteration_durations.append(duration)
517527

0 commit comments

Comments
 (0)