Skip to content

Commit 1878a46

Browse files
timsaucerclaude
andcommitted
tpch examples: align reference SQL constants with DataFrame queries
The reference SQL embedded in each q01..q22 module docstring was carried over verbatim from ``benchmarks/tpch/queries/`` and uses a different set of TPC-H substitution parameters than the DataFrame examples (answer-file-validated at scale factor 1). Update each reference SQL to use the substitution parameters the DataFrame uses, so both expressions describe the same query and would produce the same results against the same data. Constants aligned: - Q01: ``90 days`` cutoff (DataFrame ``DAYS_BEFORE_FINAL = 90``). - Q02: ``p_size = 15``, ``p_type like '%BRASS'``, ``r_name = 'EUROPE'``. - Q04: base date ``1993-07-01`` (``3 month`` interval preserved per the "quarter of a year" wording). - Q05: ``r_name = 'ASIA'``. - Q06: ``l_discount between 0.06 - 0.01 and 0.06 + 0.01``. - Q07: nations ``'FRANCE'`` / ``'GERMANY'``. - Q08: ``r_name = 'AMERICA'``, ``p_type = 'ECONOMY ANODIZED STEEL'``, inner-case ``nation = 'BRAZIL'``. - Q09: ``p_name like '%green%'``. - Q10: base date ``1993-10-01`` (``3 month`` interval preserved). - Q11: ``n_name = 'GERMANY'``. - Q12: ship modes ``('MAIL', 'SHIP')``, base date ``1994-01-01``. - Q13: ``o_comment not like '%special%requests%'``. - Q14: base date ``1995-09-01``. - Q15: base date ``1996-01-01``. - Q16: ``p_brand <> 'Brand#45'``, ``p_type not like 'MEDIUM POLISHED%'``, sizes ``(49, 14, 23, 45, 19, 3, 36, 9)``. - Q17: ``p_brand = 'Brand#23'``, ``p_container = 'MED BOX'``. - Q18: ``sum(l_quantity) > 300``. - Q19: brands ``Brand#12`` / ``Brand#23`` / ``Brand#34`` with the matching minimum quantities (1, 10, 20). - Q20: ``p_name like 'forest%'``, base date ``1994-01-01``, ``n_name = 'CANADA'``. - Q21: ``n_name = 'SAUDI ARABIA'``. - Q22: country codes ``('13', '31', '23', '29', '30', '18', '17')``. Interval units (month / year) are preserved where the problem-statement text reads "given quarter", "given year", "given month". Q01 keeps the literal "days" unit because the TPC-H problem statement itself describes the cutoff in days. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 91f96cb commit 1878a46

21 files changed

Lines changed: 46 additions & 46 deletions

examples/tpch/q01_pricing_summary_report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from
4545
lineitem
4646
where
47-
l_shipdate <= date '1998-12-01' - interval '68 days'
47+
l_shipdate <= date '1998-12-01' - interval '90 days'
4848
group by
4949
l_returnflag,
5050
l_linestatus

examples/tpch/q02_minimum_cost_supplier.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@
4848
where
4949
p_partkey = ps_partkey
5050
and s_suppkey = ps_suppkey
51-
and p_size = 48
52-
and p_type like '%TIN'
51+
and p_size = 15
52+
and p_type like '%BRASS'
5353
and s_nationkey = n_nationkey
5454
and n_regionkey = r_regionkey
55-
and r_name = 'ASIA'
55+
and r_name = 'EUROPE'
5656
and ps_supplycost = (
5757
select
5858
min(ps_supplycost)
@@ -66,7 +66,7 @@
6666
and s_suppkey = ps_suppkey
6767
and s_nationkey = n_nationkey
6868
and n_regionkey = r_regionkey
69-
and r_name = 'ASIA'
69+
and r_name = 'EUROPE'
7070
)
7171
order by
7272
s_acctbal desc,

examples/tpch/q04_order_priority_checking.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333
from
3434
orders
3535
where
36-
o_orderdate >= date '1995-04-01'
37-
and o_orderdate < date '1995-04-01' + interval '3' month
36+
o_orderdate >= date '1993-07-01'
37+
and o_orderdate < date '1993-07-01' + interval '3' month
3838
and exists (
3939
select
4040
*

examples/tpch/q05_local_supplier_volume.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747
and c_nationkey = s_nationkey
4848
and s_nationkey = n_nationkey
4949
and n_regionkey = r_regionkey
50-
and r_name = 'AFRICA'
50+
and r_name = 'ASIA'
5151
and o_orderdate >= date '1994-01-01'
5252
and o_orderdate < date '1994-01-01' + interval '1' year
5353
group by

examples/tpch/q06_forecasting_revenue_change.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
where
3838
l_shipdate >= date '1994-01-01'
3939
and l_shipdate < date '1994-01-01' + interval '1' year
40-
and l_discount between 0.04 - 0.01 and 0.04 + 0.01
40+
and l_discount between 0.06 - 0.01 and 0.06 + 0.01
4141
and l_quantity < 24;
4242
"""
4343

examples/tpch/q07_volume_shipping.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,8 @@
5555
and s_nationkey = n1.n_nationkey
5656
and c_nationkey = n2.n_nationkey
5757
and (
58-
(n1.n_name = 'GERMANY' and n2.n_name = 'IRAQ')
59-
or (n1.n_name = 'IRAQ' and n2.n_name = 'GERMANY')
58+
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
59+
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
6060
)
6161
and l_shipdate between date '1995-01-01' and date '1996-12-31'
6262
) as shipping

examples/tpch/q08_market_share.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
select
3232
o_year,
3333
sum(case
34-
when nation = 'IRAQ' then volume
34+
when nation = 'BRAZIL' then volume
3535
else 0
3636
end) / sum(volume) as mkt_share
3737
from
@@ -56,10 +56,10 @@
5656
and o_custkey = c_custkey
5757
and c_nationkey = n1.n_nationkey
5858
and n1.n_regionkey = r_regionkey
59-
and r_name = 'MIDDLE EAST'
59+
and r_name = 'AMERICA'
6060
and s_nationkey = n2.n_nationkey
6161
and o_orderdate between date '1995-01-01' and date '1996-12-31'
62-
and p_type = 'LARGE PLATED STEEL'
62+
and p_type = 'ECONOMY ANODIZED STEEL'
6363
) as all_nations
6464
group by
6565
o_year

examples/tpch/q09_product_type_profit_measure.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
and p_partkey = l_partkey
5555
and o_orderkey = l_orderkey
5656
and s_nationkey = n_nationkey
57-
and p_name like '%moccasin%'
57+
and p_name like '%green%'
5858
) as profit
5959
group by
6060
nation,

examples/tpch/q10_returned_item_reporting.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@
4747
where
4848
c_custkey = o_custkey
4949
and l_orderkey = o_orderkey
50-
and o_orderdate >= date '1993-07-01'
51-
and o_orderdate < date '1993-07-01' + interval '3' month
50+
and o_orderdate >= date '1993-10-01'
51+
and o_orderdate < date '1993-10-01' + interval '3' month
5252
and l_returnflag = 'R'
5353
and c_nationkey = n_nationkey
5454
group by

examples/tpch/q11_important_stock_identification.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
where
3939
ps_suppkey = s_suppkey
4040
and s_nationkey = n_nationkey
41-
and n_name = 'ALGERIA'
41+
and n_name = 'GERMANY'
4242
group by
4343
ps_partkey having
4444
sum(ps_supplycost * ps_availqty) > (
@@ -51,7 +51,7 @@
5151
where
5252
ps_suppkey = s_suppkey
5353
and s_nationkey = n_nationkey
54-
and n_name = 'ALGERIA'
54+
and n_name = 'GERMANY'
5555
)
5656
order by
5757
value desc;

0 commit comments

Comments
 (0)