#!/usr/bin/env python3
# Copyright 2025, University of Freiburg
# Chair of Algorithms and Data Structures
# Author: Hannah Bast <bast@cs.uni-freiburg.de>
class QueryPlan:
"""
A class representing a simplified query plan (joins only) with cost and
size estimates.
The plan is represented as a parenthesized string. For example, we write
`((AB)C)` for the query plan that first joins A and B, and then joins the
result with C.
We store the list of tables being joined as a string of sorted table names.
For example, `ABC` for the plan `((AB)(C))`. That way, we can use this
string as a key for a dictionary storing the best query plan for each set
of tables.
"""
def __init__(self, plan: str, cost: int, size: int):
self.plan = plan
self.tables = "".join(sorted(plan.replace("(", "").replace(")", "")))
self.cost = cost
self.size = size
@staticmethod
def join(plan1: "QueryPlan", plan2: "QueryPlan") -> "QueryPlan":
"""
Join two query plans and return the new query plan, using the
simplistic cost and size estimates from the lecture.
"""
plan = f"({plan1.plan}{plan2.plan})"
size = min(plan1.size, plan2.size)
cost = plan1.cost + plan2.cost + size
return QueryPlan(plan, cost, size)
class JoinGraph:
"""
A class representing the join graph for a set of input tables. We use an
adjacency list for each input table. See `main` for some examples.
"""
def __init__(self, adjacency_list: dict[str, list[str]]):
self.adjacency_list = adjacency_list
def joinable(self, plan1: QueryPlan, plan2: QueryPlan) -> bool:
"""
Check whether the two plans are joinable, that is, whether the two
subsects of tables do not intersect and there is an edge in the query
graph between some table in the first plan and some table in the second
plan.
NOTE: This is a very inefficient implementation, but it suffices for
our purposes here.
"""
tables_disjoint = set(plan1.tables).isdisjoint(set(plan2.tables))
edge_exists = False
for table1 in plan1.tables:
for table2 in plan2.tables:
if table2 in self.adjacency_list[table1]:
edge_exists = True
return tables_disjoint and edge_exists
class QueryPlanner:
"""
Compute the optimal query plan for joining a set of input tables, as
explained in the lecture.
"""
def __init__(self, input_plans: list[QueryPlan], join_graph: JoinGraph):
"""
The input plans simply consist of one table each, with zero cost and
the size of the table as size estimate.
"""
self.input_plans = input_plans
self.join_graph = join_graph
def compute_optimal_plan(self) -> QueryPlan:
"""
Compute the optimal query plan using dynamic programming, as explained
in the lecture. In round i, we compute the optimal plans for all
subsets of size i.
"""
# In the first round, start with the singleton query plans.
optimal_plans = {}
optimal_plans[1] = {}
print("Optimal plans with one table:")
for plan in self.input_plans:
optimal_plans[1][plan.tables] = plan
print(f" Table {plan.tables} [size={plan.size}]")
for k in range(2, len(self.input_plans) + 1):
# Compute all optimal plans of k tables.
optimal_plans[k] = {}
for k1 in range(1, k // 2 + 1):
k2 = k - k1
for plan1 in optimal_plans[k1].values():
for plan2 in optimal_plans[k2].values():
if self.join_graph.joinable(plan1, plan2):
plan = QueryPlan.join(plan1, plan2)
if (
plan.tables not in optimal_plans[k]
or plan.cost < optimal_plans[k][plan.tables].cost
):
optimal_plans[k][plan.tables] = plan
# Show all optimal plans of k tables.
print(f"Optimal plans with {k} tables:")
for plan in optimal_plans[k].values():
print(f" Tables {plan.tables} [size={plan.size}, cost={plan.cost}]")
if __name__ == "__main__":
# Five input tables A, B, C, D, E, with their sizes.
A = QueryPlan("A", 0, 100)
B = QueryPlan("B", 0, 200)
C = QueryPlan("C", 0, 150)
D = QueryPlan("D", 0, 300)
E = QueryPlan("E", 0, 250)
# Example join graph (a "star").
join_graph = JoinGraph(
{
"A": ["B", "C", "D", "E"],
"B": ["A"],
"C": ["A"],
"D": ["A"],
"E": ["A"],
}
)
# Compute the optimal query plan.
planner = QueryPlanner([A, B, C, D, E], join_graph)
optimal_plan = planner.compute_optimal_plan()