refactor core a bit, prep for better algo

This commit is contained in:
Thomas (Tom) C. Gorordo 2026-05-25 02:26:48 -07:00
parent a66116712b
commit 92b464fccf
Signed by: tgorordo
GPG key ID: 0CBED22BB0D94490
3 changed files with 90 additions and 34 deletions

View file

@ -13,7 +13,7 @@ sys.path.insert(0,
) )
) )
) )
from smithy import smith_set from smithy import smith_set_from_rcv
print("Content-Type: text/html\n") print("Content-Type: text/html\n")
message = "" message = ""
@ -75,7 +75,7 @@ if spreadsheet is not None:
] ]
) )
smiths = smith_set(df) # Solve! smiths = smith_set_from_rcv(df) # Solve!
message = f""" message = f"""
<h1>The Smith set winners are:</h1> <h1>The Smith set winners are:</h1>

View file

@ -1 +1,74 @@
from .rcv import smith_set import polars as pl
from itertools import combinations
from .rcv import pairmaj_from_rcv
def smith_set_brutefrom_pairmaj(pairmaj_graph: dict[str, set[str]]) -> list:
"""
Brute-force the Smith set from a pairwise majority winner graph.
parameters
---
pairmaj_graph: dict[str, set[str]]
A graph whose nodes correspond to candidates and (directed) edges show
which candidates they beat pairwise.
returns
---
smith_set: list
A list of the Smith set candidates - all are equally good winners;
ordering is determined lexicographically. If there is a Condorcet winner
(single Majority winner), the Smith set will contain that single candidate.
"""
candidates = set(pairmaj_graph.keys())
size = len(candidates)
for size in range(1, len(candidates) + 1):
for sub in combinations(candidates, size):
subset = set(sub)
out = set(candidates) - subset
dom = True
for member in subset:
if not out.issubset(pairmaj_graph[member]):
dom = False
break
if dom:
return sorted(subset)
return []
def smith_set_from_rcv(rcv_ballots: pl.DataFrame) -> list:
"""
Compute the Smith set from a Ranked-Choice ballot.
The Smith set is the minimal set of candidates which can beat all others pairwise -
if there is a single winner in the set they are guaranteed the Condorcet i.e. Majority winner.
parameters
---
df : pl.DataFrame
A Polars DataFrame representing ballots. Each column is a candidate and each
row is is a voter's ranking of the candidates. Lower numbers indicate higher
preference (1 = top-choice).
returns
---
smith_set : list
A list of the Smith set candidates - all are equally good winners;
ordering is determined lexicographically. If there is a Condorcet winner
(single Majority winner), the Smith set will contain that single candidate.
"""
return smith_set_brutefrom_pairmaj(pairmaj_from_rcv(rcv_ballots))
def smith_set(df: pl.DataFrame, ballotkind="rcv") -> list:
if ballotkind == "rcv":
return smith_set_from_rcv(df)
else:
raise NotImplementedError(f"`smith_set` ballotkind={ballotkind} is not implemented.")

View file

@ -1,36 +1,29 @@
import polars as pl import polars as pl
from itertools import combinations from itertools import combinations
def smith_set(df: pl.DataFrame) -> list: def pairmaj_from_rcv(rcv_ballots: pl.DataFrame) -> dict[str, set[str]]:
""" """
Compute the Smith set from a Ranked-Choice ballot. Build a pairwise majority winner graph from a box of Ranked-Choice Ballots.
The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
in the set they are guaranteed the Condorcet i.e. Majority winner.
parameters parameters
--- ---
df : pl.DataFrame rcv_ballots : pl.DataFrame
A Polars DataFrame representing ballots. Each column is a candidate and each A Polars DataFrame representing ballots. Each column is a candidate and each
row is is a voter's ranking of the candidates. Lower numbers indicate higher row is is a voter's ranking of the candidates. Lower numbers indicate higher
preference (1 = top-choice). preference (1 = top-choice).
returns returns
--- ---
smith_set : list pairmaj_graph: dict[str, set[str]]
A list of the Smith set candidates - all are equally good winners; ordering is determined lexicographically. A pairwise majority winner graph whose nodes correspond to candidates and
If there is a Condorcet winner (single Majority winner), the Smith set will contain that single candidate. (directed) edges show which candidates they beat pairwise.
""" """
candidates = rcv_ballots.columns
candidates = df.columns pairmaj_graph: dict[str, set[str]] = {c: set() for c in candidates}
# Build pairwise majority graph
graph: dict[str, set[str]] = {c: set() for c in candidates}
for a, b in combinations(candidates, 2): for a, b in combinations(candidates, 2):
result = df.select( result = rcv_ballots.select(
[ [
(pl.col(a) < pl.col(b)).sum().alias("a_wins"), (pl.col(a) < pl.col(b)).sum().alias("a_wins"),
(pl.col(b) < pl.col(a)).sum().alias("b_wins"), (pl.col(b) < pl.col(a)).sum().alias("b_wins"),
@ -40,24 +33,14 @@ def smith_set(df: pl.DataFrame) -> list:
a_wins, b_wins = result a_wins, b_wins = result
if a_wins > b_wins: if a_wins > b_wins:
graph[a].add(b) pairmaj_graph[a].add(b)
elif b_wins > a_wins: elif b_wins > a_wins:
graph[b].add(a) pairmaj_graph[b].add(a)
return pairmaj_graph
# Find Smith set
for size in range(1, len(candidates) + 1):
for sub in combinations(candidates, size):
subset = set(sub)
out = set(candidates) - subset
dom = True
for member in subset:
if not out.issubset(graph[member]):
dom = False
break
if dom:
return sorted(subset)
return []