init. core algorithm, initial testing, basic cli.

This commit is contained in:
Thomas (Tom) C. Gorordo 2026-05-20 11:39:03 -07:00
commit 8836c49091
Signed by: tgorordo
GPG key ID: 0CBED22BB0D94490
12 changed files with 2297 additions and 0 deletions

0
src/main.py Normal file
View file

79
src/smithy/__init__.py Normal file
View file

@ -0,0 +1,79 @@
import click
import polars as pl
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from .rcv import smith_set
console = Console()
@click.command()
@click.argument(
"spreadsheet",
type=click.Path(exists=True, dir_okay=False)
)
def main(spreadsheet: str) -> None:
"""
Compute the Smith set from a ranked-choice ballot spreadsheet.
The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
in the set they are guaranteed the Condorcet i.e. Majority winner.
"""
try:
# Load spreadsheet
if spreadsheet.endswith(".csv"):
df = pl.read_csv(spreadsheet)
elif spreadsheet.endswith((".xlsx", ".xls")):
df = pl.read_excel(spreadsheet)
else:
console.print(
"[bold red]Unsupported file type.[/bold red]\n"
"Use CSV or Excel."
)
raise SystemExit(1)
# Normalize numerical dataframe entries
df = df.with_columns([ pl.col(c)
.cast(pl.Utf8)
.str.strip_chars()
.cast(pl.Int64, strict=False).fill_null(0)
for c in df.columns ])
# Compute Smith set
smiths = smith_set(df)
# Preview table
preview = Table(title="Ballot Box")
for col in df.columns:
preview.add_column(col)
for row in df.head(5).iter_rows():
preview.add_row(*map(str, row))
console.print(preview)
# Results
console.print()
console.print(
Panel.fit(
"\n".join(f"{c}" for c in smiths),
title="Resulting Smith Set",
border_style="green"
)
)
except Exception as e:
console.print(
f"[bold red]Error:[/bold red] {e}"
)
raise SystemExit(1)

67
src/smithy/rcv.py Normal file
View file

@ -0,0 +1,67 @@
import polars as pl
from itertools import combinations
def smith_set(df: pl.DataFrame) -> list:
"""
Compute the Smith set from a Ranked-Choice ballot.
The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
in the set they are guaranteed the Condorcet i.e. Majority winner.
parameters
---
df : pl.DataFrame
A Polars DataFrame representing ballots. Each column is a candidate and each
row is is a voter's ranking of the candidates. Lower numbers indicate higher
preference (1 = top-choice).
returns
---
smith_set : list
A list of the Smith set candidates - all are equally good winners; ordering is determined lexicographically.
If there is a Condorcet winner (single Majority winner), the Smith set will contain that single candidate.
"""
candidates = df.columns
# Build pairwise majority graph
graph: dict[str, set[str]] = { c: set() for c in candidates }
for a, b in combinations(candidates, 2):
result = df.select(
[
(pl.col(a) < pl.col(b)).sum().alias("a_wins"),
(pl.col(b) < pl.col(a)).sum().alias("b_wins"),
]
).row(0)
a_wins, b_wins = result
if a_wins > b_wins:
graph[a].add(b)
elif b_wins > a_wins:
graph[b].add(a)
# Find Smith set
for size in range(1, len(candidates) + 1):
for sub in combinations(candidates, size):
subset = set(sub)
out = set(candidates) - subset
dom = True
for member in subset:
# DIRECT dominance only
if not out.issubset(graph[member]):
dom = False
break
if dom:
return sorted(subset)
return []