init. core algorithm, initial testing, basic cli.

2026-06-13 02:42:14 -07:00 · 2026-05-20 11:39:03 -07:00 · 2026-05-20 11:39:03 -07:00 · 8836c49091
commit 8836c49091
12 changed files with 2297 additions and 0 deletions
--- a/src/main.py
+++ b/src/main.py
--- a/src/smithy/init.py
+++ b/src/smithy/init.py
@ -0,0 +1,79 @@
+import click
+import polars as pl
+
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+
+from .rcv import smith_set
+
+console = Console()
+
+@click.command()
+@click.argument(
+    "spreadsheet",
+    type=click.Path(exists=True, dir_okay=False)
+)
+def main(spreadsheet: str) -> None:
+    """
+    Compute the Smith set from a ranked-choice ballot spreadsheet.
+    
+    The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
+    in the set they are guaranteed the Condorcet i.e. Majority winner.
+    """
+
+    try:
+
+        # Load spreadsheet
+        if spreadsheet.endswith(".csv"):
+            df = pl.read_csv(spreadsheet)
+
+        elif spreadsheet.endswith((".xlsx", ".xls")):
+            df = pl.read_excel(spreadsheet)
+
+        else:
+            console.print(
+                "[bold red]Unsupported file type.[/bold red]\n"
+                "Use CSV or Excel."
+            )
+            raise SystemExit(1)
+
+        # Normalize numerical dataframe entries
+        df = df.with_columns([ pl.col(c)
+                             .cast(pl.Utf8)
+                             .str.strip_chars()
+                             .cast(pl.Int64, strict=False).fill_null(0)
+                             for c in df.columns ])
+
+        # Compute Smith set
+        smiths = smith_set(df)
+
+        # Preview table
+        preview = Table(title="Ballot Box")
+
+        for col in df.columns:
+            preview.add_column(col)
+
+        for row in df.head(5).iter_rows():
+            preview.add_row(*map(str, row))
+
+        console.print(preview)
+
+        # Results
+        console.print()
+
+        console.print(
+            Panel.fit(
+                "\n".join(f"• {c}" for c in smiths),
+                title="Resulting Smith Set",
+                border_style="green"
+            )
+        )
+
+    except Exception as e:
+
+        console.print(
+            f"[bold red]Error:[/bold red] {e}"
+        )
+
+        raise SystemExit(1)
--- a/src/smithy/rcv.py
+++ b/src/smithy/rcv.py
@ -0,0 +1,67 @@
+import polars as pl
+from itertools import combinations
+
+def smith_set(df: pl.DataFrame) -> list:
+    """
+    Compute the Smith set from a Ranked-Choice ballot.
+    
+    The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
+    in the set they are guaranteed the Condorcet i.e. Majority winner.
+    
+    parameters
+    ---
+    df : pl.DataFrame
+        A Polars DataFrame representing ballots. Each column is a candidate and each
+        row is is a voter's ranking of the candidates. Lower numbers indicate higher
+        preference (1 = top-choice).
+
+    returns
+    ---
+    smith_set : list
+        A list of the Smith set candidates - all are equally good winners; ordering is determined lexicographically.
+        If there is a Condorcet winner (single Majority winner), the Smith set will contain that single candidate.
+
+
+    """
+
+    candidates = df.columns
+
+    # Build pairwise majority graph
+    graph: dict[str, set[str]] = { c: set() for c in candidates }
+
+    for a, b in combinations(candidates, 2):
+        result = df.select(
+            [
+                (pl.col(a) < pl.col(b)).sum().alias("a_wins"),
+                (pl.col(b) < pl.col(a)).sum().alias("b_wins"),
+            ]
+        ).row(0)
+
+        a_wins, b_wins = result
+
+        if a_wins > b_wins:
+            graph[a].add(b)
+        elif b_wins > a_wins:
+            graph[b].add(a)
+
+    # Find Smith set
+    for size in range(1, len(candidates) + 1):
+
+        for sub in combinations(candidates, size):
+
+            subset = set(sub)
+            out = set(candidates) - subset
+
+            dom = True
+
+            for member in subset:
+
+                # DIRECT dominance only
+                if not out.issubset(graph[member]):
+                    dom = False
+                    break
+
+            if dom:
+                return sorted(subset)
+
+    return []