refactor core a bit, prep for better algo

2026-06-05 16:22:15 -07:00 · 2026-05-25 02:26:48 -07:00 · 2026-05-25 02:26:48 -07:00 · 92b464fccf
commit 92b464fccf
parent a66116712b
3 changed files with 90 additions and 34 deletions
--- a/src/cgi/script.py
+++ b/src/cgi/script.py
@ -13,7 +13,7 @@ sys.path.insert(0,
        )
    )
 )
-from smithy import smith_set
+from smithy import smith_set_from_rcv
 print("Content-Type: text/html\n")
 message = ""
@ -75,7 +75,7 @@ if spreadsheet is not None:
                    ]
                )
-                smiths = smith_set(df) # Solve!
+                smiths = smith_set_from_rcv(df) # Solve!
                message = f"""
                <h1>The Smith set winners are:</h1>
--- a/src/smithy/init.py
+++ b/src/smithy/init.py
@ -1 +1,74 @@
-from .rcv import smith_set
+import polars as pl
 from itertools import combinations
 from .rcv import pairmaj_from_rcv
 def smith_set_brutefrom_pairmaj(pairmaj_graph: dict[str, set[str]]) -> list:
    """
    Brute-force the Smith set from a pairwise majority winner graph.
    parameters
    ---
    pairmaj_graph: dict[str, set[str]]
        A graph whose nodes correspond to candidates and (directed) edges show
        which candidates they beat pairwise.
    returns
    ---
    smith_set: list
        A list of the Smith set candidates - all are equally good winners; 
        ordering is determined lexicographically. If there is a Condorcet winner 
        (single Majority winner), the Smith set will contain that single candidate.
    """
    candidates = set(pairmaj_graph.keys())
    size = len(candidates)
    for size in range(1, len(candidates) + 1):
        for sub in combinations(candidates, size):
            subset = set(sub)
            out = set(candidates) - subset
            dom = True
            for member in subset:
                if not out.issubset(pairmaj_graph[member]):
                    dom = False
                    break
            if dom:
                return sorted(subset)
    return []
 def smith_set_from_rcv(rcv_ballots: pl.DataFrame) -> list:
    """
    Compute the Smith set from a Ranked-Choice ballot.
    The Smith set is the minimal set of candidates which can beat all others pairwise - 
    if there is a single winner in the set they are guaranteed the Condorcet i.e. Majority winner.
    parameters
    ---
    df : pl.DataFrame
        A Polars DataFrame representing ballots. Each column is a candidate and each
        row is is a voter's ranking of the candidates. Lower numbers indicate higher
        preference (1 = top-choice).
    returns
    ---
    smith_set : list
        A list of the Smith set candidates - all are equally good winners; 
        ordering is determined lexicographically. If there is a Condorcet winner 
        (single Majority winner), the Smith set will contain that single candidate.
    """
    return smith_set_brutefrom_pairmaj(pairmaj_from_rcv(rcv_ballots))
 def smith_set(df: pl.DataFrame, ballotkind="rcv") -> list:
    if ballotkind == "rcv":
        return smith_set_from_rcv(df)
    else:
        raise NotImplementedError(f"`smith_set` ballotkind={ballotkind} is not implemented.")
--- a/src/smithy/rcv.py
+++ b/src/smithy/rcv.py
@ -1,36 +1,29 @@
 import polars as pl
 from itertools import combinations
-def smith_set(df: pl.DataFrame) -> list:
+def pairmaj_from_rcv(rcv_ballots: pl.DataFrame) -> dict[str, set[str]]:
    """
-    Compute the Smith set from a Ranked-Choice ballot.
+    Build a pairwise majority winner graph from a box of Ranked-Choice Ballots.
    The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
    in the set they are guaranteed the Condorcet i.e. Majority winner.
    parameters
    ---
-    df : pl.DataFrame
+    rcv_ballots : pl.DataFrame
        A Polars DataFrame representing ballots. Each column is a candidate and each
        row is is a voter's ranking of the candidates. Lower numbers indicate higher
        preference (1 = top-choice).
    returns
    ---
-    smith_set : list
+    pairmaj_graph: dict[str, set[str]]
-        A list of the Smith set candidates - all are equally good winners; ordering is determined lexicographically.
+        A pairwise majority winner graph whose nodes correspond to candidates and 
-        If there is a Condorcet winner (single Majority winner), the Smith set will contain that single candidate.
+        (directed) edges show which candidates they beat pairwise.
    """
    candidates = rcv_ballots.columns
-    candidates = df.columns
+    pairmaj_graph: dict[str, set[str]] = {c: set() for c in candidates}
    # Build pairwise majority graph
    graph: dict[str, set[str]] = {c: set() for c in candidates}
    for a, b in combinations(candidates, 2):
-        result = df.select(
+        result = rcv_ballots.select(
            [
                (pl.col(a) < pl.col(b)).sum().alias("a_wins"),
                (pl.col(b) < pl.col(a)).sum().alias("b_wins"),
@ -40,24 +33,14 @@ def smith_set(df: pl.DataFrame) -> list:
        a_wins, b_wins = result
        if a_wins > b_wins:
-            graph[a].add(b)
+            pairmaj_graph[a].add(b)
        elif b_wins > a_wins:
-            graph[b].add(a)
+            pairmaj_graph[b].add(a)
    return pairmaj_graph
    # Find Smith set
    for size in range(1, len(candidates) + 1):
        for sub in combinations(candidates, size):
            subset = set(sub)
            out = set(candidates) - subset
            dom = True
            for member in subset:
                if not out.issubset(graph[member]):
                    dom = False
                    break
            if dom:
                return sorted(subset)
    return []