init. core algorithm, initial testing, basic cli.

2026-06-05 16:22:15 -07:00 · 2026-05-20 11:39:03 -07:00 · 2026-05-20 11:39:03 -07:00 · 8836c49091
commit 8836c49091
12 changed files with 2297 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,5 @@
+# Smithy
+*A simple smith set solver for ranked-choice ballots.* 
+
+The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
+in the set they are guaranteed the standard Condorcet i.e. Majority winner (they beat all others pairwise).
--- a/30
+++ b/30
@ -0,0 +1,30 @@
+list:
+  just --list
+
+run spreadsheet:
+  uv run smithy {{spreadsheet}}
+
+check:
+  uv run pyright src
+
+test:
+  uv run pytest -vvv --tb=short --log-cli-level=INFO
+
+format:
+  uv run ruff format src test
+
+compile:
+  uv run pyinstaller src/main.py
+
+clean:
+  uv run pyclean src test
+  uv run ruff clean
+  rm -rf main.spec cli.spec build dist .pytest_cache .hypothesis .benchmarks __marimo__
+  
+wipe:
+  just clean
+  rm -rf .venv
+
+lock:
+  uv lock
+  uv pip compile pyproject.toml -o requirements.txt --group dev
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,35 @@
+[project]
+name = "smithy"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "Thomas (Tom) C. Gorordo", email = "tcgorordo@gmail.com" }
+]
+requires-python = ">=3.13"
+dependencies = [
+    "click>=8.4.0",
+    "polars>=1.40.1",
+    "rich>=15.0.0",
+]
+
+[project.scripts]
+smithy = "smithy:main"
+
+[build-system]
+requires = ["uv_build>=0.11.7,<0.12.0"]
+build-backend = "uv_build"
+
+[dependency-groups]
+dev = [
+    "marimo[recommended]>=0.23.6",
+    "py-spy>=0.4.2",
+    "pyclean>=3.6.0",
+    "pyinstaller>=6.20.0",
+    "pyinstrument>=5.1.2",
+    "pyperf>=2.10.0",
+    "pyright>=1.1.409",
+    "pytest>=9.0.3",
+    "pytest-benchmark>=5.2.3",
+    "ruff>=0.15.13",
+]
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,238 @@
+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml -o requirements.txt --group dev
+altair==6.1.0
+    # via marimo
+altgraph==0.17.5
+    # via pyinstaller
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.13.0
+    # via
+    #   httpx
+    #   openai
+    #   starlette
+attrs==26.1.0
+    # via
+    #   jsonschema
+    #   referencing
+certifi==2026.5.20
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+charset-normalizer==3.4.7
+    # via requests
+click==8.4.0
+    # via
+    #   smithy (pyproject.toml)
+    #   marimo
+    #   uvicorn
+distro==1.9.0
+    # via openai
+docutils==0.22.4
+    # via marimo
+duckdb==1.5.3
+    # via marimo
+fastjsonschema==2.21.2
+    # via nbformat
+genai-prices==0.0.61
+    # via pydantic-ai-slim
+griffelib==2.0.2
+    # via pydantic-ai-slim
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   genai-prices
+    #   openai
+    #   pydantic-ai-slim
+    #   pydantic-graph
+idna==3.15
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+iniconfig==2.3.0
+    # via pytest
+itsdangerous==2.2.0
+    # via marimo
+jedi==0.19.2
+    # via marimo
+jinja2==3.1.6
+    # via altair
+jiter==0.15.0
+    # via openai
+jsonschema==4.26.0
+    # via
+    #   altair
+    #   nbformat
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+jupyter-core==5.9.1
+    # via nbformat
+logfire-api==4.33.0
+    # via pydantic-graph
+loro==1.10.3
+    # via marimo
+marimo==0.23.6
+    # via smithy (pyproject.toml:dev)
+markdown==3.10.2
+    # via
+    #   marimo
+    #   pymdown-extensions
+markdown-it-py==4.2.0
+    # via rich
+markupsafe==3.0.3
+    # via jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+msgspec==0.21.1
+    # via marimo
+narwhals==2.21.2
+    # via
+    #   altair
+    #   marimo
+nbformat==5.10.4
+    # via marimo
+nodeenv==1.10.0
+    # via pyright
+openai==2.37.0
+    # via pydantic-ai-slim
+opentelemetry-api==1.42.0
+    # via pydantic-ai-slim
+packaging==26.2
+    # via
+    #   altair
+    #   marimo
+    #   pyinstaller
+    #   pyinstaller-hooks-contrib
+    #   pytest
+parso==0.8.7
+    # via jedi
+platformdirs==4.9.6
+    # via jupyter-core
+pluggy==1.6.0
+    # via pytest
+polars==1.40.1
+    # via marimo
+polars-runtime-32==1.40.1
+    # via polars
+psutil==7.2.2
+    # via
+    #   marimo
+    #   pyperf
+py-cpuinfo==9.0.0
+    # via pytest-benchmark
+py-spy==0.4.2
+    # via smithy (pyproject.toml:dev)
+pyarrow==24.0.0
+    # via polars
+pyclean==3.6.0
+    # via smithy (pyproject.toml:dev)
+pydantic==2.13.4
+    # via
+    #   genai-prices
+    #   openai
+    #   pydantic-ai-slim
+    #   pydantic-graph
+pydantic-ai-slim==1.99.0
+    # via marimo
+pydantic-core==2.46.4
+    # via pydantic
+pydantic-graph==1.99.0
+    # via pydantic-ai-slim
+pygments==2.20.0
+    # via
+    #   marimo
+    #   pytest
+    #   rich
+pyinstaller==6.20.0
+    # via smithy (pyproject.toml:dev)
+pyinstaller-hooks-contrib==2026.5
+    # via pyinstaller
+pyinstrument==5.1.2
+    # via smithy (pyproject.toml:dev)
+pymdown-extensions==10.21.3
+    # via marimo
+pyperf==2.10.0
+    # via smithy (pyproject.toml:dev)
+pyright==1.1.409
+    # via smithy (pyproject.toml:dev)
+pytest==9.0.3
+    # via
+    #   smithy (pyproject.toml:dev)
+    #   pytest-benchmark
+pytest-benchmark==5.2.3
+    # via smithy (pyproject.toml:dev)
+pyyaml==6.0.3
+    # via
+    #   marimo
+    #   pymdown-extensions
+pyzmq==27.1.0
+    # via marimo
+referencing==0.37.0
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2026.5.9
+    # via tiktoken
+requests==2.34.2
+    # via tiktoken
+rich==15.0.0
+    # via smithy (pyproject.toml)
+rpds-py==0.30.0
+    # via
+    #   jsonschema
+    #   referencing
+ruff==0.15.13
+    # via
+    #   smithy (pyproject.toml:dev)
+    #   marimo
+setuptools==82.0.1
+    # via
+    #   pyinstaller
+    #   pyinstaller-hooks-contrib
+sniffio==1.3.1
+    # via openai
+sqlglot==30.8.0
+    # via marimo
+sqlglotc==30.8.0
+    # via sqlglot
+starlette==1.0.0
+    # via marimo
+tiktoken==0.13.0
+    # via pydantic-ai-slim
+tomlkit==0.15.0
+    # via marimo
+tqdm==4.67.3
+    # via openai
+traitlets==5.15.0
+    # via
+    #   jupyter-core
+    #   nbformat
+typing-extensions==4.15.0
+    # via
+    #   altair
+    #   openai
+    #   opentelemetry-api
+    #   pydantic
+    #   pydantic-core
+    #   pyright
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   pydantic
+    #   pydantic-ai-slim
+    #   pydantic-graph
+urllib3==2.7.0
+    # via requests
+uv==0.11.15
+    # via marimo
+uvicorn==0.47.0
+    # via marimo
+websockets==16.0
+    # via marimo
--- a/shell.nix
+++ b/shell.nix
@ -0,0 +1,5 @@
+{ pkgs ? import <nixpkgs> {}}:
+
+pkgs.mkShellNoCC {
+    packages = with pkgs; [ just uv ];
+}
--- a/src/main.py
+++ b/src/main.py
--- a/src/smithy/init.py
+++ b/src/smithy/init.py
@ -0,0 +1,79 @@
+import click
+import polars as pl
+
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+
+from .rcv import smith_set
+
+console = Console()
+
+@click.command()
+@click.argument(
+    "spreadsheet",
+    type=click.Path(exists=True, dir_okay=False)
+)
+def main(spreadsheet: str) -> None:
+    """
+    Compute the Smith set from a ranked-choice ballot spreadsheet.
+    
+    The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
+    in the set they are guaranteed the Condorcet i.e. Majority winner.
+    """
+
+    try:
+
+        # Load spreadsheet
+        if spreadsheet.endswith(".csv"):
+            df = pl.read_csv(spreadsheet)
+
+        elif spreadsheet.endswith((".xlsx", ".xls")):
+            df = pl.read_excel(spreadsheet)
+
+        else:
+            console.print(
+                "[bold red]Unsupported file type.[/bold red]\n"
+                "Use CSV or Excel."
+            )
+            raise SystemExit(1)
+
+        # Normalize numerical dataframe entries
+        df = df.with_columns([ pl.col(c)
+                             .cast(pl.Utf8)
+                             .str.strip_chars()
+                             .cast(pl.Int64, strict=False).fill_null(0)
+                             for c in df.columns ])
+
+        # Compute Smith set
+        smiths = smith_set(df)
+
+        # Preview table
+        preview = Table(title="Ballot Box")
+
+        for col in df.columns:
+            preview.add_column(col)
+
+        for row in df.head(5).iter_rows():
+            preview.add_row(*map(str, row))
+
+        console.print(preview)
+
+        # Results
+        console.print()
+
+        console.print(
+            Panel.fit(
+                "\n".join(f"• {c}" for c in smiths),
+                title="Resulting Smith Set",
+                border_style="green"
+            )
+        )
+
+    except Exception as e:
+
+        console.print(
+            f"[bold red]Error:[/bold red] {e}"
+        )
+
+        raise SystemExit(1)
--- a/src/smithy/rcv.py
+++ b/src/smithy/rcv.py
@ -0,0 +1,67 @@
+import polars as pl
+from itertools import combinations
+
+def smith_set(df: pl.DataFrame) -> list:
+    """
+    Compute the Smith set from a Ranked-Choice ballot.
+    
+    The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
+    in the set they are guaranteed the Condorcet i.e. Majority winner.
+    
+    parameters
+    ---
+    df : pl.DataFrame
+        A Polars DataFrame representing ballots. Each column is a candidate and each
+        row is is a voter's ranking of the candidates. Lower numbers indicate higher
+        preference (1 = top-choice).
+
+    returns
+    ---
+    smith_set : list
+        A list of the Smith set candidates - all are equally good winners; ordering is determined lexicographically.
+        If there is a Condorcet winner (single Majority winner), the Smith set will contain that single candidate.
+
+
+    """
+
+    candidates = df.columns
+
+    # Build pairwise majority graph
+    graph: dict[str, set[str]] = { c: set() for c in candidates }
+
+    for a, b in combinations(candidates, 2):
+        result = df.select(
+            [
+                (pl.col(a) < pl.col(b)).sum().alias("a_wins"),
+                (pl.col(b) < pl.col(a)).sum().alias("b_wins"),
+            ]
+        ).row(0)
+
+        a_wins, b_wins = result
+
+        if a_wins > b_wins:
+            graph[a].add(b)
+        elif b_wins > a_wins:
+            graph[b].add(a)
+
+    # Find Smith set
+    for size in range(1, len(candidates) + 1):
+
+        for sub in combinations(candidates, size):
+
+            subset = set(sub)
+            out = set(candidates) - subset
+
+            dom = True
+
+            for member in subset:
+
+                # DIRECT dominance only
+                if not out.issubset(graph[member]):
+                    dom = False
+                    break
+
+            if dom:
+                return sorted(subset)
+
+    return []
--- a/test/test_ballot.csv
+++ b/test/test_ballot.csv
@ -0,0 +1,5 @@
+Alice,Bob,Carol
+1    ,2  ,3
+2    ,1  ,3
+1    ,3  ,2
+3    ,1  ,2
--- a/test/test_nb.py
+++ b/test/test_nb.py
@ -0,0 +1,47 @@
+import marimo
+
+__generated_with = "0.23.6"
+app = marimo.App(width="medium")
+
+
+@app.cell
+def _():
+    import marimo as mo
+    from pathlib import Path
+
+    return (mo,)
+
+
+@app.cell
+def _():
+    import polars as pl
+    from smithy import smith_set
+
+    return pl, smith_set
+
+
+@app.cell
+def _(mo, pl):
+    df = pl.read_csv(mo.notebook_dir() / "test_ballot.csv")
+    df = df.with_columns([ pl.col(c) # make safe, clean up
+                             .cast(pl.Utf8)
+                              .str.strip_chars()
+                              .cast(pl.Int64, strict=False).fill_null(df.width + 1)
+                                 for c in df.columns ])
+    df
+    return (df,)
+
+
+@app.cell
+def _(df, smith_set):
+    smith_set(df) # find the smith set (should be "Alice" and "Bob" as a pair)
+    return
+
+
+@app.cell
+def _():
+    return
+
+
+if __name__ == "__main__":
+    app.run()
--- a/test/test_rcvs.py
+++ b/test/test_rcvs.py
@ -0,0 +1,35 @@
+import polars as pl
+from smithy import smith_set
+
+def test_condorcet():
+    df = pl.DataFrame({
+                          'A': [1, 1, 2, 1],
+                          'B': [2, 2, 1, 2],
+                          'C': [3, 3, 3, 3],
+                      })
+    assert smith_set(df) == ['A']
+
+def test_rockpprscrcycle():
+    df = pl.DataFrame({
+                             'A': [1, 2, 3],
+                             'B': [2, 3, 1],
+                             'C': [3, 1, 2],
+                         })
+    assert smith_set(df) == ['A', 'B', 'C']
+
+def test_abpair():
+    df = pl.DataFrame({
+        "A": [1, 2, 1, 3],
+        "B": [2, 1, 3, 1],
+        "C": [3, 3, 2, 2]
+        })
+    assert smith_set(df) == ['A', 'B']
+
+def test_fourcycle():
+    df = pl.DataFrame({
+        "A": [1,2,3,4],
+        "B": [2,3,4,1],
+        "C": [3,4,1,2],
+        "D": [4,1,2,3],
+    })
+    assert smith_set(df) == ['A', 'B', 'C', 'D']
--- a/uv.lock
+++ b/uv.lock