init. core algorithm, initial testing, basic cli.

This commit is contained in:
Thomas (Tom) C. Gorordo 2026-05-20 11:39:03 -07:00
commit 8836c49091
Signed by: tgorordo
GPG key ID: 0CBED22BB0D94490
12 changed files with 2297 additions and 0 deletions

5
README.md Normal file
View file

@ -0,0 +1,5 @@
# Smithy
*A simple smith set solver for ranked-choice ballots.*
The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
in the set they are guaranteed the standard Condorcet i.e. Majority winner (they beat all others pairwise).

30
justfile Normal file
View file

@ -0,0 +1,30 @@
list:
just --list
run spreadsheet:
uv run smithy {{spreadsheet}}
check:
uv run pyright src
test:
uv run pytest -vvv --tb=short --log-cli-level=INFO
format:
uv run ruff format src test
compile:
uv run pyinstaller src/main.py
clean:
uv run pyclean src test
uv run ruff clean
rm -rf main.spec cli.spec build dist .pytest_cache .hypothesis .benchmarks __marimo__
wipe:
just clean
rm -rf .venv
lock:
uv lock
uv pip compile pyproject.toml -o requirements.txt --group dev

35
pyproject.toml Normal file
View file

@ -0,0 +1,35 @@
[project]
name = "smithy"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
authors = [
{ name = "Thomas (Tom) C. Gorordo", email = "tcgorordo@gmail.com" }
]
requires-python = ">=3.13"
dependencies = [
"click>=8.4.0",
"polars>=1.40.1",
"rich>=15.0.0",
]
[project.scripts]
smithy = "smithy:main"
[build-system]
requires = ["uv_build>=0.11.7,<0.12.0"]
build-backend = "uv_build"
[dependency-groups]
dev = [
"marimo[recommended]>=0.23.6",
"py-spy>=0.4.2",
"pyclean>=3.6.0",
"pyinstaller>=6.20.0",
"pyinstrument>=5.1.2",
"pyperf>=2.10.0",
"pyright>=1.1.409",
"pytest>=9.0.3",
"pytest-benchmark>=5.2.3",
"ruff>=0.15.13",
]

238
requirements.txt Normal file
View file

@ -0,0 +1,238 @@
# This file was autogenerated by uv via the following command:
# uv pip compile pyproject.toml -o requirements.txt --group dev
altair==6.1.0
# via marimo
altgraph==0.17.5
# via pyinstaller
annotated-types==0.7.0
# via pydantic
anyio==4.13.0
# via
# httpx
# openai
# starlette
attrs==26.1.0
# via
# jsonschema
# referencing
certifi==2026.5.20
# via
# httpcore
# httpx
# requests
charset-normalizer==3.4.7
# via requests
click==8.4.0
# via
# smithy (pyproject.toml)
# marimo
# uvicorn
distro==1.9.0
# via openai
docutils==0.22.4
# via marimo
duckdb==1.5.3
# via marimo
fastjsonschema==2.21.2
# via nbformat
genai-prices==0.0.61
# via pydantic-ai-slim
griffelib==2.0.2
# via pydantic-ai-slim
h11==0.16.0
# via
# httpcore
# uvicorn
httpcore==1.0.9
# via httpx
httpx==0.28.1
# via
# genai-prices
# openai
# pydantic-ai-slim
# pydantic-graph
idna==3.15
# via
# anyio
# httpx
# requests
iniconfig==2.3.0
# via pytest
itsdangerous==2.2.0
# via marimo
jedi==0.19.2
# via marimo
jinja2==3.1.6
# via altair
jiter==0.15.0
# via openai
jsonschema==4.26.0
# via
# altair
# nbformat
jsonschema-specifications==2025.9.1
# via jsonschema
jupyter-core==5.9.1
# via nbformat
logfire-api==4.33.0
# via pydantic-graph
loro==1.10.3
# via marimo
marimo==0.23.6
# via smithy (pyproject.toml:dev)
markdown==3.10.2
# via
# marimo
# pymdown-extensions
markdown-it-py==4.2.0
# via rich
markupsafe==3.0.3
# via jinja2
mdurl==0.1.2
# via markdown-it-py
msgspec==0.21.1
# via marimo
narwhals==2.21.2
# via
# altair
# marimo
nbformat==5.10.4
# via marimo
nodeenv==1.10.0
# via pyright
openai==2.37.0
# via pydantic-ai-slim
opentelemetry-api==1.42.0
# via pydantic-ai-slim
packaging==26.2
# via
# altair
# marimo
# pyinstaller
# pyinstaller-hooks-contrib
# pytest
parso==0.8.7
# via jedi
platformdirs==4.9.6
# via jupyter-core
pluggy==1.6.0
# via pytest
polars==1.40.1
# via marimo
polars-runtime-32==1.40.1
# via polars
psutil==7.2.2
# via
# marimo
# pyperf
py-cpuinfo==9.0.0
# via pytest-benchmark
py-spy==0.4.2
# via smithy (pyproject.toml:dev)
pyarrow==24.0.0
# via polars
pyclean==3.6.0
# via smithy (pyproject.toml:dev)
pydantic==2.13.4
# via
# genai-prices
# openai
# pydantic-ai-slim
# pydantic-graph
pydantic-ai-slim==1.99.0
# via marimo
pydantic-core==2.46.4
# via pydantic
pydantic-graph==1.99.0
# via pydantic-ai-slim
pygments==2.20.0
# via
# marimo
# pytest
# rich
pyinstaller==6.20.0
# via smithy (pyproject.toml:dev)
pyinstaller-hooks-contrib==2026.5
# via pyinstaller
pyinstrument==5.1.2
# via smithy (pyproject.toml:dev)
pymdown-extensions==10.21.3
# via marimo
pyperf==2.10.0
# via smithy (pyproject.toml:dev)
pyright==1.1.409
# via smithy (pyproject.toml:dev)
pytest==9.0.3
# via
# smithy (pyproject.toml:dev)
# pytest-benchmark
pytest-benchmark==5.2.3
# via smithy (pyproject.toml:dev)
pyyaml==6.0.3
# via
# marimo
# pymdown-extensions
pyzmq==27.1.0
# via marimo
referencing==0.37.0
# via
# jsonschema
# jsonschema-specifications
regex==2026.5.9
# via tiktoken
requests==2.34.2
# via tiktoken
rich==15.0.0
# via smithy (pyproject.toml)
rpds-py==0.30.0
# via
# jsonschema
# referencing
ruff==0.15.13
# via
# smithy (pyproject.toml:dev)
# marimo
setuptools==82.0.1
# via
# pyinstaller
# pyinstaller-hooks-contrib
sniffio==1.3.1
# via openai
sqlglot==30.8.0
# via marimo
sqlglotc==30.8.0
# via sqlglot
starlette==1.0.0
# via marimo
tiktoken==0.13.0
# via pydantic-ai-slim
tomlkit==0.15.0
# via marimo
tqdm==4.67.3
# via openai
traitlets==5.15.0
# via
# jupyter-core
# nbformat
typing-extensions==4.15.0
# via
# altair
# openai
# opentelemetry-api
# pydantic
# pydantic-core
# pyright
# typing-inspection
typing-inspection==0.4.2
# via
# pydantic
# pydantic-ai-slim
# pydantic-graph
urllib3==2.7.0
# via requests
uv==0.11.15
# via marimo
uvicorn==0.47.0
# via marimo
websockets==16.0
# via marimo

5
shell.nix Normal file
View file

@ -0,0 +1,5 @@
{ pkgs ? import <nixpkgs> {}}:
pkgs.mkShellNoCC {
packages = with pkgs; [ just uv ];
}

0
src/main.py Normal file
View file

79
src/smithy/__init__.py Normal file
View file

@ -0,0 +1,79 @@
import click
import polars as pl
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
from .rcv import smith_set
console = Console()
@click.command()
@click.argument(
"spreadsheet",
type=click.Path(exists=True, dir_okay=False)
)
def main(spreadsheet: str) -> None:
"""
Compute the Smith set from a ranked-choice ballot spreadsheet.
The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
in the set they are guaranteed the Condorcet i.e. Majority winner.
"""
try:
# Load spreadsheet
if spreadsheet.endswith(".csv"):
df = pl.read_csv(spreadsheet)
elif spreadsheet.endswith((".xlsx", ".xls")):
df = pl.read_excel(spreadsheet)
else:
console.print(
"[bold red]Unsupported file type.[/bold red]\n"
"Use CSV or Excel."
)
raise SystemExit(1)
# Normalize numerical dataframe entries
df = df.with_columns([ pl.col(c)
.cast(pl.Utf8)
.str.strip_chars()
.cast(pl.Int64, strict=False).fill_null(0)
for c in df.columns ])
# Compute Smith set
smiths = smith_set(df)
# Preview table
preview = Table(title="Ballot Box")
for col in df.columns:
preview.add_column(col)
for row in df.head(5).iter_rows():
preview.add_row(*map(str, row))
console.print(preview)
# Results
console.print()
console.print(
Panel.fit(
"\n".join(f"{c}" for c in smiths),
title="Resulting Smith Set",
border_style="green"
)
)
except Exception as e:
console.print(
f"[bold red]Error:[/bold red] {e}"
)
raise SystemExit(1)

67
src/smithy/rcv.py Normal file
View file

@ -0,0 +1,67 @@
import polars as pl
from itertools import combinations
def smith_set(df: pl.DataFrame) -> list:
"""
Compute the Smith set from a Ranked-Choice ballot.
The Smith set is the minimal set of candidates which can beat all others pairwise - if there is a single winner
in the set they are guaranteed the Condorcet i.e. Majority winner.
parameters
---
df : pl.DataFrame
A Polars DataFrame representing ballots. Each column is a candidate and each
row is is a voter's ranking of the candidates. Lower numbers indicate higher
preference (1 = top-choice).
returns
---
smith_set : list
A list of the Smith set candidates - all are equally good winners; ordering is determined lexicographically.
If there is a Condorcet winner (single Majority winner), the Smith set will contain that single candidate.
"""
candidates = df.columns
# Build pairwise majority graph
graph: dict[str, set[str]] = { c: set() for c in candidates }
for a, b in combinations(candidates, 2):
result = df.select(
[
(pl.col(a) < pl.col(b)).sum().alias("a_wins"),
(pl.col(b) < pl.col(a)).sum().alias("b_wins"),
]
).row(0)
a_wins, b_wins = result
if a_wins > b_wins:
graph[a].add(b)
elif b_wins > a_wins:
graph[b].add(a)
# Find Smith set
for size in range(1, len(candidates) + 1):
for sub in combinations(candidates, size):
subset = set(sub)
out = set(candidates) - subset
dom = True
for member in subset:
# DIRECT dominance only
if not out.issubset(graph[member]):
dom = False
break
if dom:
return sorted(subset)
return []

5
test/test_ballot.csv Normal file
View file

@ -0,0 +1,5 @@
Alice,Bob,Carol
1 ,2 ,3
2 ,1 ,3
1 ,3 ,2
3 ,1 ,2
1 Alice Bob Carol
2 1 2 3
3 2 1 3
4 1 3 2
5 3 1 2

47
test/test_nb.py Normal file
View file

@ -0,0 +1,47 @@
import marimo
__generated_with = "0.23.6"
app = marimo.App(width="medium")
@app.cell
def _():
import marimo as mo
from pathlib import Path
return (mo,)
@app.cell
def _():
import polars as pl
from smithy import smith_set
return pl, smith_set
@app.cell
def _(mo, pl):
df = pl.read_csv(mo.notebook_dir() / "test_ballot.csv")
df = df.with_columns([ pl.col(c) # make safe, clean up
.cast(pl.Utf8)
.str.strip_chars()
.cast(pl.Int64, strict=False).fill_null(df.width + 1)
for c in df.columns ])
df
return (df,)
@app.cell
def _(df, smith_set):
smith_set(df) # find the smith set (should be "Alice" and "Bob" as a pair)
return
@app.cell
def _():
return
if __name__ == "__main__":
app.run()

35
test/test_rcvs.py Normal file
View file

@ -0,0 +1,35 @@
import polars as pl
from smithy import smith_set
def test_condorcet():
df = pl.DataFrame({
'A': [1, 1, 2, 1],
'B': [2, 2, 1, 2],
'C': [3, 3, 3, 3],
})
assert smith_set(df) == ['A']
def test_rockpprscrcycle():
df = pl.DataFrame({
'A': [1, 2, 3],
'B': [2, 3, 1],
'C': [3, 1, 2],
})
assert smith_set(df) == ['A', 'B', 'C']
def test_abpair():
df = pl.DataFrame({
"A": [1, 2, 1, 3],
"B": [2, 1, 3, 1],
"C": [3, 3, 2, 2]
})
assert smith_set(df) == ['A', 'B']
def test_fourcycle():
df = pl.DataFrame({
"A": [1,2,3,4],
"B": [2,3,4,1],
"C": [3,4,1,2],
"D": [4,1,2,3],
})
assert smith_set(df) == ['A', 'B', 'C', 'D']

1751
uv.lock generated Normal file

File diff suppressed because it is too large Load diff