commit c571fa1d171d6f67b0d704bf561ea7a7fa99ba0d Author: Thomas (Tom) C. Gorordo Date: Fri Apr 18 01:52:37 2025 -0700 init commit; data shapes mostly defined. some tests and sketched documentation in README. core algorithm TODO next diff --git a/README.md b/README.md new file mode 100644 index 0000000..e41680a --- /dev/null +++ b/README.md @@ -0,0 +1,123 @@ +--- +bibliography: REFERENCES.bib +... + +# Carousel +*A simple Stable Matching Solver.* + +`carousel` is a solver for the +[Envy-free](https://en.wikipedia.org/wiki/Envy-free_matching) +[Stable matching problem](https://en.wikipedia.org/wiki/Stable_marriage_problem) based on some naive modifications to the +[Gale-Shapley Algorithm](https://en.wikipedia.org/wiki/Gale%E2%80%93Shapley_algorithm), written in Python. + +## Algorithms + +### Gale-Shapley Deferred Acceptance + +The most basic versions of the stable matching problem was outlined and solved by [@gale&shapley1962]. + +TODO + +## Usage +Using `carousel` is pretty simple once it's set up: given some input rankings, and some post-selection criteria +the program should generate a landscape of valid matching solutions for you to choose from (and can generate more on request). + +### Installation, Setup, Dependencies & Tooling +There are a number of ways to guarantee you have the required dependencies to run `carousel`. +The most complete method is using `uv` (with `nix` and `direnv`), but a plain/more barebones setup using `venv` is also possible. + +#### Setup and run with `uv` +`carousel` was developed using the the [`uv`](https://github.com/astral-sh/uv) package and project manager. + +TODO + +#### Raw setup with `venv` +It's possible to only use only default Python tooling, if so desired, via the +[`venv` module](https://docs.python.org/3/library/venv.html). + +TODO + +#### Convenience `direnv` and `nix` environment management. +TODO + +### Matching: Input & Output + +All [input table formats supported by `polars`](https://docs.pola.rs/user-guide/io/) are supported by `carousel`. +Input data should be in one of three forms: + +#### Preferences +Preferences enumerate by-name some preferences in descending order, +e.g. the fruit preferences of Alice, Bob and Charlie are: + +| Alice | Bob | Charlie | +|--------|--------|---------| +| apple | banana | cherry | +| cherry | apple | banana | +| banana | cherry | apple | + +where for e.g. Alice prefers apples to cherries (so they appear higher in her preferences). + +#### Rankings +Rankings are like preferences, but are numerically ordered against a list of things; +e.g. Alice, Bob and Charlie rank the fruit apples, bananas and cherries as: + +| fruit | Alice | Bob | Charlie | +|--------|-------|-----|---------| +| apple | 1 | 2 | 3 | +| banana | 3 | 1 | 2 | +| cherry | 2 | 3 | 1 | + +#### Ranking Matrix +In order to perform a matching, `carousel` either needs a pair of preferences +(e.g. a set of doctor's preferences for residencies, and a set of residencies' preferences for doctors), +a pair of corresponding rankings, *or* a matrix encoding both rankings at once: + +| names | Alice | Bob | Charlie | +|---------|------------|---------|-----------| +| Baylor | (1, 3) | (2, 2) | (3, 1) | +| CaseMed | (3, 2) | (1, 1) | (2, 3) | +| Emory | (2, 1) | (3, 3) | (1, 2) | + +#### Matching +A matching is a table whose rows list the applicants matched to each reviewer +e.g. a matching from the med-school ranking matrix in the previous section might look like + +| Baylor | CaseMed | Emory | +|--------|---------|---------| +| Alice | Bob | Charlie | +| `None` | Daina | `None` | + +#### Assignments +An assignment is a table whose row lists which reviewer each applicant was matched to. +e.g. + +| Alice | Bob | Charlie | Daina | +|--------|---------|---------|---------| +| Baylor | CaseMed | Emory | CaseMed | + +TODO check/make stable. + +TODO matching more people per school e.g. + +### Matching: Post-Selection +It's often desirable to enforce additional criteria on solutions +that are not well-posed within the core optimization problem. +Since the solver itself is stochastic, these are often most easily implemented +by a post-selection. + + +## Examples +Here are some usage examples: + +### Departmental TA Assignments +TODO + +### Caltech Housing Rotation +TODO + +## References +*See [`REFERENCES.bib`](REFERENCES.bib)* + +[1]: + +TODO diff --git a/REFERENCES.bib b/REFERENCES.bib new file mode 100644 index 0000000..9c14c0e --- /dev/null +++ b/REFERENCES.bib @@ -0,0 +1,16 @@ + + +@article{gale&shapley1962, + ISSN = {0002989, 19300972}, + URL = {https://www.jstor.org/stable/2312726}, + author = {D. Gale and L. S. Shapley}, + journal = {The American Mathematical Monthly}, + number = {1}, + pages = {9--15}, + publisher = {[Taylor & Francis, Ltd., Mathematical Association of America]}, + title = {College Admissions and the Stability of Marriage}, + urldate = {2025-04-01}, + volume = {69}, + year = {1962}, +} + diff --git a/justfile b/justfile new file mode 100644 index 0000000..33fbe1b --- /dev/null +++ b/justfile @@ -0,0 +1,20 @@ +run: + uv run carousel + +check: + uv run pyright src + +test: + uv run pytest -vv --tb=short + +format: + uv run ruff format src test + +clean: + uv run pyclean src test + uv run ruff clean + rm -rf .pytest_cache .hypothesis + +wipe: + just clean + rm -rf .venv diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5651aa8 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,41 @@ +[project] +name = "carousel" +version = "0.1.0" +description = "A Stable Marriage Solver." +readme = "README.md" +authors = [{ name = "Thomas (Tom) C. Gorordo", email = "tcgorordo@gmail.com" }] +requires-python = ">=3.13" +dependencies = [ + "click>=8.1.8", + "numpy>=2.2.4", + "polars>=1.26.0", + "pytest-benchmark>=5.1.0", + "rich>=14.0.0", +] + +[project.scripts] +carousel = "carousel:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[dependency-groups] +dev = [ + "hypothesis>=6.130.8", + "pyclean>=3.1.0", + "pyinstaller>=6.12.0", + "pyright>=1.1.398", + "pytest>=8.3.5", + "ruff>=0.11.2", +] + +[pytest] +testpaths = "test" + +[tool.pyright] +include = ["src"] +exclude = ["test"] + +reportMissingImports = "error" +reportMissingTypeStubs = false diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..d6f7251 --- /dev/null +++ b/shell.nix @@ -0,0 +1,16 @@ +#let +# nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-24.11"; +# pkgs = import nixpkgs { config = {}; overlays = []; }; +#in + +{ pkgs ? import {}}: + +#pkgs.mkShellNoCC { +# packages = with pkgs; [ +# uv +# ]; +# } + +pkgs.mkShellNoCC { + packages = with pkgs; [ uv ]; +} diff --git a/src/carousel/__init__.py b/src/carousel/__init__.py new file mode 100644 index 0000000..1c7ebb9 --- /dev/null +++ b/src/carousel/__init__.py @@ -0,0 +1,121 @@ +import rich + +import polars as pl +import polars.selectors as pls + +import itertools as it + +def rank_to_pref(R): + """Converts a ranking to a preference.""" + id_col_name = R.select(pls.by_index(0)).to_series().name + P = R.select( + [pl.col(id_col_name).sort_by(c).alias(c) for c in R.columns if c != id_col_name] + ) + return P + + +def pref_to_rank(P): + """Converts a preference to a ranking.""" + o = P.select( + pl.concat_list(P.columns).explode().unique().sort().alias("") + ) # .with_row_index(offset=1) + + r = pl.concat( + [ + o.join( + P.with_row_index(offset=1), + how="full", + left_on="", + right_on=c, + maintain_order="left", + ).select(pl.col("index").alias(c)) + for c in P.columns + ], + how="horizontal", + ) + return pl.concat([o, r], how="horizontal") + + +"""" +def ranking_matrix(A, B): + T = pl.concat([A, B], how="horizontal") + + TT = T.with_columns(pl.concat_list(A.columns[0], B.columns[0])) + for ab in zip(A.columns[1:], B.columns[1:]): + TT = TT.with_columns(pl.concat_list(*ab)) + TTT = TT.select(pl.col(A.columns)) + + return TTT.insert_column(0, pl.Series("names", B.columns)) +""" + + +def check_valid_pref(P): + repeats = P.select( + (~pl.all_horizontal((pl.all().is_unique() | pl.all().is_null()).all())).alias( + "repeats" + ) + ).get_column("repeats")[0] + return not repeats + + +def check_valid_rank(R): + ties = R.select( + (~pl.all_horizontal((pl.all().is_unique() | pl.all().is_null()).all())).alias( + "ties" + ) + ).get_column("ties")[0] + return not ties + +def check_valid_match(match, applicants, reviewers): + # TODO + pass + +def check_valid_assgn(assgn, applicants, reviewers): + # TODO + pass + +def get_rank(ranking, ranker, rankee): + idx = ranking.select(pl.arg_where(pl.col("") == rankee)).item() + return ranking[ranker][idx] + +def check_unstable(match, applicant_ranking, reviewer_ranking): + applicants = applicant_ranking.columns[1:] # assume unique applicants + for a, b in it.combinations(applicants, 2): + A = match.select(c for c in match.iter_columns() if a in c).to_series().name # the reviewer a is matched to + B = match.select(c for c in match.iter_columns() if b in c).to_series().name # the reviewer b is matched to + + b_prefers_A = get_rank(applicant_ranking, b, A) < get_rank(applicant_ranking, b, B) + A_prefers_b = get_rank(reviewer_ranking, A, b) < get_rank(reviewer_ranking, A, a) + if b_prefers_A and A_prefers_b: + return True + + # or + a_prefers_B = get_rank(applicant_ranking, a, B) < get_rank(applicant_ranking, a, A) + B_prefers_a = get_rank(reviewer_ranking, B, a) < get_rank(reviewer_ranking, B, b) + if a_prefers_B and B_prefers_a: + return True + # else + return False + +def check_stable(*args, **kwargs): + return not check_unstable(*args, **kwargs) + +def deferred_acceptance(A, R): + """Find the Gale-Shapley deferred-acceptance stable matching for preferences A, R.""" + # TODO - the core algorithm! + pass + +def assgn_to_match(assgn): + # TODO + pass + +def match_to_assgn(match): + # TODO + pass + +def main() -> None: + rich.print("Hello from [italic red]carousel[/italic red]!") + + +if __name__ == "__main__": + main() diff --git a/src/carousel/deferred_acceptance.py b/src/carousel/deferred_acceptance.py new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/carousel/deferred_acceptance.py @@ -0,0 +1 @@ + diff --git a/test/galeshapley_test.py b/test/galeshapley_test.py new file mode 100644 index 0000000..7c984f0 --- /dev/null +++ b/test/galeshapley_test.py @@ -0,0 +1,96 @@ +import rich +import polars as pl +import polars.selectors as pls +import numpy as np + +rng = np.random.default_rng() + +from polars.testing import assert_frame_equal + +import pytest +from hypothesis import given, strategies as st + + +@st.composite +def rankings(draw, names=["a", "b", "c"], choices=["A", "B", "C"]): + h = pl.DataFrame({"": choices}) + r = pl.DataFrame( + {n: draw(st.just(rng.permutation(len(choices)) + 1)) for n in names} + ) # should add None option in generation of valid rankings + return pl.concat([h, r], how="horizontal") + + +@st.composite +def preferences(draw, names=["a", "b", "c"], choices=["A", "B", "C"]): + p = pl.DataFrame( + { + n: draw(st.just(rng.choice(choices, size=len(choices), replace=False))) + for n in names + } + ) + return p + + +import carousel as crsl + +p = pl.DataFrame({"a": ["A", "C", "B"], "b": ["B", "A", "C"], "c": ["C", "B", "A"]}) +r = pl.DataFrame({"": ["A", "B", "C"], "a": [1, 3, 2], "b": [2, 1, 3], "c": [3, 2, 1]}) + + +def test_invalid_pref(): + pp = pl.DataFrame( + {"a": ["A", "A", "B"], "b": ["B", "A", "C"], "c": ["C", "B", "A"]} + ) + assert crsl.check_valid_pref(pp) is False + + +def test_pref_to_rank(): + rr = crsl.pref_to_rank(p) + rich.print(p, rr, r) + assert_frame_equal(crsl.pref_to_rank(p), r, check_dtypes=False) + + +def test_invalid_rank(): + rr = pl.DataFrame( + {"": ["A", "B", "C"], "a": [1, 1, 2], "b": [2, 1, 3], "c": [3, 2, 1]} + ) + assert crsl.check_valid_pref(rr) is False + + +def test_rank_to_pref(): + assert_frame_equal(crsl.rank_to_pref(r), p, check_dtypes=False) + + +@given(rankings()) +def test_valid_rank(R): + assert crsl.check_valid_rank(R) + + +@given(rankings()) +def test_ranks_tofrom_prefs(R): + assert_frame_equal(crsl.pref_to_rank(crsl.rank_to_pref(R)), R, check_dtypes=False) + + +@given(preferences()) +def test_valid_pref(P): + assert crsl.check_valid_pref(P) + + +@given(preferences()) +def test_prefs_tofrom_ranks(P): + assert_frame_equal(crsl.rank_to_pref(crsl.pref_to_rank(P)), P, check_dtypes=False) + + +def test_eg3_unstable(): + applicant_rankings = pl.DataFrame({"": ["A", "B", "C", "D"], "a": [1, 2, 3, 4], "b": [1, 4, 3, 2], "c": [2, 1, 3, 4], "d": [4, 2, 3, 1]}) + reviewer_rankings = pl.DataFrame({"": ["a", "b", "c", "d"], "A": [3, 4, 2, 1], "B": [3, 1, 4, 2], "C": [2, 3, 4, 1], "D": [3, 2, 1, 4] }) + match = pl.DataFrame({"A" : ["a"], "B": ["b"], "C": ["c"], "D": ["d"]}) + + assert crsl.check_unstable(match, applicant_rankings, reviewer_rankings) + +def test_eg3_isstable(): + applicant_rankings = pl.DataFrame({"": ["A", "B", "C", "D"], "a": [1, 2, 3, 4], "b": [1, 4, 3, 2], "c": [2, 1, 3, 4], "d": [4, 2, 3, 1]}) + reviewer_rankings = pl.DataFrame({"": ["a", "b", "c", "d"], "A": [3, 4, 2, 1], "B": [3, 1, 4, 2], "C": [2, 3, 4, 1], "D": [3, 2, 1, 4] }) + match = pl.DataFrame({"A" : ["c"], "B": ["d"], "C": ["a"], "D": ["b"]}) + + assert crsl.check_stable(match, applicant_rankings, reviewer_rankings)