diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..1d953f4 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use nix diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e4b21d0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ + +__pycache__/ + +.venv +.venv/ +venv/ + +cli.spec +gui.spec +build/ +dist/ + +.ipynb_checkpoints +.ruff_cache/ +.pytest_cache/ +.benchmarks/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..24ee5b1 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.13 diff --git a/README.md b/README.md index b29a419..7e45d05 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,172 @@ +--- +bibliography: REFERENCES.bib +... + # Carousel -*A simple gale-shapley stable matcher for applicants to positions/assignments.* +*A simple Stable Matching solver.* + +`carousel` is a solver for the +[Envy-free](https://en.wikipedia.org/wiki/Envy-free_matching) +[Stable matching problem](https://en.wikipedia.org/wiki/Stable_marriage_problem) based on some naive modifications to the +[Gale-Shapley Algorithm](https://en.wikipedia.org/wiki/Gale%E2%80%93Shapley_algorithm), written in Python. + +## Algorithms + +### Gale-Shapley Deferred Acceptance + +- Gives a solution to the basic problem. + +### GS + Rotation Enumeration of Solutions + +- Can implement post-selection measures/constraints/criteria. + +### Integer Programming (Huang++) + +### Polytope Solution Sampling (Large Problems) + +### Brute-Force Combinatoric Search +A benchmark/testbed implementation. Beware. + +### More? +Contribute! + +## Data - Input/Output +All [input table formats supported by `polars`](https://docs.pola.rs/user-guide/io/) are supported by `carousel` (`csv`, `excel`, `json` to name a few), +which accepts a few inter-related tabular schemes for the input/output data. + +### Input +Input describes the preferences/rankings of the "applicants" of "reviewers" to which they will be matched (possibly many-to-one, as in the "College Admission Problem"), as well as the preferences/rankings for the reviwers of applicants. +Input should be in one of two forms: + +#### Preferences +Preferences enumerate by-name some preferences in descending order, +e.g. the fruit preferences of Alice, Bob and Charlie are: + +| Alice | Bob | Charlie | +|--------|--------|---------| +| apple | banana | cherry | +| cherry | apple | banana | +| banana | cherry | apple | + +where for e.g. Alice prefers apples to cherries (so they appear higher in her preferences). + +#### Rankings +Rankings are like preferences, but are numerically ordered against a list of things; +e.g. Alice, Bob and Charlie rank the fruit apples, bananas and cherries as: + +| fruit | Alice | Bob | Charlie | +|--------|-------|-----|---------| +| apple | 1 | 2 | 3 | +| banana | 3 | 1 | 2 | +| cherry | 2 | 3 | 1 | + +### Intermediate + +#### Ranking Matrix +In order to perform a matching, `carousel` either needs a pair of preferences +(e.g. a set of doctor's preferences for residencies, and a set of residencies' preferences for doctors), +a pair of corresponding rankings. A ranking matrix is a concise way to express +a pair of rankings - used to display rankings: + +| names | Alice | Bob | Charlie | +|---------|------------|---------|-----------| +| Baylor | (1, 3) | (2, 2) | (3, 1) | +| CaseMed | (3, 2) | (1, 1) | (2, 3) | +| Emory | (2, 1) | (3, 3) | (1, 2) | + + +#### Priorities +Internally, preferences/rankings are often serialized to rows as a priority listing of the form: + +| App. | Rank | Rev | +|-------|---|--------| +| Alice | 1 | apple | +| Alice | 2 | cherry | +| Alice | 3 | banana | +| Bob | 1 | banana | +| Bob | 2 | apple | +| Bob | 3 | cherry | +| Charlie | 1 | cherry | +| Charlie | 2 | banana | +| Charlie | 3 | apple | + +(in no specific row order). This is not a supported input format in the frontends, but may be relevant +when interacting with the package programmatically - priorities can be followed as edges in a graph. + +### Output + +#### Matching +A matching is a table whose rows list the applicants matched to each reviewer +e.g. a matching from the med-school ranking matrix in the previous section might look like + +| Baylor | CaseMed | Emory | +|--------|---------|---------| +| Alice | Bob | Charlie | +| `None` | Daina | `None` | + +#### Assignments +An assignment is a table whose row lists which reviewer each applicant was matched to. +e.g. + +| Alice | Bob | Charlie | Daina | +|--------|---------|---------|---------| +| Baylor | CaseMed | Emory | CaseMed | + +TODO check/make stable. + +TODO matching more people per school e.g. + + + + +## Usage + +There are 4 main ways to use Carousel: + +### UO Pages Server - CGI Form +An HTML form submission interface is hosted at + +> [`https://pages.uoregon.edu/tgorordo/forms/carousel.html`](https://pages.uoregon.edu/tgorordo/forms/carousel.html) + +using the [pages.uoregon.edu CGI feature](https://service.uoregon.edu/TDClient/2030/Portal/KB/ArticleDet?ID=43069). + +Submit your applicant and reviewer preferences in tabular form, +or as excel uploads and the server will return a table of matches for you to choose from. + +This resource has *very* limited compute, so excessive usage might result in limitations or restricted access. +Try not to ruin a good thing! A moderate number of problems on the scale of those in the Examples section below should be sustainable. + +### Command Line Binary + +TODO + +### GUI Binary + +TODO + +### Python Library/Development +If you prefer to invoke `carousel` directly (or incorporate it as a library into another script) +in a python environment instead of using any of the bundled/released versions of the program described above (or wish to +reproduce those bundles), you can do so using the [`uv` environment/package/project manager](https://github.com/astral-sh/uv) +or a raw python virtual environment using the [`venv` module](https://docs.python.org/3/library/venv.html) +(if you need an intro to python `venv`s see [this page](https://pages.uoregon.edu/tgorordo/uoph410-510a_Image-Analysis/venvs.html)). + +Some extra command-line development conveniences are available if you use the tools: + +- [`just`](https://github.com/casey/just) is a taskrunner that can execute the provided `justfile` of some common useful commands. +- [`direnv`](https://github.com/direnv/direnv) with [`nix` (shell)](https://github.com/NixOS/nix) can guarantee minimal development tooling without polluting your broader environment. i.e. they can auto-install and run all of carousel's tooling in an environment specific to your development directory. + +but everything provided by these tools can also be done using more standard/default shell tooling. +[`uv`](https://github.com/astral-sh/uv) as your package/environment manager is highly recommended, however. + +TODO + +## Post-Selection +It's often desirable to enforce additional criteria on solutions +that are not well-posed within the core optimization problem. +Since the solver itself is stochastic to some extent, these are often most easily implemented +by a post-selection on a sampling of solutions. + ## Examples Here are some usage examples: @@ -9,3 +176,10 @@ TODO ### Caltech Housing Rotation TODO + +## References +*See [`REFERENCES.bib`](REFERENCES.bib)* + +[1]: + +TODO diff --git a/REFERENCES.bib b/REFERENCES.bib new file mode 100644 index 0000000..940ba25 --- /dev/null +++ b/REFERENCES.bib @@ -0,0 +1,75 @@ +@article{gale&shapley1962, + ISSN = {0002989, 19300972}, + URL = {https://www.jstor.org/stable/2312726}, + author = {D. Gale and L. S. Shapley}, + journal = {The American Mathematical Monthly}, + number = {1}, + pages = {9--15}, + publisher = {[Taylor & Francis, Ltd., Mathematical Association of America]}, + title = {College Admissions and the Stability of Marriage}, + urldate = {2025-04-01}, + volume = {69}, + year = {1962}, +} + +@article{doi:10.1137/0215048, +author = {Irving, Robert W. and Leather, Paul}, +title = {The Complexity of Counting Stable Marriages}, +journal = {SIAM Journal on Computing}, +volume = {15}, +number = {3}, +pages = {655-667}, +year = {1986}, +doi = {10.1137/0215048}, +URL = {https://doi.org/10.1137/0215048}, +eprint = {https://doi.org/10.1137/0215048} +} + +@article{doi:10.1137/0216010, +author = {Gusfield, Dan}, +title = {Three Fast Algorithms for Four Problems in Stable Marriage}, +journal = {SIAM Journal on Computing}, +volume = {16}, +number = {1}, +pages = {111-128}, +year = {1987}, +doi = {10.1137/0216010}, +URL = { https://doi.org/10.1137/0216010}, +eprint = { https://doi.org/10.1137/0216010 } +} + +@article{https://doi.org/10.3982/TE4830, +author = {Huang, Chao}, +title = {Stable matching: An integer programming approach}, +journal = {Theoretical Economics}, +volume = {18}, +number = {1}, +pages = {37-63}, +keywords = {Two-sided matching, stability, integer programming, many-to-one matching, complementarity, total unimodularity, demand type, C61, C78, D47, D63}, +doi = {https://doi.org/10.3982/TE4830}, +url = {https://onlinelibrary.wiley.com/doi/abs/10.3982/TE4830}, +eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.3982/TE4830} +} + +@article{DELORME2019426, +title = {Mathematical models for stable matching problems with ties and incomplete lists}, +journal = {European Journal of Operational Research}, +volume = {277}, +number = {2}, +pages = {426-441}, +year = {2019}, +issn = {0377-2217}, +doi = {https://doi.org/10.1016/j.ejor.2019.03.017}, +url = {https://www.sciencedirect.com/science/article/pii/S0377221719302565}, +author = {Maxence Delorme and Sergio García and Jacek Gondzio and Jörg Kalcsics and David Manlove and William Pettersson}, +} + +@misc{gutin2024findingstablematchingsassignment, + title={Finding all stable matchings with assignment constraints}, + author={Gregory Gutin and Philip R. Neary and Anders Yeo}, + year={2024}, + eprint={2204.03989}, + archivePrefix={arXiv}, + primaryClass={econ.TH}, + url={https://arxiv.org/abs/2204.03989}, +} diff --git a/pyproject.toml b/pyproject.toml index a464690..5beda68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ requires-python = ">=3.13" dependencies = [ "numpy>=2.2.4", "polars>=1.26.0", + "rustworkx>=0.17.1", ] #[project.scripts] diff --git a/requirements.txt b/requirements.txt index 2beb6d6..bfb8bbc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -85,7 +85,9 @@ nbformat==5.10.4 nodeenv==1.9.1 # via pyright numpy==2.2.4 - # via carousel (pyproject.toml) + # via + # carousel (pyproject.toml) + # rustworkx openai==1.106.1 # via marimo packaging==24.2 @@ -149,6 +151,8 @@ ruff==0.11.6 # via # carousel (pyproject.toml:dev) # marimo +rustworkx==0.17.1 + # via carousel (pyproject.toml) setuptools==78.1.0 # via # pyinstaller diff --git a/src/carousel/__init__.py b/src/carousel/__init__.py index 7530edc..4e2d375 100644 --- a/src/carousel/__init__.py +++ b/src/carousel/__init__.py @@ -1,57 +1,37 @@ +import logging, rich +from rich.logging import RichHandler + +import itertools as it +import numpy as np + +import polars as pl +import polars.selectors as pls + +rich.traceback.install() + +logging.basicConfig( + level=logging.INFO, + format="%(message)s", + datefmt="[%X]", + handlers=[ + RichHandler( + show_time=True, + markup=True, + rich_tracebacks=True, + tracebacks_suppress=[pl, pls, np], + ) + ], +) +log = logging.getLogger(__name__) + +from .util import * +from .brute import * from .def_acc import * -def check_match_unstable( - match, - applicant_prefs, - position_prefs, - capacities, - *, - app_col: str = "applicant", - pos_col: str = "position", - rank_col: str = "rank", -): - """ - Check match stability between applicants and positions. - - parameters - --- - match: pl.DataFrame - | applicant | position | - - applicant_prefs: pl.DataFrame - | applicant | position | rank | - - position_prefs: pl.DataFrame - | position | applicant | rank | - - """ - pass # TODO +def main() -> None: + rich.print("Hello from [italic red]carousel[/italic red]!") -def check_match_stable( - match, - applicant_prefs, - position_prefs, - capacities, - *, - app_col: str = "applicant", - pos_col: str = "position", - rank_col: str = "rank", -) -> bool: - """ - Check match stability between applicants and positions. - - parameters - --- - match: pl.DataFrame - | applicant | position | - - applicant_prefs: pl.DataFrame - | applicant | position | rank | - - position_prefs: pl.DataFrame - | position | applicant | rank | - - """ - pass # TODO \ No newline at end of file +if __name__ == "__main__": + main() diff --git a/src/carousel/brute.py b/src/carousel/brute.py new file mode 100644 index 0000000..d14d8e9 --- /dev/null +++ b/src/carousel/brute.py @@ -0,0 +1,3 @@ +def brute_match(applicant_rankings, reviewer_rankings): + """Brute force combinatoric search for stable matches.""" + pass diff --git a/src/carousel/def_acc.py b/src/carousel/def_acc.py index 7b9d07f..4933bfc 100644 --- a/src/carousel/def_acc.py +++ b/src/carousel/def_acc.py @@ -1,137 +1,177 @@ -from collections import deque -import heapq - -import numpy as np -import polars as pl +from .util import * -def GS_deferred_acceptance( - applicant_prefs: pl.DataFrame, - position_prefs: pl.DataFrame, - capacities: pl.DataFrame, - *, - app_col: str = "applicant", - pos_col: str = "position", - rank_col: str = "rank", -) -> pl.DataFrame: - """ - Compute the proposer-optimal Gale-Shapley deferred acceptance stable matching for a - college-admissions problem between "applicants" and "positions" with specified capacities. - - parameters - --- - applicant_prefs: pl.DataFrame - A 3-column ranking of positions by applicants. | applicant | position | rank | - (lower rank is more preferred). +def preparefor_def_acc(applicant_rankings, reviewer_rankings): + """Sanitize/Format applicant and reviewer rankings for the deferred acceptance solver.""" + pass + # return app_ranks, rev_ranks - position_prefs: pl.DataFrame - A 3-column ranking of applicants by positions. | position | applicant | rank | - (lower rank is more preferred). +def matchby_deferred_acceptance(applicant_rankings, reviewer_rankings, revfirst=False): + """Find the Gale-Shapley deferred-acceptance stable matching for rankings A, R. Default to A-first unless `revfirst=true`.""" - capacities: pl.DataFrame - A listing of position capacities. | position | capacity | - - returns - --- - matches: pl.DataFrame - A two-column match between applicants and positions (e.g. students and colleges). - | applicant | position | - """ - - app_idxs = ( - applicant_prefs.select(app_col).unique().sort(app_col).with_row_index("app_idx") + app_prio = rank_to_prio( + applicant_rankings, prioritizer="applicant", priority="reviewer" + ) + rev_prio = rank_to_prio( + reviewer_rankings, prioritizer="reviewer", priority="applicant" ) - pos_idxs = ( - position_prefs.select(pos_col).unique().sort(pos_col).with_row_index("pos_idx") + state = app_prio.select( + [ + pl.col("applicant").unique().alias("applicant"), + ] + ).with_columns([pl.lit(0).alias("next_rank"), pl.lit(True).alias("is_free")]) + + matches = pl.DataFrame( + { + "applicant": pl.Series([]), + "reviewer": pl.Series([]), + "current": pl.Series([], dtype=pl.Int64), + } ) - n_apps = app_idxs.height - n_poss = pos_idxs.height + max_iters = len(state) * len(rev_prio.select("applicant").unique()) - ap = ( - applicant_prefs.join(app_idxs, on=app_col) - .join(pos_idxs, on=pos_col) - .sort(["app_idx", rank_col]) - ) - al = ( - ap.group_by("app_idx", maintain_order=True) - .agg(pl.col("pos_idx")) - .sort("app_idx") - ) - app_prefs = al["pos_idx"].to_list() + for _ in range(max_iters): + props = ( + state.filter(pl.col("is_free")) + .join(app_prio, on="applicant") + .filter(pl.col("rank") == pl.col("next_rank")) + .select(["applicant", "reviewer"]) + ) - max_pref_len = max((len(x) for x in app_prefs), default=0) + if len(props) == 0: + break - pmat = np.full((n_apps, max_pref_len), -1, dtype=np.int32) - for i, r in enumerate(app_prefs): - pmat[i, : len(r)] = r + props = props.join(rev_prio, on=["reviewer", "applicant"], how="left") - pp = ( - position_prefs.join(app_idxs, on=app_col) - .join(pos_idxs, on=pos_col) - .sort(["pos_idx", rank_col]) - ) - worst_rank = np.iinfo(np.int32).max + props = props.join( + matches.select( + ["reviewer", pl.col("applicant").alias("proposer"), pl.col("current")] + ), + on="reviewer", + how="left", + ) - ranking = np.full((n_poss, n_apps), worst_rank, dtype=np.int32) - for r in pp.iter_rows(named=True): - ranking[r["pos_idx"], r["app_idx"]] = r[rank_col] + props = props.with_columns( + [ + ( + pl.col("proposer").is_null() | (pl.col("rank") < pl.col("current")) + ).alias("accepted") + ] + ) - caps = capacities.join(pos_idxs, on=pos_col).sort("pos_idx") - cap = caps["capacity"].to_numpy().astype(np.int32) + accepted = proposals.filter(pl.col("accepted")) + rejected = proposals.filter(~pl.col("accepted")) - # --- + displaced = accepted.filter(pl.col("proposer").is_not_null()).select( + pl.col("proposer").alias("applicant") + ) - next_c = np.zeros(n_apps, dtype=np.int32) - matched_pos = np.full(n_apps, -1, dtype=np.int32) + matches = matches.join(accepted.select("reviewer"), on="reviewer", how="anti") - free = deque(np.arange(n_apps, dtype=np.int32)) + matches = pl.concat( + [ + matches, + accepted.select( + ["reviewer", "applicant", pl.col("rank").alias("current")] + ), + ] + ) - pos_heaps: list[list[tuple[int, int]]] = [[] for _ in range(n_poss)] + rejected_apps = rejected.select("applicant") + accepted_apps = accepted.select("applicant") - while free: - a = free.popleft() + state = ( + state.join( + rejected_apps.with_columns(pl.lit(True).alias("was_rejected")), + on="applicant", + how="left", + ) + .join( + accepted_apps.with_columns(pl.lit(True).alias("was_accepted")), + on="applicant", + how="left", + ) + .join( + displaced.with_columns(pl.lit(True).alias("was_displaced")), + on="applicant", + how="left", + ) + .with_columns( + [ + pl.when(pl.col("was_rejected").fill_null(False)) + .then(pl.col("next_rank") + 1) + .otherwise(pl.col("next_rank")) + .alias("next_rank"), + pl.when(pl.col("was_accepted").fill_null(False)) + .then(False) + .when(pl.col("was_displaced").full_null(False)) + .then(True) + .otherwise(pl.col("is_free")) + .alias("is_free"), + ] + ) + .select(["applicant", "next_rank", "is_free"]) + ) - while next_c[a] < max_pref_len: - p = pmat[a, next_c[a]] - next_c[a] += 1 + return matches.select(["applicant", "reviewer"]) - if p == -1: - break - # else - arank = ranking[p, a] - - if arank == worst_rank: - continue - - heap = pos_heaps[p] - - if len(heap) < cap[p]: - heapq.heappush(heap, (-arank, a)) - matched_pos[a] = a - break - - worst_neg_rank, worst_app = heap[0] - worst_rank_current = -worst_neg_rank - - if arank < worst_rank_current: - heapq.heapreplace(heap, (-arank, a)) - matched_pos[a] = a - matched_pos[worst_app] = -1 - - free.append(worst_app) - break - - matches = ( - pl.DataFrame({"app_idx": np.arange(n_apps), "pos_idx": matched_pos}) - .filter(pl.col("pos_idx") != -1) - .join(app_idxs, on="app_idx") - .join(pos_idxs, on="pos_idx") - .select([app_col, pos_col]) - .sort(app_col) +def deferred_acceptance_match(applicant_rankings, reviewer_rankings): + """Find Gale-Shapley deferred-acceptance stable matching for rankings A, R.""" + reviewer_rankings = reviewer_rankings.rename( + {reviewer_rankings.columns[0]: "applicant"} ) - return matches + app_prefs = rank_to_pref(applicant_rankings) + offers = app_prefs.transpose( + include_header=True, + header_name="applicant", + column_names=["pref" + str(i + 1) for i in range(app_prefs.width)], + ).with_columns(pl.coalesce(pl.all().exclude("applicant")).alias("offer")) + + # offers = pl.concat(pl.align_frames(offers, reviewer_rankings, on="applicant"), how="horizontal") + offers = pl.concat([offers, reviewer_rankings], how="align_left") + + match = pl.DataFrame( + { + r: offers.select(pl.col("applicant", "offer").sort_by(r)) + .select( + pl.when(pl.col("offer").eq(r)).then(pl.col("applicant")).otherwise(None) + ) + .select(pl.all().fill_null(strategy="backward").first()) + .to_series() + for r in reviewer_rankings.columns[1:] + } + ) # .select(pl.all().fill_null(strategy="backward").first()) + + # while check_unstable(match, applicant_rankings, reviewer_rankings): + while match.select(pl.any_horizontal(pl.all().has_nulls())).item(): + # null applicant preferences that rejected + offers = offers.with_columns( + pl.when( + pl.col("applicant").is_in(match.row(0)).not_(), + pl.col(c).is_null().not_(), + ) + .then(pl.lit(None)) + .otherwise(c) + .alias(c) + for c in offers.select(pl.col("pref*")).columns + ) + + return match + + offers = offers.with_columns(pl.col("pref")) + + offers = offers.with_columns( + pl.coalesce( + # TODO: select prefn columns using a regex + ).alias("offer") + ) + + # TODO update match + + # else if stable + return match diff --git a/src/carousel/util.py b/src/carousel/util.py new file mode 100644 index 0000000..bce8d14 --- /dev/null +++ b/src/carousel/util.py @@ -0,0 +1,152 @@ +import itertools as it +import numpy as np + +import polars as pl +import polars.selectors as pls + +# Stability + + +def get_rank(ranking, ranker, ranked): + idx = ranking.select(pl.arg_where(pl.col("") == ranked)).item() + return ranking[ranker][idx] + + +def check_match_unstable(match, applicant_ranking, reviewer_ranking): + applicants = applicant_ranking.columns[1:] # assume unique applicants + for a, b in it.permutations(applicants, 2): + A = ( + match.select(c for c in match.iter_columns() if a in c).to_series().name + ) # the reviewer a is matched to + B = ( + match.select(c for c in match.iter_columns() if b in c).to_series().name + ) # the reviewer b is matched to + + b_prefers_A = get_rank(applicant_ranking, b, A) < get_rank( + applicant_ranking, b, B + ) + A_prefers_b = get_rank(reviewer_ranking, A, b) < get_rank( + reviewer_ranking, A, a + ) + if b_prefers_A and A_prefers_b: + return True + # else + return False + + +def check_match_stable(*args, **kwargs): + return not check_match_unstable(*args, **kwargs) + + +# Conversions + + +def rank_to_pref(ranking): + """Converts a ranking to a preference.""" + id_col_name = ranking.select(pls.by_index(0)).to_series().name + preferences = ranking.select( + [ + pl.col(id_col_name).sort_by(c).alias(c) + for c in ranking.columns + if c != id_col_name + ] + ) + return preferences + + +def pref_to_rank(preferences): + """Converts a preference to a ranking.""" + o = preferences.select( + pl.concat_list(preferences.columns).explode().unique().sort().alias("") + ) # .with_row_index(offset=1) + + ranking = pl.concat( + [ + o.join( + preferences.with_row_index(offset=1), + how="full", + left_on="", + right_on=c, + maintain_order="left", + ).select(pl.col("index").alias(c)) + for c in preferences.columns + ], + how="horizontal", + ) + return pl.concat([o, ranking], how="horizontal") + + +def ranks_to_mat(rankA, rankB): + return prefs_to_mat(rank_to_pref(rankA), rank_to_pref(rankB)) + + +def prefs_to_mat(prefA, prefB): + return rank_to_mat(pref_to_rank(preferences), pref_to_rank(preferences[0])) + + +def mat_to_ranks(matrix): + pass # TODO + + +def mat_to_prefs(matrix): + rankA, rankB = mat_to_ranks(matrix) + return rank_to_pref(rankA), rank_to_preft(rankB) + + +def pref_to_prio(pref, prioritizer="prioritizer", priority="subject"): + return pref.with_row_index("rank").unpivot( + index="rank", variable_name=prioritizer, value_name=priority + ) + + +def rank_to_prio(rank, **kwargs): + return pref_to_prio(rank_to_pref(rank), **kwargs) + + +"""" +def ranking_matrix(A, B): + T = pl.concat([A, B], how="horizontal") + + TT = T.with_columns(pl.concat_list(A.columns[0], B.columns[0])) + for ab in zip(A.columns[1:], B.columns[1:]): + TT = TT.with_columns(pl.concat_list(*ab)) + TTT = TT.select(pl.col(A.columns)) + + return TTT.insert_column(0, pl.Series("names", B.columns)) +""" + +## Output + + +def match_to_assgn(matching): + pass + + +def assgn_to_match(assignment): + pass + + +## Internal + + +# Validity + + +def check_pref_allunique(preferences): + """A valid set of preferences has all unique entries in each column.""" + repeats = preferences.select( + (~pl.all_horizontal((pl.all().is_unique() | pl.all().is_null()).all())).alias( + "repeats" + ) + ).get_column("repeats")[0] + return not repeats + + +def check_rank_noties(ranking): + """A valid ranking has no ties.""" + ties = ranking.select( + (~pl.all_horizontal((pl.all().is_unique() | pl.all().is_null()).all())).alias( + "ties" + ) + ).get_column("ties")[0] + return not ties diff --git a/src/carouselcmd.py b/src/carouselcmd.py index 945c578..1ae15b8 100644 --- a/src/carouselcmd.py +++ b/src/carouselcmd.py @@ -31,8 +31,8 @@ def cli(preferences: str, show_ballots=False, pretty=False) -> None: """ Compute the Gale-Shapley stable match of some preferences -- .csv or .xls(x). - A stable matching (of the "college admissions" problem) is one in which there is no - pair of, say, TA and course which would prefer each other over their respective + A stable matching (of the "college admissions" problem) is one in which there is no + pair of, say, TA and course which would prefer each other over their respective assignment given by the matching. """ diff --git a/test/galeshapley_test.py b/test/galeshapley_test.py index f9e8e84..d95fe56 100644 --- a/test/galeshapley_test.py +++ b/test/galeshapley_test.py @@ -1,80 +1,135 @@ import polars as pl +import polars.selectors as pls +import numpy as np + from polars.testing import assert_frame_equal -from carousel import GS_deferred_acceptance + +import pytest, rich +from hypothesis import given, strategies as st + +import carousel as crsl + +rng = np.random.default_rng() -def test_prefs(): - people_prefs = pl.DataFrame( +@st.composite +def rankings(draw, names=["a", "b", "c"], choices=["A", "B", "C"]): + h = pl.DataFrame({"": choices}) + r = pl.DataFrame( + {n: draw(st.just(rng.permutation(len(choices)) + 1)) for n in names} + ) # should add None option in generation of valid rankings + return pl.concat([h, r], how="horizontal") + + +@st.composite +def preferences(draw, names=["a", "b", "c"], choices=["A", "B", "C"]): + p = pl.DataFrame( { - "people": [ - "Alice", - "Alice", - "Alice", - "Bob", - "Bob", - "Bob", - "Charlie", - "Charlie", - "Charlie", - ], - "fruit": [ - "apple", - "banana", - "cherry", - "banana", - "cherry", - "apple", - "cherry", - "apple", - "banana", - ], - "rank": [1, 2, 3, 1, 2, 3, 1, 2, 3], + n: draw(st.just(rng.choice(choices, size=len(choices), replace=False))) + for n in names } ) + return p - fruit_prefs = pl.DataFrame( + +p = pl.DataFrame({"a": ["A", "C", "B"], "b": ["B", "A", "C"], "c": ["C", "B", "A"]}) +r = pl.DataFrame({"": ["A", "B", "C"], "a": [1, 3, 2], "b": [2, 1, 3], "c": [3, 2, 1]}) + + +def test_invalid_pref(): + pp = pl.DataFrame( + {"a": ["A", "A", "B"], "b": ["B", "A", "C"], "c": ["C", "B", "A"]} + ) + assert crsl.check_pref_allunique(pp) is False + + +def test_pref_to_rank(): + assert_frame_equal(crsl.pref_to_rank(p), r, check_dtypes=False) + + +def test_invalid_rank(): + rr = pl.DataFrame( + {"": ["A", "B", "C"], "a": [1, 1, 2], "b": [2, 1, 3], "c": [3, 2, 1]} + ) + assert crsl.check_pref_allunique(rr) is False + + +def test_rank_to_pref(): + assert_frame_equal(crsl.rank_to_pref(r), p, check_dtypes=False) + + +@given(rankings()) +def test_valid_rank(R): + assert crsl.check_rank_noties(R) + + +@given(rankings()) +def test_ranks_tofrom_prefs(R): + assert_frame_equal(crsl.pref_to_rank(crsl.rank_to_pref(R)), R, check_dtypes=False) + + +@given(preferences()) +def test_valid_pref(P): + assert crsl.check_pref_allunique(P) + + +@given(preferences()) +def test_prefs_tofrom_ranks(P): + assert_frame_equal(crsl.rank_to_pref(crsl.pref_to_rank(P)), P, check_dtypes=False) + + +def test_eg2_unstable(): + ar = pl.DataFrame( { - "fruit": [ - "apple", - "apple", - "apple", - "banana", - "banana", - "banana", - "cherry", - "cherry", - "cherry", - ], - "people": [ - "Alice", - "Bob", - "Charlie", - "Alice", - "Bob", - "Charlie", - "Alice", - "Bob", - "Charlie", - ], - "rank": [1, 1, 1, 1, 1, 1, 1, 1, 1], # fruits have no preferences + "": ["A", "B", "C", "D"], + "a": [1, 2, 3, 4], + "b": [1, 4, 3, 2], + "c": [2, 1, 3, 4], + "d": [4, 2, 3, 1], } ) - - capacities = pl.DataFrame( + rr = pl.DataFrame( { - "fruit": ["apple", "cherry", "banana"], - "capacity": [1, 1, 1], # have one of each + "": ["a", "b", "c", "d"], + "A": [3, 4, 2, 1], + "B": [3, 1, 4, 2], + "C": [2, 3, 4, 1], + "D": [3, 2, 1, 4], } ) + match = pl.DataFrame({"A": ["a"], "B": ["b"], "C": ["c"], "D": ["d"]}) - assert_frame_equal( - GS_deferred_acceptance( - people_prefs, fruit_prefs, capacities, app_col="people", pos_col="fruit" - ).sort(["people", "fruit"]), - pl.DataFrame( - { - "people": ["Alice", "Bob", "Charlie"], - "fruit": ["apple", "banana", "cherry"], - } - ).sort(["people", "fruit"]), + assert crsl.check_match_unstable(match, ar, rr) + + +def test_eg2_isstable(): + ar = pl.DataFrame( + { + "": ["A", "B", "C", "D"], + "a": [1, 2, 3, 4], + "b": [1, 4, 3, 2], + "c": [2, 1, 3, 4], + "d": [4, 2, 3, 1], + } ) + rr = pl.DataFrame( + { + "": ["a", "b", "c", "d"], + "A": [3, 4, 2, 1], + "B": [3, 1, 4, 2], + "C": [2, 3, 4, 1], + "D": [3, 2, 1, 4], + } + ) + match = pl.DataFrame({"A": ["c"], "B": ["d"], "C": ["a"], "D": ["b"]}) + + assert crsl.check_match_stable(match, ar, rr) + + +@given( + rankings(names=["a", "b", "c", "d"], choices=["A", "B", "C", "D"]), + rankings(names=["A", "B", "C", "D"], choices=["a", "b", "c", "d"]), +) +def test_defacc_isstable(applicant_rankings, reviewer_rankings): + match = crsl.matchby_deferred_acceptance(applicant_rankings, reviewer_rankings) + assert crsl.check_match_stable(match, applicant_rankings, reviewer_rankings) diff --git a/test/nbs/__marimo__/session/hovses.py.json b/test/nbs/__marimo__/session/hovses.py.json new file mode 100644 index 0000000..517c3e3 --- /dev/null +++ b/test/nbs/__marimo__/session/hovses.py.json @@ -0,0 +1,124 @@ +{ + "version": "1", + "metadata": { + "marimo_version": "0.18.3" + }, + "cells": [ + { + "id": "Hbol", + "code_hash": "bc65c35c6fad59890b50c502bb8affa4", + "outputs": [ + { + "type": "data", + "data": { + "text/markdown": "

Caltech Hovse Rotation Example

" + } + } + ], + "console": [] + }, + { + "id": "MJUe", + "code_hash": "ccb62cc29f2cec640b063832a90adfec", + "outputs": [ + { + "type": "data", + "data": { + "text/markdown": "

Imports

" + } + } + ], + "console": [] + }, + { + "id": "vblA", + "code_hash": "9588e2b4004a32c73b7ecc0a968b666f", + "outputs": [ + { + "type": "data", + "data": { + "text/plain": "" + } + } + ], + "console": [] + }, + { + "id": "bkHC", + "code_hash": "4821038400db1f5dc63daf7ca5b26279", + "outputs": [ + { + "type": "data", + "data": { + "text/markdown": "

Generating Rotation Rankings

" + } + } + ], + "console": [] + }, + { + "id": "lEQa", + "code_hash": "ecc9ae14c8c8f3875656fc620665c6b4", + "outputs": [ + { + "type": "data", + "data": { + "text/html": "
True
" + } + } + ], + "console": [] + }, + { + "id": "NdeS", + "code_hash": "c582dec943ff7b743aa0691df291cea6", + "outputs": [ + { + "type": "data", + "data": { + "text/html": "" + } + } + ], + "console": [] + }, + { + "id": "PKri", + "code_hash": "b9f0f3a28a20d94d4b173bbbd49db37f", + "outputs": [], + "console": [] + }, + { + "id": "Xref", + "code_hash": "ccc8305d08e563d4fb1c9df31e9b7b69", + "outputs": [ + { + "type": "data", + "data": { + "text/html": "" + } + } + ], + "console": [] + }, + { + "id": "taaO", + "code_hash": "b712cc6f3d50763308dd5bc1c6703f77", + "outputs": [ + { + "type": "data", + "data": { + "text/html": "" + } + } + ], + "console": [] + }, + { + "id": "wJzy", + "code_hash": null, + "outputs": [], + "console": [] + } + ] +} \ No newline at end of file diff --git a/test/nbs/hovses.py b/test/nbs/hovses.py new file mode 100644 index 0000000..6846191 --- /dev/null +++ b/test/nbs/hovses.py @@ -0,0 +1,105 @@ +import marimo + +__generated_with = "0.18.3" +app = marimo.App(width="medium") + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + # Caltech Hovse Rotation Example + """) + return + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Imports + """) + return + + +@app.cell +def _(): + import marimo as mo + import polars as pl, polars.selectors as pls + import numpy as np, faker as fk + import carousel as crsl + + return crsl, mo, pl + + +@app.cell(hide_code=True) +def _(mo): + mo.md(r""" + ## Generating Rotation Rankings + """) + return + + +@app.cell +def _(crsl, pl): + ar = pl.DataFrame( + { + "": ["A", "B", "C", "D"], + "a": [1, 2, 3, 4], + "b": [1, 4, 3, 2], + "c": [2, 1, 3, 4], + "d": [4, 2, 3, 1], + } + ) + rr = pl.DataFrame( + { + "": ["a", "b", "c", "d"], + "A": [3, 4, 2, 1], + "B": [3, 1, 4, 2], + "C": [2, 3, 4, 1], + "D": [3, 2, 1, 4], + } + ) + m = pl.DataFrame({"A": ["c"], "B": ["d"], "C": ["a"], "D": ["b"]}) + crsl.check_match_stable(m, ar, rr) + return ar, rr + + +@app.cell +def _(ar): + ar + return + + +@app.cell +def _(): + hovses = [ + "Blacker", + "Dabney", + "Ricketts", + "Fleming", + "Page", + "Lloyd", + "Venerable", + "Avery", + ] + return + + +@app.cell +def _(ar, crsl): + crsl.rank_to_pref(ar) + return + + +@app.cell +def _(crsl, rr): + crsl.pref_to_prio(crsl.rank_to_pref(rr)) + return + + +@app.cell +def _(): + return + + +if __name__ == "__main__": + app.run() diff --git a/uv.lock b/uv.lock index e60a3d9..2c4a045 100644 --- a/uv.lock +++ b/uv.lock @@ -64,6 +64,7 @@ source = { editable = "." } dependencies = [ { name = "numpy" }, { name = "polars" }, + { name = "rustworkx" }, ] [package.dev-dependencies] @@ -84,6 +85,7 @@ dev = [ requires-dist = [ { name = "numpy", specifier = ">=2.2.4" }, { name = "polars", specifier = ">=1.26.0" }, + { name = "rustworkx", specifier = ">=0.17.1" }, ] [package.metadata.requires-dev] @@ -1532,6 +1534,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/d5/bc97ff895ec35cf3925d4bd60f3b39d822f377a446906ec9bcc87405e59b/ruff-0.15.14-py3-none-win_arm64.whl", hash = "sha256:ff47b90a9ef6a40c9e2f3b479c1fb78531adf055b94c1eba0a7ba04b31951826", size = 11208607, upload-time = "2026-05-21T14:34:26.525Z" }, ] +[[package]] +name = "rustworkx" +version = "0.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/b0/66d96f02120f79eeed86b5c5be04029b6821155f31ed4907a4e9f1460671/rustworkx-0.17.1.tar.gz", hash = "sha256:59ea01b4e603daffa4e8827316c1641eef18ae9032f0b1b14aa0181687e3108e", size = 399407, upload-time = "2025-09-15T16:29:46.429Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/24/8972ed631fa05fdec05a7bb7f1fc0f8e78ee761ab37e8a93d1ed396ba060/rustworkx-0.17.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c08fb8db041db052da404839b064ebfb47dcce04ba9a3e2eb79d0c65ab011da4", size = 2257491, upload-time = "2025-08-13T01:43:31.466Z" }, + { url = "https://files.pythonhosted.org/packages/23/ae/7b6bbae5e0487ee42072dc6a46edf5db9731a0701ed648db22121fb7490c/rustworkx-0.17.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:4ef8e327dadf6500edd76fedb83f6d888b9266c58bcdbffd5a40c33835c9dd26", size = 2040175, upload-time = "2025-08-13T01:43:33.762Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ea/c17fb9428c8f0dcc605596f9561627a5b9ef629d356204ee5088cfcf52c6/rustworkx-0.17.1-cp39-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b809e0aa2927c68574b196f993233e269980918101b0dd235289c4f3ddb2115", size = 2324771, upload-time = "2025-08-13T01:43:35.553Z" }, + { url = "https://files.pythonhosted.org/packages/d7/40/ec8b3b8b0f8c0b768690c454b8dcc2781b4f2c767f9f1215539c7909e35b/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7e82c46a92fb0fd478b7372e15ca524c287485fdecaed37b8bb68f4df2720f2", size = 2068584, upload-time = "2025-08-13T01:43:37.261Z" }, + { url = "https://files.pythonhosted.org/packages/d9/22/713b900d320d06ce8677e71bba0ec5df0037f1d83270bff5db3b271c10d7/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42170075d8a7319e89ff63062c2f1d1116ced37b6f044f3bf36d10b60a107aa4", size = 2380949, upload-time = "2025-08-13T01:52:17.435Z" }, + { url = "https://files.pythonhosted.org/packages/20/4b/54be84b3b41a19caf0718a2b6bb280dde98c8626c809c969f16aad17458f/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65cba97fa95470239e2d65eb4db1613f78e4396af9f790ff771b0e5476bfd887", size = 2562069, upload-time = "2025-08-13T02:09:27.222Z" }, + { url = "https://files.pythonhosted.org/packages/39/5b/281bb21d091ab4e36cf377088366d55d0875fa2347b3189c580ec62b44c7/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246cc252053f89e36209535b9c58755960197e6ae08d48d3973760141c62ac95", size = 2221186, upload-time = "2025-08-13T01:43:38.598Z" }, + { url = "https://files.pythonhosted.org/packages/cc/2d/30a941a21b81e9db50c4c3ef8a64c5ee1c8eea3a90506ca0326ce39d021f/rustworkx-0.17.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c10d25e9f0e87d6a273d1ea390b636b4fb3fede2094bf0cb3fe565d696a91b48", size = 2123510, upload-time = "2025-08-13T01:43:40.288Z" }, + { url = "https://files.pythonhosted.org/packages/4f/ef/c9199e4b6336ee5a9f1979c11b5779c5cf9ab6f8386e0b9a96c8ffba7009/rustworkx-0.17.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:48784a673cf8d04f3cd246fa6b53fd1ccc4d83304503463bd561c153517bccc1", size = 2302783, upload-time = "2025-08-13T01:43:42.073Z" }, + { url = "https://files.pythonhosted.org/packages/30/3d/a49ab633e99fca4ccbb9c9f4bd41904186c175ebc25c530435529f71c480/rustworkx-0.17.1-cp39-abi3-win32.whl", hash = "sha256:5dbc567833ff0a8ad4580a4fe4bde92c186d36b4c45fca755fb1792e4fafe9b5", size = 1931541, upload-time = "2025-08-13T01:43:43.415Z" }, + { url = "https://files.pythonhosted.org/packages/a9/ec/cee878c1879b91ab8dc7d564535d011307839a2fea79d2a650413edf53be/rustworkx-0.17.1-cp39-abi3-win_amd64.whl", hash = "sha256:d0a48fb62adabd549f9f02927c3a159b51bf654c7388a12fc16d45452d5703ea", size = 2055049, upload-time = "2025-08-13T01:43:44.926Z" }, +] + [[package]] name = "setuptools" version = "82.0.1"