diff --git a/.envrc b/.envrc deleted file mode 100644 index 1d953f4..0000000 --- a/.envrc +++ /dev/null @@ -1 +0,0 @@ -use nix diff --git a/.gitignore b/.gitignore deleted file mode 100644 index e4b21d0..0000000 --- a/.gitignore +++ /dev/null @@ -1,16 +0,0 @@ - -__pycache__/ - -.venv -.venv/ -venv/ - -cli.spec -gui.spec -build/ -dist/ - -.ipynb_checkpoints -.ruff_cache/ -.pytest_cache/ -.benchmarks/ diff --git a/.python-version b/.python-version deleted file mode 100644 index 24ee5b1..0000000 --- a/.python-version +++ /dev/null @@ -1 +0,0 @@ -3.13 diff --git a/README.md b/README.md index 7e45d05..b29a419 100644 --- a/README.md +++ b/README.md @@ -1,172 +1,5 @@ ---- -bibliography: REFERENCES.bib -... - # Carousel -*A simple Stable Matching solver.* - -`carousel` is a solver for the -[Envy-free](https://en.wikipedia.org/wiki/Envy-free_matching) -[Stable matching problem](https://en.wikipedia.org/wiki/Stable_marriage_problem) based on some naive modifications to the -[Gale-Shapley Algorithm](https://en.wikipedia.org/wiki/Gale%E2%80%93Shapley_algorithm), written in Python. - -## Algorithms - -### Gale-Shapley Deferred Acceptance - -- Gives a solution to the basic problem. - -### GS + Rotation Enumeration of Solutions - -- Can implement post-selection measures/constraints/criteria. - -### Integer Programming (Huang++) - -### Polytope Solution Sampling (Large Problems) - -### Brute-Force Combinatoric Search -A benchmark/testbed implementation. Beware. - -### More? -Contribute! - -## Data - Input/Output -All [input table formats supported by `polars`](https://docs.pola.rs/user-guide/io/) are supported by `carousel` (`csv`, `excel`, `json` to name a few), -which accepts a few inter-related tabular schemes for the input/output data. - -### Input -Input describes the preferences/rankings of the "applicants" of "reviewers" to which they will be matched (possibly many-to-one, as in the "College Admission Problem"), as well as the preferences/rankings for the reviwers of applicants. -Input should be in one of two forms: - -#### Preferences -Preferences enumerate by-name some preferences in descending order, -e.g. the fruit preferences of Alice, Bob and Charlie are: - -| Alice | Bob | Charlie | -|--------|--------|---------| -| apple | banana | cherry | -| cherry | apple | banana | -| banana | cherry | apple | - -where for e.g. Alice prefers apples to cherries (so they appear higher in her preferences). - -#### Rankings -Rankings are like preferences, but are numerically ordered against a list of things; -e.g. Alice, Bob and Charlie rank the fruit apples, bananas and cherries as: - -| fruit | Alice | Bob | Charlie | -|--------|-------|-----|---------| -| apple | 1 | 2 | 3 | -| banana | 3 | 1 | 2 | -| cherry | 2 | 3 | 1 | - -### Intermediate - -#### Ranking Matrix -In order to perform a matching, `carousel` either needs a pair of preferences -(e.g. a set of doctor's preferences for residencies, and a set of residencies' preferences for doctors), -a pair of corresponding rankings. A ranking matrix is a concise way to express -a pair of rankings - used to display rankings: - -| names | Alice | Bob | Charlie | -|---------|------------|---------|-----------| -| Baylor | (1, 3) | (2, 2) | (3, 1) | -| CaseMed | (3, 2) | (1, 1) | (2, 3) | -| Emory | (2, 1) | (3, 3) | (1, 2) | - - -#### Priorities -Internally, preferences/rankings are often serialized to rows as a priority listing of the form: - -| App. | Rank | Rev | -|-------|---|--------| -| Alice | 1 | apple | -| Alice | 2 | cherry | -| Alice | 3 | banana | -| Bob | 1 | banana | -| Bob | 2 | apple | -| Bob | 3 | cherry | -| Charlie | 1 | cherry | -| Charlie | 2 | banana | -| Charlie | 3 | apple | - -(in no specific row order). This is not a supported input format in the frontends, but may be relevant -when interacting with the package programmatically - priorities can be followed as edges in a graph. - -### Output - -#### Matching -A matching is a table whose rows list the applicants matched to each reviewer -e.g. a matching from the med-school ranking matrix in the previous section might look like - -| Baylor | CaseMed | Emory | -|--------|---------|---------| -| Alice | Bob | Charlie | -| `None` | Daina | `None` | - -#### Assignments -An assignment is a table whose row lists which reviewer each applicant was matched to. -e.g. - -| Alice | Bob | Charlie | Daina | -|--------|---------|---------|---------| -| Baylor | CaseMed | Emory | CaseMed | - -TODO check/make stable. - -TODO matching more people per school e.g. - - - - -## Usage - -There are 4 main ways to use Carousel: - -### UO Pages Server - CGI Form -An HTML form submission interface is hosted at - -> [`https://pages.uoregon.edu/tgorordo/forms/carousel.html`](https://pages.uoregon.edu/tgorordo/forms/carousel.html) - -using the [pages.uoregon.edu CGI feature](https://service.uoregon.edu/TDClient/2030/Portal/KB/ArticleDet?ID=43069). - -Submit your applicant and reviewer preferences in tabular form, -or as excel uploads and the server will return a table of matches for you to choose from. - -This resource has *very* limited compute, so excessive usage might result in limitations or restricted access. -Try not to ruin a good thing! A moderate number of problems on the scale of those in the Examples section below should be sustainable. - -### Command Line Binary - -TODO - -### GUI Binary - -TODO - -### Python Library/Development -If you prefer to invoke `carousel` directly (or incorporate it as a library into another script) -in a python environment instead of using any of the bundled/released versions of the program described above (or wish to -reproduce those bundles), you can do so using the [`uv` environment/package/project manager](https://github.com/astral-sh/uv) -or a raw python virtual environment using the [`venv` module](https://docs.python.org/3/library/venv.html) -(if you need an intro to python `venv`s see [this page](https://pages.uoregon.edu/tgorordo/uoph410-510a_Image-Analysis/venvs.html)). - -Some extra command-line development conveniences are available if you use the tools: - -- [`just`](https://github.com/casey/just) is a taskrunner that can execute the provided `justfile` of some common useful commands. -- [`direnv`](https://github.com/direnv/direnv) with [`nix` (shell)](https://github.com/NixOS/nix) can guarantee minimal development tooling without polluting your broader environment. i.e. they can auto-install and run all of carousel's tooling in an environment specific to your development directory. - -but everything provided by these tools can also be done using more standard/default shell tooling. -[`uv`](https://github.com/astral-sh/uv) as your package/environment manager is highly recommended, however. - -TODO - -## Post-Selection -It's often desirable to enforce additional criteria on solutions -that are not well-posed within the core optimization problem. -Since the solver itself is stochastic to some extent, these are often most easily implemented -by a post-selection on a sampling of solutions. - +*A simple gale-shapley stable matcher for applicants to positions/assignments.* ## Examples Here are some usage examples: @@ -176,10 +9,3 @@ TODO ### Caltech Housing Rotation TODO - -## References -*See [`REFERENCES.bib`](REFERENCES.bib)* - -[1]: - -TODO diff --git a/REFERENCES.bib b/REFERENCES.bib deleted file mode 100644 index 940ba25..0000000 --- a/REFERENCES.bib +++ /dev/null @@ -1,75 +0,0 @@ -@article{gale&shapley1962, - ISSN = {0002989, 19300972}, - URL = {https://www.jstor.org/stable/2312726}, - author = {D. Gale and L. S. Shapley}, - journal = {The American Mathematical Monthly}, - number = {1}, - pages = {9--15}, - publisher = {[Taylor & Francis, Ltd., Mathematical Association of America]}, - title = {College Admissions and the Stability of Marriage}, - urldate = {2025-04-01}, - volume = {69}, - year = {1962}, -} - -@article{doi:10.1137/0215048, -author = {Irving, Robert W. and Leather, Paul}, -title = {The Complexity of Counting Stable Marriages}, -journal = {SIAM Journal on Computing}, -volume = {15}, -number = {3}, -pages = {655-667}, -year = {1986}, -doi = {10.1137/0215048}, -URL = {https://doi.org/10.1137/0215048}, -eprint = {https://doi.org/10.1137/0215048} -} - -@article{doi:10.1137/0216010, -author = {Gusfield, Dan}, -title = {Three Fast Algorithms for Four Problems in Stable Marriage}, -journal = {SIAM Journal on Computing}, -volume = {16}, -number = {1}, -pages = {111-128}, -year = {1987}, -doi = {10.1137/0216010}, -URL = { https://doi.org/10.1137/0216010}, -eprint = { https://doi.org/10.1137/0216010 } -} - -@article{https://doi.org/10.3982/TE4830, -author = {Huang, Chao}, -title = {Stable matching: An integer programming approach}, -journal = {Theoretical Economics}, -volume = {18}, -number = {1}, -pages = {37-63}, -keywords = {Two-sided matching, stability, integer programming, many-to-one matching, complementarity, total unimodularity, demand type, C61, C78, D47, D63}, -doi = {https://doi.org/10.3982/TE4830}, -url = {https://onlinelibrary.wiley.com/doi/abs/10.3982/TE4830}, -eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.3982/TE4830} -} - -@article{DELORME2019426, -title = {Mathematical models for stable matching problems with ties and incomplete lists}, -journal = {European Journal of Operational Research}, -volume = {277}, -number = {2}, -pages = {426-441}, -year = {2019}, -issn = {0377-2217}, -doi = {https://doi.org/10.1016/j.ejor.2019.03.017}, -url = {https://www.sciencedirect.com/science/article/pii/S0377221719302565}, -author = {Maxence Delorme and Sergio García and Jacek Gondzio and Jörg Kalcsics and David Manlove and William Pettersson}, -} - -@misc{gutin2024findingstablematchingsassignment, - title={Finding all stable matchings with assignment constraints}, - author={Gregory Gutin and Philip R. Neary and Anders Yeo}, - year={2024}, - eprint={2204.03989}, - archivePrefix={arXiv}, - primaryClass={econ.TH}, - url={https://arxiv.org/abs/2204.03989}, -} diff --git a/pyproject.toml b/pyproject.toml index 5beda68..a464690 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,6 @@ requires-python = ">=3.13" dependencies = [ "numpy>=2.2.4", "polars>=1.26.0", - "rustworkx>=0.17.1", ] #[project.scripts] diff --git a/requirements.txt b/requirements.txt index bfb8bbc..2beb6d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -85,9 +85,7 @@ nbformat==5.10.4 nodeenv==1.9.1 # via pyright numpy==2.2.4 - # via - # carousel (pyproject.toml) - # rustworkx + # via carousel (pyproject.toml) openai==1.106.1 # via marimo packaging==24.2 @@ -151,8 +149,6 @@ ruff==0.11.6 # via # carousel (pyproject.toml:dev) # marimo -rustworkx==0.17.1 - # via carousel (pyproject.toml) setuptools==78.1.0 # via # pyinstaller diff --git a/src/carousel/__init__.py b/src/carousel/__init__.py index 4e2d375..7530edc 100644 --- a/src/carousel/__init__.py +++ b/src/carousel/__init__.py @@ -1,37 +1,57 @@ -import logging, rich -from rich.logging import RichHandler - -import itertools as it -import numpy as np - -import polars as pl -import polars.selectors as pls - -rich.traceback.install() - -logging.basicConfig( - level=logging.INFO, - format="%(message)s", - datefmt="[%X]", - handlers=[ - RichHandler( - show_time=True, - markup=True, - rich_tracebacks=True, - tracebacks_suppress=[pl, pls, np], - ) - ], -) -log = logging.getLogger(__name__) - -from .util import * -from .brute import * from .def_acc import * -def main() -> None: - rich.print("Hello from [italic red]carousel[/italic red]!") +def check_match_unstable( + match, + applicant_prefs, + position_prefs, + capacities, + *, + app_col: str = "applicant", + pos_col: str = "position", + rank_col: str = "rank", +): + """ + Check match stability between applicants and positions. + + parameters + --- + match: pl.DataFrame + | applicant | position | + + applicant_prefs: pl.DataFrame + | applicant | position | rank | + + position_prefs: pl.DataFrame + | position | applicant | rank | + + """ + pass # TODO -if __name__ == "__main__": - main() +def check_match_stable( + match, + applicant_prefs, + position_prefs, + capacities, + *, + app_col: str = "applicant", + pos_col: str = "position", + rank_col: str = "rank", +) -> bool: + """ + Check match stability between applicants and positions. + + parameters + --- + match: pl.DataFrame + | applicant | position | + + applicant_prefs: pl.DataFrame + | applicant | position | rank | + + position_prefs: pl.DataFrame + | position | applicant | rank | + + """ + pass # TODO \ No newline at end of file diff --git a/src/carousel/brute.py b/src/carousel/brute.py deleted file mode 100644 index d14d8e9..0000000 --- a/src/carousel/brute.py +++ /dev/null @@ -1,3 +0,0 @@ -def brute_match(applicant_rankings, reviewer_rankings): - """Brute force combinatoric search for stable matches.""" - pass diff --git a/src/carousel/def_acc.py b/src/carousel/def_acc.py index 4933bfc..7b9d07f 100644 --- a/src/carousel/def_acc.py +++ b/src/carousel/def_acc.py @@ -1,177 +1,137 @@ -from .util import * +from collections import deque +import heapq + +import numpy as np +import polars as pl -def preparefor_def_acc(applicant_rankings, reviewer_rankings): - """Sanitize/Format applicant and reviewer rankings for the deferred acceptance solver.""" - pass - # return app_ranks, rev_ranks +def GS_deferred_acceptance( + applicant_prefs: pl.DataFrame, + position_prefs: pl.DataFrame, + capacities: pl.DataFrame, + *, + app_col: str = "applicant", + pos_col: str = "position", + rank_col: str = "rank", +) -> pl.DataFrame: + """ + Compute the proposer-optimal Gale-Shapley deferred acceptance stable matching for a + college-admissions problem between "applicants" and "positions" with specified capacities. + + parameters + --- + applicant_prefs: pl.DataFrame + A 3-column ranking of positions by applicants. | applicant | position | rank | + (lower rank is more preferred). -def matchby_deferred_acceptance(applicant_rankings, reviewer_rankings, revfirst=False): - """Find the Gale-Shapley deferred-acceptance stable matching for rankings A, R. Default to A-first unless `revfirst=true`.""" + position_prefs: pl.DataFrame + A 3-column ranking of applicants by positions. | position | applicant | rank | + (lower rank is more preferred). - app_prio = rank_to_prio( - applicant_rankings, prioritizer="applicant", priority="reviewer" - ) - rev_prio = rank_to_prio( - reviewer_rankings, prioritizer="reviewer", priority="applicant" + capacities: pl.DataFrame + A listing of position capacities. | position | capacity | + + returns + --- + matches: pl.DataFrame + A two-column match between applicants and positions (e.g. students and colleges). + | applicant | position | + """ + + app_idxs = ( + applicant_prefs.select(app_col).unique().sort(app_col).with_row_index("app_idx") ) - state = app_prio.select( - [ - pl.col("applicant").unique().alias("applicant"), - ] - ).with_columns([pl.lit(0).alias("next_rank"), pl.lit(True).alias("is_free")]) - - matches = pl.DataFrame( - { - "applicant": pl.Series([]), - "reviewer": pl.Series([]), - "current": pl.Series([], dtype=pl.Int64), - } + pos_idxs = ( + position_prefs.select(pos_col).unique().sort(pos_col).with_row_index("pos_idx") ) - max_iters = len(state) * len(rev_prio.select("applicant").unique()) + n_apps = app_idxs.height + n_poss = pos_idxs.height - for _ in range(max_iters): - props = ( - state.filter(pl.col("is_free")) - .join(app_prio, on="applicant") - .filter(pl.col("rank") == pl.col("next_rank")) - .select(["applicant", "reviewer"]) - ) + ap = ( + applicant_prefs.join(app_idxs, on=app_col) + .join(pos_idxs, on=pos_col) + .sort(["app_idx", rank_col]) + ) + al = ( + ap.group_by("app_idx", maintain_order=True) + .agg(pl.col("pos_idx")) + .sort("app_idx") + ) + app_prefs = al["pos_idx"].to_list() - if len(props) == 0: - break + max_pref_len = max((len(x) for x in app_prefs), default=0) - props = props.join(rev_prio, on=["reviewer", "applicant"], how="left") + pmat = np.full((n_apps, max_pref_len), -1, dtype=np.int32) + for i, r in enumerate(app_prefs): + pmat[i, : len(r)] = r - props = props.join( - matches.select( - ["reviewer", pl.col("applicant").alias("proposer"), pl.col("current")] - ), - on="reviewer", - how="left", - ) + pp = ( + position_prefs.join(app_idxs, on=app_col) + .join(pos_idxs, on=pos_col) + .sort(["pos_idx", rank_col]) + ) + worst_rank = np.iinfo(np.int32).max - props = props.with_columns( - [ - ( - pl.col("proposer").is_null() | (pl.col("rank") < pl.col("current")) - ).alias("accepted") - ] - ) + ranking = np.full((n_poss, n_apps), worst_rank, dtype=np.int32) + for r in pp.iter_rows(named=True): + ranking[r["pos_idx"], r["app_idx"]] = r[rank_col] - accepted = proposals.filter(pl.col("accepted")) - rejected = proposals.filter(~pl.col("accepted")) + caps = capacities.join(pos_idxs, on=pos_col).sort("pos_idx") + cap = caps["capacity"].to_numpy().astype(np.int32) - displaced = accepted.filter(pl.col("proposer").is_not_null()).select( - pl.col("proposer").alias("applicant") - ) + # --- - matches = matches.join(accepted.select("reviewer"), on="reviewer", how="anti") + next_c = np.zeros(n_apps, dtype=np.int32) + matched_pos = np.full(n_apps, -1, dtype=np.int32) - matches = pl.concat( - [ - matches, - accepted.select( - ["reviewer", "applicant", pl.col("rank").alias("current")] - ), - ] - ) + free = deque(np.arange(n_apps, dtype=np.int32)) - rejected_apps = rejected.select("applicant") - accepted_apps = accepted.select("applicant") + pos_heaps: list[list[tuple[int, int]]] = [[] for _ in range(n_poss)] - state = ( - state.join( - rejected_apps.with_columns(pl.lit(True).alias("was_rejected")), - on="applicant", - how="left", - ) - .join( - accepted_apps.with_columns(pl.lit(True).alias("was_accepted")), - on="applicant", - how="left", - ) - .join( - displaced.with_columns(pl.lit(True).alias("was_displaced")), - on="applicant", - how="left", - ) - .with_columns( - [ - pl.when(pl.col("was_rejected").fill_null(False)) - .then(pl.col("next_rank") + 1) - .otherwise(pl.col("next_rank")) - .alias("next_rank"), - pl.when(pl.col("was_accepted").fill_null(False)) - .then(False) - .when(pl.col("was_displaced").full_null(False)) - .then(True) - .otherwise(pl.col("is_free")) - .alias("is_free"), - ] - ) - .select(["applicant", "next_rank", "is_free"]) - ) + while free: + a = free.popleft() - return matches.select(["applicant", "reviewer"]) + while next_c[a] < max_pref_len: + p = pmat[a, next_c[a]] + next_c[a] += 1 + if p == -1: + break + # else -def deferred_acceptance_match(applicant_rankings, reviewer_rankings): - """Find Gale-Shapley deferred-acceptance stable matching for rankings A, R.""" - reviewer_rankings = reviewer_rankings.rename( - {reviewer_rankings.columns[0]: "applicant"} + arank = ranking[p, a] + + if arank == worst_rank: + continue + + heap = pos_heaps[p] + + if len(heap) < cap[p]: + heapq.heappush(heap, (-arank, a)) + matched_pos[a] = a + break + + worst_neg_rank, worst_app = heap[0] + worst_rank_current = -worst_neg_rank + + if arank < worst_rank_current: + heapq.heapreplace(heap, (-arank, a)) + matched_pos[a] = a + matched_pos[worst_app] = -1 + + free.append(worst_app) + break + + matches = ( + pl.DataFrame({"app_idx": np.arange(n_apps), "pos_idx": matched_pos}) + .filter(pl.col("pos_idx") != -1) + .join(app_idxs, on="app_idx") + .join(pos_idxs, on="pos_idx") + .select([app_col, pos_col]) + .sort(app_col) ) - app_prefs = rank_to_pref(applicant_rankings) - offers = app_prefs.transpose( - include_header=True, - header_name="applicant", - column_names=["pref" + str(i + 1) for i in range(app_prefs.width)], - ).with_columns(pl.coalesce(pl.all().exclude("applicant")).alias("offer")) - - # offers = pl.concat(pl.align_frames(offers, reviewer_rankings, on="applicant"), how="horizontal") - offers = pl.concat([offers, reviewer_rankings], how="align_left") - - match = pl.DataFrame( - { - r: offers.select(pl.col("applicant", "offer").sort_by(r)) - .select( - pl.when(pl.col("offer").eq(r)).then(pl.col("applicant")).otherwise(None) - ) - .select(pl.all().fill_null(strategy="backward").first()) - .to_series() - for r in reviewer_rankings.columns[1:] - } - ) # .select(pl.all().fill_null(strategy="backward").first()) - - # while check_unstable(match, applicant_rankings, reviewer_rankings): - while match.select(pl.any_horizontal(pl.all().has_nulls())).item(): - # null applicant preferences that rejected - offers = offers.with_columns( - pl.when( - pl.col("applicant").is_in(match.row(0)).not_(), - pl.col(c).is_null().not_(), - ) - .then(pl.lit(None)) - .otherwise(c) - .alias(c) - for c in offers.select(pl.col("pref*")).columns - ) - - return match - - offers = offers.with_columns(pl.col("pref")) - - offers = offers.with_columns( - pl.coalesce( - # TODO: select prefn columns using a regex - ).alias("offer") - ) - - # TODO update match - - # else if stable - return match + return matches diff --git a/src/carousel/util.py b/src/carousel/util.py deleted file mode 100644 index bce8d14..0000000 --- a/src/carousel/util.py +++ /dev/null @@ -1,152 +0,0 @@ -import itertools as it -import numpy as np - -import polars as pl -import polars.selectors as pls - -# Stability - - -def get_rank(ranking, ranker, ranked): - idx = ranking.select(pl.arg_where(pl.col("") == ranked)).item() - return ranking[ranker][idx] - - -def check_match_unstable(match, applicant_ranking, reviewer_ranking): - applicants = applicant_ranking.columns[1:] # assume unique applicants - for a, b in it.permutations(applicants, 2): - A = ( - match.select(c for c in match.iter_columns() if a in c).to_series().name - ) # the reviewer a is matched to - B = ( - match.select(c for c in match.iter_columns() if b in c).to_series().name - ) # the reviewer b is matched to - - b_prefers_A = get_rank(applicant_ranking, b, A) < get_rank( - applicant_ranking, b, B - ) - A_prefers_b = get_rank(reviewer_ranking, A, b) < get_rank( - reviewer_ranking, A, a - ) - if b_prefers_A and A_prefers_b: - return True - # else - return False - - -def check_match_stable(*args, **kwargs): - return not check_match_unstable(*args, **kwargs) - - -# Conversions - - -def rank_to_pref(ranking): - """Converts a ranking to a preference.""" - id_col_name = ranking.select(pls.by_index(0)).to_series().name - preferences = ranking.select( - [ - pl.col(id_col_name).sort_by(c).alias(c) - for c in ranking.columns - if c != id_col_name - ] - ) - return preferences - - -def pref_to_rank(preferences): - """Converts a preference to a ranking.""" - o = preferences.select( - pl.concat_list(preferences.columns).explode().unique().sort().alias("") - ) # .with_row_index(offset=1) - - ranking = pl.concat( - [ - o.join( - preferences.with_row_index(offset=1), - how="full", - left_on="", - right_on=c, - maintain_order="left", - ).select(pl.col("index").alias(c)) - for c in preferences.columns - ], - how="horizontal", - ) - return pl.concat([o, ranking], how="horizontal") - - -def ranks_to_mat(rankA, rankB): - return prefs_to_mat(rank_to_pref(rankA), rank_to_pref(rankB)) - - -def prefs_to_mat(prefA, prefB): - return rank_to_mat(pref_to_rank(preferences), pref_to_rank(preferences[0])) - - -def mat_to_ranks(matrix): - pass # TODO - - -def mat_to_prefs(matrix): - rankA, rankB = mat_to_ranks(matrix) - return rank_to_pref(rankA), rank_to_preft(rankB) - - -def pref_to_prio(pref, prioritizer="prioritizer", priority="subject"): - return pref.with_row_index("rank").unpivot( - index="rank", variable_name=prioritizer, value_name=priority - ) - - -def rank_to_prio(rank, **kwargs): - return pref_to_prio(rank_to_pref(rank), **kwargs) - - -"""" -def ranking_matrix(A, B): - T = pl.concat([A, B], how="horizontal") - - TT = T.with_columns(pl.concat_list(A.columns[0], B.columns[0])) - for ab in zip(A.columns[1:], B.columns[1:]): - TT = TT.with_columns(pl.concat_list(*ab)) - TTT = TT.select(pl.col(A.columns)) - - return TTT.insert_column(0, pl.Series("names", B.columns)) -""" - -## Output - - -def match_to_assgn(matching): - pass - - -def assgn_to_match(assignment): - pass - - -## Internal - - -# Validity - - -def check_pref_allunique(preferences): - """A valid set of preferences has all unique entries in each column.""" - repeats = preferences.select( - (~pl.all_horizontal((pl.all().is_unique() | pl.all().is_null()).all())).alias( - "repeats" - ) - ).get_column("repeats")[0] - return not repeats - - -def check_rank_noties(ranking): - """A valid ranking has no ties.""" - ties = ranking.select( - (~pl.all_horizontal((pl.all().is_unique() | pl.all().is_null()).all())).alias( - "ties" - ) - ).get_column("ties")[0] - return not ties diff --git a/src/carouselcmd.py b/src/carouselcmd.py index 1ae15b8..945c578 100644 --- a/src/carouselcmd.py +++ b/src/carouselcmd.py @@ -31,8 +31,8 @@ def cli(preferences: str, show_ballots=False, pretty=False) -> None: """ Compute the Gale-Shapley stable match of some preferences -- .csv or .xls(x). - A stable matching (of the "college admissions" problem) is one in which there is no - pair of, say, TA and course which would prefer each other over their respective + A stable matching (of the "college admissions" problem) is one in which there is no + pair of, say, TA and course which would prefer each other over their respective assignment given by the matching. """ diff --git a/test/galeshapley_test.py b/test/galeshapley_test.py index d95fe56..f9e8e84 100644 --- a/test/galeshapley_test.py +++ b/test/galeshapley_test.py @@ -1,135 +1,80 @@ import polars as pl -import polars.selectors as pls -import numpy as np - from polars.testing import assert_frame_equal - -import pytest, rich -from hypothesis import given, strategies as st - -import carousel as crsl - -rng = np.random.default_rng() +from carousel import GS_deferred_acceptance -@st.composite -def rankings(draw, names=["a", "b", "c"], choices=["A", "B", "C"]): - h = pl.DataFrame({"": choices}) - r = pl.DataFrame( - {n: draw(st.just(rng.permutation(len(choices)) + 1)) for n in names} - ) # should add None option in generation of valid rankings - return pl.concat([h, r], how="horizontal") - - -@st.composite -def preferences(draw, names=["a", "b", "c"], choices=["A", "B", "C"]): - p = pl.DataFrame( +def test_prefs(): + people_prefs = pl.DataFrame( { - n: draw(st.just(rng.choice(choices, size=len(choices), replace=False))) - for n in names + "people": [ + "Alice", + "Alice", + "Alice", + "Bob", + "Bob", + "Bob", + "Charlie", + "Charlie", + "Charlie", + ], + "fruit": [ + "apple", + "banana", + "cherry", + "banana", + "cherry", + "apple", + "cherry", + "apple", + "banana", + ], + "rank": [1, 2, 3, 1, 2, 3, 1, 2, 3], } ) - return p - -p = pl.DataFrame({"a": ["A", "C", "B"], "b": ["B", "A", "C"], "c": ["C", "B", "A"]}) -r = pl.DataFrame({"": ["A", "B", "C"], "a": [1, 3, 2], "b": [2, 1, 3], "c": [3, 2, 1]}) - - -def test_invalid_pref(): - pp = pl.DataFrame( - {"a": ["A", "A", "B"], "b": ["B", "A", "C"], "c": ["C", "B", "A"]} - ) - assert crsl.check_pref_allunique(pp) is False - - -def test_pref_to_rank(): - assert_frame_equal(crsl.pref_to_rank(p), r, check_dtypes=False) - - -def test_invalid_rank(): - rr = pl.DataFrame( - {"": ["A", "B", "C"], "a": [1, 1, 2], "b": [2, 1, 3], "c": [3, 2, 1]} - ) - assert crsl.check_pref_allunique(rr) is False - - -def test_rank_to_pref(): - assert_frame_equal(crsl.rank_to_pref(r), p, check_dtypes=False) - - -@given(rankings()) -def test_valid_rank(R): - assert crsl.check_rank_noties(R) - - -@given(rankings()) -def test_ranks_tofrom_prefs(R): - assert_frame_equal(crsl.pref_to_rank(crsl.rank_to_pref(R)), R, check_dtypes=False) - - -@given(preferences()) -def test_valid_pref(P): - assert crsl.check_pref_allunique(P) - - -@given(preferences()) -def test_prefs_tofrom_ranks(P): - assert_frame_equal(crsl.rank_to_pref(crsl.pref_to_rank(P)), P, check_dtypes=False) - - -def test_eg2_unstable(): - ar = pl.DataFrame( + fruit_prefs = pl.DataFrame( { - "": ["A", "B", "C", "D"], - "a": [1, 2, 3, 4], - "b": [1, 4, 3, 2], - "c": [2, 1, 3, 4], - "d": [4, 2, 3, 1], + "fruit": [ + "apple", + "apple", + "apple", + "banana", + "banana", + "banana", + "cherry", + "cherry", + "cherry", + ], + "people": [ + "Alice", + "Bob", + "Charlie", + "Alice", + "Bob", + "Charlie", + "Alice", + "Bob", + "Charlie", + ], + "rank": [1, 1, 1, 1, 1, 1, 1, 1, 1], # fruits have no preferences } ) - rr = pl.DataFrame( + + capacities = pl.DataFrame( { - "": ["a", "b", "c", "d"], - "A": [3, 4, 2, 1], - "B": [3, 1, 4, 2], - "C": [2, 3, 4, 1], - "D": [3, 2, 1, 4], + "fruit": ["apple", "cherry", "banana"], + "capacity": [1, 1, 1], # have one of each } ) - match = pl.DataFrame({"A": ["a"], "B": ["b"], "C": ["c"], "D": ["d"]}) - assert crsl.check_match_unstable(match, ar, rr) - - -def test_eg2_isstable(): - ar = pl.DataFrame( - { - "": ["A", "B", "C", "D"], - "a": [1, 2, 3, 4], - "b": [1, 4, 3, 2], - "c": [2, 1, 3, 4], - "d": [4, 2, 3, 1], - } + assert_frame_equal( + GS_deferred_acceptance( + people_prefs, fruit_prefs, capacities, app_col="people", pos_col="fruit" + ).sort(["people", "fruit"]), + pl.DataFrame( + { + "people": ["Alice", "Bob", "Charlie"], + "fruit": ["apple", "banana", "cherry"], + } + ).sort(["people", "fruit"]), ) - rr = pl.DataFrame( - { - "": ["a", "b", "c", "d"], - "A": [3, 4, 2, 1], - "B": [3, 1, 4, 2], - "C": [2, 3, 4, 1], - "D": [3, 2, 1, 4], - } - ) - match = pl.DataFrame({"A": ["c"], "B": ["d"], "C": ["a"], "D": ["b"]}) - - assert crsl.check_match_stable(match, ar, rr) - - -@given( - rankings(names=["a", "b", "c", "d"], choices=["A", "B", "C", "D"]), - rankings(names=["A", "B", "C", "D"], choices=["a", "b", "c", "d"]), -) -def test_defacc_isstable(applicant_rankings, reviewer_rankings): - match = crsl.matchby_deferred_acceptance(applicant_rankings, reviewer_rankings) - assert crsl.check_match_stable(match, applicant_rankings, reviewer_rankings) diff --git a/test/nbs/__marimo__/session/hovses.py.json b/test/nbs/__marimo__/session/hovses.py.json deleted file mode 100644 index 517c3e3..0000000 --- a/test/nbs/__marimo__/session/hovses.py.json +++ /dev/null @@ -1,124 +0,0 @@ -{ - "version": "1", - "metadata": { - "marimo_version": "0.18.3" - }, - "cells": [ - { - "id": "Hbol", - "code_hash": "bc65c35c6fad59890b50c502bb8affa4", - "outputs": [ - { - "type": "data", - "data": { - "text/markdown": "

Caltech Hovse Rotation Example

" - } - } - ], - "console": [] - }, - { - "id": "MJUe", - "code_hash": "ccb62cc29f2cec640b063832a90adfec", - "outputs": [ - { - "type": "data", - "data": { - "text/markdown": "

Imports

" - } - } - ], - "console": [] - }, - { - "id": "vblA", - "code_hash": "9588e2b4004a32c73b7ecc0a968b666f", - "outputs": [ - { - "type": "data", - "data": { - "text/plain": "" - } - } - ], - "console": [] - }, - { - "id": "bkHC", - "code_hash": "4821038400db1f5dc63daf7ca5b26279", - "outputs": [ - { - "type": "data", - "data": { - "text/markdown": "

Generating Rotation Rankings

" - } - } - ], - "console": [] - }, - { - "id": "lEQa", - "code_hash": "ecc9ae14c8c8f3875656fc620665c6b4", - "outputs": [ - { - "type": "data", - "data": { - "text/html": "
True
" - } - } - ], - "console": [] - }, - { - "id": "NdeS", - "code_hash": "c582dec943ff7b743aa0691df291cea6", - "outputs": [ - { - "type": "data", - "data": { - "text/html": "" - } - } - ], - "console": [] - }, - { - "id": "PKri", - "code_hash": "b9f0f3a28a20d94d4b173bbbd49db37f", - "outputs": [], - "console": [] - }, - { - "id": "Xref", - "code_hash": "ccc8305d08e563d4fb1c9df31e9b7b69", - "outputs": [ - { - "type": "data", - "data": { - "text/html": "" - } - } - ], - "console": [] - }, - { - "id": "taaO", - "code_hash": "b712cc6f3d50763308dd5bc1c6703f77", - "outputs": [ - { - "type": "data", - "data": { - "text/html": "" - } - } - ], - "console": [] - }, - { - "id": "wJzy", - "code_hash": null, - "outputs": [], - "console": [] - } - ] -} \ No newline at end of file diff --git a/test/nbs/hovses.py b/test/nbs/hovses.py deleted file mode 100644 index 6846191..0000000 --- a/test/nbs/hovses.py +++ /dev/null @@ -1,105 +0,0 @@ -import marimo - -__generated_with = "0.18.3" -app = marimo.App(width="medium") - - -@app.cell(hide_code=True) -def _(mo): - mo.md(r""" - # Caltech Hovse Rotation Example - """) - return - - -@app.cell(hide_code=True) -def _(mo): - mo.md(r""" - ## Imports - """) - return - - -@app.cell -def _(): - import marimo as mo - import polars as pl, polars.selectors as pls - import numpy as np, faker as fk - import carousel as crsl - - return crsl, mo, pl - - -@app.cell(hide_code=True) -def _(mo): - mo.md(r""" - ## Generating Rotation Rankings - """) - return - - -@app.cell -def _(crsl, pl): - ar = pl.DataFrame( - { - "": ["A", "B", "C", "D"], - "a": [1, 2, 3, 4], - "b": [1, 4, 3, 2], - "c": [2, 1, 3, 4], - "d": [4, 2, 3, 1], - } - ) - rr = pl.DataFrame( - { - "": ["a", "b", "c", "d"], - "A": [3, 4, 2, 1], - "B": [3, 1, 4, 2], - "C": [2, 3, 4, 1], - "D": [3, 2, 1, 4], - } - ) - m = pl.DataFrame({"A": ["c"], "B": ["d"], "C": ["a"], "D": ["b"]}) - crsl.check_match_stable(m, ar, rr) - return ar, rr - - -@app.cell -def _(ar): - ar - return - - -@app.cell -def _(): - hovses = [ - "Blacker", - "Dabney", - "Ricketts", - "Fleming", - "Page", - "Lloyd", - "Venerable", - "Avery", - ] - return - - -@app.cell -def _(ar, crsl): - crsl.rank_to_pref(ar) - return - - -@app.cell -def _(crsl, rr): - crsl.pref_to_prio(crsl.rank_to_pref(rr)) - return - - -@app.cell -def _(): - return - - -if __name__ == "__main__": - app.run() diff --git a/uv.lock b/uv.lock index 2c4a045..e60a3d9 100644 --- a/uv.lock +++ b/uv.lock @@ -64,7 +64,6 @@ source = { editable = "." } dependencies = [ { name = "numpy" }, { name = "polars" }, - { name = "rustworkx" }, ] [package.dev-dependencies] @@ -85,7 +84,6 @@ dev = [ requires-dist = [ { name = "numpy", specifier = ">=2.2.4" }, { name = "polars", specifier = ">=1.26.0" }, - { name = "rustworkx", specifier = ">=0.17.1" }, ] [package.metadata.requires-dev] @@ -1534,28 +1532,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/d5/bc97ff895ec35cf3925d4bd60f3b39d822f377a446906ec9bcc87405e59b/ruff-0.15.14-py3-none-win_arm64.whl", hash = "sha256:ff47b90a9ef6a40c9e2f3b479c1fb78531adf055b94c1eba0a7ba04b31951826", size = 11208607, upload-time = "2026-05-21T14:34:26.525Z" }, ] -[[package]] -name = "rustworkx" -version = "0.17.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e7/b0/66d96f02120f79eeed86b5c5be04029b6821155f31ed4907a4e9f1460671/rustworkx-0.17.1.tar.gz", hash = "sha256:59ea01b4e603daffa4e8827316c1641eef18ae9032f0b1b14aa0181687e3108e", size = 399407, upload-time = "2025-09-15T16:29:46.429Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/20/24/8972ed631fa05fdec05a7bb7f1fc0f8e78ee761ab37e8a93d1ed396ba060/rustworkx-0.17.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c08fb8db041db052da404839b064ebfb47dcce04ba9a3e2eb79d0c65ab011da4", size = 2257491, upload-time = "2025-08-13T01:43:31.466Z" }, - { url = "https://files.pythonhosted.org/packages/23/ae/7b6bbae5e0487ee42072dc6a46edf5db9731a0701ed648db22121fb7490c/rustworkx-0.17.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:4ef8e327dadf6500edd76fedb83f6d888b9266c58bcdbffd5a40c33835c9dd26", size = 2040175, upload-time = "2025-08-13T01:43:33.762Z" }, - { url = "https://files.pythonhosted.org/packages/cd/ea/c17fb9428c8f0dcc605596f9561627a5b9ef629d356204ee5088cfcf52c6/rustworkx-0.17.1-cp39-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b809e0aa2927c68574b196f993233e269980918101b0dd235289c4f3ddb2115", size = 2324771, upload-time = "2025-08-13T01:43:35.553Z" }, - { url = "https://files.pythonhosted.org/packages/d7/40/ec8b3b8b0f8c0b768690c454b8dcc2781b4f2c767f9f1215539c7909e35b/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7e82c46a92fb0fd478b7372e15ca524c287485fdecaed37b8bb68f4df2720f2", size = 2068584, upload-time = "2025-08-13T01:43:37.261Z" }, - { url = "https://files.pythonhosted.org/packages/d9/22/713b900d320d06ce8677e71bba0ec5df0037f1d83270bff5db3b271c10d7/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42170075d8a7319e89ff63062c2f1d1116ced37b6f044f3bf36d10b60a107aa4", size = 2380949, upload-time = "2025-08-13T01:52:17.435Z" }, - { url = "https://files.pythonhosted.org/packages/20/4b/54be84b3b41a19caf0718a2b6bb280dde98c8626c809c969f16aad17458f/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65cba97fa95470239e2d65eb4db1613f78e4396af9f790ff771b0e5476bfd887", size = 2562069, upload-time = "2025-08-13T02:09:27.222Z" }, - { url = "https://files.pythonhosted.org/packages/39/5b/281bb21d091ab4e36cf377088366d55d0875fa2347b3189c580ec62b44c7/rustworkx-0.17.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246cc252053f89e36209535b9c58755960197e6ae08d48d3973760141c62ac95", size = 2221186, upload-time = "2025-08-13T01:43:38.598Z" }, - { url = "https://files.pythonhosted.org/packages/cc/2d/30a941a21b81e9db50c4c3ef8a64c5ee1c8eea3a90506ca0326ce39d021f/rustworkx-0.17.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c10d25e9f0e87d6a273d1ea390b636b4fb3fede2094bf0cb3fe565d696a91b48", size = 2123510, upload-time = "2025-08-13T01:43:40.288Z" }, - { url = "https://files.pythonhosted.org/packages/4f/ef/c9199e4b6336ee5a9f1979c11b5779c5cf9ab6f8386e0b9a96c8ffba7009/rustworkx-0.17.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:48784a673cf8d04f3cd246fa6b53fd1ccc4d83304503463bd561c153517bccc1", size = 2302783, upload-time = "2025-08-13T01:43:42.073Z" }, - { url = "https://files.pythonhosted.org/packages/30/3d/a49ab633e99fca4ccbb9c9f4bd41904186c175ebc25c530435529f71c480/rustworkx-0.17.1-cp39-abi3-win32.whl", hash = "sha256:5dbc567833ff0a8ad4580a4fe4bde92c186d36b4c45fca755fb1792e4fafe9b5", size = 1931541, upload-time = "2025-08-13T01:43:43.415Z" }, - { url = "https://files.pythonhosted.org/packages/a9/ec/cee878c1879b91ab8dc7d564535d011307839a2fea79d2a650413edf53be/rustworkx-0.17.1-cp39-abi3-win_amd64.whl", hash = "sha256:d0a48fb62adabd549f9f02927c3a159b51bf654c7388a12fc16d45452d5703ea", size = 2055049, upload-time = "2025-08-13T01:43:44.926Z" }, -] - [[package]] name = "setuptools" version = "82.0.1"