From b760d6adb84ba2cb942b3d2d33b7a102c845057b Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Fri, 9 Jul 2021 06:57:18 +0200 Subject: [PATCH 1/3] Make ParallelClustering picklable, fixing memory caching in Mapper pipelines (#597) * Remove lambdas from ParallelClustering so it can be pickled * Add regression test --- gtda/mapper/cluster.py | 26 ++++++++++++++++++++------ gtda/mapper/tests/test_cluster.py | 15 ++++++++++++++- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/gtda/mapper/cluster.py b/gtda/mapper/cluster.py index 9f9aa248d..f8ce09440 100644 --- a/gtda/mapper/cluster.py +++ b/gtda/mapper/cluster.py @@ -16,6 +16,22 @@ from ..utils.validation import validate_params +def _sample_weight_computer(rel_indices, sample_weight): + return {"sample_weight": sample_weight[rel_indices]} + + +def _empty_dict(*args): + return {} + + +def _indices_computer_precomputed(rel_indices): + return np.ix_(rel_indices, rel_indices) + + +def _indices_computer_not_precomputed(rel_indices): + return rel_indices + + class ParallelClustering(BaseEstimator): """Employ joblib parallelism to cluster different portions of a dataset. @@ -129,16 +145,14 @@ def fit(self, X, y=None, sample_weight=None): fit_params = signature(self.clusterer.fit).parameters if sample_weight is not None and "sample_weight" in fit_params: - self._sample_weight_computer = lambda rel_indices, sample_weight: \ - {"sample_weight": sample_weight[rel_indices]} + self._sample_weight_computer = _sample_weight_computer else: - self._sample_weight_computer = lambda *args: {} + self._sample_weight_computer = _empty_dict if self._precomputed: - self._indices_computer = lambda rel_indices: \ - np.ix_(rel_indices, rel_indices) + self._indices_computer = _indices_computer_precomputed else: - self._indices_computer = lambda rel_indices: rel_indices + self._indices_computer = _indices_computer_not_precomputed # This seems necessary to avoid large overheads when running fit a # second time. Probably due to refcounts. NOTE: Only works if done diff --git a/gtda/mapper/tests/test_cluster.py b/gtda/mapper/tests/test_cluster.py index b457c543e..9c6dde068 100644 --- a/gtda/mapper/tests/test_cluster.py +++ b/gtda/mapper/tests/test_cluster.py @@ -2,6 +2,9 @@ for ParallelClustering.""" # License: GNU AGPLv3 +from shutil import rmtree +from tempfile import mkdtemp + import numpy as np import pytest import sklearn as sk @@ -11,7 +14,8 @@ from numpy.testing import assert_almost_equal from scipy.spatial import distance_matrix -from gtda.mapper import ParallelClustering, FirstHistogramGap, FirstSimpleGap +from gtda.mapper import ParallelClustering, FirstHistogramGap, \ + FirstSimpleGap, make_mapper_pipeline def test_parallel_clustering_bad_input(): @@ -233,3 +237,12 @@ def get_partition_from_preds(preds): assert get_partition_from_preds(preds) == \ get_partition_from_preds(preds_mat) + + +def test_mapper_pipeline_picklable(): + # Regression test for issue #596 + X = np.random.random((100, 2)) + cachedir = mkdtemp() + pipe = make_mapper_pipeline(memory=cachedir) + pipe.fit_transform(X) + rmtree(cachedir) From ccba800d24356308768beb78240737232ab29e69 Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Fri, 9 Jul 2021 13:44:16 +0200 Subject: [PATCH 2/3] Prepare v0.5.1 (#598) * Bump version to 0.5.1, create release notes --- doc/library.rst | 4 ++-- doc/release.rst | 30 ++++++++++++++++++++++++++++++ doc/versions | 1 + gtda/_version.py | 2 +- setup.py | 2 +- 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/doc/library.rst b/doc/library.rst index dbed41623..fe4ae000b 100644 --- a/doc/library.rst +++ b/doc/library.rst @@ -124,5 +124,5 @@ What's new .. include:: release.rst - :start-after: Release 0.5.0 - :end-before: Release 0.4.0 + :start-after: Release 0.5.1 + :end-before: Release 0.5.0 diff --git a/doc/release.rst b/doc/release.rst index 4d6e6c111..dfadafaf6 100644 --- a/doc/release.rst +++ b/doc/release.rst @@ -4,6 +4,36 @@ Release Notes .. _stable: +************* +Release 0.5.1 +************* + +This release was made shortly after the release of version 0.5.0, to resolve an important bug. Please refer to `the release notes for 0.5.0 `_ to see the major improvements and backwards-incompatible changes to the Mapper subpackage which were introduced there. + +Major Features and Improvements +=============================== + +None. + +Bug Fixes +========= + +A bug preventing Mapper pipelines from working with memory caching has been fixed (`#597 `_). + +Backwards-Incompatible Changes +============================== + +None. + +Thanks to our Contributors +========================== + +This release contains contributions from: + +Umberto Lupo + +We are also grateful to all who filed issues or helped resolve them, asked and answered questions, and were part of inspiring discussions. + ************* Release 0.5.0 ************* diff --git a/doc/versions b/doc/versions index 095c0fbba..9d8bd6684 100644 --- a/doc/versions +++ b/doc/versions @@ -5,4 +5,5 @@ ./0.3.1 ./0.4.0 ./0.5.0 +./0.5.1 ./latest diff --git a/gtda/_version.py b/gtda/_version.py index a5caaa708..32e32355f 100644 --- a/gtda/_version.py +++ b/gtda/_version.py @@ -19,4 +19,4 @@ # 'X.Y.dev0' is the canonical version of 'X.Y.dev' # -__version__ = "0.5.0" +__version__ = "0.5.1" diff --git a/setup.py b/setup.py index 1fedb33db..700e83732 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ MAINTAINER_EMAIL = "maintainers@giotto.ai" URL = "https://github.com/giotto-ai/giotto-tda" LICENSE = "GNU AGPLv3" -DOWNLOAD_URL = "https://github.com/giotto-ai/giotto-tda/tarball/v0.5.0" +DOWNLOAD_URL = "https://github.com/giotto-ai/giotto-tda/tarball/v0.5.1" VERSION = __version__ # noqa CLASSIFIERS = ["Intended Audience :: Science/Research", "Intended Audience :: Developers", From 4c881b2992c8f55eb8bc4b909cf04f2629ea642e Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Fri, 9 Jul 2021 15:21:02 +0200 Subject: [PATCH 3/3] [CI] Update ccache --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 6cb7e1762..71c38b1f4 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -46,7 +46,7 @@ jobs: - task: Cache@2 inputs: - key: '"ccache-wheels-v2021.07.08" | $(Agent.OS) | "$(python.version)"' + key: '"ccache-wheels-v2021.07.09" | $(Agent.OS) | "$(python.version)"' path: $(CCACHE_DIR) displayName: ccache @@ -146,7 +146,7 @@ jobs: - task: Cache@2 inputs: - key: '"ccache-v2021.07.08" | $(Agent.OS) | "$(python.version)"' + key: '"ccache-v2021.07.09" | $(Agent.OS) | "$(python.version)"' path: $(CCACHE_DIR) displayName: ccache