ModalScoutEnsemble
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sheshe import ModalScoutEnsemble
X, y = load_iris(return_X_y=True)
mse = ModalScoutEnsemble().fit(X, y)
mse.plot_classes(X, y)
plt.show()
Ensemble that applies ModalBoundaryClustering on the most promising
subspaces discovered by SubspaceScout. Each submodel is weighted by scout
score, cross-validation and feature importance, and the ensemble can delegate
optimisation to ShuShu when ensemble_method="shushu".
Example
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression())
mse.fit(X, y)
labels = mse.predict(X)
Usage examples
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0)
mse.fit(X, y) # fit
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0)
mse.fit_predict(X, y) # fit_predict
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0)
mse.fit_transform(X, y) # fit_transform
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0).fit(X, y)
mse.transform(X) # transform
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0).fit(X, y)
mse.predict(X) # predict
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0).fit(X, y)
mse.predict_proba(X) # predict_proba
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0).fit(X, y)
mse.decision_function(X) # decision_function
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0).fit(X, y)
mse.predict_regions(X) # predict_regions
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0).fit(X, y)
mse.score(X, y) # score
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble(base_estimator=LogisticRegression(), random_state=0).fit(X, y)
mse.save("mse.joblib") # save
from sheshe import ModalScoutEnsemble
from sklearn.linear_model import LogisticRegression
mse = ModalScoutEnsemble.load("mse.joblib")
Additional examples
from sheshe import ModalScoutEnsemble
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
iris = load_iris()
X, y = iris.data, iris.target
mse = ModalScoutEnsemble(
base_estimator=LogisticRegression(max_iter=200),
task="classification",
random_state=0,
scout_kwargs={"max_order": 2, "top_m": 4, "sample_size": None},
cv=2,
# ensemble_method="shushu" would use the ShuShu optimizer
)
mse.fit(X, y)
print(mse.predict(X[:5]))
print(mse.predict_proba(X[:5]))
Parameters
base_estimator(BaseEstimator): model used to compute probabilities or predictions in each subspace.task(str, optional): "classification" or "regression". Inferred from the base estimator ifNone.ensemble_method(str, default"modal_scout"): either"modal_scout"to use the internal subspace ensemble or"shushu"to delegate toShuShu.top_k(int, default8): maximum number of subspaces kept.min_score(floatorNone): minimum score required for a subspace to be used.max_order(intorNone): maximum order of subspaces evaluated.metric(strorNone, default"mi_synergy"): criterion used to rank subspaces.jaccard_threshold(float, default0.55): minimum Jaccard similarity to consider two subspaces redundant.alpha(float, default0.5): exponent for the scout score in the final weighting.beta(float, default0.5): exponent for cross‑validation performance.gamma(float, default0.5): exponent for global feature importance.cv(intorNone, default3): number of CV folds;0orNoneuses a holdout split.cv_metric_cls(Callable, defaultbalanced_accuracy_score): metric for classification CV.cv_metric_reg(Callable, defaultr2_score): metric for regression CV.cv_floor(floatorNone): discard subspaces with CV below this value.n_jobs(int, default1): number of parallel jobs for CV.random_state(intorNone, default0): RNG seed.base_2d_rays(int, default8): base number of rays for MBC fits in each subspace.ray_cap(int, default48): maximum rays allowed per subspace.time_budget_s(floatorNone): optional global time budget for fitting.use_importances(bool, defaultTrue): include global feature importances in the weighting.importance_sample_size(intorNone, default4096): sample size for computing global importances.scout_kwargs(dictorNone): parameters forwarded toSubspaceScout.shushu_kwargs(dictorNone): parameters forwarded toShuShuwhenensemble_method="shushu".mbc_kwargs(dictorNone): additional arguments passed to eachModalBoundaryClusteringinstance.verbose(int, default0): logging level.prediction_within_region(bool, defaultFalse): evaluate base estimator only within each region during prediction.
Methods
fit(X, y)– train the ensemble onXandy.predict(X)– predict labels or cluster ids.predict_proba(X)– class probabilities aggregated across subspaces.decision_function(X)– decision scores averaged across submodels.predict_regions(X)– DataFrame with region assignments.plot_pairs(X, y=None, show_histograms=False, **kwargs)– delegate toModalBoundaryClustering.plot_pairsfor a selected submodel, including optional marginal histograms.plot_pair_3d(X, pair, **kwargs)– 3D surface for a feature pair of a submodel.