Source code for metasklearn.core.problem

#!/usr/bin/env python
# Created by "Thieu" at 07:31, 08/05/2025 ----------%                                                                               
#       Email: nguyenthieu2102@gmail.com            %                                                    
#       Github: https://github.com/thieu1995        %                         
# --------------------------------------------------%

import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from mealpy import Problem


[docs]class HyperparameterProblem(Problem):
    """
    A class to define a hyperparameter optimization problem for machine learning models.

    Inherits from the `Problem` class in the `mealpy` library and provides functionality
    to evaluate hyperparameter configurations using cross-validation.

    Attributes:
        estimator: The machine learning model to optimize.
        X: The feature matrix.
        y: The target vector.
        metric_class: A custom metric class for evaluation.
        obj_name: The name of the objective metric.
        cv: The number of cross-validation folds.
        n_jobs: The number of parallel jobs for cross-validation.
        shuffle: Whether to shuffle the data before splitting into folds.
        kf: The KFold cross-validator instance.
        get_obj_score_: The scoring function to use (either sklearn or custom).
    """

    def __init__(self, bounds=None, minmax="max", X=None, y=None, estimator=None, metric_class=None,
                 obj_name=None, sklearn_score=None, cv=None, n_jobs=None, shuffle=True, seed=None, **kwargs):
        """
        Initializes the HyperparameterProblem instance.

        Args:
            bounds: The bounds for the hyperparameters.
            minmax: The optimization direction ("max" for maximization, "min" for minimization).
            X: The feature matrix.
            y: The target vector.
            estimator: The machine learning model to optimize.
            metric_class: A custom metric class for evaluation.
            obj_name: The name of the objective metric.
            sklearn_score: Whether to use sklearn's scoring function.
            cv: The number of cross-validation folds.
            n_jobs: The number of parallel jobs for cross-validation.
            shuffle: Whether to shuffle the data before splitting into folds.
            seed: The random seed for reproducibility.
            **kwargs: Additional arguments for the parent class.
        """
        self.estimator = estimator
        self.X = X
        self.y = y
        self.metric_class = metric_class
        self.obj_name = obj_name

        if sklearn_score:
            self.get_obj_score_ = self._get_sklearn_score
        else:
            self.get_obj_score_ = self._get_custom_score

        self.cv = cv
        if cv is None or cv < 2:
            self.cv = 2
        self.n_jobs = n_jobs
        self.shuffle = shuffle
        self.kf = KFold(n_splits=self.cv, shuffle=shuffle, random_state=seed)
        super().__init__(bounds, minmax, **{**kwargs, "seed":seed})

    def _get_sklearn_score(self):
        """
        Computes the cross-validation score using sklearn's scoring function.

        Returns:
            float: The mean cross-validation score.
        """
        scores = cross_val_score(self.estimator, self.X, self.y,
                                 cv=self.cv, scoring=self.obj_name, n_jobs=self.n_jobs)
        return np.mean(scores)

    def _get_custom_score(self):
        """
        Computes the cross-validation score using a custom scoring function from PerMetrics library

        Returns:
            float: The mean cross-validation score.
        """
        scores = []
        # Perform custom cross-validation
        for train_idx, test_idx in self.kf.split(self.X):
            # Split the data into training and test sets
            X_train, X_test = self.X[train_idx], self.X[test_idx]
            y_train, y_test = self.y[train_idx], self.y[test_idx]
            # Train the model on the training set
            self.estimator.fit(X_train, y_train)
            # Make predictions on the test set
            y_pred = self.estimator.predict(X_test)
            # Calculate accuracy for the current fold
            mt = self.metric_class(y_test, y_pred)
            score = mt.get_metric_by_name(self.obj_name)[self.obj_name]
            # Accumulate accuracy across folds
            scores.append(score)
        return np.mean(scores)

[docs]    def obj_func(self, x):
        """
        Objective function to evaluate a hyperparameter configuration.

        Args:
            x: The encoded hyperparameter configuration.

        Returns:
            float: The evaluation score for the given configuration.
        """
        x_decoded = self.decode_solution(x)
        self.estimator.set_params(**x_decoded)
        score = self.get_obj_score_()
        return score