Source code for metasklearn.core.problem

#!/usr/bin/env python
# Created by "Thieu" at 07:31, 08/05/2025 ----------%                                                                               
#       Email: nguyenthieu2102@gmail.com            %                                                    
#       Github: https://github.com/thieu1995        %                         
# --------------------------------------------------%

import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from mealpy import Problem


[docs]class HyperparameterProblem(Problem): """ A class to define a hyperparameter optimization problem for machine learning models. Inherits from the `Problem` class in the `mealpy` library and provides functionality to evaluate hyperparameter configurations using cross-validation. Attributes: estimator: The machine learning model to optimize. X: The feature matrix. y: The target vector. metric_class: A custom metric class for evaluation. obj_name: The name of the objective metric. cv: The number of cross-validation folds. n_jobs: The number of parallel jobs for cross-validation. shuffle: Whether to shuffle the data before splitting into folds. kf: The KFold cross-validator instance. get_obj_score_: The scoring function to use (either sklearn or custom). """ def __init__(self, bounds=None, minmax="max", X=None, y=None, estimator=None, metric_class=None, obj_name=None, sklearn_score=None, cv=None, n_jobs=None, shuffle=True, seed=None, **kwargs): """ Initializes the HyperparameterProblem instance. Args: bounds: The bounds for the hyperparameters. minmax: The optimization direction ("max" for maximization, "min" for minimization). X: The feature matrix. y: The target vector. estimator: The machine learning model to optimize. metric_class: A custom metric class for evaluation. obj_name: The name of the objective metric. sklearn_score: Whether to use sklearn's scoring function. cv: The number of cross-validation folds. n_jobs: The number of parallel jobs for cross-validation. shuffle: Whether to shuffle the data before splitting into folds. seed: The random seed for reproducibility. **kwargs: Additional arguments for the parent class. """ self.estimator = estimator self.X = X self.y = y self.metric_class = metric_class self.obj_name = obj_name if sklearn_score: self.get_obj_score_ = self._get_sklearn_score else: self.get_obj_score_ = self._get_custom_score self.cv = cv if cv is None or cv < 2: self.cv = 2 self.n_jobs = n_jobs self.shuffle = shuffle self.kf = KFold(n_splits=self.cv, shuffle=shuffle, random_state=seed) super().__init__(bounds, minmax, **{**kwargs, "seed":seed}) def _get_sklearn_score(self): """ Computes the cross-validation score using sklearn's scoring function. Returns: float: The mean cross-validation score. """ scores = cross_val_score(self.estimator, self.X, self.y, cv=self.cv, scoring=self.obj_name, n_jobs=self.n_jobs) return np.mean(scores) def _get_custom_score(self): """ Computes the cross-validation score using a custom scoring function from PerMetrics library Returns: float: The mean cross-validation score. """ scores = [] # Perform custom cross-validation for train_idx, test_idx in self.kf.split(self.X): # Split the data into training and test sets X_train, X_test = self.X[train_idx], self.X[test_idx] y_train, y_test = self.y[train_idx], self.y[test_idx] # Train the model on the training set self.estimator.fit(X_train, y_train) # Make predictions on the test set y_pred = self.estimator.predict(X_test) # Calculate accuracy for the current fold mt = self.metric_class(y_test, y_pred) score = mt.get_metric_by_name(self.obj_name)[self.obj_name] # Accumulate accuracy across folds scores.append(score) return np.mean(scores)
[docs] def obj_func(self, x): """ Objective function to evaluate a hyperparameter configuration. Args: x: The encoded hyperparameter configuration. Returns: float: The evaluation score for the given configuration. """ x_decoded = self.decode_solution(x) self.estimator.set_params(**x_decoded) score = self.get_obj_score_() return score