Module `water_security.classification.classifier`

Expand source code

from typing import Dict
from sklearn.base import BaseEstimator, RegressorMixin
import pandas as pd
import xgboost as xgb


class Classifier(BaseEstimator, RegressorMixin):
    def __init__(self, risk):
        self.regressor = None
        self.risk = risk
        self._parameters = None

    @property
    def parameters(self) -> Dict:
        if self._parameters is None:
            from data.model import MODEL_BEST_PARAMS

            self._parameters = MODEL_BEST_PARAMS[self.risk]
        return self._parameters

    def fit(self, x_data: pd.DataFrame, y_data):
        """
        x_data: the nxm features
        y_data: the n labels, with values 0,1,2 or 3
        """
        self.regressor = xgb.XGBRegressor(**self.parameters).fit(x_data, y_data)
        self.feature_importances_ = pd.Series(
            self.regressor.feature_importances_, index=x_data.columns.tolist()
        )
        self.feature_importances_ = self.feature_importances_[
            self.feature_importances_ > 0
        ].sort_values(ascending=False)
        return self

    def predict(self, x_data):
        """
        x_data: the nxm features
        Returns n predictions, which have values 0 to 3, they can be floats
        """
        return self.regressor.predict(x_data)

Classes

class Classifier (risk)

Base class for all estimators in scikit-learn.

Notes

All estimators should specify all the parameters that can be set at the class level in their __init__ as explicit keyword arguments (no *args or **kwargs).

Expand source code

class Classifier(BaseEstimator, RegressorMixin):
    def __init__(self, risk):
        self.regressor = None
        self.risk = risk
        self._parameters = None

    @property
    def parameters(self) -> Dict:
        if self._parameters is None:
            from data.model import MODEL_BEST_PARAMS

            self._parameters = MODEL_BEST_PARAMS[self.risk]
        return self._parameters

    def fit(self, x_data: pd.DataFrame, y_data):
        """
        x_data: the nxm features
        y_data: the n labels, with values 0,1,2 or 3
        """
        self.regressor = xgb.XGBRegressor(**self.parameters).fit(x_data, y_data)
        self.feature_importances_ = pd.Series(
            self.regressor.feature_importances_, index=x_data.columns.tolist()
        )
        self.feature_importances_ = self.feature_importances_[
            self.feature_importances_ > 0
        ].sort_values(ascending=False)
        return self

    def predict(self, x_data):
        """
        x_data: the nxm features
        Returns n predictions, which have values 0 to 3, they can be floats
        """
        return self.regressor.predict(x_data)

Ancestors

sklearn.base.BaseEstimator
sklearn.base.RegressorMixin

Instance variables

var parameters : Dict

Expand source code

@property
def parameters(self) -> Dict:
    if self._parameters is None:
        from data.model import MODEL_BEST_PARAMS

        self._parameters = MODEL_BEST_PARAMS[self.risk]
    return self._parameters

Methods

def fit(self, x_data: pandas.core.frame.DataFrame, y_data)

x_data: the nxm features y_data: the n labels, with values 0,1,2 or 3

Expand source code

def fit(self, x_data: pd.DataFrame, y_data):
    """
    x_data: the nxm features
    y_data: the n labels, with values 0,1,2 or 3
    """
    self.regressor = xgb.XGBRegressor(**self.parameters).fit(x_data, y_data)
    self.feature_importances_ = pd.Series(
        self.regressor.feature_importances_, index=x_data.columns.tolist()
    )
    self.feature_importances_ = self.feature_importances_[
        self.feature_importances_ > 0
    ].sort_values(ascending=False)
    return self

def predict(self, x_data)

x_data: the nxm features Returns n predictions, which have values 0 to 3, they can be floats

Expand source code

def predict(self, x_data):
    """
    x_data: the nxm features
    Returns n predictions, which have values 0 to 3, they can be floats
    """
    return self.regressor.predict(x_data)