Module water_security.classification.classifier
Expand source code
from typing import Dict
from sklearn.base import BaseEstimator, RegressorMixin
import pandas as pd
import xgboost as xgb
class Classifier(BaseEstimator, RegressorMixin):
def __init__(self, risk):
self.regressor = None
self.risk = risk
self._parameters = None
@property
def parameters(self) -> Dict:
if self._parameters is None:
from data.model import MODEL_BEST_PARAMS
self._parameters = MODEL_BEST_PARAMS[self.risk]
return self._parameters
def fit(self, x_data: pd.DataFrame, y_data):
"""
x_data: the nxm features
y_data: the n labels, with values 0,1,2 or 3
"""
self.regressor = xgb.XGBRegressor(**self.parameters).fit(x_data, y_data)
self.feature_importances_ = pd.Series(
self.regressor.feature_importances_, index=x_data.columns.tolist()
)
self.feature_importances_ = self.feature_importances_[
self.feature_importances_ > 0
].sort_values(ascending=False)
return self
def predict(self, x_data):
"""
x_data: the nxm features
Returns n predictions, which have values 0 to 3, they can be floats
"""
return self.regressor.predict(x_data)
Classes
class Classifier (risk)
-
Base class for all estimators in scikit-learn.
Notes
All estimators should specify all the parameters that can be set at the class level in their
__init__
as explicit keyword arguments (no*args
or**kwargs
).Expand source code
class Classifier(BaseEstimator, RegressorMixin): def __init__(self, risk): self.regressor = None self.risk = risk self._parameters = None @property def parameters(self) -> Dict: if self._parameters is None: from data.model import MODEL_BEST_PARAMS self._parameters = MODEL_BEST_PARAMS[self.risk] return self._parameters def fit(self, x_data: pd.DataFrame, y_data): """ x_data: the nxm features y_data: the n labels, with values 0,1,2 or 3 """ self.regressor = xgb.XGBRegressor(**self.parameters).fit(x_data, y_data) self.feature_importances_ = pd.Series( self.regressor.feature_importances_, index=x_data.columns.tolist() ) self.feature_importances_ = self.feature_importances_[ self.feature_importances_ > 0 ].sort_values(ascending=False) return self def predict(self, x_data): """ x_data: the nxm features Returns n predictions, which have values 0 to 3, they can be floats """ return self.regressor.predict(x_data)
Ancestors
- sklearn.base.BaseEstimator
- sklearn.base.RegressorMixin
Instance variables
var parameters : Dict
-
Expand source code
@property def parameters(self) -> Dict: if self._parameters is None: from data.model import MODEL_BEST_PARAMS self._parameters = MODEL_BEST_PARAMS[self.risk] return self._parameters
Methods
def fit(self, x_data: pandas.core.frame.DataFrame, y_data)
-
x_data: the nxm features y_data: the n labels, with values 0,1,2 or 3
Expand source code
def fit(self, x_data: pd.DataFrame, y_data): """ x_data: the nxm features y_data: the n labels, with values 0,1,2 or 3 """ self.regressor = xgb.XGBRegressor(**self.parameters).fit(x_data, y_data) self.feature_importances_ = pd.Series( self.regressor.feature_importances_, index=x_data.columns.tolist() ) self.feature_importances_ = self.feature_importances_[ self.feature_importances_ > 0 ].sort_values(ascending=False) return self
def predict(self, x_data)
-
x_data: the nxm features Returns n predictions, which have values 0 to 3, they can be floats
Expand source code
def predict(self, x_data): """ x_data: the nxm features Returns n predictions, which have values 0 to 3, they can be floats """ return self.regressor.predict(x_data)