-
Notifications
You must be signed in to change notification settings - Fork 47
/
Copy pathxgboost_experiment.py
60 lines (46 loc) · 2.65 KB
/
xgboost_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import xgboost as xgb
from hyperopt import hp
from experiment import Experiment
class XGBExperiment(Experiment):
def __init__(self, learning_task, n_estimators=5000, max_hyperopt_evals=50,
counters_sort_col=None, holdout_size=0,
train_path=None, test_path=None, cd_path=None, output_folder_path='./'):
Experiment.__init__(self, learning_task, 'xgb', n_estimators, max_hyperopt_evals,
True, counters_sort_col, holdout_size,
train_path, test_path, cd_path, output_folder_path)
self.space = {
'eta': hp.loguniform('eta', -7, 0),
'max_depth' : hp.quniform('max_depth', 2, 10, 1),
'subsample': hp.uniform('subsample', 0.5, 1),
'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1),
'colsample_bylevel': hp.uniform('colsample_bylevel', 0.5, 1),
'min_child_weight': hp.loguniform('min_child_weight', -16, 5),
'alpha': hp.choice('alpha', [0, hp.loguniform('alpha_positive', -16, 2)]),
'lambda': hp.choice('lambda', [0, hp.loguniform('lambda_positive', -16, 2)]),
'gamma': hp.choice('gamma', [0, hp.loguniform('gamma_positive', -16, 2)])
}
self.default_params = {'eta': 0.3, 'max_depth': 6, 'subsample': 1.0,
'colsample_bytree': 1.0, 'colsample_bylevel': 1.0,
'min_child_weight': 1, 'alpha': 0, 'lambda': 1, 'gamma': 0}
self.default_params = self.preprocess_params(self.default_params)
self.title = 'XGBoost'
def preprocess_params(self, params):
if self.learning_task == "classification":
params.update({'objective': 'binary:logistic', 'eval_metric': 'logloss', 'silent': 1})
elif self.learning_task == "regression":
params.update({'objective': 'reg:linear', 'eval_metric': 'rmse', 'silent': 1})
params['max_depth'] = int(params['max_depth'])
return params
def convert_to_dataset(self, data, label, cat_cols=None):
return xgb.DMatrix(data, label)
def fit(self, params, dtrain, dtest, n_estimators, seed=0):
params.update({"seed": seed})
evals_result = {}
bst = xgb.train(params, dtrain, evals=[(dtest, 'test')], evals_result=evals_result,
num_boost_round=n_estimators, verbose_eval=False)
results = evals_result['test']['rmse'] if self.learning_task == 'regression' \
else evals_result['test']['logloss']
return bst, results
def predict(self, bst, dtest, X_test):
preds = bst.predict(dtest)
return preds