Objectives example

Import modules and prepare data

[1]:
from hypernets.core.random_state import set_random_state
set_random_state(1234)

from hypernets.utils import logging as hyn_logging
from hypergbm import make_experiment

from hypernets.tabular import get_tool_box
from hypernets.tabular.datasets import dsutils


hyn_logging.set_level(hyn_logging.WARN)

df = dsutils.load_bank().head(10000)
tb = get_tool_box(df)
df_train, df_test = tb.train_test_split(df, test_size=0.2, random_state=9527)

Number Of Features objective example

This is an example about how using NumOfFeatures to reduce model complexity

[2]:
import numpy as np

experiment = make_experiment(df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['nf'],  # use NumberOfFeatures as objective
                             drift_detection=False)

estimators = experiment.run(max_trials=30)
[3]:
df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True][['trial_no', 'succeeded', 'non_dominated', 'reward_logloss', 'reward_nf', 'model_index']]
[3]:
trial_no succeeded non_dominated reward_logloss reward_nf model_index
9 10 True True 0.283389 0.0 0.0
17 18 True True 0.25848 0.5 1.0
18 19 True True 0.199278 0.8125 2.0
25 26 True True 0.125533 0.9375 3.0

Prediction performance objective example

[4]:
experiment = make_experiment(df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['pred_perf'],  # use PredictionPerformanceObjective as objective
                             drift_detection=False)
estimators = experiment.run(max_trials=30)
[5]:
df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True][['trial_no', 'succeeded', 'non_dominated', 'reward_logloss', 'reward_pred_perf', 'model_index']]
[5]:
trial_no succeeded non_dominated reward_logloss reward_pred_perf model_index
3 4 True True 0.199202 0.020022 0.0
5 6 True True 0.121710 0.022027 1.0

Feature usage objective example

example of automatically feature selection

[6]:
experiment = make_experiment(df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['feature_usage'],
                             drift_detection=False)
estimators = experiment.run(max_trials=30)
[7]:
df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True].drop(['reward', 'scores'], axis=1)
[7]:
trial_no succeeded elapsed non_dominated model_index reward_logloss reward_feature_usage
4 5 True 0.491640 True 0.0 0.285834 0.1875
5 6 True 0.912090 True 1.0 0.094966 0.6875
7 8 True 0.493440 True 2.0 0.152629 0.3750
13 14 True 0.387586 True 3.0 0.230497 0.3125

View input features in model

[8]:
model_index = 0
experiment.hyper_model_.history.get_best()[model_index].get_model().data_pipeline[0].features[0][1].steps[0][1].important_features
[8]:
[('duration', 1712.328210838139),
 ('month', 429.7665938436985),
 ('age', 313.9162983652204)]

PSI objective example

[9]:
experiment = make_experiment(df_train,
                             test_data=df_test.copy().drop('y', axis=1),
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['psi'],
                             drift_detection=False)
estimators = experiment.run(max_trials=30)
[10]:
df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True].drop(['reward', 'scores'], axis=1)
[10]:
trial_no succeeded elapsed non_dominated model_index reward_logloss reward_psi
3 4 True 0.474756 True 0.0 0.107476 0.0
[ ]: