Objectives example

Import modules and prepare data

[1]:

from hypernets.core.random_state import set_random_state
set_random_state(1234)

from hypernets.utils import logging as hyn_logging
from hypergbm import make_experiment

from hypernets.tabular import get_tool_box
from hypernets.tabular.datasets import dsutils


hyn_logging.set_level(hyn_logging.WARN)

df = dsutils.load_bank().head(10000)
tb = get_tool_box(df)
df_train, df_test = tb.train_test_split(df, test_size=0.2, random_state=9527)

Number Of Features objective example

This is an example about how using NumOfFeatures to reduce model complexity

[2]:

import numpy as np

experiment = make_experiment(df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['nf'],  # use NumberOfFeatures as objective
                             drift_detection=False)

estimators = experiment.run(max_trials=30)

[3]:

df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True][['trial_no', 'succeeded', 'non_dominated', 'reward_logloss', 'reward_nf', 'model_index']]

[3]:

	trial_no	succeeded	non_dominated	reward_logloss	reward_nf	model_index
9	10	True	True	0.283389	0.0	0.0
17	18	True	True	0.25848	0.5	1.0
18	19	True	True	0.199278	0.8125	2.0
25	26	True	True	0.125533	0.9375	3.0

Prediction performance objective example

[4]:

experiment = make_experiment(df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['pred_perf'],  # use PredictionPerformanceObjective as objective
                             drift_detection=False)
estimators = experiment.run(max_trials=30)

[5]:

df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True][['trial_no', 'succeeded', 'non_dominated', 'reward_logloss', 'reward_pred_perf', 'model_index']]

[5]:

	trial_no	succeeded	non_dominated	reward_logloss	reward_pred_perf	model_index
3	4	True	True	0.199202	0.020022	0.0
5	6	True	True	0.121710	0.022027	1.0

Feature usage objective example

example of automatically feature selection

[6]:

experiment = make_experiment(df_train,
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['feature_usage'],
                             drift_detection=False)
estimators = experiment.run(max_trials=30)

[7]:

df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True].drop(['reward', 'scores'], axis=1)

[7]:

	trial_no	succeeded	elapsed	non_dominated	model_index	reward_logloss	reward_feature_usage
4	5	True	0.491640	True	0.0	0.285834	0.1875
5	6	True	0.912090	True	1.0	0.094966	0.6875
7	8	True	0.493440	True	2.0	0.152629	0.3750
13	14	True	0.387586	True	3.0	0.230497	0.3125

View input features in model

[8]:

model_index = 0
experiment.hyper_model_.history.get_best()[model_index].get_model().data_pipeline[0].features[0][1].steps[0][1].important_features

[8]:

[('duration', 1712.328210838139),
 ('month', 429.7665938436985),
 ('age', 313.9162983652204)]

PSI objective example

[9]:

experiment = make_experiment(df_train,
                             test_data=df_test.copy().drop('y', axis=1),
                             eval_data=df_test.copy(),
                             callbacks=[],
                             search_callbacks=[],
                             target='y',
                             searcher='nsga2',  # available MOO searchers: moead, nsga2, rnsga2
                             reward_metric='logloss',
                             objectives=['psi'],
                             drift_detection=False)
estimators = experiment.run(max_trials=30)

[10]:

df_trials = experiment.hyper_model_.history.to_df()
df_trials[df_trials['non_dominated'] == True].drop(['reward', 'scores'], axis=1)

[10]:

	trial_no	succeeded	elapsed	non_dominated	model_index	reward_logloss	reward_psi
3	4	True	0.474756	True	0.0	0.107476	0.0

[ ]: