13.3. GTSDA¶
Examples
13.3.1. example_gtsda_checker_simple.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
from da.p7core import gtsda
import numpy as np
def run_example():
"""Example of correlation analysis for simple linear data
"""
# prepare data
number_points = 50
number_dimensions = 2
x = np.random.rand(number_points, number_dimensions)
y = -0.3 * x[:, 0] + x[:, 1] + 0.05 * np.random.rand(number_points)
print('Original dependency is: y = -0.3 * x1 + x2 + 0.05 * random()')
print('The number of points is %d' % number_points)
print('')
# create GTSDA Analyzer object
analyzer = gtsda.Analyzer()
# perform checking procedure with default options
result_default = analyzer.check(x=x, y=y)
print('Results of correlation analysis with default options:')
print('=====================================================')
print('scores: %s' % result_default.scores)
print('p_values: %s' % result_default.p_values)
print('decisions: %s' % result_default.decisions)
print('\n')
# run checking procedure with Person correlation and asymptotic estimation of the p-value
options = {'GTSDA/Checker/Technique': 'PearsonCorrelation', 'GTSDA/Checker/PValues/Method': 'Asymptotic'}
result_asymp = analyzer.check(x=x, y=y, options=options)
print('Results of correlation analysis with Pearson correlation\ncoefficient and "Asymptotic" estimation of the p-value:')
print('========================================================')
print('scores: %s' % result_asymp.scores)
print('p_values: %s' % result_asymp.p_values)
print('decisions: %s' % result_asymp.decisions)
print('\n')
# run checking procedure with Person correlation and permutations estimation of the p-value
options = {'GTSDA/Checker/Technique': 'PearsonCorrelation', 'GTSDA/Checker/PValues/Method': 'Permutations'}
result_permut = analyzer.check(x=x, y=y, options=options)
print('Results of correlation analysis with Pearson correlation\ncoefficient and "Permutations" estimation of the p-value:')
print('========================================================')
print('scores: %s' % result_permut.scores)
print('p_values: %s' % result_permut.p_values)
print('decisions: %s' % result_permut.decisions)
print('\n')
# compute checking procedure with partial correlation coefficient
# Note partial correlations require explicit explanatory variable
options = {'GTSDA/Checker/Technique': 'PearsonPartialCorrelation'}
# Let us calculate correlation between components of x and y while the other components of x are used as explanatory variables
partial_scores = np.empty((1, number_dimensions)) # 1 is the number of outputs
partial_p_values = np.empty((1, number_dimensions))
partial_decisions = np.empty((1, number_dimensions))
for input_index in range(number_dimensions):
z = np.hstack((x[:,:input_index], x[:, (input_index + 1):]))
result_partial_i = analyzer.check(x=x[:, input_index], y=y, z=z, options=options)
partial_scores[:, input_index] = result_partial_i.scores
partial_p_values[:, input_index] = result_partial_i.p_values
partial_decisions[:, input_index] = result_partial_i.decisions
print('Results of correlation analysis with partial correlation coefficient:')
print('=====================================================================')
print('scores: %s' % partial_scores)
print('p_values: %s' % partial_p_values)
print('decisions: %s' % partial_decisions)
print('\n')
def main():
"""
Example of GTSDA Checker usage.
"""
print('=' * 80)
run_example()
print('=' * 80)
if __name__ == "__main__":
main()
|
13.3.2. example_gtsda_checker_plots.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
"""
This example illustrates the usage of check dependence functionality for various types of dependences.
You can change the technique and compare results.
"""
import os
import numpy as np
from matplotlib import pyplot as plt
from da.p7core import gtsda
def benchmark_generate_samples(n, key):
x = np.random.random(size=n)-0.5
if key == 0:
y = x
elif key == 1:
y = 0.7 * x + 0.3 * (np.random.random(n) - 0.5)
elif key == 2:
y = 0.3 * x + 0.7 * (np.random.random(n) - 0.5)
elif key == 3:
y = 0. * x + 0.9 * (np.random.random(n) - 0.5)
elif key == 4:
y = -0.3 * x + 0.7 * (np.random.random(n) - 0.5)
elif key == 5:
y = -0.8 * x + 0.2 * (np.random.random(n) - 0.5)
elif key == 6:
y = -x
elif key == 7:
y = x
elif key == 8:
y = 0.5 * x
elif key == 9:
y = 0.2 * x
elif key == 10:
y = np.zeros(n)
elif key == 11:
y = -0.2 * x
elif key == 12:
y = -0.5 * x
elif key == 13:
y = -x
elif key == 14:
y = 0.7 * (0.7 * np.cos(x * 3 * np.pi) + 1. * (np.random.random(n) - 0.5))
elif key == 15:
x_ = x
y_ = np.random.random(n) - 0.5
alpha = np.arctan(0.3)
x = x_ * np.cos(alpha) + y_ * np.sin(alpha)
y = x_ * np.sin(alpha) - y_ * np.cos(alpha)
elif key == 16:
x_ = x
y_ = np.random.random(n) - 0.5
alpha = np.pi / 4.
x = x_ * np.cos(alpha) + y_ * np.sin(alpha)
y = x_ * np.sin(alpha) - y_ * np.cos(alpha)
elif key == 17:
y = 3 * (0.9 * x**2 + 0.3 * (np.random.random(n))) - 0.5
elif key == 18:
y = 3 * (0.9 * x**2 + 0.15 * (np.random.random(n)))
for s in range(n):
if np.random.randint(2) == 0:
y[s] *= -1
elif key == 19:
phi = x * 2 * np.pi
r = 0.8 * (1. + 0.3 * (np.random.random(n) - 0.5))
x = r * np.cos(phi)
y = r * np.sin(phi)
elif key == 20:
phi = x * 2. * np.pi
r = np.random.random(n)*0.3
x = r * np.cos(phi)
y = r * np.sin(phi)
for s in range(n):
a = np.random.randint(4)
if a == 0 or a == 1:
y[s] = y[s] + 0.5
else:
y[s] = y[s] - 0.5
if a == 0 or a == 2:
x[s] = x[s] - 0.5
else:
x[s] = x[s] + 0.5
elif key == 21:
y = 0.7 * (0.9 * np.cos(x * 3. * np.pi) + 0.1 * (np.random.random(n) - 0.5))
elif key == 22:
x_ = x
y_ = np.random.random(n) - 0.5
alpha = np.arctan(0.3)
x = x_ * np.cos(alpha) + y_ * np.sin(alpha)
y = (x_ * np.sin(alpha) - y_ * np.cos(alpha))/2.
elif key == 23:
x_ = x
y_ = np.random.random(n) - 0.5
alpha = np.pi / 4.
x = x_ * np.cos(alpha) + y_ * np.sin(alpha)
y = (x_ * np.sin(alpha) - y_ * np.cos(alpha)) / 4.
elif key == 24:
y = 3 * (0.95 * x**2 + 0.05 * (np.random.random(n))) - 0.5
elif key == 25:
y = 3 * (0.95 * x**2 + 0.01 * (np.random.random(n)))
for s in range(n):
if np.random.randint(2) == 0:
y[s] *= -1
elif key == 26:
phi = x * 2 * np.pi
r = 0.8 * (1. + 0.1 * (np.random.random(n) - 0.5))
x = r * np.cos(phi)
y = r * np.sin(phi)
elif key == 27:
phi = x * 2. * np.pi
r = np.random.random(n)*0.2
x = r * np.cos(phi)
y = r * np.sin(phi)
for s in range(n):
a = np.random.randint(4)
if a == 0 or a == 1:
y[s] = y[s] + 0.5
else:
y[s] = y[s] - 0.5
if a == 0 or a == 2:
x[s] = x[s] - 0.5
else:
x[s] = x[s] + 0.5
return x.reshape(-1, 1), y.reshape(-1, 1)
def calc_correlation(x, y, corrtype):
analyzer = gtsda.Analyzer()
options = {'gtsda/checker/technique':corrtype, 'gtsda/checker/pvalues/enable':False}
analyzer.options.set(options)
return analyzer.check(x, y).scores[0][0]
def make_big_plot(corrtype='distancecorrelation', save=False):
fig = plt.figure(figsize=(21, 12))
fig.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05, wspace=0.4, hspace=0.4)
for k in range(28):
x, y = benchmark_generate_samples(1000, k)
plt.subplot(4, 7, 1 + k)
plt.axis([-1, 1, -1, 1])
plt.scatter(x, y, marker='.', color='b')
printedvalue = calc_correlation(x, y, corrtype)
print("Sample #%d, score: %s" % (k, printedvalue))
plt.text(-0.8, 0.8, str(round(printedvalue, 15)), color='r')
if save == True:
plt.savefig('example_gtsda_checker_'+corrtype + '.png')
if 'SUPPRESS_SHOW_PLOTS' not in os.environ:
plt.show()
if __name__ == "__main__":
make_big_plot(corrtype="distancecorrelation", save=True)
|
13.3.3. example_gtsda_checker_partial.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
"""
This example illustrates the fundamental usage of correlation analysis in treating high-dimensional
problems. Commonly used Pearson correlation fails to correctly detect dependency in this case.
This difficulty is solved by using partial Pearson correlation. The difference is due to the fact
that the method excludes the possible influence of other inputs when calculating correlation between
the considered input and output.
"""
#[0]
from da.p7core import gtsda
import os
import numpy as np
#[m1]
def run():
dirpath = os.path.dirname(__file__)
filepath = os.path.join(dirpath, 'TAXI2000.csv')
print("Load data from %s" % filepath)
data = np.loadtxt(filepath, delimiter=",")
x = data[:, :-1]
y = data[:, -1:]
#[m2]
print('Create analyzer object.')
print('The Pearson partial correlation will be used in current case.')
analyzer = gtsda.Analyzer()
print('Compute score values...')
scores_partial = []
scores_pearson = []
for input_index in range(x.shape[1]):
z = np.hstack((x[:,:input_index], x[:, (input_index + 1):])) # let other x columns be the explanatory variables matrix
# Calculate scores with partial Pearson correlation coefficient
result = analyzer.check(x=x[:, input_index], y=y, z=z, options={'GTSDA/Checker/Technique': 'PearsonPartialCorrelation'})
# use only statistically significant correlations
if result.decisions[0, 0]:
scores_partial.append(np.fabs(result.scores[0, 0]))
print(' feature #%-3d: partial Pearson score=%-15.5g p-value=%-15.5g decision: %d' % (1 + input_index, result.scores[0, 0], result.p_values[0, 0], result.decisions[0, 0]))
# Calculate scores with Pearson correlation coefficient
result = analyzer.check(x=x[:, input_index], y=y, z=z, options={'GTSDA/Checker/Technique': 'PearsonCorrelation'})
# use only statistically significant correlations
if result.decisions[0, 0]:
scores_pearson.append(np.fabs(result.scores[0, 0]))
print(' feature #%-3d: Pearson score=%-15.5g p-value=%-15.5g decision: %d' % (1 + input_index, result.scores[0, 0], result.p_values[0, 0], result.decisions[0, 0]))
print('')
#[m3]
# Convert correlation coefficients to sorted list of scores
scores_partial = sorted(scores_partial, reverse=True)
scores_pearson = sorted(scores_pearson, reverse=True)
print('\nTotal features number: %d' % x.shape[1])
print('The number of statistically significant scores based on the partial Pearson correlation coefficients: %d' % len(scores_partial))
print('The number of statistically significant scores based on the Pearson correlation coefficients: %d\n' % len(scores_pearson))
print('Statistically significant scores based on the partial Pearson correlation coefficients: %s\n' % scores_partial)
print('Statistically significant scores based on the Pearson correlation coefficients: %s\n' % scores_pearson)
plot(scores_partial, scores_pearson)
#[m4]
def plot(scores_partial, scores_pearson):
try:
import matplotlib.pyplot as plt
print('Plotting...')
# GTSDA scores
plt.subplot(111)
features_number = max(len(scores_partial), len(scores_pearson))
plt.scatter(np.arange(1, 1 + len(scores_partial)), scores_partial, s=10, c='r', label='Pearson partial correlation')
plt.scatter(np.arange(1, 1 + len(scores_pearson)), scores_pearson, s=10, c='b', label='Pearson correlation')
plt.xlabel('Feature number')
plt.ylabel('Statistically significant score')
plt.grid(True)
plt.legend(loc='best')
# save and show plots
name = 'gtsda_example_checker_partial'
plt.savefig(name)
print('Plots are saved to %s.png' % os.path.join(os.getcwd(), name))
print('On the plot we see that relative score values closely resemble the index of variability.')
print('From this plot one may conclude that there are only 12 important variables in the considered region and the rest 151 may be dropped in the analysis.')
if 'SUPPRESS_SHOW_PLOTS' not in os.environ:
plt.show()
except ImportError:
print('Plotting is not available due to the matplotlib library absence.')
#[m0]
if __name__ == "__main__":
# run GTSDA example
run()
|
13.3.4. example_gtsda_ranker_simple.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
from da.p7core import blackbox, gtsda
from da.p7core.loggers import StreamLogger
import numpy as np
class ExampleBlackbox(blackbox.Blackbox):
"""
Problem representation for GTSDA ranker in blackbox mode
"""
def prepare_blackbox(self):
# add new variable in problem
self.add_variable((0, 1))
self.add_variable((0, 1))
self.add_variable((0, 1))
# add new response in problem
self.add_response()
def evaluate(self, queryx):
result = []
for x in queryx:
result.append(sum(x))
return result
def blackbox():
"""
Example for estimate variable scores for a blackbox
"""
# create ranker
ranker = gtsda.Analyzer()
# set options
ranker.options.set("GTSDA/Seed", 100)
# set logger, by default StreamLogger output to sys.stdout
ranker.set_logger(StreamLogger())
# create problem
bbox = ExampleBlackbox()
budget = 350
# get result
result = ranker.rank(blackbox=bbox, budget=budget)
# print some info about result
print(str(result))
print("\nResults with default options (screening indices are selected):")
print('-' * 60)
for i, s in enumerate(result.scores):
print('score for blackbox response[%d]: %s' % (i, s))
print('-' * 60)
result = ranker.rank(blackbox=bbox, budget=budget, options={'GTSDA/Ranker/Technique':'sobol'})
# print some info about result
print(str(result))
print("\nResults with Sobol indices:")
print('-' * 60)
for i, s in enumerate(result.scores):
print('score for blackbox response[%d]: %s' % (i, s))
print('-' * 60)
def sample():
"""
Example for estimate variable scores for input variables with respect to each output variable based on a "solid" sample given by user
"""
# prepare data
# note, than tool need big training sample
number = 1000
input_dimension = 4
output_dimension = 2
np.random.seed(100)
# input part of sample
x = np.random.rand(number, input_dimension)
# output part of sample
y = np.hstack((x[:, [0]] + 2 * x[:, [1]], x[:, [3]]))
# create ranker
ranker = gtsda.Analyzer()
# set Logger
ranker.set_logger(StreamLogger())
# get result
result = ranker.rank(x=x, y=y)
# print info about results:
print(str(result))
print("\nResults:")
print('-' * 60)
for i, s in enumerate(result.scores):
print('score for output[%d]: %s' % (i, s))
print('-' * 60)
def main():
"""
Example of GTSDA Ranker usage.
"""
print('=' * 60)
# example for Sample-based type of algorithm
sample()
print('=' * 60)
# example for Blackbox-based type of algorithm
blackbox()
print('=' * 60)
if __name__ == "__main__":
main()
|
13.3.5. example_gtsda_ranker_screening.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
from da.p7core import gtsda
import numpy as np
def main():
"""
Example of screening indices computation with GTSDA Ranker.
"""
# prepare data
number_points = 100
input_dimension = 4
np.random.seed(100)
# input part of sample
x = np.random.rand(number_points, input_dimension) * 2 - 1
# output part of sample
y = x[:, 0] + 2 * x[:, 1] + x[:, 2]**2 + x[:, 3]**3
# doing analysis...
analyzer = gtsda.Analyzer()
rank_result = analyzer.rank(x=x, y=y, options={'GTSDA/Ranker/Technique': 'screening'})
# or just rank_result = analyzer.rank(x=x, y=y) as 'screening' is the default index type
# and reading results...
mu_star = rank_result.info['Ranker']['Detailed info']['mu_star']
mu = rank_result.info['Ranker']['Detailed info']['mu']
sigma = rank_result.info['Ranker']['Detailed info']['sigma']
print("mu_star: %s" % mu_star)
print("mu: %s" % mu)
print("sigma: %s" % sigma)
if __name__ == "__main__":
main()
|
13.3.6. example_gtsda_ranker_sobol.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
from da.p7core import gtsda
import numpy as np
def main():
"""
Example of Sobol indices computation with GTSDA Ranker.
"""
# prepare data
number_points = 2000
input_dimension = 4
np.random.seed(100)
# input part of sample
x = np.random.rand(number_points, input_dimension) * 2 - 1
# output part of sample
y = x[:, 0]**2 + 2 * x[:, 0] * x[:, 1] + x[:, 2]**2
# doing analysis...
analyzer = gtsda.Analyzer()
rank_result = analyzer.rank(x=x, y=y, options={'GTSDA/Ranker/Technique': 'sobol'})
# and reading results...
total_indices = rank_result.info['Ranker']['Detailed info']['Total indices']
main_indices = rank_result.info['Ranker']['Detailed info']['Main indices']
interact_indices = rank_result.info['Ranker']['Detailed info']['Interaction indices']
print("Total indices: %s" % total_indices)
print("Main indices: %s" % main_indices)
print("Intearaction indices: %s" % interact_indices)
if __name__ == "__main__":
main()
|
13.3.7. example_gtsda_ranker_taguchi.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
from da.p7core import gtsda
import numpy as np
def main():
"""
Example of Taguchi indices computation with GTSDA Ranker.
"""
# define sample
x = np.array([[100, 2, 4, 0.1],
[100, 5, 6, 0.2],
[100, 8, 8, 0.3],
[150, 2, 6, 0.3],
[150, 5, 8, 0.1],
[150, 8, 4, 0.2],
[200, 2, 8, 0.2],
[200, 5, 4, 0.3],
[200, 8, 6, 0.1]])
y = np.array([[87.3, 82.3, 70.7],
[74.8, 70.7, 63.2],
[56.5, 54.0, 45.7],
[79.8, 78.2, 62.3],
[77.3, 76.5, 54.0],
[89.0, 87.3, 83.2],
[64.8, 62.3, 55.7],
[99.0, 93.2, 87.3],
[75.7, 74.0, 63.2]])
x = np.tile(x, (3, 1))
y = y.reshape(27, 1, order='F')
# set options
options = {"GTSDA/Ranker/Technique": "Taguchi",
"GTSDA/Ranker/Taguchi/Method": "signal_to_noise"}
# rank
ranker = gtsda.Analyzer()
result = ranker.rank(x=x, y=y, options=options)
# print result
print(str(result.scores))
if __name__ == "__main__":
main()
|
13.3.8. example_gtsda_selector_simple.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
from da.p7core import gtsda
from da.p7core.loggers import StreamLogger
import numpy as np
import sys
def _make_sample(sample_size, input_dim, func):
x = np.random.rand(sample_size, input_dim)
# output part of sample
y = func(x)
return x, y
def mystery_function(x):
"""Example function.
Args:
x: 2D point or points batch (a list of two NumPy arrays).
Returns:
Single function value, or an array of values. Array shape is the same
as input shape.
"""
term1 = x[:, 1] - 5 * x[:, 0] * x[:, 0]
term2 = 1 - 5 * x[:, 0]
term3 = 2 - 5 * x[:, 1]
term4 = np.sin(2.5 * x[:, 0]) * np.sin(17.5 * x[:, 0] * x[:, 1])
result = 2 + 0.25 * term1 * term1 + term2 * term2 + 2 * term3 * term3 + 7 * term4 + 10 * x[:, 2]**2
return result
def get_ranking(x, y, analyzer, direction='dec'):
try:
scores = analyzer.rank(x=x, y=y).scores[0]
error = None
except:
error = "Error occurred in the ranking procedure: %s." % sys.exc_info()[1]
scores = None
if error is not None:
raise Exception(error)
# Get ranks based on scores
if direction == 'inc':
ranks = np.argsort(scores)
elif direction == 'dec':
ranks = np.argsort(-scores)
else:
raise ValueError("Direction of the ranking should be 'inc' or 'dec'")
print('SCORES: %s' % str(scores))
print('RANKS: %s' % str(ranks))
return ranks, scores
def run_example():
"""Example for estimate variable scores for input variables with respect to each output variable based on a "solid" sample given by user
"""
# prepare data
sample_size = 100
input_dim = 4
np.random.seed(100)
x, y = _make_sample(sample_size, input_dim, mystery_function)
optimal_feature_list = [0, 1, 2]
# create analyzer
analyzer = gtsda.Analyzer()
# set Logger
analyzer.set_logger(StreamLogger())
ranking, _ = get_ranking(x, y, analyzer)
# get results with internal validation error computation (IV)
options = {'GTSDA/Selector/ValidationType': 'internal'}
result_internal = analyzer.select(x=x, y=y, ranking=ranking, options=options)
# get results with train sample error computation
options = {'GTSDA/Selector/ValidationType': 'trainsample'}
result_test = analyzer.select(x=x, y=y, ranking=ranking, options=options)
print("\nOptimal features: %s" % optimal_feature_list)
print("Selected features with IV: %s" % result_internal.feature_list[:, 0])
print("Selected features with train sample validation: %s" % result_test.feature_list[:, 0])
def main():
"""
Example of GTSDA Selector usage.
"""
print('=' * 60)
run_example()
print('=' * 60)
if __name__ == "__main__":
main()
|