13.2. GTApprox¶
Examples
13.2.1. example_adaptive_doe.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
import numpy as np
"""Using adaptive DOE for approximation example."""
from da.p7core import gtapprox
from da.p7core.loggers import LogLevel, StreamLogger
from da.p7core import blackbox as bb
class TestProblem(bb.Blackbox):
def prepare_blackbox(self):
self.add_variable((0, 1))
self.add_variable((0, 1))
self.add_response()
def evaluate(self, queryx):
x1 = queryx[:,0]
x2 = queryx[:,1]
f = 2.0 +0.25 *(x2 - 5.0 *x1**2)**2 + (1.0 -5.0 *x1)**2 +2.0 *(2.0 -5.0*x2)**2 +7.0 *np.sin(2.5 *x1) *np.sin(17.5 *x1 *x2)
return f.reshape((queryx.shape[0], 1))
initial_x = np.array([[0.65, 0.15],
[0.55, 0.45],
[0.75, 0.85],
[0.35, 0.25],
[0.05, 0.05],
[0.15, 0.55],
[0.95, 0.65],
[0.45, 0.75],
[0.85, 0.35],
[0.25, 0.95]])
def get_sample(problem, points, x_sample=None, f_sample=None):
"""Data set generation."""
from da.p7core import gtdoe
doegenerator = gtdoe.Generator()
doegenerator.options.set({'GTDoE/Technique': 'LHS'})
result = doegenerator.build_doe(problem, count=points, sample_x=x_sample, sample_f=f_sample)
xSample = result.solutions(["x"], filter_type="new")
fSample = result.solutions(["f"], filter_type="new")
return xSample, fSample
def main():
"""Example to demonstrate AE and adaptive sampling."""
# create builder
builder = gtapprox.Builder()
options = {
'GTApprox/InternalValidation': 'off',
'GTApprox/AccuracyEvaluation': 'on',
'GTApprox/ExactFitRequired': 'on',
'GTApprox/Technique': 'GP',
'GTApprox/LogLevel': 'Info'
}
builder.options.set(options)
builder.set_logger(StreamLogger(log_level=LogLevel.ERROR))
# prepare initial training sample
problem = TestProblem()
initial_f = problem.evaluate(initial_x)
print("Training initial model...")
model = builder.build(initial_x, initial_f)
print("Done!")
# improve model
iterations = 6
additionalSize = 10
testSize = 500
x_sample = initial_x
f_sample = initial_f
for _ in range(iterations):
# generate new big test sample
x_test, f_test = get_sample(problem, testSize, x_sample, f_sample)
# calculate actual and predicted errors on test sample
errors = model.validate(x_test, f_test)
print("Training sample size: %d, RMSE: %s" % (len(x_sample), errors["RRMS"]))
# find max predicted error in test sample points
ae_predictions = model.calc_ae(x_test).reshape((len(x_test,)))
idx = np.argsort(ae_predictions)
# add additionalSize points with maximal AE values to the training samples
x_sample = np.vstack((x_sample, x_test[idx[-additionalSize:]]))
f_sample = np.vstack((f_sample, f_test[idx[-additionalSize:]]))
# build next improved model
model = builder.build(x_sample, f_sample)
# calculate final error
x_test, f_test = get_sample(problem, testSize, x_sample, f_sample)
errors = model.validate(x_test, f_test)
print("Training sample size: %d, RRMS: %s" % (len(x_sample), errors["RRMS"]))
print("Final result:\n--------------\nSample size: %d, RRMS: %s" % (len(x_sample), errors["RRMS"]))
if __name__ == "__main__":
main()
|
13.2.2. example_gtapprox.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
"""Basic GTApprox Usage."""
from da.p7core import gtapprox
from da.p7core.loggers import StreamLogger
import math, random
def main():
"""
Example of GTApprox usage.
"""
# prepare data
sSize = 7
dim = 2
random.seed(10)
# generate random hypercube and evaluate function
x_sample = [[random.uniform(0., 1.) for i in range(dim)] for j in range(sSize)]
y_sample = [[x[i]**(i+1) for i in range(len(x))] for x in x_sample]
# create approximation builder
builder = gtapprox.Builder()
# setup options
options = {
'GTApprox/AccuracyEvaluation': 'on',
'GTApprox/Technique': 'GP',
'GTApprox/InternalValidation': 'on',
'GTApprox/LogLevel': 'Info'
}
builder.options.set(options)
# setup logger function (optional)
logger = StreamLogger()
builder.set_logger(logger)
# train model
model = builder.build(x_sample, y_sample)
# save trained model
model.save('approxModel.gta')
# load trained model and use it
loaded_model = gtapprox.Model('approxModel.gta')
# print model information
print('----------- Model -----------')
print('SizeX: %d' % loaded_model.size_x)
print('SizeF: %d' % loaded_model.size_f)
print('Model has AE: %s' % loaded_model.has_ae)
print('----------- Info -----------')
print(str(loaded_model))
# create test sample
test_xsample = [[random.uniform(0., 1.) for i in range(dim)] for j in range(sSize)]
# calculate and display approximated values
for x in test_xsample:
y = loaded_model.calc(x)
print('Model Y: %s' % y)
# calculate and display gradients
for x in test_xsample:
dy = loaded_model.grad(x, gtapprox.GradMatrixOrder.F_MAJOR)
print('Model gradient: %s' % dy)
# calculate and display AE
for x in test_xsample:
ae = loaded_model.calc_ae(x)
print('Model AE: %s' % ae)
if __name__ == "__main__":
main()
|
13.2.3. example_internal_validation.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
"""Internal Validation Example"""
from da.p7core import gtapprox, gtdoe
from da.p7core.loggers import StreamLogger
import numpy as np
def rosenbrock(x):
"""
Rosenbrock function (simple one)
"""
x1 = x[:,0]
x2 = x[:,1]
return (1.0-x1)**2 +100*(x2-x1**2)**2
def rastrigin(x):
"""
Rastrigin function (very complex one)
"""
x1 = x[:,0]
x2 = x[:,1]
return 20. + (x1**2 -10 *np.cos(2.0*np.pi*x1)) +(x2**2 - 10* np.cos(2.0*np.pi*x2))
def main():
"""
Example to demonstrate IV
"""
# create generator
generator = gtdoe.Generator()
# create builder
builder = gtapprox.Builder()
builder.options.set('GTApprox/LogLevel', 'Info')
builder.options.set('GTApprox/InternalValidation', 'on')
#set logger
logger = StreamLogger()
builder.set_logger(logger)
# data description
dim_x = 2
dim_f = 1
bounds = ([-1]*dim_x, [1]*dim_x)
training_sample_size = 20
test_sample_size = 25**dim_x
# generating training input sample
result = generator.build_doe(bounds, training_sample_size, options={'GTDoE/Technique': 'LHS'})
x_sample = result.solutions(["x"])
print("IV example\n---------------")
# generating training output sample for rosenbrock model
y_sample = rosenbrock(x_sample)
print("Training model 1...")
model_rosenbrock = builder.build(x_sample, y_sample)
print("Done!\n")
# generating training output sample for rastrigin model
y_sample = rastrigin(x_sample)
print("Training model 2...")
model_rastrigin = builder.build(x_sample, y_sample)
print("Done!\n\n")
# generating test sample
result = generator.build_doe(bounds, test_sample_size, options={'GTDoE/Technique': 'FullFactorial'})
x_test = result.solutions(["x"])
f_rosenbrock = rosenbrock(x_test)
f_rastrigin = rastrigin(x_test)
# calculating error on test sample
err_rosenbrock = model_rosenbrock.validate(x_test, f_rosenbrock)
err_rastrigin = model_rastrigin.validate(x_test, f_rastrigin)
print("Actual RMS error on Rosenbrock function:")
print(str(err_rosenbrock["RMS"]))
print("Internal validation results for Rosenbrock function:")
print(str(model_rosenbrock.iv_info["Componentwise"]["RMS"]))
print("\n\nActual RMS error on Rastrigin function:")
print(str(err_rastrigin["RMS"]))
print("Internal validation results for Rastrigin function:")
print(str(model_rastrigin.iv_info["Componentwise"]["RMS"]))
if __name__ == "__main__":
main()
|
13.2.4. example_axis_rotations.py¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 | #
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
"""
Example illustrates benefits of the GTApprox/MaxAxisRotations option usage.
"""
#[0] Required imports
import os
import sys
import numpy as np
from datetime import datetime
from da.p7core import gtapprox
from da.p7core.loggers import StreamLogger
#[0] imports end
#[1] Function to be approximated: normalized Rana's function with diagonal wrap
def rana(x):
# Evaluate n-dimensional normalized Rana function.
# Note 2D case can be reduced to the following formula:
# x1 * sin(sqrt(abs(x2 - x1 + 1))) * cos(sqrt(abs(x2 + x1 + 1)))
# + (x1 + 1) * cos(sqrt(abs(x1 - x2 + 1))) * sin(sqrt(abs(x2 + x1 + 1)))
# + x2 * sin(sqrt(abs(x1 - x2 + 1))) * cos(sqrt(abs(x2 + x1 + 1)))
# + (x2 + 1) * cos(sqrt(abs(x2 - x1 + 1))) * sin(sqrt(abs(x2 + x1 + 1)))
x = np.array(x, dtype=float)
xj = x[:, range(-1, x.shape[1] - 1)]
t1 = np.sqrt(np.abs(xj - x + 1))
t2 = np.sqrt(np.abs(xj + x + 1))
return (x * np.sin(t1) * np.cos(t2) + (xj + 1.) * np.cos(t1) * np.sin(t2)).sum(axis=1).reshape((-1,1)) / x.shape[1]
#[1]
#[2] main workflow
def main():
#[2-1] Generate training and validation data
print("Generating train and validation datasets...")
x_min, x_max = -450., -520. # both input coordinates have the same range
train_x = square_grid(x_min, x_max, 8) # generate train input as full factorial 8-by-8 grid within [x_min, x_max] range
train_y = rana(train_x) # evaluate true function at train input points
validation_x = square_grid(x_min, x_max, 100) # generate validation input as full factorial 100-by-100 grid within [x_min, x_max] range
validation_y = rana(validation_x) # evaluate true function at validation input points
print("Done. Train data contain %d vectors while validation data contain %d vectors." % (train_x.shape[0], validation_x.shape[0]))
#[2-1]
#[2-2] Initialize model builder
print('Creating GTApprox Builder...')
gtapprox_builder = gtapprox.Builder() # create the model builder object
gtapprox_builder.set_logger(StreamLogger()) # attach console logger to the model builder
# Manually select HDA technique. It is neither necessary nor optimal for this particular
# simplified case, but usually it is the optimal choice for a huge multidimensional dataset.
gtapprox_builder.options.set("GTApprox/Technique", "HDA")
#[2-2]
#[2-3] Create HDA approximation using default options.
print("\n\nTraining HDA model with default options (note 'GTApprox/MaxAxisRotations'=0 by default)...")
print("-" * 74)
time_start = datetime.now()
model_default = gtapprox_builder.build(train_x, train_y) # create model using default options (implies no axis rotations)
time_default = datetime.now() - time_start
print("-" * 74)
#[2-3]
#[2-4] Create HDA approximation using axis rotations.
print("\n\nTraining HDA model with auto selection of the axis rotations number ('GTApprox/MaxAxisRotations'=-1)...")
print("-" * 74)
time_start = datetime.now()
model_rotations = gtapprox_builder.build(train_x, train_y, options={"GTApprox/MaxAxisRotations": -1}) # create model with 'auto' selected number of axis rotations
time_rotations = datetime.now() - time_start
print("-" * 74)
#[2-4]
#[2-5] Comparing models built with and without axis rotations
print("\n\nCalculating validation errors...")
# The validate() method returns dictionary that maps the error metric name to the vector of outputwise errors values.
errors_default = model_default.validate(validation_x, validation_y)
errors_rotations = model_rotations.validate(validation_x, validation_y)
print("-" * 74)
print("| %-25s| %-20s | %-20s |" % ("", "Default options", "Use axis rotations"))
print("-" * 74)
# Print training time.
print("| %-25s| %-20s | %-20s |" % ("Training time", time_default, time_rotations))
# Print approximation technique selected by SmartSelection.
print("| %-25s| %-20s | %-20s |" % ("Technique selected", model_default.details["Technique"], model_rotations.details["Technique"]))
print("-" * 74)
for error_name in sorted(errors_default.keys()):
# Print error metric and its value. Note output of the model is 1-dimensional.
print("| %-25s| %-20.5g | %-20.5g |" % (error_name, errors_default[error_name][0], errors_rotations[error_name][0]))
print("-" * 74)
#[2-5]
#[2-7] Display 3D plots of models. The first argument of the helper function is dictionary: model title -> callable object evaluating function value.
# So we can seamlessly mix models calc() method and pure Python rana() function.
plot_3D({"Model built with default options": model_default.calc,
"Model built with axis rotations": model_rotations.calc,
"True normalized Rana's function with diagonal wrap": rana}, \
train_x, train_y)
#[2-7]
#[2-8] Display contour plots.
plot_2D({"Model built with default options": model_default.calc,
"Model built with axis rotations": model_rotations.calc,
"True normalized Rana's function with diagonal wrap": rana}, \
train_x, train_y, [-400., -200., 0., 200., 400.])
#[2-8]
#[2]
#[3] Helper function for 2D grid generation
def square_grid(x_min, x_max, n_x):
x_factor = np.linspace(x_min, x_max, n_x).reshape(-1, 1)
return np.hstack((np.repeat(x_factor, x_factor.shape[0], axis=0),
np.tile(x_factor, (x_factor.shape[0], 1))))
#[3]
#[4] Helper function for 3D plotting
def plot_3D(named_models, train_x, train_y):
print('Plotting 3D charts...')
try:
# Try to import required matplotlib library
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib import cm
fontsize = 10 # titles and labels font size
train_dataset_color = (0., .5, 0., 1.0) # color of the trainign dataset points
model_prediction_color = (0., 0., 1., 0.8) # color of the model values mesh
background_color = (1., 1., 1., 1.) # plot background color
# Create mesh grid for plotting model predictions
x_mesh = np.meshgrid(np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 100),
np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 100))
# Convert mesh grid to the gtapprox.Model.calc()-compatible representation
model_x = np.hstack((x_mesh[0].flatten().reshape(-1, 1), x_mesh[1].flatten().reshape(-1, 1)))
figures_list = [] # special list of objects that should be stored until plot is removed
# Enumerate model names. The model index is only needed for PNG file name generation.
for model_index, model_name in enumerate(named_models):
# Create new plot window.
figure = plt.figure(figsize=(10., 7.), facecolor=background_color, edgecolor=background_color)
# Configure 3D plot.
subplot = figure.add_subplot(111, projection='3d')
subplot.tick_params(labelsize=fontsize)
subplot.set_xlabel("x1", fontsize=fontsize)
subplot.set_ylabel("x2", fontsize=fontsize)
subplot.set_zlabel("f(x1, x2)", fontsize=fontsize)
# draw scatter plot of the training dataset
plot_dataset = subplot.scatter(train_x[:, 0], train_x[:, 1], train_y[:, 0], color=train_dataset_color, s=4)
# Calculate model predictions. Note it returns N-by-1 dimensional matrix while wireframe mesh requires
# mesh with the same shape as x_mesh[0] and x_mesh[1]. Fortunately order of points in flatten matrix
# is the same for x_mesh and model predictions, so we can reshape to the x_mesh[0].shape.
model_predictions = named_models[model_name](model_x).reshape(x_mesh[0].shape)
# draw wireframe mesh of models predictions
plot_predictions = subplot.plot_wireframe(x_mesh[0], x_mesh[1], model_predictions, color=model_prediction_color, linewidth=0.2)
# Setup legend and title.
figure.legend(handles=[plot_dataset, plot_predictions], \
labels=['Train dataset', 'Model predictions'], prop={'size': fontsize})
figure.suptitle(model_name, fontsize=(fontsize+2))
# Make some layout cosmetics.
figure.tight_layout()
figure.get_axes()[0].view_init(azim=-135., elev=60.)
# Generate file name.
filename = 'gtapprox_example_axis_rotations_contour_%d.png' % model_index
# Python 2/3 compatibility workarond
try:
filename = os.path.join(os.getcwdu(), filename)
except AttributeError:
filename = os.path.join(os.getcwd(), filename)
# Save figure as PNG file.
figure.savefig(filename)
print('Plot of the "%s" is saved to %s' % (model_name, filename))
# Store figure object until plot is shown.
figures_list.append(figure)
if 'SUPPRESS_SHOW_PLOTS' not in os.environ:
plt.show()
except ImportError:
print("Plotting is disabled due to absence of the matplotlib library.")
except:
print("Failed to plot figure: %s" % sys.exc_info[1])
#[4]
#[5] Helper function for contour plotting
def plot_2D(named_models, train_x, train_y, levels):
print('Plotting contour charts...')
try:
# Try to import required matplotlib library
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from matplotlib import cm
fontsize = 10 # titles and labels font size
train_dataset_color = (0., .5, 0., 1.0) # color of the trainign dataset points
background_color = (1., 1., 1., 1.) # plot background color
# Create mesh grid for plotting model predictions. We need a lot of points to generate accurate contour plot.
x_mesh = np.meshgrid(np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 400),
np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 400))
# Convert mesh grid to the gtapprox.Model.calc()-compatible representation
model_x = np.hstack((x_mesh[0].flatten().reshape(-1, 1), x_mesh[1].flatten().reshape(-1, 1)))
figures_list = [] # special list of objects that should be stored until plot is removed
# Enumerate model names. The model index is only needed for PNG file name generation.
for model_index, model_name in enumerate(named_models):
# Create new plot window.
figure = plt.figure(figsize=(10., 7.), facecolor=background_color, edgecolor=background_color)
# Configure 2D plot.
subplot = figure.add_subplot(111)
subplot.tick_params(labelsize=fontsize)
subplot.set_xlabel("x1", fontsize=fontsize)
subplot.set_ylabel("x2", fontsize=fontsize)
# Draw scatter plot of the training dataset.
plot_dataset = subplot.scatter(train_x[:, 0], train_x[:, 1], color=train_dataset_color, s=4)
# Calculate model predictions. Note it returns N-by-1 dimensional matrix while contour plot requires
# mesh with the same shape as x_mesh[0] and x_mesh[1]. Fortunately order of points in flatten matrix
# is the same for x_mesh and model predictions, so we can reshape to the x_mesh[0].shape.
model_predictions = named_models[model_name](model_x).reshape(x_mesh[0].shape)
# Draw models predictions as contour plot.
levels = sorted(levels)
plot_predictions = subplot.contour(x_mesh[0], x_mesh[1], model_predictions, cmap=cm.coolwarm, levels=levels)
plot_predictions.clabel(inline=1, fontsize=(fontsize-2))
# Setup legend and title.
legend_labels = ['Train dataset']
legend_colors = [plot_dataset]
for level, contour_level in zip(levels, plot_predictions.collections):
legend_labels.append('f(x1, x2) = %g' % level)
figure.legend(handles=legend_colors, labels=legend_labels, prop={'size': fontsize})
figure.suptitle(model_name, fontsize=(fontsize+2))
# Generate file name.
filename = 'gtapprox_example_axis_rotations_contour_%d.png' % model_index
# Python 2/3 compatibility workarond
try:
filename = os.path.join(os.getcwdu(), filename)
except AttributeError:
filename = os.path.join(os.getcwd(), filename)
# Save figure as PNG file.
figure.savefig(filename)
print('Plot of the "%s" is saved to %s' % (model_name, filename))
# Store figure object until plot is shown.
figures_list.append(figure)
if 'SUPPRESS_SHOW_PLOTS' not in os.environ:
plt.show()
except ImportError:
print("Plotting is disabled due to absence of the matplotlib library.")
except:
print("Failed to plot figure: %s" % sys.exc_info[1])
#[5]
if __name__ == "__main__":
main()
|
13.2.5. example_gtmodel.c¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 | /*
*Copyright (C) pSeven SAS, 2010-present
*
* Example of using the C interface to GTApprox models (GTModel for C).
*
*/
#include "GTApproxModel.h"
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void handleError(GTModelError** error, const char* prefix, int fatalError) {
if (*error) {
/* report and free error */
fprintf(stderr, "%s: %s\n", prefix, GTModelErrorDescription(*error));
GTModelErrorFree(*error);
if (fatalError) {
/* exit if error marked as 'fatal' */
exit(1);
} else {
/* reset error variable otherwise */
*error = 0;
}
}
}
int main(int argc, char* argv[]) {
int calculateAE = 0; /* indicates whether AE calculation is requested */
char* modelFileName = 0; /* path to the model file */
char* inputFileName = 0; /* path to the input CSV file or '-' to use stdin */
FILE* inputFile = stdin; /* pointer to input file (stdin by default) */
int inputSize = 0; /* model input vector size */
int outputSize = 0; /* model output vector size */
int hasAE = 0; /* indicates whether the model supports AE */
double* input; /* input buffer */
double* output; /* output buffer */
int linebufSize = 256; /* actual size of the input line buffer */
char* linebuf = (char*)malloc(linebufSize); /* input line buffer */
int i; /* counter */
GTApproxModel* model = 0;
GTModelError* error = 0;
if (argc > 1 && !strcmp(argv[1], "-e")) {
calculateAE = 1;
}
if (argc > (calculateAE+1)) {
modelFileName = argv[1 + calculateAE];
}
if (argc > (calculateAE+2)) {
inputFileName = argv[2 + calculateAE];
}
if (!modelFileName) {
/* print help if no model path given */
printf(
"Usage:\n"
" %s\n"
" Print this message and exit.\n"
"\n"
" %s model_file\n"
" Load a GTApprox model from model_file and print model information.\n"
"\n"
" %s [-e] model_file input.csv\n"
" Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the inputs given in a comma-separated input file.\n"
"\n"
" %s [-e] model -\n"
" Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the input given by stdin.\n"
"\n"
"Model output is printed to stdout. Each line is a result for another input point.\n"
"Print order:\n"
" model (function) values,\n"
" gradient (partial derivative) values for the first output component with respect to all input components,\n"
" ...\n"
" gradient (partial derivative) values for the last output component with respect to all input components.\n"
"\n",
argv[0], argv[0], argv[0], argv[0]);
return 0;
}
/* load model */
if (!(model = GTApproxModelLoad(modelFileName, &error))) {
fprintf(stderr, "Failed to load model %s: %s\n", modelFileName, GTModelErrorDescription(error));
GTModelErrorFree(error);
return 1;
}
/* get model parameters */
inputSize = GTApproxModelInputSize(model, 0);
outputSize = GTApproxModelOutputSize(model, 0);
hasAE = GTApproxModelHasAE(model, 0);
if (!inputFileName) {
/* print model info and exit */
printf("Model file: %s\n", modelFileName);
printf("Input dimension: %d\n", inputSize);
printf("Output dimension: %d\n", outputSize);
printf("Accuracy evaluation support: %s\n", (hasAE ? "Yes" : "No"));
printf("Extended model information: %s\n", GTApproxModelDescription(model, 0));
GTApproxModelFree(model);
return 0;
}
/* check AE */
if (calculateAE && !hasAE) {
printf("The model from %s does not support accuracy evaluation.", modelFileName);
GTApproxModelFree(model);
return 0;
}
/* open input file if exists */
if (strcmp(inputFileName, "-") && !(inputFile = fopen(inputFileName, "r"))) {
fprintf(stderr, "Failed to open file %s for reading: %s\n", inputFileName, strerror(errno));
GTApproxModelFree(model);
return 2;
}
/* allocate input/output buffers */
input = (double*)malloc((inputSize + 1) * sizeof(double));
output = (double*)malloc(outputSize * (inputSize + 1) * sizeof(double));
while(!feof(inputFile) && !ferror(inputFile)) {
int valuesRead, linebufFill = 0;
char *tokens;
/* read line from the input stream */
while (fgets(linebuf + linebufFill, linebufSize - linebufFill, inputFile)) {
linebufFill += strlen(linebuf + linebufFill);
if ('\n' == linebuf[linebufFill - 1]) {
/* EOL detected */
break;
} else if (!(linebuf = (char*)realloc(linebuf, (linebufSize += 256)))){
/* can not increment buffer - out of memory */
fprintf(stderr, "Cannot allocate line buffer of size %d: out of memory!\n", linebufSize);
return 1;
}
}
/* tokenize it and read input floats */
valuesRead = 0;
if (linebufFill > 0) {
for (tokens = strtok(linebuf, ","); tokens; (tokens = strtok(0, ","))) {
char* tailptr = tokens;
input[(valuesRead < inputSize) ? valuesRead : inputSize] = strtod(tokens, &tailptr);
if (tailptr > tokens) {
++ valuesRead;
}
}
}
if (inputSize == valuesRead) {
if (!calculateAE) {
GTApproxModelCalc(model, input, 1, output, 1, &error);
handleError(&error, "Failed to calculate model value", 0);
GTApproxModelGrad(model, input, 1, output + outputSize, inputSize, 1, &error);
handleError(&error, "Failed to calculate model gradient", 0);
} else {
GTApproxModelCalcAE(model, input, 1, output, 1, &error);
handleError(&error, "Failed to estimate model AE", 0);
GTApproxModelGradAE(model, input, 1, output + outputSize, inputSize, 1, &error);
handleError(&error, "Failed to estimate gradient of model AE", 0);
}
/* print output to stdout */
printf("%lf", output[0]);
for (i = 1; i < outputSize * (inputSize + 1); ++ i) {
printf(", %lf", output[i]);
}
printf("\n");
} else if (valuesRead > 0) {
/* report input error */
fprintf(stderr, "Invalid input: %d out of %d required float values read from line.\n", valuesRead, inputSize);
} else if (inputFile == stdin) {
/* break on stdin, skip empty line in file */
break;
}
}
/* release resources */
if (inputFile != stdin) {
fclose(inputFile);
}
free(input);
free(output);
free(linebuf);
GTApproxModelFree(model);
return 0;
}
|
13.2.6. EvaluateGTModel.java¶
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | /*
*Copyright (C) pSeven SAS, 2010-present
*
*/
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.Files;
import java.nio.charset.Charset;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.File;
import java.lang.NumberFormatException;
import net.datadvance.gtmodel.GTApproxModel;
/**
* Sample code that loads a binary GTApprox model and evaluates model responses.
* To compile this class use the following command:
*
* javac -cp gtmodel.jar EvaluateGTModel.java
*
*/
public class EvaluateGTModel {
static void usage() {
System.err.println("Usage:");
System.err.format (" java -cp gtmodel.jar%s. EvaluateGTModel%n", File.pathSeparator);
System.err.println(" Print this message and exit.");
System.err.println("");
System.err.format (" java -cp gtmodel.jar%s. EvaluateGTModel <model_file>%n", File.pathSeparator);
System.err.println(" Load a GTApprox model from model_file and print model information.");
System.err.println("");
System.err.format (" java -cp gtmodel.jar%s. EvaluateGTModel [-e] [-s] [-i] <model_file> <input_csv_1> ... <input_csv_N>%n", File.pathSeparator);
System.err.println(" Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the inputs given in a comma-separated input file.");
System.err.println(" Use -s switch to skip CSV header row.");
System.err.println(" Use -i switch to print input values prior to model output.");
System.err.println("");
System.err.println("Model output is printed to stdout. Each line is a result for another input point.");
System.err.println("");
System.err.println("Print order:");
System.err.println(" [optional input values (if -i switch is provided),]");
System.err.println(" model (function) values,");
System.err.println(" gradient (partial derivative) values for the first output component with respect to all input components,");
System.err.println(" ...");
System.err.println(" gradient (partial derivative) values for the last output component with respect to all input components.");
System.exit(-1);
}
static double[] parseLine(String line, int expectedTokensNumber, Path fileName, int lineIndex) {
if (null == line || 0 == line.length()) {
return null;
}
// Disclaimer: never use this code for the real CSV parsing. It is inefficient and incomplete (does not ignore quoted commas).
String[] stringInput = line.split(",");
if (stringInput.length != expectedTokensNumber) {
System.err.format("Line #%d of the file %s has invalid number of tokens: %d (%d expected)%n",
lineIndex, fileName, stringInput.length, expectedTokensNumber);
return null;
}
double[] modelInput = new double[stringInput.length];
try {
for (int i = 0; i < stringInput.length; i ++) {
if (0 == stringInput[i].length()) {
System.err.format("Line #%d of the file %s has empty field #%d. Using NaN as input value.", lineIndex, fileName, (i + 1));
modelInput[i] = Double.NaN;
} else {
modelInput[i] = Double.parseDouble(stringInput[i]);
}
}
} catch (NumberFormatException e) {
System.err.format("Failed to parse line #%d of the file %s: %s%n", lineIndex, fileName, e);
return null;
}
return modelInput;
}
public static void main(String[] args) throws IOException {
boolean calculateAE = false;
boolean skipHeaderRow = false;
boolean printModelInput = false;
// process options
int argi = 0;
while (argi < args.length) {
String arg = args[argi];
if (!arg.startsWith("-"))
break;
if (arg.length() < 2)
usage();
for (int i = 1; i < arg.length(); i ++) {
char c = arg.charAt(i);
switch (c) {
case 'e':
calculateAE = true;
break;
case 's':
skipHeaderRow = true;
break;
case 'i':
printModelInput = true;
break;
default:
System.err.format("Invalid or unrecognized option given: %s%n%n", arg);
usage();
}
}
argi++;
}
// remaining arguments are the model data and CSV files to evaluate
if (argi == args.length)
usage();
// load model
Path modelPath = Paths.get(args[argi++]);
GTApproxModel model = new GTApproxModel(Files.readAllBytes(modelPath));
if (model == null) {
System.err.format("Failed to load model %s%n", modelPath.toString());
usage();
}
if (argi == args.length) {
// model only is given: just print model info
System.out.format("Model file: %s%n", modelPath.toString());
System.out.format("Input dimension: %d%n", model.inputSize());
System.out.format("Output dimension: %d%n", model.outputSize());
System.out.format("Accuracy evaluation support: %s%n", (model.hasAE() ? "Yes" : "No"));
System.out.format("Extended model information: %s%n", model.description());
} else {
if (calculateAE && !model.hasAE()) {
System.err.format("AE requested but %s model does not support it.%n", modelPath.toString());
System.exit(-2);
}
int modelInputSize = model.inputSize();
// read input files
while (argi < args.length) {
Path dataPath = Paths.get(args[argi++]);
Charset charset = Charset.forName("UTF-8");
try (BufferedReader reader = Files.newBufferedReader(dataPath, charset)) {
String line = null;
int lineIndex = 0;
if (skipHeaderRow) {
reader.readLine();
++ lineIndex;
}
while ((line = reader.readLine()) != null) {
double[] modelInput = parseLine(line.trim(), modelInputSize, dataPath, ++ lineIndex);
if (null != modelInput) {
if (printModelInput) {
for (int i = 0; i < modelInput.length; i ++) {
System.out.format("%.17g,", modelInput[i]);
}
}
double[] modelResponse;
double[][] modelGradient;
if (calculateAE) {
modelResponse = model.calcAE(modelInput);
modelGradient = model.calcGradAE(modelInput, model.GRADIENT_F_MAJOR);
} else {
modelResponse = model.calc(modelInput);
modelGradient = model.calcGrad(modelInput, model.GRADIENT_F_MAJOR);
}
// print model response (always at least 1-dimensional)
System.out.format("%.17g", modelResponse[0]);
for (int i = 1; i < modelResponse.length; i ++) {
System.out.format(",%.17g", modelResponse[i]);
}
// print model gradient
for (int i = 0; i < modelGradient.length; i ++) {
for (int j = 0; j < modelGradient[i].length; j ++) {
System.out.format(",%.17g", modelGradient[i][j]);
}
}
System.out.format("%n");
}
}
} catch (IOException e) {
System.err.format("Failed to read file %s: %s%n", dataPath.toString(), e);
}
}
}
}
}
|