13.2. GTApprox

13.2.1. example_adaptive_doe.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
#
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
import numpy as np

"""Using adaptive DOE for approximation example."""

from da.p7core import gtapprox
from da.p7core.loggers import LogLevel, StreamLogger
from da.p7core import blackbox as bb

class TestProblem(bb.Blackbox):
  def prepare_blackbox(self):
    self.add_variable((0, 1))
    self.add_variable((0, 1))
    self.add_response()

  def evaluate(self, queryx):
    x1 = queryx[:,0]
    x2 = queryx[:,1]
    f = 2.0 +0.25 *(x2 - 5.0 *x1**2)**2 + (1.0 -5.0 *x1)**2 +2.0 *(2.0 -5.0*x2)**2 +7.0 *np.sin(2.5 *x1) *np.sin(17.5 *x1 *x2)
    return f.reshape((queryx.shape[0], 1))

initial_x = np.array([[0.65, 0.15],
                      [0.55, 0.45],
                      [0.75, 0.85],
                      [0.35, 0.25],
                      [0.05, 0.05],
                      [0.15, 0.55],
                      [0.95, 0.65],
                      [0.45, 0.75],
                      [0.85, 0.35],
                      [0.25, 0.95]])

def get_sample(problem, points, x_sample=None, f_sample=None):
  """Data set generation."""
  from da.p7core import gtdoe
  doegenerator = gtdoe.Generator()
  doegenerator.options.set({'GTDoE/Technique': 'LHS'})
  result = doegenerator.build_doe(problem, count=points, sample_x=x_sample, sample_f=f_sample)

  xSample = result.solutions(["x"], filter_type="new")
  fSample = result.solutions(["f"], filter_type="new")

  return xSample, fSample

def main():
  """Example to demonstrate AE and adaptive sampling."""
  # create builder
  builder = gtapprox.Builder()
  options = {
    'GTApprox/InternalValidation': 'off',
    'GTApprox/AccuracyEvaluation': 'on',
    'GTApprox/ExactFitRequired': 'on',
    'GTApprox/Technique': 'GP',
    'GTApprox/LogLevel': 'Info'
  }
  builder.options.set(options)
  builder.set_logger(StreamLogger(log_level=LogLevel.ERROR))

  # prepare initial training sample
  problem = TestProblem()
  initial_f = problem.evaluate(initial_x)

  print("Training initial model...")
  model = builder.build(initial_x, initial_f)
  print("Done!")

  # improve model
  iterations = 6
  additionalSize = 10
  testSize = 500
  x_sample = initial_x
  f_sample = initial_f

  for _ in range(iterations):
    # generate new big test sample
    x_test, f_test = get_sample(problem, testSize, x_sample, f_sample)

    # calculate actual and predicted errors on test sample
    errors = model.validate(x_test, f_test)
    print("Training sample size: %d, RMSE: %s" % (len(x_sample), errors["RRMS"]))

    # find max predicted error in test sample points
    ae_predictions = model.calc_ae(x_test).reshape((len(x_test,)))
    idx = np.argsort(ae_predictions)

    # add additionalSize points with maximal AE values to the training samples
    x_sample = np.vstack((x_sample, x_test[idx[-additionalSize:]]))
    f_sample = np.vstack((f_sample, f_test[idx[-additionalSize:]]))

    # build next improved model
    model = builder.build(x_sample, f_sample)

  # calculate final error
  x_test, f_test = get_sample(problem, testSize, x_sample, f_sample)
  errors = model.validate(x_test, f_test)
  print("Training sample size: %d, RRMS: %s" % (len(x_sample), errors["RRMS"]))

  print("Final result:\n--------------\nSample size: %d, RRMS: %s" % (len(x_sample), errors["RRMS"]))

if __name__ == "__main__":
  main()

13.2.2. example_gtapprox.py

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#

"""Basic GTApprox Usage."""

from da.p7core import gtapprox
from da.p7core.loggers import StreamLogger

import math, random

def main():
  """
  Example of GTApprox usage.
  """

  # prepare data
  sSize = 7
  dim = 2
  random.seed(10)
  # generate random hypercube and evaluate function
  x_sample = [[random.uniform(0., 1.) for i in range(dim)] for j in range(sSize)]
  y_sample = [[x[i]**(i+1) for i in range(len(x))] for x in x_sample]

  # create approximation builder
  builder = gtapprox.Builder()
  # setup options
  options = {
  'GTApprox/AccuracyEvaluation': 'on',
  'GTApprox/Technique': 'GP',
  'GTApprox/InternalValidation': 'on',
  'GTApprox/LogLevel': 'Info'
  }
  builder.options.set(options)
  # setup logger function (optional)
  logger = StreamLogger()
  builder.set_logger(logger)
  # train model
  model = builder.build(x_sample, y_sample)
  # save trained model
  model.save('approxModel.gta')

  # load trained model and use it
  loaded_model = gtapprox.Model('approxModel.gta')

  # print model information
  print('----------- Model -----------')
  print('SizeX: %d' % loaded_model.size_x)
  print('SizeF: %d' % loaded_model.size_f)
  print('Model has AE: %s' % loaded_model.has_ae)
  print('----------- Info -----------')
  print(str(loaded_model))

  # create test sample
  test_xsample = [[random.uniform(0., 1.) for i in range(dim)] for j in range(sSize)]

  # calculate and display approximated values
  for x in test_xsample:
    y = loaded_model.calc(x)
    print('Model Y: %s' % y)

  # calculate and display gradients
  for x in test_xsample:
    dy = loaded_model.grad(x, gtapprox.GradMatrixOrder.F_MAJOR)
    print('Model gradient: %s' % dy)

  # calculate and display AE
  for x in test_xsample:
    ae = loaded_model.calc_ae(x)
    print('Model AE: %s' % ae)

if __name__ == "__main__":
  main()

13.2.3. example_internal_validation.py

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#

"""Internal Validation Example"""

from da.p7core import gtapprox, gtdoe
from da.p7core.loggers import StreamLogger

import numpy as np

def rosenbrock(x):
  """
  Rosenbrock function (simple one)
  """
  x1 = x[:,0]
  x2 = x[:,1]
  return (1.0-x1)**2 +100*(x2-x1**2)**2

def rastrigin(x):
  """
  Rastrigin function (very complex one)
  """
  x1 = x[:,0]
  x2 = x[:,1]
  return 20. + (x1**2 -10 *np.cos(2.0*np.pi*x1)) +(x2**2 - 10* np.cos(2.0*np.pi*x2))

def main():
  """
  Example to demonstrate IV
  """
  # create generator
  generator = gtdoe.Generator()

  # create builder
  builder = gtapprox.Builder()
  builder.options.set('GTApprox/LogLevel', 'Info')
  builder.options.set('GTApprox/InternalValidation', 'on')
  #set logger
  logger = StreamLogger()
  builder.set_logger(logger)

  # data description
  dim_x = 2
  dim_f = 1
  bounds = ([-1]*dim_x, [1]*dim_x)
  training_sample_size = 20
  test_sample_size = 25**dim_x

  # generating training input sample
  result = generator.build_doe(bounds, training_sample_size, options={'GTDoE/Technique': 'LHS'})
  x_sample = result.solutions(["x"])

  print("IV example\n---------------")
  # generating training output sample for rosenbrock model
  y_sample = rosenbrock(x_sample)
  print("Training model 1...")
  model_rosenbrock = builder.build(x_sample, y_sample)
  print("Done!\n")

  # generating training output sample for rastrigin model
  y_sample = rastrigin(x_sample)
  print("Training model 2...")
  model_rastrigin = builder.build(x_sample, y_sample)
  print("Done!\n\n")

  # generating test sample
  result = generator.build_doe(bounds, test_sample_size, options={'GTDoE/Technique': 'FullFactorial'})
  x_test = result.solutions(["x"])
  f_rosenbrock = rosenbrock(x_test)
  f_rastrigin = rastrigin(x_test)

  # calculating error on test sample
  err_rosenbrock = model_rosenbrock.validate(x_test, f_rosenbrock)
  err_rastrigin = model_rastrigin.validate(x_test, f_rastrigin)

  print("Actual RMS error on Rosenbrock function:")
  print(str(err_rosenbrock["RMS"]))
  print("Internal validation results for Rosenbrock function:")
  print(str(model_rosenbrock.iv_info["Componentwise"]["RMS"]))

  print("\n\nActual RMS error on Rastrigin function:")
  print(str(err_rastrigin["RMS"]))
  print("Internal validation results for Rastrigin function:")
  print(str(model_rastrigin.iv_info["Componentwise"]["RMS"]))

if __name__ == "__main__":
  main()

13.2.4. example_axis_rotations.py

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#

"""
Example illustrates benefits of the GTApprox/MaxAxisRotations option usage.
"""

#[0] Required imports
import os
import sys
import numpy as np
from datetime import datetime

from da.p7core import gtapprox
from da.p7core.loggers import StreamLogger
#[0] imports end

#[1] Function to be approximated: normalized Rana's function with diagonal wrap
def rana(x):
  # Evaluate n-dimensional normalized Rana function.
  # Note 2D case can be reduced to the following formula:
  #          x1 * sin(sqrt(abs(x2 - x1 + 1))) * cos(sqrt(abs(x2 + x1 + 1)))
  #  + (x1 + 1) * cos(sqrt(abs(x1 - x2 + 1))) * sin(sqrt(abs(x2 + x1 + 1)))
  #  +       x2 * sin(sqrt(abs(x1 - x2 + 1))) * cos(sqrt(abs(x2 + x1 + 1)))
  #  + (x2 + 1) * cos(sqrt(abs(x2 - x1 + 1))) * sin(sqrt(abs(x2 + x1 + 1)))

  x  = np.array(x, dtype=float)
  xj = x[:, range(-1, x.shape[1] - 1)]
  t1 = np.sqrt(np.abs(xj - x + 1))
  t2 = np.sqrt(np.abs(xj + x + 1))
  return (x * np.sin(t1) * np.cos(t2) + (xj + 1.) * np.cos(t1) * np.sin(t2)).sum(axis=1).reshape((-1,1)) / x.shape[1]
#[1]

#[2] main workflow
def main():
  #[2-1] Generate training and validation data
  print("Generating train and validation datasets...")
  x_min, x_max = -450., -520. # both input coordinates have the same range
  train_x = square_grid(x_min, x_max, 8) # generate train input as full factorial 8-by-8 grid within [x_min, x_max] range
  train_y = rana(train_x) # evaluate true function at train input points
  validation_x = square_grid(x_min, x_max, 100) # generate validation input as full factorial 100-by-100 grid within [x_min, x_max] range
  validation_y = rana(validation_x) # evaluate true function at validation input points
  print("Done. Train data contain %d vectors while validation data contain %d vectors." % (train_x.shape[0], validation_x.shape[0]))
  #[2-1]

  #[2-2] Initialize  model builder
  print('Creating GTApprox Builder...')
  gtapprox_builder = gtapprox.Builder() # create the model builder object
  gtapprox_builder.set_logger(StreamLogger()) # attach console logger to the model builder

  # Manually select HDA technique. It is neither necessary nor optimal for this particular
  # simplified case, but usually it is the optimal choice for a huge multidimensional dataset.
  gtapprox_builder.options.set("GTApprox/Technique", "HDA")
  #[2-2]

  #[2-3] Create HDA approximation using default options.
  print("\n\nTraining HDA model with default options (note 'GTApprox/MaxAxisRotations'=0 by default)...")
  print("-" * 74)
  time_start = datetime.now()
  model_default = gtapprox_builder.build(train_x, train_y) # create model using default options (implies no axis rotations)
  time_default = datetime.now() - time_start
  print("-" * 74)
  #[2-3]

  #[2-4] Create HDA approximation using axis rotations.
  print("\n\nTraining HDA model with auto selection of the axis rotations number ('GTApprox/MaxAxisRotations'=-1)...")
  print("-" * 74)
  time_start = datetime.now()
  model_rotations = gtapprox_builder.build(train_x, train_y, options={"GTApprox/MaxAxisRotations": -1}) # create model with 'auto' selected number of axis rotations
  time_rotations = datetime.now() - time_start
  print("-" * 74)
  #[2-4]

  #[2-5] Comparing models built with and without axis rotations
  print("\n\nCalculating validation errors...")
  # The validate() method returns dictionary that maps the error metric name to the vector of outputwise errors values.
  errors_default = model_default.validate(validation_x, validation_y)
  errors_rotations = model_rotations.validate(validation_x, validation_y)
  print("-" * 74)
  print("| %-25s| %-20s | %-20s |" % ("", "Default options", "Use axis rotations"))
  print("-" * 74)
  # Print training time.
  print("| %-25s| %-20s | %-20s |" % ("Training time", time_default, time_rotations))
  # Print approximation technique selected by SmartSelection.
  print("| %-25s| %-20s | %-20s |" % ("Technique selected", model_default.details["Technique"], model_rotations.details["Technique"]))
  print("-" * 74)
  for error_name in sorted(errors_default.keys()):
    # Print error metric and its value. Note output of the model is 1-dimensional.
    print("| %-25s| %-20.5g | %-20.5g |" % (error_name, errors_default[error_name][0], errors_rotations[error_name][0]))
  print("-" * 74)
  #[2-5]

  #[2-7] Display 3D plots of models. The first argument of the helper function is dictionary: model title -> callable object evaluating function value.
  # So we can seamlessly mix models calc() method and pure Python rana() function.
  plot_3D({"Model built with default options": model_default.calc,
           "Model built with axis rotations": model_rotations.calc,
           "True normalized Rana's function with diagonal wrap": rana}, \
          train_x, train_y)
  #[2-7]

  #[2-8] Display contour plots.
  plot_2D({"Model built with default options": model_default.calc,
           "Model built with axis rotations": model_rotations.calc,
           "True normalized Rana's function with diagonal wrap": rana}, \
          train_x, train_y, [-400., -200., 0., 200., 400.])
  #[2-8]
#[2]

#[3] Helper function for 2D grid generation
def square_grid(x_min, x_max, n_x):
  x_factor = np.linspace(x_min, x_max, n_x).reshape(-1, 1)
  return np.hstack((np.repeat(x_factor, x_factor.shape[0], axis=0),
                    np.tile(x_factor, (x_factor.shape[0], 1))))
#[3]

#[4] Helper function for 3D plotting
def plot_3D(named_models, train_x, train_y):
  print('Plotting 3D charts...')
  try:
    # Try to import required matplotlib library
    from mpl_toolkits.mplot3d import Axes3D
    import matplotlib.patches as mpatches
    import matplotlib.pyplot as plt
    from matplotlib import cm

    fontsize = 10 # titles and labels font size
    train_dataset_color = (0., .5, 0., 1.0) # color of the trainign dataset points
    model_prediction_color = (0., 0., 1., 0.8) # color of the model values mesh
    background_color = (1., 1., 1., 1.) # plot background color

    # Create mesh grid for plotting model predictions
    x_mesh = np.meshgrid(np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 100),
                         np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 100))

    # Convert mesh grid to the gtapprox.Model.calc()-compatible representation
    model_x = np.hstack((x_mesh[0].flatten().reshape(-1, 1), x_mesh[1].flatten().reshape(-1, 1)))

    figures_list = [] # special list of objects that should be stored until plot is removed

    # Enumerate model names. The model index is only needed for PNG file name generation.
    for model_index, model_name in enumerate(named_models):
      # Create new plot window.
      figure = plt.figure(figsize=(10., 7.), facecolor=background_color, edgecolor=background_color)

      # Configure 3D plot.
      subplot = figure.add_subplot(111, projection='3d')
      subplot.tick_params(labelsize=fontsize)
      subplot.set_xlabel("x1", fontsize=fontsize)
      subplot.set_ylabel("x2", fontsize=fontsize)
      subplot.set_zlabel("f(x1, x2)", fontsize=fontsize)

      # draw scatter plot of the training dataset
      plot_dataset = subplot.scatter(train_x[:, 0], train_x[:, 1], train_y[:, 0], color=train_dataset_color, s=4)

      # Calculate model predictions. Note it returns N-by-1 dimensional matrix while wireframe mesh requires
      # mesh with the same shape as x_mesh[0] and x_mesh[1]. Fortunately order of points in flatten matrix
      # is the same for x_mesh and model predictions, so we can reshape to the x_mesh[0].shape.
      model_predictions = named_models[model_name](model_x).reshape(x_mesh[0].shape)

      # draw wireframe mesh of models predictions
      plot_predictions = subplot.plot_wireframe(x_mesh[0], x_mesh[1], model_predictions, color=model_prediction_color, linewidth=0.2)

      # Setup legend and title.
      figure.legend(handles=[plot_dataset, plot_predictions], \
                    labels=['Train dataset', 'Model predictions'], prop={'size': fontsize})
      figure.suptitle(model_name, fontsize=(fontsize+2))

      # Make some layout cosmetics.
      figure.tight_layout()
      figure.get_axes()[0].view_init(azim=-135., elev=60.)

      # Generate file name.
      filename = 'gtapprox_example_axis_rotations_contour_%d.png' % model_index

      # Python 2/3 compatibility workarond
      try:
        filename = os.path.join(os.getcwdu(), filename)
      except AttributeError:
        filename = os.path.join(os.getcwd(), filename)

      # Save figure as PNG file.
      figure.savefig(filename)
      print('Plot of the "%s" is saved to %s' % (model_name, filename))

      # Store figure object until plot is shown.
      figures_list.append(figure)

    if 'SUPPRESS_SHOW_PLOTS' not in os.environ:
      plt.show()
  except ImportError:
    print("Plotting is disabled due to absence of the matplotlib library.")
  except:
    print("Failed to plot figure: %s" % sys.exc_info[1])
#[4]

#[5] Helper function for contour plotting
def plot_2D(named_models, train_x, train_y, levels):
  print('Plotting contour charts...')
  try:
    # Try to import required matplotlib library
    import matplotlib.patches as mpatches
    import matplotlib.pyplot as plt
    from matplotlib import cm

    fontsize = 10 # titles and labels font size
    train_dataset_color = (0., .5, 0., 1.0) # color of the trainign dataset points
    background_color = (1., 1., 1., 1.) # plot background color

    # Create mesh grid for plotting model predictions. We need a lot of points to generate accurate contour plot.
    x_mesh = np.meshgrid(np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 400),
                         np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 400))

    # Convert mesh grid to the gtapprox.Model.calc()-compatible representation
    model_x = np.hstack((x_mesh[0].flatten().reshape(-1, 1), x_mesh[1].flatten().reshape(-1, 1)))

    figures_list = [] # special list of objects that should be stored until plot is removed

    # Enumerate model names. The model index is only needed for PNG file name generation.
    for model_index, model_name in enumerate(named_models):
      # Create new plot window.
      figure = plt.figure(figsize=(10., 7.), facecolor=background_color, edgecolor=background_color)

      # Configure 2D plot.
      subplot = figure.add_subplot(111)
      subplot.tick_params(labelsize=fontsize)
      subplot.set_xlabel("x1", fontsize=fontsize)
      subplot.set_ylabel("x2", fontsize=fontsize)

      # Draw scatter plot of the training dataset.
      plot_dataset = subplot.scatter(train_x[:, 0], train_x[:, 1], color=train_dataset_color, s=4)

      # Calculate model predictions. Note it returns N-by-1 dimensional matrix while contour plot requires
      # mesh with the same shape as x_mesh[0] and x_mesh[1]. Fortunately order of points in flatten matrix
      # is the same for x_mesh and model predictions, so we can reshape to the x_mesh[0].shape.
      model_predictions = named_models[model_name](model_x).reshape(x_mesh[0].shape)

      # Draw models predictions as contour plot.
      levels = sorted(levels)
      plot_predictions = subplot.contour(x_mesh[0], x_mesh[1], model_predictions, cmap=cm.coolwarm, levels=levels)
      plot_predictions.clabel(inline=1, fontsize=(fontsize-2))

      # Setup legend and title.
      legend_labels = ['Train dataset']
      legend_colors = [plot_dataset]

      for level, contour_level in zip(levels, plot_predictions.collections):
        legend_labels.append('f(x1, x2) = %g' % level)

      figure.legend(handles=legend_colors, labels=legend_labels, prop={'size': fontsize})
      figure.suptitle(model_name, fontsize=(fontsize+2))

      # Generate file name.
      filename = 'gtapprox_example_axis_rotations_contour_%d.png' % model_index

      # Python 2/3 compatibility workarond
      try:
        filename = os.path.join(os.getcwdu(), filename)
      except AttributeError:
        filename = os.path.join(os.getcwd(), filename)

      # Save figure as PNG file.
      figure.savefig(filename)
      print('Plot of the "%s" is saved to %s' % (model_name, filename))

      # Store figure object until plot is shown.
      figures_list.append(figure)

    if 'SUPPRESS_SHOW_PLOTS' not in os.environ:
      plt.show()
  except ImportError:
    print("Plotting is disabled due to absence of the matplotlib library.")
  except:
    print("Failed to plot figure: %s" % sys.exc_info[1])
#[5]

if __name__ == "__main__":
  main()

13.2.5. example_gtmodel.c

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
/*
 *Copyright (C) pSeven SAS, 2010-present
 *
 * Example of using the C interface to GTApprox models (GTModel for C).
 *
 */

#include "GTApproxModel.h"

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void handleError(GTModelError** error, const char* prefix, int fatalError) {
  if (*error) {
    /* report and free error */
    fprintf(stderr, "%s: %s\n", prefix, GTModelErrorDescription(*error));
    GTModelErrorFree(*error);
    if (fatalError) {
      /* exit if error marked as 'fatal' */
      exit(1);
    } else {
      /* reset error variable otherwise */
      *error = 0;
    }
  }
}

int main(int argc, char* argv[]) {
  int calculateAE = 0; /* indicates whether AE calculation is requested */
  char* modelFileName = 0; /* path to the model file */
  char* inputFileName = 0; /* path to the input CSV file or '-' to use stdin */
  FILE* inputFile = stdin; /* pointer to input file (stdin by default) */

  int inputSize = 0; /* model input vector size */
  int outputSize = 0; /* model output vector size */
  int hasAE = 0; /* indicates whether the model supports AE */

  double* input; /* input buffer */
  double* output; /* output buffer */

  int linebufSize = 256; /* actual size of the input line buffer  */
  char* linebuf = (char*)malloc(linebufSize); /* input line buffer */
  int i; /* counter */

  GTApproxModel* model = 0;
  GTModelError* error = 0;

  if (argc > 1 && !strcmp(argv[1], "-e")) {
    calculateAE = 1;
  }

  if (argc > (calculateAE+1)) {
    modelFileName = argv[1 + calculateAE];
  }

  if (argc > (calculateAE+2)) {
    inputFileName = argv[2 + calculateAE];
  }

  if (!modelFileName) {
    /* print help if no model path given */
    printf(
      "Usage:\n"
      "    %s\n"
      "  Print this message and exit.\n"
      "\n"
      "    %s model_file\n"
      "  Load a GTApprox model from model_file and print model information.\n"
      "\n"
      "    %s [-e] model_file input.csv\n"
      "  Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the inputs given in a comma-separated input file.\n"
      "\n"
      "    %s [-e] model -\n"
      "  Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the input given by stdin.\n"
      "\n"
      "Model output is printed to stdout. Each line is a result for another input point.\n"
      "Print order:\n"
      "  model (function) values,\n"
      "  gradient (partial derivative) values for the first output component with respect to all input components,\n"
      "  ...\n"
      "  gradient (partial derivative) values for the last output component with respect to all input components.\n"
      "\n",
      argv[0], argv[0], argv[0], argv[0]);
    return 0;
  }

  /* load model */
  if (!(model = GTApproxModelLoad(modelFileName, &error))) {
    fprintf(stderr, "Failed to load model %s: %s\n", modelFileName, GTModelErrorDescription(error));
    GTModelErrorFree(error);
    return 1;
  }

  /* get model parameters */
  inputSize = GTApproxModelInputSize(model, 0);
  outputSize = GTApproxModelOutputSize(model, 0);
  hasAE = GTApproxModelHasAE(model, 0);

  if (!inputFileName) {
    /* print model info and exit */
    printf("Model file: %s\n", modelFileName);
    printf("Input dimension: %d\n", inputSize);
    printf("Output dimension: %d\n", outputSize);
    printf("Accuracy evaluation support: %s\n", (hasAE ? "Yes" : "No"));
    printf("Extended model information: %s\n", GTApproxModelDescription(model, 0));
    GTApproxModelFree(model);
    return 0;
  }

  /* check AE */
  if (calculateAE && !hasAE) {
    printf("The model from %s does not support accuracy evaluation.", modelFileName);
    GTApproxModelFree(model);
    return 0;
  }

  /* open input file if exists */
  if (strcmp(inputFileName, "-") && !(inputFile = fopen(inputFileName, "r"))) {
    fprintf(stderr, "Failed to open file %s for reading: %s\n", inputFileName, strerror(errno));
    GTApproxModelFree(model);
    return 2;
  }

  /* allocate input/output buffers */
  input = (double*)malloc((inputSize + 1) * sizeof(double));
  output = (double*)malloc(outputSize * (inputSize + 1) * sizeof(double));

  while(!feof(inputFile) && !ferror(inputFile)) {
    int valuesRead, linebufFill = 0;
    char *tokens;

    /* read line from the input stream */
    while (fgets(linebuf + linebufFill, linebufSize - linebufFill, inputFile)) {
      linebufFill += strlen(linebuf + linebufFill);
      if ('\n' == linebuf[linebufFill - 1]) {
        /* EOL detected */
        break;
      } else if (!(linebuf = (char*)realloc(linebuf, (linebufSize += 256)))){
        /* can not increment buffer - out of memory */
        fprintf(stderr, "Cannot allocate line buffer of size %d: out of memory!\n", linebufSize);
        return 1;
      }
    }

    /* tokenize it and read input floats */
    valuesRead = 0;
    if (linebufFill > 0) {
      for (tokens = strtok(linebuf, ","); tokens; (tokens = strtok(0, ","))) {
        char* tailptr = tokens;
        input[(valuesRead < inputSize) ? valuesRead : inputSize] = strtod(tokens, &tailptr);
        if (tailptr > tokens) {
          ++ valuesRead;
        }
      }
    }

    if (inputSize == valuesRead) {
      if (!calculateAE) {
        GTApproxModelCalc(model, input, 1, output, 1, &error);
        handleError(&error, "Failed to calculate model value", 0);
        GTApproxModelGrad(model, input, 1, output + outputSize, inputSize, 1, &error);
        handleError(&error, "Failed to calculate model gradient", 0);
      } else {
        GTApproxModelCalcAE(model, input, 1, output, 1, &error);
        handleError(&error, "Failed to estimate model AE", 0);
        GTApproxModelGradAE(model, input, 1, output + outputSize, inputSize, 1, &error);
        handleError(&error, "Failed to estimate gradient of model AE", 0);
      }

      /* print output to stdout */
      printf("%lf", output[0]);
      for (i = 1; i < outputSize * (inputSize + 1); ++ i) {
        printf(", %lf", output[i]);
      }
      printf("\n");
    } else if (valuesRead > 0) {
      /* report input error */
      fprintf(stderr, "Invalid input: %d out of %d required float values read from line.\n", valuesRead, inputSize);
    } else if (inputFile == stdin) {
      /* break on stdin, skip empty line in file */
      break;
    }
  }

  /* release resources */

  if (inputFile != stdin) {
    fclose(inputFile);
  }

  free(input);
  free(output);
  free(linebuf);

  GTApproxModelFree(model);

  return 0;
}

13.2.6. EvaluateGTModel.java

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/*
 *Copyright (C) pSeven SAS, 2010-present
 *
 */
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.Files;
import java.nio.charset.Charset;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.File;
import java.lang.NumberFormatException;

import net.datadvance.gtmodel.GTApproxModel;

/**
 * Sample code that loads a binary GTApprox model and evaluates model responses.
 * To compile this class use the following command:
 *
 *   javac -cp gtmodel.jar EvaluateGTModel.java
 *
 */
public class EvaluateGTModel {
  static void usage() {
    System.err.println("Usage:");
    System.err.format ("    java -cp gtmodel.jar%s. EvaluateGTModel%n", File.pathSeparator);
    System.err.println("  Print this message and exit.");
    System.err.println("");
    System.err.format ("    java -cp gtmodel.jar%s. EvaluateGTModel <model_file>%n", File.pathSeparator);
    System.err.println("  Load a GTApprox model from model_file and print model information.");
    System.err.println("");
    System.err.format ("    java -cp gtmodel.jar%s. EvaluateGTModel [-e] [-s] [-i] <model_file> <input_csv_1> ... <input_csv_N>%n", File.pathSeparator);
    System.err.println("  Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the inputs given in a comma-separated input file.");
    System.err.println("  Use -s switch to skip CSV header row.");
    System.err.println("  Use -i switch to print input values prior to model output.");
    System.err.println("");
    System.err.println("Model output is printed to stdout. Each line is a result for another input point.");
    System.err.println("");
    System.err.println("Print order:");
    System.err.println("  [optional input values (if -i switch is provided),]");
    System.err.println("  model (function) values,");
    System.err.println("  gradient (partial derivative) values for the first output component with respect to all input components,");
    System.err.println("  ...");
    System.err.println("  gradient (partial derivative) values for the last output component with respect to all input components.");
    System.exit(-1);
  }

  static double[] parseLine(String line, int expectedTokensNumber, Path fileName, int lineIndex) {
    if (null == line || 0 == line.length()) {
      return null;
    }

    // Disclaimer: never use this code for the real CSV parsing. It is inefficient and incomplete (does not ignore quoted commas).
    String[] stringInput = line.split(",");

    if (stringInput.length != expectedTokensNumber) {
      System.err.format("Line #%d of the file %s has invalid number of tokens: %d (%d expected)%n",
                lineIndex, fileName, stringInput.length, expectedTokensNumber);
      return null;
    }

    double[] modelInput = new double[stringInput.length];

    try {
      for (int i = 0; i < stringInput.length; i ++) {
        if (0 == stringInput[i].length()) {
          System.err.format("Line #%d of the file %s has empty field #%d. Using NaN as input value.", lineIndex, fileName, (i + 1));
          modelInput[i] = Double.NaN;
        } else {
          modelInput[i] = Double.parseDouble(stringInput[i]);
        }
      }
    } catch (NumberFormatException e) {
      System.err.format("Failed to parse line #%d of the file %s: %s%n", lineIndex, fileName, e);
      return null;
    }

    return modelInput;
  }

  public static void main(String[] args) throws IOException {
    boolean calculateAE = false;
    boolean skipHeaderRow = false;
    boolean printModelInput = false;

    // process options
    int argi = 0;
    while (argi < args.length) {
      String arg = args[argi];
      if (!arg.startsWith("-"))
        break;
      if (arg.length() < 2)
        usage();
      for (int i = 1; i < arg.length(); i ++) {
        char c = arg.charAt(i);
        switch (c) {
        case 'e':
          calculateAE = true;
          break;
        case 's':
          skipHeaderRow = true;
          break;
        case 'i':
          printModelInput = true;
          break;
        default:
          System.err.format("Invalid or unrecognized option given: %s%n%n", arg);
          usage();
        }
      }
      argi++;
    }

    // remaining arguments are the model data and CSV files to evaluate
    if (argi == args.length)
      usage();

    // load model
    Path modelPath = Paths.get(args[argi++]);
    GTApproxModel model = new GTApproxModel(Files.readAllBytes(modelPath));

    if (model == null) {
      System.err.format("Failed to load model %s%n", modelPath.toString());
      usage();
    }

    if (argi == args.length) {
      // model only is given: just print model info
      System.out.format("Model file: %s%n", modelPath.toString());
      System.out.format("Input dimension: %d%n", model.inputSize());
      System.out.format("Output dimension: %d%n", model.outputSize());
      System.out.format("Accuracy evaluation support: %s%n", (model.hasAE() ? "Yes" : "No"));
      System.out.format("Extended model information: %s%n", model.description());
    } else {
      if (calculateAE && !model.hasAE()) {
        System.err.format("AE requested but %s model does not support it.%n", modelPath.toString());
        System.exit(-2);
      }

      int modelInputSize = model.inputSize();

      // read input files
      while (argi < args.length) {
        Path dataPath = Paths.get(args[argi++]);
        Charset charset = Charset.forName("UTF-8");
        try (BufferedReader reader = Files.newBufferedReader(dataPath, charset)) {
          String line = null;
          int lineIndex = 0;

          if (skipHeaderRow) {
            reader.readLine();
            ++ lineIndex;
          }

          while ((line = reader.readLine()) != null) {
            double[] modelInput = parseLine(line.trim(), modelInputSize, dataPath, ++ lineIndex);

            if (null != modelInput) {
              if (printModelInput) {
                for (int i = 0; i < modelInput.length; i ++) {
                  System.out.format("%.17g,", modelInput[i]);
                }

              }

              double[] modelResponse;
              double[][] modelGradient;

              if (calculateAE) {
                modelResponse = model.calcAE(modelInput);
                modelGradient = model.calcGradAE(modelInput, model.GRADIENT_F_MAJOR);
              } else {
                modelResponse = model.calc(modelInput);
                modelGradient = model.calcGrad(modelInput, model.GRADIENT_F_MAJOR);
              }

              // print model response (always at least 1-dimensional)
              System.out.format("%.17g", modelResponse[0]);
              for (int i = 1; i < modelResponse.length; i ++) {
                System.out.format(",%.17g", modelResponse[i]);
              }

              // print model gradient
              for (int i = 0; i < modelGradient.length; i ++) {
                for (int j = 0; j < modelGradient[i].length; j ++) {
                  System.out.format(",%.17g", modelGradient[i][j]);
                }
              }

              System.out.format("%n");
            }
          }
        } catch (IOException e) {
          System.err.format("Failed to read file %s: %s%n", dataPath.toString(), e);
        }
      }
    }
  }
}