13.2. GTApprox¶

Examples

example_adaptive_doe.py
example_gtapprox.py
example_internal_validation.py
example_axis_rotations.py
example_gtmodel.c
EvaluateGTModel.java

13.2.1. example_adaptive_doe.py ¶

#
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#
import numpy as np

"""Using adaptive DOE for approximation example."""

from da.p7core import gtapprox
from da.p7core.loggers import LogLevel, StreamLogger
from da.p7core import blackbox as bb

class TestProblem(bb.Blackbox):
  def prepare_blackbox(self):
    self.add_variable((0, 1))
    self.add_variable((0, 1))
    self.add_response()

  def evaluate(self, queryx):
    x1 = queryx[:,0]
    x2 = queryx[:,1]
    f = 2.0 +0.25 *(x2 - 5.0 *x1**2)**2 + (1.0 -5.0 *x1)**2 +2.0 *(2.0 -5.0*x2)**2 +7.0 *np.sin(2.5 *x1) *np.sin(17.5 *x1 *x2)
    return f.reshape((queryx.shape[0], 1))

initial_x = np.array([[0.65, 0.15],
                      [0.55, 0.45],
                      [0.75, 0.85],
                      [0.35, 0.25],
                      [0.05, 0.05],
                      [0.15, 0.55],
                      [0.95, 0.65],
                      [0.45, 0.75],
                      [0.85, 0.35],
                      [0.25, 0.95]])

def get_sample(problem, points, x_sample=None, f_sample=None):
  """Data set generation."""
  from da.p7core import gtdoe
  doegenerator = gtdoe.Generator()
  doegenerator.options.set({'GTDoE/Technique': 'LHS'})
  result = doegenerator.build_doe(problem, count=points, sample_x=x_sample, sample_f=f_sample)

  xSample = result.solutions(["x"], filter_type="new")
  fSample = result.solutions(["f"], filter_type="new")

  return xSample, fSample

def main():
  """Example to demonstrate AE and adaptive sampling."""
  # create builder
  builder = gtapprox.Builder()
  options = {
    'GTApprox/InternalValidation': 'off',
    'GTApprox/AccuracyEvaluation': 'on',
    'GTApprox/ExactFitRequired': 'on',
    'GTApprox/Technique': 'GP',
    'GTApprox/LogLevel': 'Info'
  }
  builder.options.set(options)
  builder.set_logger(StreamLogger(log_level=LogLevel.ERROR))

  # prepare initial training sample
  problem = TestProblem()
  initial_f = problem.evaluate(initial_x)

  print("Training initial model...")
  model = builder.build(initial_x, initial_f)
  print("Done!")

  # improve model
  iterations = 6
  additionalSize = 10
  testSize = 500
  x_sample = initial_x
  f_sample = initial_f

  for _ in range(iterations):
    # generate new big test sample
    x_test, f_test = get_sample(problem, testSize, x_sample, f_sample)

    # calculate actual and predicted errors on test sample
    errors = model.validate(x_test, f_test)
    print("Training sample size: %d, RMSE: %s" % (len(x_sample), errors["RRMS"]))

    # find max predicted error in test sample points
    ae_predictions = model.calc_ae(x_test).reshape((len(x_test,)))
    idx = np.argsort(ae_predictions)

    # add additionalSize points with maximal AE values to the training samples
    x_sample = np.vstack((x_sample, x_test[idx[-additionalSize:]]))
    f_sample = np.vstack((f_sample, f_test[idx[-additionalSize:]]))

    # build next improved model
    model = builder.build(x_sample, f_sample)

  # calculate final error
  x_test, f_test = get_sample(problem, testSize, x_sample, f_sample)
  errors = model.validate(x_test, f_test)
  print("Training sample size: %d, RRMS: %s" % (len(x_sample), errors["RRMS"]))

  print("Final result:\n--------------\nSample size: %d, RRMS: %s" % (len(x_sample), errors["RRMS"]))

if __name__ == "__main__":
  main()

13.2.2. example_gtapprox.py ¶

#
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#

"""Basic GTApprox Usage."""

from da.p7core import gtapprox
from da.p7core.loggers import StreamLogger

import math, random

def main():
  """
  Example of GTApprox usage.
  """

  # prepare data
  sSize = 7
  dim = 2
  random.seed(10)
  # generate random hypercube and evaluate function
  x_sample = [[random.uniform(0., 1.) for i in range(dim)] for j in range(sSize)]
  y_sample = [[x[i]**(i+1) for i in range(len(x))] for x in x_sample]

  # create approximation builder
  builder = gtapprox.Builder()
  # setup options
  options = {
  'GTApprox/AccuracyEvaluation': 'on',
  'GTApprox/Technique': 'GP',
  'GTApprox/InternalValidation': 'on',
  'GTApprox/LogLevel': 'Info'
  }
  builder.options.set(options)
  # setup logger function (optional)
  logger = StreamLogger()
  builder.set_logger(logger)
  # train model
  model = builder.build(x_sample, y_sample)
  # save trained model
  model.save('approxModel.gta')

  # load trained model and use it
  loaded_model = gtapprox.Model('approxModel.gta')

  # print model information
  print('----------- Model -----------')
  print('SizeX: %d' % loaded_model.size_x)
  print('SizeF: %d' % loaded_model.size_f)
  print('Model has AE: %s' % loaded_model.has_ae)
  print('----------- Info -----------')
  print(str(loaded_model))

  # create test sample
  test_xsample = [[random.uniform(0., 1.) for i in range(dim)] for j in range(sSize)]

  # calculate and display approximated values
  for x in test_xsample:
    y = loaded_model.calc(x)
    print('Model Y: %s' % y)

  # calculate and display gradients
  for x in test_xsample:
    dy = loaded_model.grad(x, gtapprox.GradMatrixOrder.F_MAJOR)
    print('Model gradient: %s' % dy)

  # calculate and display AE
  for x in test_xsample:
    ae = loaded_model.calc_ae(x)
    print('Model AE: %s' % ae)

if __name__ == "__main__":
  main()

13.2.3. example_internal_validation.py ¶

#
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#

"""Internal Validation Example"""

from da.p7core import gtapprox, gtdoe
from da.p7core.loggers import StreamLogger

import numpy as np

def rosenbrock(x):
  """
  Rosenbrock function (simple one)
  """
  x1 = x[:,0]
  x2 = x[:,1]
  return (1.0-x1)**2 +100*(x2-x1**2)**2

def rastrigin(x):
  """
  Rastrigin function (very complex one)
  """
  x1 = x[:,0]
  x2 = x[:,1]
  return 20. + (x1**2 -10 *np.cos(2.0*np.pi*x1)) +(x2**2 - 10* np.cos(2.0*np.pi*x2))

def main():
  """
  Example to demonstrate IV
  """
  # create generator
  generator = gtdoe.Generator()

  # create builder
  builder = gtapprox.Builder()
  builder.options.set('GTApprox/LogLevel', 'Info')
  builder.options.set('GTApprox/InternalValidation', 'on')
  #set logger
  logger = StreamLogger()
  builder.set_logger(logger)

  # data description
  dim_x = 2
  dim_f = 1
  bounds = ([-1]*dim_x, [1]*dim_x)
  training_sample_size = 20
  test_sample_size = 25**dim_x

  # generating training input sample
  result = generator.build_doe(bounds, training_sample_size, options={'GTDoE/Technique': 'LHS'})
  x_sample = result.solutions(["x"])

  print("IV example\n---------------")
  # generating training output sample for rosenbrock model
  y_sample = rosenbrock(x_sample)
  print("Training model 1...")
  model_rosenbrock = builder.build(x_sample, y_sample)
  print("Done!\n")

  # generating training output sample for rastrigin model
  y_sample = rastrigin(x_sample)
  print("Training model 2...")
  model_rastrigin = builder.build(x_sample, y_sample)
  print("Done!\n\n")

  # generating test sample
  result = generator.build_doe(bounds, test_sample_size, options={'GTDoE/Technique': 'FullFactorial'})
  x_test = result.solutions(["x"])
  f_rosenbrock = rosenbrock(x_test)
  f_rastrigin = rastrigin(x_test)

  # calculating error on test sample
  err_rosenbrock = model_rosenbrock.validate(x_test, f_rosenbrock)
  err_rastrigin = model_rastrigin.validate(x_test, f_rastrigin)

  print("Actual RMS error on Rosenbrock function:")
  print(str(err_rosenbrock["RMS"]))
  print("Internal validation results for Rosenbrock function:")
  print(str(model_rosenbrock.iv_info["Componentwise"]["RMS"]))

  print("\n\nActual RMS error on Rastrigin function:")
  print(str(err_rastrigin["RMS"]))
  print("Internal validation results for Rastrigin function:")
  print(str(model_rastrigin.iv_info["Componentwise"]["RMS"]))

if __name__ == "__main__":
  main()

13.2.4. example_axis_rotations.py ¶

#
# coding: utf-8
# Copyright (C) pSeven SAS, 2010-present
#

"""
Example illustrates benefits of the GTApprox/MaxAxisRotations option usage.
"""

#[0] Required imports
import os
import sys
import numpy as np
from datetime import datetime

from da.p7core import gtapprox
from da.p7core.loggers import StreamLogger
#[0] imports end

#[1] Function to be approximated: normalized Rana's function with diagonal wrap
def rana(x):
  # Evaluate n-dimensional normalized Rana function.
  # Note 2D case can be reduced to the following formula:
  #          x1 * sin(sqrt(abs(x2 - x1 + 1))) * cos(sqrt(abs(x2 + x1 + 1)))
  #  + (x1 + 1) * cos(sqrt(abs(x1 - x2 + 1))) * sin(sqrt(abs(x2 + x1 + 1)))
  #  +       x2 * sin(sqrt(abs(x1 - x2 + 1))) * cos(sqrt(abs(x2 + x1 + 1)))
  #  + (x2 + 1) * cos(sqrt(abs(x2 - x1 + 1))) * sin(sqrt(abs(x2 + x1 + 1)))

  x  = np.array(x, dtype=float)
  xj = x[:, range(-1, x.shape[1] - 1)]
  t1 = np.sqrt(np.abs(xj - x + 1))
  t2 = np.sqrt(np.abs(xj + x + 1))
  return (x * np.sin(t1) * np.cos(t2) + (xj + 1.) * np.cos(t1) * np.sin(t2)).sum(axis=1).reshape((-1,1)) / x.shape[1]
#[1]

#[2] main workflow
def main():
  #[2-1] Generate training and validation data
  print("Generating train and validation datasets...")
  x_min, x_max = -450., -520. # both input coordinates have the same range
  train_x = square_grid(x_min, x_max, 8) # generate train input as full factorial 8-by-8 grid within [x_min, x_max] range
  train_y = rana(train_x) # evaluate true function at train input points
  validation_x = square_grid(x_min, x_max, 100) # generate validation input as full factorial 100-by-100 grid within [x_min, x_max] range
  validation_y = rana(validation_x) # evaluate true function at validation input points
  print("Done. Train data contain %d vectors while validation data contain %d vectors." % (train_x.shape[0], validation_x.shape[0]))
  #[2-1]

  #[2-2] Initialize  model builder
  print('Creating GTApprox Builder...')
  gtapprox_builder = gtapprox.Builder() # create the model builder object
  gtapprox_builder.set_logger(StreamLogger()) # attach console logger to the model builder

  # Manually select HDA technique. It is neither necessary nor optimal for this particular
  # simplified case, but usually it is the optimal choice for a huge multidimensional dataset.
  gtapprox_builder.options.set("GTApprox/Technique", "HDA")
  #[2-2]

  #[2-3] Create HDA approximation using default options.
  print("\n\nTraining HDA model with default options (note 'GTApprox/MaxAxisRotations'=0 by default)...")
  print("-" * 74)
  time_start = datetime.now()
  model_default = gtapprox_builder.build(train_x, train_y) # create model using default options (implies no axis rotations)
  time_default = datetime.now() - time_start
  print("-" * 74)
  #[2-3]

  #[2-4] Create HDA approximation using axis rotations.
  print("\n\nTraining HDA model with auto selection of the axis rotations number ('GTApprox/MaxAxisRotations'=-1)...")
  print("-" * 74)
  time_start = datetime.now()
  model_rotations = gtapprox_builder.build(train_x, train_y, options={"GTApprox/MaxAxisRotations": -1}) # create model with 'auto' selected number of axis rotations
  time_rotations = datetime.now() - time_start
  print("-" * 74)
  #[2-4]

  #[2-5] Comparing models built with and without axis rotations
  print("\n\nCalculating validation errors...")
  # The validate() method returns dictionary that maps the error metric name to the vector of outputwise errors values.
  errors_default = model_default.validate(validation_x, validation_y)
  errors_rotations = model_rotations.validate(validation_x, validation_y)
  print("-" * 74)
  print("| %-25s| %-20s | %-20s |" % ("", "Default options", "Use axis rotations"))
  print("-" * 74)
  # Print training time.
  print("| %-25s| %-20s | %-20s |" % ("Training time", time_default, time_rotations))
  # Print approximation technique selected by SmartSelection.
  print("| %-25s| %-20s | %-20s |" % ("Technique selected", model_default.details["Technique"], model_rotations.details["Technique"]))
  print("-" * 74)
  for error_name in sorted(errors_default.keys()):
    # Print error metric and its value. Note output of the model is 1-dimensional.
    print("| %-25s| %-20.5g | %-20.5g |" % (error_name, errors_default[error_name][0], errors_rotations[error_name][0]))
  print("-" * 74)
  #[2-5]

  #[2-7] Display 3D plots of models. The first argument of the helper function is dictionary: model title -> callable object evaluating function value.
  # So we can seamlessly mix models calc() method and pure Python rana() function.
  plot_3D({"Model built with default options": model_default.calc,
           "Model built with axis rotations": model_rotations.calc,
           "True normalized Rana's function with diagonal wrap": rana}, \
          train_x, train_y)
  #[2-7]

  #[2-8] Display contour plots.
  plot_2D({"Model built with default options": model_default.calc,
           "Model built with axis rotations": model_rotations.calc,
           "True normalized Rana's function with diagonal wrap": rana}, \
          train_x, train_y, [-400., -200., 0., 200., 400.])
  #[2-8]
#[2]

#[3] Helper function for 2D grid generation
def square_grid(x_min, x_max, n_x):
  x_factor = np.linspace(x_min, x_max, n_x).reshape(-1, 1)
  return np.hstack((np.repeat(x_factor, x_factor.shape[0], axis=0),
                    np.tile(x_factor, (x_factor.shape[0], 1))))
#[3]

#[4] Helper function for 3D plotting
def plot_3D(named_models, train_x, train_y):
  print('Plotting 3D charts...')
  try:
    # Try to import required matplotlib library
    from mpl_toolkits.mplot3d import Axes3D
    import matplotlib.patches as mpatches
    import matplotlib.pyplot as plt
    from matplotlib import cm

    fontsize = 10 # titles and labels font size
    train_dataset_color = (0., .5, 0., 1.0) # color of the trainign dataset points
    model_prediction_color = (0., 0., 1., 0.8) # color of the model values mesh
    background_color = (1., 1., 1., 1.) # plot background color

    # Create mesh grid for plotting model predictions
    x_mesh = np.meshgrid(np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 100),
                         np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 100))

    # Convert mesh grid to the gtapprox.Model.calc()-compatible representation
    model_x = np.hstack((x_mesh[0].flatten().reshape(-1, 1), x_mesh[1].flatten().reshape(-1, 1)))

    figures_list = [] # special list of objects that should be stored until plot is removed

    # Enumerate model names. The model index is only needed for PNG file name generation.
    for model_index, model_name in enumerate(named_models):
      # Create new plot window.
      figure = plt.figure(figsize=(10., 7.), facecolor=background_color, edgecolor=background_color)

      # Configure 3D plot.
      subplot = figure.add_subplot(111, projection='3d')
      subplot.tick_params(labelsize=fontsize)
      subplot.set_xlabel("x1", fontsize=fontsize)
      subplot.set_ylabel("x2", fontsize=fontsize)
      subplot.set_zlabel("f(x1, x2)", fontsize=fontsize)

      # draw scatter plot of the training dataset
      plot_dataset = subplot.scatter(train_x[:, 0], train_x[:, 1], train_y[:, 0], color=train_dataset_color, s=4)

      # Calculate model predictions. Note it returns N-by-1 dimensional matrix while wireframe mesh requires
      # mesh with the same shape as x_mesh[0] and x_mesh[1]. Fortunately order of points in flatten matrix
      # is the same for x_mesh and model predictions, so we can reshape to the x_mesh[0].shape.
      model_predictions = named_models[model_name](model_x).reshape(x_mesh[0].shape)

      # draw wireframe mesh of models predictions
      plot_predictions = subplot.plot_wireframe(x_mesh[0], x_mesh[1], model_predictions, color=model_prediction_color, linewidth=0.2)

      # Setup legend and title.
      figure.legend(handles=[plot_dataset, plot_predictions], \
                    labels=['Train dataset', 'Model predictions'], prop={'size': fontsize})
      figure.suptitle(model_name, fontsize=(fontsize+2))

      # Make some layout cosmetics.
      figure.tight_layout()
      figure.get_axes()[0].view_init(azim=-135., elev=60.)

      # Generate file name.
      filename = 'gtapprox_example_axis_rotations_contour_%d.png' % model_index

      # Python 2/3 compatibility workarond
      try:
        filename = os.path.join(os.getcwdu(), filename)
      except AttributeError:
        filename = os.path.join(os.getcwd(), filename)

      # Save figure as PNG file.
      figure.savefig(filename)
      print('Plot of the "%s" is saved to %s' % (model_name, filename))

      # Store figure object until plot is shown.
      figures_list.append(figure)

    if 'SUPPRESS_SHOW_PLOTS' not in os.environ:
      plt.show()
  except ImportError:
    print("Plotting is disabled due to absence of the matplotlib library.")
  except:
    print("Failed to plot figure: %s" % sys.exc_info[1])
#[4]

#[5] Helper function for contour plotting
def plot_2D(named_models, train_x, train_y, levels):
  print('Plotting contour charts...')
  try:
    # Try to import required matplotlib library
    import matplotlib.patches as mpatches
    import matplotlib.pyplot as plt
    from matplotlib import cm

    fontsize = 10 # titles and labels font size
    train_dataset_color = (0., .5, 0., 1.0) # color of the trainign dataset points
    background_color = (1., 1., 1., 1.) # plot background color

    # Create mesh grid for plotting model predictions. We need a lot of points to generate accurate contour plot.
    x_mesh = np.meshgrid(np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 400),
                         np.linspace(train_x[:, 1].min(), train_x[:, 1].max(), 400))

    # Convert mesh grid to the gtapprox.Model.calc()-compatible representation
    model_x = np.hstack((x_mesh[0].flatten().reshape(-1, 1), x_mesh[1].flatten().reshape(-1, 1)))

    figures_list = [] # special list of objects that should be stored until plot is removed

    # Enumerate model names. The model index is only needed for PNG file name generation.
    for model_index, model_name in enumerate(named_models):
      # Create new plot window.
      figure = plt.figure(figsize=(10., 7.), facecolor=background_color, edgecolor=background_color)

      # Configure 2D plot.
      subplot = figure.add_subplot(111)
      subplot.tick_params(labelsize=fontsize)
      subplot.set_xlabel("x1", fontsize=fontsize)
      subplot.set_ylabel("x2", fontsize=fontsize)

      # Draw scatter plot of the training dataset.
      plot_dataset = subplot.scatter(train_x[:, 0], train_x[:, 1], color=train_dataset_color, s=4)

      # Calculate model predictions. Note it returns N-by-1 dimensional matrix while contour plot requires
      # mesh with the same shape as x_mesh[0] and x_mesh[1]. Fortunately order of points in flatten matrix
      # is the same for x_mesh and model predictions, so we can reshape to the x_mesh[0].shape.
      model_predictions = named_models[model_name](model_x).reshape(x_mesh[0].shape)

      # Draw models predictions as contour plot.
      levels = sorted(levels)
      plot_predictions = subplot.contour(x_mesh[0], x_mesh[1], model_predictions, cmap=cm.coolwarm, levels=levels)
      plot_predictions.clabel(inline=1, fontsize=(fontsize-2))

      # Setup legend and title.
      legend_labels = ['Train dataset']
      legend_colors = [plot_dataset]

      for level, contour_level in zip(levels, plot_predictions.collections):
        legend_labels.append('f(x1, x2) = %g' % level)

      figure.legend(handles=legend_colors, labels=legend_labels, prop={'size': fontsize})
      figure.suptitle(model_name, fontsize=(fontsize+2))

      # Generate file name.
      filename = 'gtapprox_example_axis_rotations_contour_%d.png' % model_index

      # Python 2/3 compatibility workarond
      try:
        filename = os.path.join(os.getcwdu(), filename)
      except AttributeError:
        filename = os.path.join(os.getcwd(), filename)

      # Save figure as PNG file.
      figure.savefig(filename)
      print('Plot of the "%s" is saved to %s' % (model_name, filename))

      # Store figure object until plot is shown.
      figures_list.append(figure)

    if 'SUPPRESS_SHOW_PLOTS' not in os.environ:
      plt.show()
  except ImportError:
    print("Plotting is disabled due to absence of the matplotlib library.")
  except:
    print("Failed to plot figure: %s" % sys.exc_info[1])
#[5]

if __name__ == "__main__":
  main()

13.2.5. example_gtmodel.c ¶

/*
 *Copyright (C) pSeven SAS, 2010-present
 *
 * Example of using the C interface to GTApprox models (GTModel for C).
 *
 */

#include "GTApproxModel.h"

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void handleError(GTModelError** error, const char* prefix, int fatalError) {
  if (*error) {
    /* report and free error */
    fprintf(stderr, "%s: %s\n", prefix, GTModelErrorDescription(*error));
    GTModelErrorFree(*error);
    if (fatalError) {
      /* exit if error marked as 'fatal' */
      exit(1);
    } else {
      /* reset error variable otherwise */
      *error = 0;
    }
  }
}

int main(int argc, char* argv[]) {
  int calculateAE = 0; /* indicates whether AE calculation is requested */
  char* modelFileName = 0; /* path to the model file */
  char* inputFileName = 0; /* path to the input CSV file or '-' to use stdin */
  FILE* inputFile = stdin; /* pointer to input file (stdin by default) */

  int inputSize = 0; /* model input vector size */
  int outputSize = 0; /* model output vector size */
  int hasAE = 0; /* indicates whether the model supports AE */

  double* input; /* input buffer */
  double* output; /* output buffer */

  int linebufSize = 256; /* actual size of the input line buffer  */
  char* linebuf = (char*)malloc(linebufSize); /* input line buffer */
  int i; /* counter */

  GTApproxModel* model = 0;
  GTModelError* error = 0;

  if (argc > 1 && !strcmp(argv[1], "-e")) {
    calculateAE = 1;
  }

  if (argc > (calculateAE+1)) {
    modelFileName = argv[1 + calculateAE];
  }

  if (argc > (calculateAE+2)) {
    inputFileName = argv[2 + calculateAE];
  }

  if (!modelFileName) {
    /* print help if no model path given */
    printf(
      "Usage:\n"
      "    %s\n"
      "  Print this message and exit.\n"
      "\n"
      "    %s model_file\n"
      "  Load a GTApprox model from model_file and print model information.\n"
      "\n"
      "    %s [-e] model_file input.csv\n"
      "  Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the inputs given in a comma-separated input file.\n"
      "\n"
      "    %s [-e] model -\n"
      "  Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the input given by stdin.\n"
      "\n"
      "Model output is printed to stdout. Each line is a result for another input point.\n"
      "Print order:\n"
      "  model (function) values,\n"
      "  gradient (partial derivative) values for the first output component with respect to all input components,\n"
      "  ...\n"
      "  gradient (partial derivative) values for the last output component with respect to all input components.\n"
      "\n",
      argv[0], argv[0], argv[0], argv[0]);
    return 0;
  }

  /* load model */
  if (!(model = GTApproxModelLoad(modelFileName, &error))) {
    fprintf(stderr, "Failed to load model %s: %s\n", modelFileName, GTModelErrorDescription(error));
    GTModelErrorFree(error);
    return 1;
  }

  /* get model parameters */
  inputSize = GTApproxModelInputSize(model, 0);
  outputSize = GTApproxModelOutputSize(model, 0);
  hasAE = GTApproxModelHasAE(model, 0);

  if (!inputFileName) {
    /* print model info and exit */
    printf("Model file: %s\n", modelFileName);
    printf("Input dimension: %d\n", inputSize);
    printf("Output dimension: %d\n", outputSize);
    printf("Accuracy evaluation support: %s\n", (hasAE ? "Yes" : "No"));
    printf("Extended model information: %s\n", GTApproxModelDescription(model, 0));
    GTApproxModelFree(model);
    return 0;
  }

  /* check AE */
  if (calculateAE && !hasAE) {
    printf("The model from %s does not support accuracy evaluation.", modelFileName);
    GTApproxModelFree(model);
    return 0;
  }

  /* open input file if exists */
  if (strcmp(inputFileName, "-") && !(inputFile = fopen(inputFileName, "r"))) {
    fprintf(stderr, "Failed to open file %s for reading: %s\n", inputFileName, strerror(errno));
    GTApproxModelFree(model);
    return 2;
  }

  /* allocate input/output buffers */
  input = (double*)malloc((inputSize + 1) * sizeof(double));
  output = (double*)malloc(outputSize * (inputSize + 1) * sizeof(double));

  while(!feof(inputFile) && !ferror(inputFile)) {
    int valuesRead, linebufFill = 0;
    char *tokens;

    /* read line from the input stream */
    while (fgets(linebuf + linebufFill, linebufSize - linebufFill, inputFile)) {
      linebufFill += strlen(linebuf + linebufFill);
      if ('\n' == linebuf[linebufFill - 1]) {
        /* EOL detected */
        break;
      } else if (!(linebuf = (char*)realloc(linebuf, (linebufSize += 256)))){
        /* can not increment buffer - out of memory */
        fprintf(stderr, "Cannot allocate line buffer of size %d: out of memory!\n", linebufSize);
        return 1;
      }
    }

    /* tokenize it and read input floats */
    valuesRead = 0;
    if (linebufFill > 0) {
      for (tokens = strtok(linebuf, ","); tokens; (tokens = strtok(0, ","))) {
        char* tailptr = tokens;
        input[(valuesRead < inputSize) ? valuesRead : inputSize] = strtod(tokens, &tailptr);
        if (tailptr > tokens) {
          ++ valuesRead;
        }
      }
    }

    if (inputSize == valuesRead) {
      if (!calculateAE) {
        GTApproxModelCalc(model, input, 1, output, 1, &error);
        handleError(&error, "Failed to calculate model value", 0);
        GTApproxModelGrad(model, input, 1, output + outputSize, inputSize, 1, &error);
        handleError(&error, "Failed to calculate model gradient", 0);
      } else {
        GTApproxModelCalcAE(model, input, 1, output, 1, &error);
        handleError(&error, "Failed to estimate model AE", 0);
        GTApproxModelGradAE(model, input, 1, output + outputSize, inputSize, 1, &error);
        handleError(&error, "Failed to estimate gradient of model AE", 0);
      }

      /* print output to stdout */
      printf("%lf", output[0]);
      for (i = 1; i < outputSize * (inputSize + 1); ++ i) {
        printf(", %lf", output[i]);
      }
      printf("\n");
    } else if (valuesRead > 0) {
      /* report input error */
      fprintf(stderr, "Invalid input: %d out of %d required float values read from line.\n", valuesRead, inputSize);
    } else if (inputFile == stdin) {
      /* break on stdin, skip empty line in file */
      break;
    }
  }

  /* release resources */

  if (inputFile != stdin) {
    fclose(inputFile);
  }

  free(input);
  free(output);
  free(linebuf);

  GTApproxModelFree(model);

  return 0;
}

13.2.6. EvaluateGTModel.java ¶

/*
 *Copyright (C) pSeven SAS, 2010-present
 *
 */
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.Files;
import java.nio.charset.Charset;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.File;
import java.lang.NumberFormatException;

import net.datadvance.gtmodel.GTApproxModel;

/**
 * Sample code that loads a binary GTApprox model and evaluates model responses.
 * To compile this class use the following command:
 *
 *   javac -cp gtmodel.jar EvaluateGTModel.java
 *
 */
public class EvaluateGTModel {
  static void usage() {
    System.err.println("Usage:");
    System.err.format ("    java -cp gtmodel.jar%s. EvaluateGTModel%n", File.pathSeparator);
    System.err.println("  Print this message and exit.");
    System.err.println("");
    System.err.format ("    java -cp gtmodel.jar%s. EvaluateGTModel <model_file>%n", File.pathSeparator);
    System.err.println("  Load a GTApprox model from model_file and print model information.");
    System.err.println("");
    System.err.format ("    java -cp gtmodel.jar%s. EvaluateGTModel [-e] [-s] [-i] <model_file> <input_csv_1> ... <input_csv_N>%n", File.pathSeparator);
    System.err.println("  Load a GTApprox model from model_file and calculate model output (AE, if -e switch specified) for the inputs given in a comma-separated input file.");
    System.err.println("  Use -s switch to skip CSV header row.");
    System.err.println("  Use -i switch to print input values prior to model output.");
    System.err.println("");
    System.err.println("Model output is printed to stdout. Each line is a result for another input point.");
    System.err.println("");
    System.err.println("Print order:");
    System.err.println("  [optional input values (if -i switch is provided),]");
    System.err.println("  model (function) values,");
    System.err.println("  gradient (partial derivative) values for the first output component with respect to all input components,");
    System.err.println("  ...");
    System.err.println("  gradient (partial derivative) values for the last output component with respect to all input components.");
    System.exit(-1);
  }

  static double[] parseLine(String line, int expectedTokensNumber, Path fileName, int lineIndex) {
    if (null == line || 0 == line.length()) {
      return null;
    }

    // Disclaimer: never use this code for the real CSV parsing. It is inefficient and incomplete (does not ignore quoted commas).
    String[] stringInput = line.split(",");

    if (stringInput.length != expectedTokensNumber) {
      System.err.format("Line #%d of the file %s has invalid number of tokens: %d (%d expected)%n",
                lineIndex, fileName, stringInput.length, expectedTokensNumber);
      return null;
    }

    double[] modelInput = new double[stringInput.length];

    try {
      for (int i = 0; i < stringInput.length; i ++) {
        if (0 == stringInput[i].length()) {
          System.err.format("Line #%d of the file %s has empty field #%d. Using NaN as input value.", lineIndex, fileName, (i + 1));
          modelInput[i] = Double.NaN;
        } else {
          modelInput[i] = Double.parseDouble(stringInput[i]);
        }
      }
    } catch (NumberFormatException e) {
      System.err.format("Failed to parse line #%d of the file %s: %s%n", lineIndex, fileName, e);
      return null;
    }

    return modelInput;
  }

  public static void main(String[] args) throws IOException {
    boolean calculateAE = false;
    boolean skipHeaderRow = false;
    boolean printModelInput = false;

    // process options
    int argi = 0;
    while (argi < args.length) {
      String arg = args[argi];
      if (!arg.startsWith("-"))
        break;
      if (arg.length() < 2)
        usage();
      for (int i = 1; i < arg.length(); i ++) {
        char c = arg.charAt(i);
        switch (c) {
        case 'e':
          calculateAE = true;
          break;
        case 's':
          skipHeaderRow = true;
          break;
        case 'i':
          printModelInput = true;
          break;
        default:
          System.err.format("Invalid or unrecognized option given: %s%n%n", arg);
          usage();
        }
      }
      argi++;
    }

    // remaining arguments are the model data and CSV files to evaluate
    if (argi == args.length)
      usage();

    // load model
    Path modelPath = Paths.get(args[argi++]);
    GTApproxModel model = new GTApproxModel(Files.readAllBytes(modelPath));

    if (model == null) {
      System.err.format("Failed to load model %s%n", modelPath.toString());
      usage();
    }

    if (argi == args.length) {
      // model only is given: just print model info
      System.out.format("Model file: %s%n", modelPath.toString());
      System.out.format("Input dimension: %d%n", model.inputSize());
      System.out.format("Output dimension: %d%n", model.outputSize());
      System.out.format("Accuracy evaluation support: %s%n", (model.hasAE() ? "Yes" : "No"));
      System.out.format("Extended model information: %s%n", model.description());
    } else {
      if (calculateAE && !model.hasAE()) {
        System.err.format("AE requested but %s model does not support it.%n", modelPath.toString());
        System.exit(-2);
      }

      int modelInputSize = model.inputSize();

      // read input files
      while (argi < args.length) {
        Path dataPath = Paths.get(args[argi++]);
        Charset charset = Charset.forName("UTF-8");
        try (BufferedReader reader = Files.newBufferedReader(dataPath, charset)) {
          String line = null;
          int lineIndex = 0;

          if (skipHeaderRow) {
            reader.readLine();
            ++ lineIndex;
          }

          while ((line = reader.readLine()) != null) {
            double[] modelInput = parseLine(line.trim(), modelInputSize, dataPath, ++ lineIndex);

            if (null != modelInput) {
              if (printModelInput) {
                for (int i = 0; i < modelInput.length; i ++) {
                  System.out.format("%.17g,", modelInput[i]);
                }

              }

              double[] modelResponse;
              double[][] modelGradient;

              if (calculateAE) {
                modelResponse = model.calcAE(modelInput);
                modelGradient = model.calcGradAE(modelInput, model.GRADIENT_F_MAJOR);
              } else {
                modelResponse = model.calc(modelInput);
                modelGradient = model.calcGrad(modelInput, model.GRADIENT_F_MAJOR);
              }

              // print model response (always at least 1-dimensional)
              System.out.format("%.17g", modelResponse[0]);
              for (int i = 1; i < modelResponse.length; i ++) {
                System.out.format(",%.17g", modelResponse[i]);
              }

              // print model gradient
              for (int i = 0; i < modelGradient.length; i ++) {
                for (int j = 0; j < modelGradient[i].length; j ++) {
                  System.out.format(",%.17g", modelGradient[i][j]);
                }
              }

              System.out.format("%n");
            }
          }
        } catch (IOException e) {
          System.err.format("Failed to read file %s: %s%n", dataPath.toString(), e);
        }
      }
    }
  }
}