single-multivariate.c

     
   1  //! @file single-multivariate.c
   2  //! @author J. Marcel van der Veer
   3  //!
   4  //! @section Copyright
   5  //!
   6  //! This file is part of Algol68G - an Algol 68 compiler-interpreter.
   7  //! Copyright 2001-2023 J. Marcel van der Veer [algol68g@xs4all.nl].
   8  //!
   9  //! @section License
  10  //!
  11  //! This program is free software; you can redistribute it and/or modify it 
  12  //! under the terms of the GNU General Public License as published by the 
  13  //! Free Software Foundation; either version 3 of the License, or 
  14  //! (at your option) any later version.
  15  //!
  16  //! This program is distributed in the hope that it will be useful, but 
  17  //! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 
  18  //! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 
  19  //! more details. You should have received a copy of the GNU General Public 
  20  //! License along with this program. If not, see [http://www.gnu.org/licenses/].
  21  
  22  //! @section Synopsis
  23  //!
  24  //! REAL multivariate regression.
  25  
  26  #include "a68g.h"
  27  
  28  #if defined (HAVE_GSL)
  29  
  30  #include "a68g-torrix.h"
  31  #include "a68g-prelude-gsl.h"
  32  
  33  //! @brief Compute pseudo_inverse := pseudo inverse (X) 
  34  
  35  void compute_pseudo_inverse (NODE_T *p, gsl_matrix **pseudo_inverse, gsl_matrix *X, REAL_T lim)
  36  {
  37  //
  38  // The pseudo inverse gives a least-square approximate solution for a 
  39  // system of linear equations not having an exact solution.
  40  // Multivariate statistics is a well known application.
  41  //
  42    MATH_RTE (p, X == NO_REAL_MATRIX, M_ROW_ROW_REAL, "empty data matrix");
  43    MATH_RTE (p, lim < 0, M_REAL, "invalid limit");
  44    unt M = SIZE1 (X), N = SIZE2 (X);  
  45  // GSL only handles M >= N, transpose commutes with pseudo inverse.
  46    gsl_matrix *U;
  47    BOOL_T transpose = (M < N);
  48    if (transpose) {
  49      M = SIZE2 (X); N = SIZE1 (X);  
  50      U = gsl_matrix_calloc (M, N);
  51      gsl_matrix_transpose_memcpy (U, X);
  52    } else {
  53      U = gsl_matrix_calloc (M, N);
  54      gsl_matrix_memcpy (U, X);
  55    }
  56  // A = USVᵀ by Jacobi, more precise than Golub-Reinsch.
  57  // The GSL routine yields V, not Vᵀ. U is decomposed in place.
  58    gsl_matrix *V = gsl_matrix_calloc (N, N);
  59    gsl_vector *Sv = gsl_vector_calloc (N);
  60    ASSERT_GSL (gsl_linalg_SV_decomp_jacobi (U, V, Sv));
  61  // Compute S⁻¹. 
  62  // Diagonal elements < 'lim' * max (singular values) are set to zero.
  63  // Very small eigenvalues wreak havoc on a pseudo inverse.
  64  // Python NumPy default is 1e-15, but assumes a Hermitian matrix.
  65  // SVD gives singular values sorted in descending order.
  66    REAL_T lwb = gsl_vector_get (Sv, 0) * (lim > 0 ? lim : 1e-9);
  67    gsl_matrix *S_inv = gsl_matrix_calloc (N, M); // calloc sets matrix to 0.
  68    for (unt i = 0; i < N; i++) {
  69      REAL_T x = gsl_vector_get (Sv, i);
  70      if (x > lwb) {
  71        gsl_matrix_set (S_inv, i, i, 1 / x);
  72      } else {
  73        gsl_matrix_set (S_inv, i, i, 0);
  74      }
  75    }
  76    gsl_vector_free (Sv);
  77  // GSL SVD yields thin SVD - pad with zeros.
  78    gsl_matrix *Uf = gsl_matrix_calloc (M, M);
  79    for (unt i = 0; i < M; i++) {
  80      for (unt j = 0; j < N; j++) {
  81        gsl_matrix_set (Uf, i, j, gsl_matrix_get (U, i, j));
  82      }
  83    }
  84  // Compute pseudo inverse A⁻¹ = VS⁻¹Uᵀ. 
  85    gsl_matrix *VS_inv = gsl_matrix_calloc (N, M);
  86    gsl_matrix *X_inv = gsl_matrix_calloc (N, M);
  87    ASSERT_GSL (gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1, V, S_inv, 0, VS_inv));
  88    ASSERT_GSL (gsl_blas_dgemm (CblasNoTrans, CblasTrans, 1, VS_inv, Uf, 0, X_inv));
  89  // Compose result.
  90    if (transpose) {
  91      (*pseudo_inverse) = gsl_matrix_calloc (M, N);
  92      gsl_matrix_transpose_memcpy ((*pseudo_inverse), X_inv);
  93    } else {
  94      (*pseudo_inverse) = gsl_matrix_calloc (N, M);
  95      gsl_matrix_memcpy ((*pseudo_inverse), X_inv);
  96    }
  97  // Clean up.
  98    gsl_matrix_free (S_inv);
  99    gsl_matrix_free (U);
 100    gsl_matrix_free (Uf);
 101    gsl_matrix_free (V);
 102    gsl_matrix_free (VS_inv);
 103    gsl_matrix_free (X_inv);
 104  }
 105  
 106  //! @brief PROC pseudo inv = ([, ] REAL, REAL) [, ] REAL
 107  
 108  void genie_matrix_pinv_lim (NODE_T * p)
 109  {
 110  // Compute the Moore-Penrose pseudo inverse of a MxN matrix.
 111  // G. Strang. "Linear Algebra and its applications", 2nd edition.
 112  // Academic Press [1980].
 113    gsl_error_handler_t *save_handler = gsl_set_error_handler (torrix_error_handler);
 114    torrix_error_node = p;
 115    A68_REAL lim;
 116    POP_OBJECT (p, &lim, A68_REAL);
 117    gsl_matrix *X = pop_matrix (p, A68_TRUE), *pseudo_inverse = NO_REAL_MATRIX;
 118    compute_pseudo_inverse (p, &pseudo_inverse, X, VALUE (&lim));
 119    push_matrix (p, pseudo_inverse);
 120    gsl_matrix_free (pseudo_inverse);
 121    gsl_matrix_free (X);
 122    (void) gsl_set_error_handler (save_handler);
 123  }
 124  
 125  //! @brief OP PINV = ([, ] REAL) [, ] REAL
 126  
 127  void genie_matrix_pinv (NODE_T * p)
 128  {
 129    PUSH_VALUE (p, 0, A68_REAL);
 130    genie_matrix_pinv_lim (p);
 131  }
 132  
 133  //! @brief column-centered data matrix.
 134  
 135  gsl_matrix *column_mean (gsl_matrix *DATA)
 136  {
 137  // M samples, N features.
 138    unt M = SIZE1 (DATA), N = SIZE2 (DATA);
 139    gsl_matrix *C = gsl_matrix_calloc (M, N);
 140    for (unt i = 0; i < N; i++) {
 141      DOUBLE_T sum = 0;
 142      for (unt j = 0; j < M; j++) {
 143        sum += gsl_matrix_get (DATA, j, i);
 144      }
 145      REAL_T mean = sum / M;
 146      for (unt j = 0; j < M; j++) {
 147        gsl_matrix_set (C, j, i, gsl_matrix_get (DATA, j, i) - mean);
 148      }
 149    }
 150    return C;
 151  }
 152  
 153  //! @brief take left columns from matrix.
 154  
 155  void genie_left_columns (NODE_T *p)
 156  {
 157    A68_INT k;
 158    POP_OBJECT (p, &k, A68_INT);
 159    gsl_matrix *X = pop_matrix (p, A68_TRUE); 
 160    unt M = SIZE1 (X), N = SIZE2 (X), cols = VALUE (&k);
 161    MATH_RTE (p, cols < 0, M_INT, "invalid number of columns");
 162    if (cols == 0 || cols > N) {
 163      cols = N;
 164    }
 165    gsl_matrix *Y = gsl_matrix_calloc (M, cols);
 166    for (unt i = 0; i < M; i++) {
 167      for (unt j = 0; j < cols; j++) {
 168        gsl_matrix_set (Y, i, j, gsl_matrix_get (X, i, j));
 169      }
 170    }
 171    push_matrix (p, Y);
 172    gsl_matrix_free (X);
 173    gsl_matrix_free (Y);
 174  }
 175  
 176  //! @brief OP MEAN = ([, ] REAL) [, ] REAL
 177  
 178  void genie_matrix_column_mean (NODE_T *p)
 179  {
 180    gsl_error_handler_t *save_handler = gsl_set_error_handler (torrix_error_handler);
 181    torrix_error_node = p;
 182    gsl_matrix *Z = pop_matrix (p, A68_TRUE); 
 183    unt M = SIZE1 (Z), N = SIZE2 (Z);
 184    gsl_matrix *A = gsl_matrix_calloc (M, N);
 185    for (unt i = 0; i < N; i++) {
 186      DOUBLE_T sum = 0;
 187      for (unt j = 0; j < M; j++) {
 188        sum += gsl_matrix_get (Z, j, i);
 189      }
 190      DOUBLE_T mean = sum / M;
 191      for (unt j = 0; j < M; j++) {
 192        gsl_matrix_set (A, j, i, mean);
 193      }
 194    }
 195    push_matrix (p, A);
 196    gsl_matrix_free (A);
 197    gsl_matrix_free (Z);
 198    (void) gsl_set_error_handler (save_handler);
 199  }
 200  
 201  //! @brief compute PCA of MxN matrix from covariance matrix
 202  //! @param eigen_values vector with eigen values on output
 203  //! @param X data matrix, samples in rows, features in columns
 204  
 205  gsl_matrix *compute_pca_cv (NODE_T *p, gsl_vector **eigen_values, gsl_matrix *X)
 206  {
 207  //
 208  // Forming the covariance matrix squares the condition number.
 209  // Hence this routine might loose more significant digits than SVD.
 210  // On the other hand, using PCA one looks for dominant eigenvectors.
 211  //
 212    MATH_RTE (p, X == NO_REAL_MATRIX, M_ROW_ROW_REAL, "empty data matrix");
 213  // M samples, N features.
 214    unt M = SIZE1 (X), N = SIZE2 (X);
 215  // Keep X column-mean-centered.
 216    gsl_matrix *C = column_mean (X);
 217  // Covariance matrix, M samples: Cov = XᵀX.
 218    M = MAX (M, N);
 219    gsl_matrix *CV = gsl_matrix_calloc (M, M);
 220    gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1, C, C, 0, CV);
 221  // Compute and sort eigenvectors.
 222    gsl_vector *Ev = gsl_vector_calloc (M);
 223    gsl_matrix *eigen_vectors = gsl_matrix_calloc (M, M);
 224    gsl_eigen_symmv_workspace *W = gsl_eigen_symmv_alloc (M);
 225    ASSERT_GSL (gsl_eigen_symmv (CV, Ev, eigen_vectors, W));
 226    ASSERT_GSL (gsl_eigen_symmv_sort (Ev, eigen_vectors, GSL_EIGEN_SORT_ABS_DESC));
 227  // Return eigen values, if required.
 228    if (eigen_values != NO_REF_VECTOR) {
 229      (*eigen_values) = gsl_vector_calloc (N);
 230      ASSERT_GSL (gsl_vector_memcpy ((*eigen_values), Ev));
 231    }
 232  // Clean up.
 233    gsl_eigen_symmv_free (W);
 234    gsl_matrix_free (C);
 235    gsl_matrix_free (CV);
 236    gsl_vector_free (Ev);
 237    return eigen_vectors;
 238  }
 239  
 240  //! @brief PROC pca cv = ([, ] REAL) [, ] REAL
 241  
 242  void genie_matrix_pca_cv (NODE_T * p)
 243  {
 244  // Principal component analysis of a MxN matrix.
 245    gsl_error_handler_t *save_handler = gsl_set_error_handler (torrix_error_handler);
 246    torrix_error_node = p;
 247    A68_REF ref_row;
 248    POP_REF (p, &ref_row);
 249    gsl_matrix *X = pop_matrix (p, A68_TRUE);
 250    gsl_vector *Ev;
 251    gsl_matrix *eigen_vectors = compute_pca_cv (p, &Ev, X);
 252    *DEREF (A68_REF, &ref_row) = vector_to_row (p, Ev);
 253    push_matrix (p, eigen_vectors);
 254    gsl_matrix_free (eigen_vectors);
 255    gsl_matrix_free (X);
 256    (void) gsl_set_error_handler (save_handler);
 257  }
 258  
 259  //! @brief compute PCA of MxN matrix by Jacobi SVD
 260  //! @param singular_values vector with singular values on output
 261  //! @param X data matrix, samples in rows, features in columns
 262  
 263  gsl_matrix *compute_pca_svd (NODE_T *p, gsl_vector **singular_values, gsl_matrix *X)
 264  {
 265  //
 266  // In PCA, SVD is closely related to eigenvector decomposition of the covariance matrix:
 267  // If Cov = XᵀX = VLVᵀ and X = USVᵀ then XᵀX = VSUᵀUSVᵀ = VS²Vᵀ, hence L = S².
 268  //
 269    MATH_RTE (p, X == NO_REAL_MATRIX, M_ROW_ROW_REAL, "empty data matrix");
 270  // Keep X column-mean-centered.
 271    gsl_matrix *C = column_mean (X), *U;
 272  // M samples, N features.
 273    unt M = SIZE1 (X), N = SIZE2 (X);
 274  // GSL does thin SVD, only handles M >= N, overdetermined systems.    
 275    BOOL_T transpose = (M < N);
 276    if (transpose) {
 277  // Xᵀ = VSUᵀ
 278      M = SIZE2 (X); N = SIZE1 (X);  
 279      U = gsl_matrix_calloc (M, N);
 280      gsl_matrix_transpose_memcpy (U, C);
 281    } else {
 282  // X = USVᵀ
 283      U = gsl_matrix_calloc (M, N);
 284      gsl_matrix_memcpy (U, C);
 285    }
 286  // X = USVᵀ by Jacobi, more precise than Golub-Reinsch.
 287  // GSL routine yields V, not Vᵀ, U develops in place.
 288    gsl_matrix *V = gsl_matrix_calloc (N, N);
 289    gsl_vector *Sv = gsl_vector_calloc (N);
 290    ASSERT_GSL (gsl_linalg_SV_decomp_jacobi (U, V, Sv));
 291  // Return singular values, if required.
 292    if (singular_values != NO_REF_VECTOR) {
 293      (*singular_values) = gsl_vector_calloc (N);
 294      ASSERT_GSL (gsl_vector_memcpy ((*singular_values), Sv));
 295    }
 296    gsl_matrix *eigen_vectors = gsl_matrix_calloc (N, M);
 297    if (transpose) {
 298      eigen_vectors = gsl_matrix_calloc (M, N);
 299      ASSERT_GSL (gsl_matrix_memcpy (eigen_vectors, U));
 300    } else {
 301      eigen_vectors = gsl_matrix_calloc (M, N);
 302      ASSERT_GSL (gsl_matrix_memcpy (eigen_vectors, V));
 303    }
 304  // Clean up.
 305    gsl_matrix_free (C);
 306    gsl_matrix_free (U);
 307    gsl_matrix_free (V);
 308    gsl_vector_free (Sv);
 309  // Done.
 310    return eigen_vectors;
 311  }
 312  
 313  //! @brief PROC pca svd = ([, ] REAL, REF [] REAL) [, ] REAL
 314  
 315  void genie_matrix_pca_svd (NODE_T * p)
 316  {
 317  // Principal component analysis of a MxN matrix.
 318    gsl_error_handler_t *save_handler = gsl_set_error_handler (torrix_error_handler);
 319    torrix_error_node = p;
 320    A68_REF ref_row;
 321    POP_REF (p, &ref_row);
 322    gsl_matrix *X = pop_matrix (p, A68_TRUE);
 323    gsl_vector *Sv;
 324    gsl_matrix *eigen_vectors = compute_pca_svd (p, &Sv, X);
 325    if (!IS_NIL (ref_row)) {
 326      *DEREF (A68_REF, &ref_row) = vector_to_row (p, Sv);
 327    }
 328    push_matrix (p, eigen_vectors);
 329    gsl_matrix_free (eigen_vectors);
 330    gsl_matrix_free (X);
 331    (void) gsl_set_error_handler (save_handler);
 332  }
 333  
 334  
 335  static gsl_matrix *mat_before_ab (NODE_T *p, gsl_matrix *u, gsl_matrix *v)
 336  {
 337  // Form A := A BEFORE B. Deallocate A, otherwise PLS1 leaks memory.
 338    gsl_matrix *w = matrix_hcat (p, u, v);
 339    if (u != NO_REAL_MATRIX) {
 340      gsl_matrix_free (u);
 341    }
 342    return w;
 343  }
 344  
 345  static gsl_matrix *mat_over_ab (NODE_T *p, gsl_matrix *u, gsl_matrix *v)
 346  {
 347  // Form A := A OVER B. Deallocate A, otherwise PLS1 leaks memory.
 348    gsl_matrix *w = matrix_vcat (p, u, v);
 349    if (u != NO_REAL_MATRIX) {
 350      gsl_matrix_free (u);
 351    }
 352    return w;
 353  }
 354  
 355  //! @brief PROC pls1 = ([, ] REAL, [, ] REAL, INT, REF [] REAL) [, ] REAL
 356  
 357  void genie_matrix_pls1 (NODE_T *p)
 358  {
 359  // PLS decomposes X and Y data concurrently as
 360  //
 361  // X = T Pᵀ + E
 362  // Y = U Qᵀ + F
 363  //
 364  // PLS1 is a widely used algorithm appropriate for the vector Y case.
 365  //
 366  // PLS1 with SVD solver for beta.
 367  // NIPALS algorithm with orthonormal scores and loadings.
 368  // See Ulf Indahl, Journal of Chemometrics 28(3) 168-180 [2014].
 369  //
 370  // Note that E is an MxN matrix and f, beta are Nx1 column vectors.
 371  // This for consistency with PLS2.
 372  //
 373  // Decompose Nc eigenvectors.
 374  // 
 375    gsl_error_handler_t *save_handler = gsl_set_error_handler (torrix_error_handler);
 376    torrix_error_node = p;
 377  // Pop arguments.
 378    A68_REF ref_eigenv;
 379    POP_REF (p, &ref_eigenv);
 380    A68_INT Nc;
 381    POP_OBJECT (p, &Nc, A68_INT); 
 382    gsl_matrix *F = pop_matrix (p, A68_TRUE);
 383    gsl_matrix *E = pop_matrix (p, A68_TRUE);
 384  // Catch wrong calls.
 385    unt Me = SIZE1 (E), Ne = SIZE2 (E), Mf = SIZE1 (F), Nf = SIZE2 (F);
 386    MATH_RTE (p, Mf == 0 || Nf == 0, M_ROW_ROW_REAL, "invalid column vector F");
 387    MATH_RTE (p, Me == 0 || Ne == 0, M_ROW_ROW_REAL, "invalid data matrix E");
 388    MATH_RTE (p, Me != Mf, M_ROW_ROW_REAL, "rows in F must match columns in E");
 389    MATH_RTE (p, Nf != 1, M_ROW_ROW_REAL, "F must be a column vector");
 390    MATH_RTE (p, VALUE (&Nc) < 0, M_INT, "invalid number of components");
 391    CHECK_INT_SHORTEN(p, VALUE (&Nc));
 392  // Sensible defaults.
 393    int Nk = VALUE (&Nc);
 394    if (Nk > Ne) {
 395      Nk = Ne;
 396    } else if (Nk == 0) { // All eigenvectors.
 397      Nk = Me;
 398    }
 399  // Decompose E and F.
 400    gsl_matrix *EIGEN = NO_REAL_MATRIX, *nE = NO_REAL_MATRIX, *nF = NO_REAL_MATRIX; 
 401    gsl_matrix *eig = gsl_matrix_calloc (Ne, 1);
 402    gsl_matrix *lat = gsl_matrix_calloc (Me, 1);
 403    gsl_matrix *pl = gsl_matrix_calloc (Ne, 1);
 404    gsl_matrix *ql = gsl_matrix_calloc (Nf, 1); // 1x1 in PLS1
 405    gsl_vector *Ev = gsl_vector_calloc (Nk);
 406  // Latent variable deflation.
 407    for (int k = 0; k < Nk; k ++) {
 408  // E weight from E, F covariance.
 409  // eig = Eᵀ * f / | Eᵀ * f |
 410      ASSERT_GSL (gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, E, F, 0.0, eig));
 411      REAL_T norm = matrix_norm (eig);
 412      ASSERT_GSL (gsl_matrix_scale (eig, 1.0 / norm));
 413      gsl_vector_set (Ev, k, norm);
 414  // Compute latent variable.
 415  // lat = E * eig / | E * eig |
 416      ASSERT_GSL (gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, E, eig, 0.0, lat));
 417      norm = matrix_norm (lat);
 418      ASSERT_GSL (gsl_matrix_scale (lat, 1.0 / norm));
 419  // Deflate E and F, remove latent variable from both.
 420  // pl = Eᵀ * lat; E -= lat * plᵀ and ql = Fᵀ * lat; F -= lat * qlᵀ
 421      ASSERT_GSL (gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, E, lat, 0.0, pl));
 422      ASSERT_GSL (gsl_blas_dgemm (CblasNoTrans, CblasTrans, -1.0, lat, pl, 1.0, E));
 423      ASSERT_GSL (gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, lat, F, 0.0, ql));
 424  // Build matrices.
 425      EIGEN = mat_before_ab (p, EIGEN, eig);
 426      nE = mat_before_ab (p, nE, pl); // P
 427      nF = mat_over_ab (p, nF, ql);   // Qᵀ
 428    }
 429  // Intermediate garbage collector.
 430    gsl_matrix_free (E);
 431    gsl_matrix_free (F);
 432    gsl_matrix_free (eig);
 433    gsl_matrix_free (lat);
 434    gsl_matrix_free (pl);
 435    gsl_matrix_free (ql);
 436  // Projection of original data = Eᵀ * EIGEN
 437    gsl_matrix *Pr = gsl_matrix_calloc (SIZE2 (nE), SIZE2 (EIGEN));
 438    ASSERT_GSL (gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, nE, EIGEN, 0.0, Pr));
 439  // 
 440  // Now beta = EIGEN * Pr⁻¹ * nF
 441  // Compute with SVD to avoid matrix inversion.
 442  // Matrix inversion is a source of numerical noise!
 443  // 
 444    gsl_matrix *u, *v; gsl_vector *s, *w;
 445    unt M = SIZE1 (Pr), N = SIZE2 (Pr);
 446  // GSL computes thin SVD, M >= N only, returning V, not Vᵀ.
 447    if (M >= N) {
 448  // X = USVᵀ
 449      s = gsl_vector_calloc (N);
 450      u = gsl_matrix_calloc (M, N);
 451      v = gsl_matrix_calloc (N, N);
 452      w = gsl_vector_calloc (N);
 453      gsl_matrix_memcpy (u, Pr);
 454      ASSERT_GSL (gsl_linalg_SV_decomp (u, v, s, w));
 455    } else {
 456  // Xᵀ = VSUᵀ
 457      s = gsl_vector_calloc (M);
 458      u = gsl_matrix_calloc (N, M);
 459      v = gsl_matrix_calloc (M, M);
 460      w = gsl_vector_calloc (M);
 461      gsl_matrix_transpose_memcpy (u, Pr);
 462      ASSERT_GSL (gsl_linalg_SV_decomp (u, v, s, w));
 463    }
 464  // Kill short eigenvectors cf. Python numpy.
 465    for (int k = 1; k < SIZE (s); k++) {
 466      if (gsl_vector_get (s, k) / gsl_vector_get (s, 0) < 1e-15) {
 467        gsl_vector_set (s, k, 0.0);
 468      }
 469    }
 470  // Solve for beta.
 471    gsl_vector *nFv = gsl_vector_calloc (Nk), *x = gsl_vector_calloc (Nk);
 472    gsl_matrix_get_col (nFv, nF, 0);
 473    if (M >= N) {
 474      ASSERT_GSL (gsl_linalg_SV_solve (u, v, s, nFv, x));
 475    } else {
 476      ASSERT_GSL (gsl_linalg_SV_solve (v, u, s, nFv, x));
 477    }
 478    gsl_matrix *beta = gsl_matrix_calloc (Ne, 1), *xmat = gsl_matrix_calloc (Nk, 1);
 479    ASSERT_GSL (gsl_matrix_set_col (xmat, 0, x));
 480    ASSERT_GSL (gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, EIGEN, xmat, 0.0, beta));
 481  // Yield results.
 482    if (!IS_NIL (ref_eigenv)) {
 483      *DEREF (A68_REF, &ref_eigenv) = vector_to_row (p, Ev);
 484    }
 485    push_matrix (p, beta);
 486  // Garbage collector.
 487    gsl_matrix_free (beta);
 488    gsl_matrix_free (EIGEN);
 489    gsl_matrix_free (nE);
 490    gsl_matrix_free (nF);
 491    gsl_matrix_free (Pr);
 492    gsl_matrix_free (u);
 493    gsl_matrix_free (v);
 494    gsl_matrix_free (xmat);
 495    gsl_vector_free (Ev);
 496    gsl_vector_free (nFv);
 497    gsl_vector_free (s);
 498    gsl_vector_free (w);
 499    gsl_vector_free (x);
 500  //
 501    (void) gsl_set_error_handler (save_handler);
 502  }
 503  
 504  //! @brief PROC pls1 lim = ([, ] REAL, [, ] REAL, REAL, REF [] REAL) [, ] REAL
 505  
 506  void genie_matrix_pls1_lim (NODE_T *p)
 507  {
 508  //
 509  // PLS1 with SVD solver for beta.
 510  // NIPALS algorithm with orthonormal scores and loadings.
 511  // See Ulf Indahl, Journal of Chemometrics 28(3) 168-180 [2014].
 512  //
 513  // Note that E is an MxN matrix and f, beta are Nx1 column vectors.
 514  // This for consistency with PLS2.
 515  //
 516  // Decompose eigenvectors until relative size is too short.
 517  //
 518    gsl_error_handler_t *save_handler = gsl_set_error_handler (torrix_error_handler);
 519    torrix_error_node = p;
 520  // Pop arguments.
 521    A68_REF ref_eigenv;
 522    POP_REF (p, &ref_eigenv);
 523  // 'lim' is minimum relative length to first eigenvector.
 524    A68_REAL lim;
 525    POP_OBJECT (p, &lim, A68_REAL); 
 526    gsl_matrix *F = pop_matrix (p, A68_TRUE);
 527    gsl_matrix *E = pop_matrix (p, A68_TRUE);
 528  // Catch wrong calls.
 529    unt Me = SIZE1 (E), Ne = SIZE2 (E), Mf = SIZE1 (F), Nf = SIZE2 (F);
 530    MATH_RTE (p, Mf == 0 || Nf == 0, M_ROW_ROW_REAL, "invalid column vector F");
 531    MATH_RTE (p, Me == 0 || Ne == 0, M_ROW_ROW_REAL, "invalid data matrix E");
 532    MATH_RTE (p, Me != Mf, M_ROW_ROW_REAL, "rows in F must match columns in E");
 533    MATH_RTE (p, Nf != 1, M_ROW_ROW_REAL, "F must be a column vector");
 534    MATH_RTE (p, VALUE (&lim) < 0 || VALUE (&lim) > 1.0, M_REAL, "invalid relative length");
 535  // Sensible defaults.
 536    int Nk = MIN (Ne, Mf);
 537  // Decompose E and F.
 538    gsl_matrix *EIGEN = NO_REAL_MATRIX, *nE = NO_REAL_MATRIX, *nF = NO_REAL_MATRIX; 
 539    gsl_matrix *eig = gsl_matrix_calloc (Ne, 1);
 540    gsl_matrix *lat = gsl_matrix_calloc (Me, 1);
 541    gsl_matrix *pl = gsl_matrix_calloc (Ne, 1);
 542    gsl_matrix *ql = gsl_matrix_calloc (Nf, 1); // 1x1 in PLS1
 543    gsl_vector *Ev = gsl_vector_calloc (Nk);
 544  // Latent variable deflation.
 545    int go_on = A68_TRUE;
 546    for (int k = 0; k < Nk && go_on; k ++) {
 547  // E weight from E, F covariance.
 548  // eig = Eᵀ * f / | Eᵀ * f |
 549      ASSERT_GSL (gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, E, F, 0.0, eig));
 550      REAL_T norm = matrix_norm (eig);
 551      ASSERT_GSL (gsl_matrix_scale (eig, 1.0 / norm));
 552      gsl_vector_set (Ev, k, norm);
 553  // Compute latent variable.
 554  // lat = E * eig / | E * eig |
 555      ASSERT_GSL (gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, E, eig, 0.0, lat));
 556      norm = matrix_norm (lat);
 557      ASSERT_GSL (gsl_matrix_scale (lat, 1.0 / norm));
 558  // Deflate E and F, remove latent variable from both.
 559  // pl = Eᵀ * lat; E -= lat * plᵀ and ql = Fᵀ * lat; F -= lat * qlᵀ
 560      ASSERT_GSL (gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, E, lat, 0.0, pl));
 561      ASSERT_GSL (gsl_blas_dgemm (CblasNoTrans, CblasTrans, -1.0, lat, pl, 1.0, E));
 562      ASSERT_GSL (gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, lat, F, 0.0, ql));
 563  // Build matrices.
 564      EIGEN = mat_before_ab (p, EIGEN, eig);
 565      nE = mat_before_ab (p, nE, pl); // P
 566      nF = mat_over_ab (p, nF, ql);   // Qᵀ
 567  // Another iteration? 
 568     if (k > 0 && gsl_vector_get (Ev, k) / gsl_vector_get (Ev, 0) < VALUE (&lim)) {
 569        Nk = k + 1;
 570        go_on = A68_FALSE;
 571      }
 572    }
 573  // Intermediate garbage collector.
 574    gsl_matrix_free (E);
 575    gsl_matrix_free (F);
 576    gsl_matrix_free (eig);
 577    gsl_matrix_free (lat);
 578    gsl_matrix_free (pl);
 579    gsl_matrix_free (ql);
 580  // Projection of original data = Eᵀ * EIGEN
 581    gsl_matrix *Pr = gsl_matrix_calloc (SIZE2 (nE), SIZE2 (EIGEN));
 582    ASSERT_GSL (gsl_blas_dgemm (CblasTrans, CblasNoTrans, 1.0, nE, EIGEN, 0.0, Pr));
 583  // 
 584  // Now beta = EIGEN * Pr⁻¹ * nF
 585  // Compute with SVD to avoid matrix inversion.
 586  // Matrix inversion is a source of numerical noise!
 587  // 
 588    gsl_matrix *u, *v; gsl_vector *s, *w;
 589    unt M = SIZE1 (Pr), N = SIZE2 (Pr);
 590  // GSL computes thin SVD, M >= N only, returning V, not Vᵀ.
 591    if (M >= N) {
 592  // X = USVᵀ
 593      s = gsl_vector_calloc (N);
 594      u = gsl_matrix_calloc (M, N);
 595      v = gsl_matrix_calloc (N, N);
 596      w = gsl_vector_calloc (N);
 597      gsl_matrix_memcpy (u, Pr);
 598      ASSERT_GSL (gsl_linalg_SV_decomp (u, v, s, w));
 599    } else {
 600  // Xᵀ = VSUᵀ
 601      s = gsl_vector_calloc (M);
 602      u = gsl_matrix_calloc (N, M);
 603      v = gsl_matrix_calloc (M, M);
 604      w = gsl_vector_calloc (M);
 605      gsl_matrix_transpose_memcpy (u, Pr);
 606      ASSERT_GSL (gsl_linalg_SV_decomp (u, v, s, w));
 607    }
 608  // Kill short eigenvectors cf. Python numpy.
 609    for (int k = 1; k < SIZE (s); k++) {
 610      if (gsl_vector_get (s, k) / gsl_vector_get (s, 0) < 1e-15) {
 611        gsl_vector_set (s, k, 0.0);
 612      }
 613    }
 614  // Solve for beta.
 615    gsl_vector *nFv = gsl_vector_calloc (Nk), *x = gsl_vector_calloc (Nk);
 616    gsl_matrix_get_col (nFv, nF, 0);
 617    if (M >= N) {
 618      ASSERT_GSL (gsl_linalg_SV_solve (u, v, s, nFv, x));
 619    } else {
 620      ASSERT_GSL (gsl_linalg_SV_solve (v, u, s, nFv, x));
 621    }
 622    gsl_matrix *beta = gsl_matrix_calloc (Ne, 1), *xmat = gsl_matrix_calloc (Nk, 1);
 623    ASSERT_GSL (gsl_matrix_set_col (xmat, 0, x));
 624    ASSERT_GSL (gsl_blas_dgemm (CblasNoTrans, CblasNoTrans, 1.0, EIGEN, xmat, 0.0, beta));
 625  // Yield results.
 626    if (!IS_NIL (ref_eigenv)) {
 627      gsl_vector *Evl = gsl_vector_calloc (Nk);
 628      for (unt k = 0; k < Nk; k++) {
 629        gsl_vector_set (Evl, k, gsl_vector_get (Evl, k));
 630      }
 631      *DEREF (A68_REF, &ref_eigenv) = vector_to_row (p, Evl);
 632      gsl_vector_free (Evl);
 633    }
 634    push_matrix (p, beta);
 635  // Garbage collector.
 636    gsl_matrix_free (beta);
 637    gsl_matrix_free (EIGEN);
 638    gsl_matrix_free (nE);
 639    gsl_matrix_free (nF);
 640    gsl_matrix_free (Pr);
 641    gsl_matrix_free (u);
 642    gsl_matrix_free (v);
 643    gsl_matrix_free (xmat);
 644    gsl_vector_free (Ev);
 645    gsl_vector_free (nFv);
 646    gsl_vector_free (s);
 647    gsl_vector_free (w);
 648    gsl_vector_free (x);
 649  //
 650    (void) gsl_set_error_handler (save_handler);
 651  }
 652  
 653  #endif