linear-regression.a68

     
   1  COMMENT
   2    
   3  @section Synopsis
   4    
   5  Ordinary Least Squares, Principal Component and Partial Least Squares regression.
   6  
   7  This program requires a68g to be built with the GNU Scientific Library.
   8   
   9  COMMENT
  10  
  11  PRAGMAT need gsl PRAGMAT
  12  
  13  # Read a training set with (single-column) constituent values.
  14    Training set entries are in a CSV file formatted per record as:
  15    sample name, constituent value, feature values #
  16  
  17  INT samples = read int, features = read int;
  18   
  19  VOID (open (standin, "training-set.csv", standin channel));
  20  make term (standin, ",");
  21  [samples, features] REAL training set, [samples, 1] REAL constituents;
  22    
  23  FOR i TO samples
  24  DO read ((LOC STRING, space, constituents[i, 1], space));
  25     FOR j TO features
  26     DO read (training set[i, j]);
  27        IF j < features
  28        THEN read (space)
  29        FI
  30     OD;
  31     IF i < samples
  32     THEN read (newline)
  33     FI
  34  OD;
  35   
  36  # Centering columns is required for PCR and PLS, not for OLS. #
  37  
  38  MODE MATRIX = FLEX [1 : 0, 1 : 0] REAL,
  39       VECTOR = FLEX [1 : 0] REAL,
  40       COLVEC = MATRIX; # Column vector #
  41  
  42  MATRIX col mean set = MEAN training set, COLVEC col mean consts = MEAN constituents;
  43  
  44  MATRIX mean set = training set - col mean set, 
  45  COLVEC mean consts = constituents - col mean consts;
  46  
  47  # Beta vectors and predictions for constituent values #
  48  
  49  MATRIX betas, predictions;
  50  
  51  BEGIN COMMENT 1. Ordinary Least Squares COMMENT
  52    
  53        COLVEC ols beta := PINV mean set * mean consts; # Column vector. #
  54        betas := ols beta;
  55        predictions := mean set * ols beta + col mean consts
  56  
  57  END; 
  58  
  59  BEGIN COMMENT 2. Principal Component Regression COMMENT
  60  
  61        # PCR, from covariance matrix #
  62  
  63        MATRIX eigens = pca cv (mean set, NIL) [, ]; 
  64        
  65        # Project original data and compute projected beta #
  66        
  67        MATRIX projected set = mean set * eigens;
  68        COLVEC pcr beta = eigens * PINV projected set * mean consts;
  69        
  70        betas := betas BEFORE pcr beta;
  71        predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
  72  
  73  END;
  74     
  75  BEGIN COMMENT 3. Principal Component Regression COMMENT
  76  
  77        # PCR, from SVD decomposition #
  78    
  79        MATRIX eigens = pca svd (mean set, NIL) [, ];
  80        
  81        # Project original data and compute projected beta #
  82        
  83        MATRIX projected set = mean set * eigens;
  84        COLVEC pcr beta = eigens * PINV projected set * mean consts;
  85        
  86        betas := betas BEFORE pcr beta;
  87        predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
  88  
  89  END;
  90   
  91  BEGIN COMMENT 4. Partial Least Squares Regression COMMENT
  92    
  93        # NIPALS PLS1 following Ulf Indahl.
  94          Journal of Chemometrics 28(3) 168-180 [2014]
  95        #
  96        
  97        MATRIX e := mean set, new set, new consts, eigens,
  98        COLVEC f := mean consts;
  99    
 100        TO samples # For this demo we compute all latent variables #
 101        DO 
 102           # E weight (eigen component) Eᵀf. The norm is an eigenvalue #
 103         
 104           COLVEC eigen = (T e * f) / NORM (T e * f);
 105  
 106           # Compute latent variable t from E factor score #
 107    
 108           COLVEC t := (e * eigen) / NORM (e * eigen);
 109    
 110           # Use factor loadings p, q to partial out t from e, f (deflation) #
 111    
 112           COLVEC p := T e * t;
 113           e -:= t * T p; 
 114           COLVEC q := T t * f;
 115  
 116           # Build matrices #
 117    
 118           eigens     := eigens BEFORE eigen;
 119           new set    := new set BEFORE p;    # P  #
 120           new consts := new consts ABOVE q   # Qᵀ # 
 121       
 122        OD;
 123  
 124        MATRIX projected set = T new set * eigens;
 125  
 126        COLVEC pls beta = eigens * INV projected set * new consts; # Compare to PCR # 
 127        betas := betas BEFORE pls beta;
 128        predictions := predictions BEFORE (mean set * pls beta) + col mean consts;
 129  
 130        # Rerun solving the linear equation for beta in eigen-space, 
 131          which may be more accurate than INV #
 132  
 133        OP SOLVE = (MATRIX a, COLVEC b) COLVEC:
 134           BEGIN MATRIX u, v, VECTOR s;
 135                 svd decomp (a, u, s, v);
 136                 FOR k FROM 2 TO UPB s       # Ignore short eigenvectors #
 137                 DO IF s[k] / s[1] < 1e-15   # cf. Python numpy package  #
 138                    THEN s[k] := 0
 139                    FI
 140                 OD;
 141                 CV svd solve (u, s, v, b[:, 1])
 142           END;
 143  
 144        PRIO SOLVE = 9;
 145  
 146        COLVEC alt pls beta := eigens * projected set SOLVE new consts; # Compare to PCR # 
 147        betas := betas BEFORE alt pls beta;
 148        predictions := predictions BEFORE (mean set * alt pls beta) + col mean consts
 149  
 150  END;
 151  
 152  print matrix (betas, 0);
 153  print matrix (predictions, 0)
 154