linear-regression.a68

     
   1  COMMENT
   2    
   3  @section Synopsis
   4    
   5  Ordinary Least Squares, Principal Component and Partial Least Squares regression.
   6  
   7  This program requires a68g to be built with the GNU Scientific Library.
   8   
   9  COMMENT
  10  
  11  PRAGMAT need gsl PRAGMAT
  12  
  13  # Read a training set with (single-column) constituent values.
  14    Training set entries are in a CSV file formatted per record as:
  15    sample name, constituent value, feature values #
  16  
  17  INT samples = read int, features = read int;
  18   
  19  VOID (open (standin, "training-set.csv", standin channel));
  20  make term (standin, ",");
  21  [samples, features] REAL training set, [samples, 1] REAL constituents;
  22    
  23  FOR i TO samples
  24  DO read ((LOC STRING, space, constituents[i, 1], space));
  25     FOR j TO features
  26     DO read (training set[i, j]);
  27        IF j < features
  28        THEN read (space)
  29        FI
  30     OD;
  31     IF i < samples
  32     THEN read (newline)
  33     FI
  34  OD;
  35   
  36  # Centering columns is required for PCR and PLS, not for OLS. #
  37  
  38  MODE MATRIX = FLEX [1 : 0, 1 : 0] REAL,
  39       VECTOR = FLEX [1 : 0] REAL,
  40       COLVEC = MATRIX; # Column vector #
  41  
  42  MATRIX col mean set = MEAN training set, COLVEC col mean consts = MEAN constituents;
  43  
  44  MATRIX mean set = training set - col mean set, 
  45  COLVEC mean consts = constituents - col mean consts;
  46  
  47  # Beta vectors and predictions for constituent values #
  48  
  49  MATRIX betas, predictions;
  50  
  51  BEGIN COMMENT 1. Ordinary Least Squares and Total Least Squares COMMENT
  52    
  53        COLVEC ols beta := PINV mean set * mean consts; # Column vector. #
  54        betas := ols beta;
  55        predictions := mean set * ols beta + col mean consts
  56  END; 
  57  
  58  BEGIN COMMENT 2. Principal Component Regression COMMENT
  59  
  60        # PCR, from covariance matrix #
  61  
  62        MATRIX eigens = pca cv (mean set, NIL) [, ]; 
  63        
  64        # Project original data and compute projected beta #
  65       
  66        MATRIX projected set = mean set * eigens;
  67        COLVEC pcr beta = eigens * PINV projected set * mean consts;
  68        
  69        betas := betas BEFORE pcr beta;
  70        predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
  71  END;
  72     
  73  BEGIN COMMENT 3. Principal Component Regression COMMENT
  74  
  75        # PCR, from SVD decomposition #
  76    
  77        MATRIX eigens = pca svd (mean set, NIL) [, ];
  78        
  79        # Project original data and compute projected beta #
  80  
  81        MATRIX projected set = mean set * eigens;
  82        COLVEC pcr beta := eigens * PINV projected set * mean consts;
  83        
  84        betas := betas BEFORE pcr beta;
  85        predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
  86  END;
  87   
  88  BEGIN COMMENT 4. Partial Least Squares Regression COMMENT
  89    
  90        # NIPALS PLS1 following Ulf Indahl. #
  91        
  92        MATRIX e := mean set, new set, new consts, eigens,
  93        COLVEC f := mean consts;
  94    
  95        TO samples # For this demo we compute all latent variables #
  96        DO 
  97           # E weight (eigen component) Eᵀf. The norm is an eigenvalue #
  98  
  99           IF REAL norm = NORM (T e * f);
 100              norm > small real 
 101           THEN COLVEC eigen = (T e * f) / norm;
 102  
 103                # Compute latent variable t from E factor score #
 104    
 105                COLVEC t := (e * eigen) / NORM (e * eigen);
 106    
 107                # Use factor loadings p, q to partial out t from e, f (deflation) #
 108    
 109                COLVEC p := T e * t, q := T t * f;
 110                e -:= t * T p; 
 111                f -:= t * T q;
 112  
 113                # Build matrices #
 114    
 115                eigens     := eigens BEFORE eigen;
 116                new set    := new set BEFORE p;    # P  #
 117                new consts := new consts ABOVE q   # Qᵀ # 
 118           FI
 119        OD;
 120  
 121        MATRIX projected set = T new set * eigens;
 122  
 123        COLVEC pls beta = eigens * INV projected set * new consts; # Compare to PCR # 
 124        betas := betas BEFORE pls beta;
 125        predictions := predictions BEFORE (mean set * pls beta) + col mean consts;
 126  
 127        # Rerun solving the linear equation for beta in eigen-space, 
 128          which may be more accurate than INV #
 129  
 130        OP SOLVE = (MATRIX a, COLVEC b) COLVEC:
 131           BEGIN MATRIX u, v, VECTOR s;
 132                 svd decomp (a, u, s, v);
 133                 FOR k FROM 2 TO UPB s       # Ignore short eigenvectors #
 134                 DO IF s[k] / s[1] < 1e-15   # cf. Python numpy package  #
 135                    THEN s[k] := 0
 136                    FI
 137                 OD;
 138                 CV svd solve (u, s, v, b[:, 1])
 139           END;
 140  
 141        PRIO SOLVE = 9;
 142  
 143        COLVEC alt pls beta := eigens * projected set SOLVE new consts; # Compare to PCR # 
 144        betas := betas BEFORE alt pls beta;
 145        predictions := predictions BEFORE (mean set * alt pls beta) + col mean consts
 146  END;
 147  
 148  print matrix (betas, 0);
 149  print matrix (predictions, 0)