linear-regression.a68
1 COMMENT
2
3 @section Synopsis
4
5 Ordinary Least Squares, Principal Component and Partial Least Squares regression.
6
7 This program requires a68g to be built with the GNU Scientific Library.
8
9 COMMENT
10
11 PRAGMAT need gsl PRAGMAT
12
13 # Read a training set with (single-column) constituent values.
14 Training set entries are in a CSV file formatted per record as:
15 sample name, constituent value, feature values #
16
17 INT samples = read int, features = read int;
18
19 VOID (open (standin, "training-set.csv", standin channel));
20 make term (standin, ",");
21 [samples, features] REAL training set, [samples, 1] REAL constituents;
22
23 FOR i TO samples
24 DO read ((LOC STRING, space, constituents[i, 1], space));
25 FOR j TO features
26 DO read (training set[i, j]);
27 IF j < features
28 THEN read (space)
29 FI
30 OD;
31 IF i < samples
32 THEN read (newline)
33 FI
34 OD;
35
36 # Centering columns is required for PCR and PLS, not for OLS. #
37
38 MODE MATRIX = FLEX [1 : 0, 1 : 0] REAL,
39 VECTOR = FLEX [1 : 0] REAL,
40 COLVEC = MATRIX; # Column vector #
41
42 MATRIX col mean set = MEAN training set, COLVEC col mean consts = MEAN constituents;
43
44 MATRIX mean set = training set - col mean set,
45 COLVEC mean consts = constituents - col mean consts;
46
47 # Beta vectors and predictions for constituent values #
48
49 MATRIX betas, predictions;
50
51 BEGIN COMMENT 1. Ordinary Least Squares COMMENT
52
53 COLVEC ols beta := PINV mean set * mean consts; # Column vector. #
54 betas := ols beta;
55 predictions := mean set * ols beta + col mean consts
56
57 END;
58
59 BEGIN COMMENT 2. Principal Component Regression COMMENT
60
61 # PCR, from covariance matrix #
62
63 MATRIX eigens = pca cv (mean set, NIL) [, ];
64
65 # Project original data and compute projected beta #
66
67 MATRIX projected set = mean set * eigens;
68 COLVEC pcr beta = eigens * PINV projected set * mean consts;
69
70 betas := betas BEFORE pcr beta;
71 predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
72
73 END;
74
75 BEGIN COMMENT 3. Principal Component Regression COMMENT
76
77 # PCR, from SVD decomposition #
78
79 MATRIX eigens = pca svd (mean set, NIL) [, ];
80
81 # Project original data and compute projected beta #
82
83 MATRIX projected set = mean set * eigens;
84 COLVEC pcr beta = eigens * PINV projected set * mean consts;
85
86 betas := betas BEFORE pcr beta;
87 predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
88
89 END;
90
91 BEGIN COMMENT 4. Partial Least Squares Regression COMMENT
92
93 # NIPALS PLS1 following Ulf Indahl.
94 Journal of Chemometrics 28(3) 168-180 [2014]
95 #
96
97 MATRIX e := mean set, new set, new consts, eigens,
98 COLVEC f := mean consts;
99
100 TO samples # For this demo we compute all latent variables #
101 DO
102 # E weight (eigen component) Eᵀf. The norm is an eigenvalue #
103
104 COLVEC eigen = (T e * f) / NORM (T e * f);
105
106 # Compute latent variable t from E factor score #
107
108 COLVEC t := (e * eigen) / NORM (e * eigen);
109
110 # Use factor loadings p, q to partial out t from e, f (deflation) #
111
112 COLVEC p := T e * t;
113 e -:= t * T p;
114 COLVEC q := T t * f;
115
116 # Build matrices #
117
118 eigens := eigens BEFORE eigen;
119 new set := new set BEFORE p; # P #
120 new consts := new consts ABOVE q # Qᵀ #
121
122 OD;
123
124 MATRIX projected set = T new set * eigens;
125
126 COLVEC pls beta = eigens * INV projected set * new consts; # Compare to PCR #
127 betas := betas BEFORE pls beta;
128 predictions := predictions BEFORE (mean set * pls beta) + col mean consts;
129
130 # Rerun solving the linear equation for beta in eigen-space,
131 which may be more accurate than INV #
132
133 OP SOLVE = (MATRIX a, COLVEC b) COLVEC:
134 BEGIN MATRIX u, v, VECTOR s;
135 svd decomp (a, u, s, v);
136 FOR k FROM 2 TO UPB s # Ignore short eigenvectors #
137 DO IF s[k] / s[1] < 1e-15 # cf. Python numpy package #
138 THEN s[k] := 0
139 FI
140 OD;
141 CV svd solve (u, s, v, b[:, 1])
142 END;
143
144 PRIO SOLVE = 9;
145
146 COLVEC alt pls beta := eigens * projected set SOLVE new consts; # Compare to PCR #
147 betas := betas BEFORE alt pls beta;
148 predictions := predictions BEFORE (mean set * alt pls beta) + col mean consts
149
150 END;
151
152 print matrix (betas, 0);
153 print matrix (predictions, 0)
154