linear-regression.a68
1 COMMENT
2
3 @section Synopsis
4
5 Ordinary Least Squares, Principal Component and Partial Least Squares regression.
6
7 This program requires a68g to be built with the GNU Scientific Library.
8
9 COMMENT
10
11 PRAGMAT need gsl PRAGMAT
12
13 # Read a training set with (single-column) constituent values.
14 Training set entries are in a CSV file formatted per record as:
15 sample name, constituent value, feature values #
16
17 INT samples = read int, features = read int;
18
19 VOID (open (standin, "training-set.csv", standin channel));
20 make term (standin, ",");
21 [samples, features] REAL training set, [samples, 1] REAL constituents;
22
23 FOR i TO samples
24 DO read ((LOC STRING, space, constituents[i, 1], space));
25 FOR j TO features
26 DO read (training set[i, j]);
27 IF j < features
28 THEN read (space)
29 FI
30 OD;
31 IF i < samples
32 THEN read (newline)
33 FI
34 OD;
35
36 # Centering columns is required for PCR and PLS, not for OLS. #
37
38 MODE MATRIX = FLEX [1 : 0, 1 : 0] REAL,
39 VECTOR = FLEX [1 : 0] REAL,
40 COLVEC = MATRIX; # Column vector #
41
42 MATRIX col mean set = MEAN training set, COLVEC col mean consts = MEAN constituents;
43
44 MATRIX mean set = training set - col mean set,
45 COLVEC mean consts = constituents - col mean consts;
46
47 # Beta vectors and predictions for constituent values #
48
49 MATRIX betas, predictions;
50
51 BEGIN COMMENT 1. Ordinary Least Squares and Total Least Squares COMMENT
52
53 COLVEC ols beta := PINV mean set * mean consts; # Column vector. #
54 betas := ols beta;
55 predictions := mean set * ols beta + col mean consts
56 END;
57
58 BEGIN COMMENT 2. Principal Component Regression COMMENT
59
60 # PCR, from covariance matrix #
61
62 MATRIX eigens = pca cv (mean set, NIL) [, ];
63
64 # Project original data and compute projected beta #
65
66 MATRIX projected set = mean set * eigens;
67 COLVEC pcr beta = eigens * PINV projected set * mean consts;
68
69 betas := betas BEFORE pcr beta;
70 predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
71 END;
72
73 BEGIN COMMENT 3. Principal Component Regression COMMENT
74
75 # PCR, from SVD decomposition #
76
77 MATRIX eigens = pca svd (mean set, NIL) [, ];
78
79 # Project original data and compute projected beta #
80
81 MATRIX projected set = mean set * eigens;
82 COLVEC pcr beta := eigens * PINV projected set * mean consts;
83
84 betas := betas BEFORE pcr beta;
85 predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
86 END;
87
88 BEGIN COMMENT 4. Partial Least Squares Regression COMMENT
89
90 # NIPALS PLS1 following Ulf Indahl. #
91
92 MATRIX e := mean set, new set, new consts, eigens,
93 COLVEC f := mean consts;
94
95 TO samples # For this demo we compute all latent variables #
96 DO
97 # E weight (eigen component) Eᵀf. The norm is an eigenvalue #
98
99 IF REAL norm = NORM (T e * f);
100 norm > small real
101 THEN COLVEC eigen = (T e * f) / norm;
102
103 # Compute latent variable t from E factor score #
104
105 COLVEC t := (e * eigen) / NORM (e * eigen);
106
107 # Use factor loadings p, q to partial out t from e, f (deflation) #
108
109 COLVEC p := T e * t, q := T t * f;
110 e -:= t * T p;
111 f -:= t * T q;
112
113 # Build matrices #
114
115 eigens := eigens BEFORE eigen;
116 new set := new set BEFORE p; # P #
117 new consts := new consts ABOVE q # Qᵀ #
118 FI
119 OD;
120
121 MATRIX projected set = T new set * eigens;
122
123 COLVEC pls beta = eigens * INV projected set * new consts; # Compare to PCR #
124 betas := betas BEFORE pls beta;
125 predictions := predictions BEFORE (mean set * pls beta) + col mean consts;
126
127 # Rerun solving the linear equation for beta in eigen-space,
128 which may be more accurate than INV #
129
130 OP SOLVE = (MATRIX a, COLVEC b) COLVEC:
131 BEGIN MATRIX u, v, VECTOR s;
132 svd decomp (a, u, s, v);
133 FOR k FROM 2 TO UPB s # Ignore short eigenvectors #
134 DO IF s[k] / s[1] < 1e-15 # cf. Python numpy package #
135 THEN s[k] := 0
136 FI
137 OD;
138 CV svd solve (u, s, v, b[:, 1])
139 END;
140
141 PRIO SOLVE = 9;
142
143 COLVEC alt pls beta := eigens * projected set SOLVE new consts; # Compare to PCR #
144 betas := betas BEFORE alt pls beta;
145 predictions := predictions BEFORE (mean set * alt pls beta) + col mean consts
146 END;
147
148 print matrix (betas, 0);
149 print matrix (predictions, 0)
© 2002-2024 J.M. van der Veer (jmvdveer@xs4all.nl)
|