## linear-regression.a68

```
1  COMMENT
2
3  @section Synopsis
4
5  Ordinary Least Squares, Principal Component and Partial Least Squares regression.
6
7  This program requires a68g to be built with the GNU Scientific Library.
8
9  COMMENT
10
11  PRAGMAT need gsl PRAGMAT
12
13  # Read a training set with (single-column) constituent values.
14    Training set entries are in a CSV file formatted per record as:
15    sample name, constituent value, feature values #
16
18
19  VOID (open (standin, "training-set.csv", standin channel));
20  make term (standin, ",");
21  [samples, features] REAL training set, [samples, 1] REAL constituents;
22
23  FOR i TO samples
24  DO read ((LOC STRING, space, constituents[i, 1], space));
25     FOR j TO features
26     DO read (training set[i, j]);
27        IF j < features
29        FI
30     OD;
31     IF i < samples
33     FI
34  OD;
35
36  # Centering columns is required for PCR and PLS, not for OLS. #
37
38  MODE MATRIX = FLEX [1 : 0, 1 : 0] REAL,
39       VECTOR = FLEX [1 : 0] REAL,
40       COLVEC = MATRIX; # Column vector #
41
42  MATRIX col mean set = MEAN training set, COLVEC col mean consts = MEAN constituents;
43
44  MATRIX mean set = training set - col mean set,
45  COLVEC mean consts = constituents - col mean consts;
46
47  # Beta vectors and predictions for constituent values #
48
49  MATRIX betas, predictions;
50
51  BEGIN COMMENT 1. Ordinary Least Squares and Total Least Squares COMMENT
52
53        COLVEC ols beta := PINV mean set * mean consts; # Column vector. #
54        betas := ols beta;
55        predictions := mean set * ols beta + col mean consts
56  END;
57
58  BEGIN COMMENT 2. Principal Component Regression COMMENT
59
60        # PCR, from covariance matrix #
61
62        MATRIX eigens = pca cv (mean set, NIL) [, ];
63
64        # Project original data and compute projected beta #
65
66        MATRIX projected set = mean set * eigens;
67        COLVEC pcr beta = eigens * PINV projected set * mean consts;
68
69        betas := betas BEFORE pcr beta;
70        predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
71  END;
72
73  BEGIN COMMENT 3. Principal Component Regression COMMENT
74
75        # PCR, from SVD decomposition #
76
77        MATRIX eigens = pca svd (mean set, NIL) [, ];
78
79        # Project original data and compute projected beta #
80
81        MATRIX projected set = mean set * eigens;
82        COLVEC pcr beta := eigens * PINV projected set * mean consts;
83
84        betas := betas BEFORE pcr beta;
85        predictions := predictions BEFORE (mean set * pcr beta) + col mean consts
86  END;
87
88  BEGIN COMMENT 4. Partial Least Squares Regression COMMENT
89
90        # NIPALS PLS1 following Ulf Indahl. #
91
92        MATRIX e := mean set, new set, new consts, eigens,
93        COLVEC f := mean consts;
94
95        TO samples # For this demo we compute all latent variables #
96        DO
97           # E weight (eigen component) Eᵀf. The norm is an eigenvalue #
98
99           IF REAL norm = NORM (T e * f);
100              norm > small real
101           THEN COLVEC eigen = (T e * f) / norm;
102
103                # Compute latent variable t from E factor score #
104
105                COLVEC t := (e * eigen) / NORM (e * eigen);
106
107                # Use factor loadings p, q to partial out t from e, f (deflation) #
108
109                COLVEC p := T e * t, q := T t * f;
110                e -:= t * T p;
111                f -:= t * T q;
112
113                # Build matrices #
114
115                eigens     := eigens BEFORE eigen;
116                new set    := new set BEFORE p;    # P  #
117                new consts := new consts ABOVE q   # Qᵀ #
118           FI
119        OD;
120
121        MATRIX projected set = T new set * eigens;
122
123        COLVEC pls beta = eigens * INV projected set * new consts; # Compare to PCR #
124        betas := betas BEFORE pls beta;
125        predictions := predictions BEFORE (mean set * pls beta) + col mean consts;
126
127        # Rerun solving the linear equation for beta in eigen-space,
128          which may be more accurate than INV #
129
130        OP SOLVE = (MATRIX a, COLVEC b) COLVEC:
131           BEGIN MATRIX u, v, VECTOR s;
132                 svd decomp (a, u, s, v);
133                 FOR k FROM 2 TO UPB s       # Ignore short eigenvectors #
134                 DO IF s[k] / s[1] < 1e-15   # cf. Python numpy package  #
135                    THEN s[k] := 0
136                    FI
137                 OD;
138                 CV svd solve (u, s, v, b[:, 1])
139           END;
140
141        PRIO SOLVE = 9;
142
143        COLVEC alt pls beta := eigens * projected set SOLVE new consts; # Compare to PCR #
144        betas := betas BEFORE alt pls beta;
145        predictions := predictions BEFORE (mean set * alt pls beta) + col mean consts
146  END;
147
148  print matrix (betas, 0);
149  print matrix (predictions, 0)

```

 © 2002-2024 J.M. van der Veer (jmvdveer@xs4all.nl)