SVD.pls <- function(x, y, n.components = NULL) {
# Step 1: Center and scale X and Y
X <- scale(x, center = TRUE, scale = TRUE)
Y <- scale(y, center = TRUE, scale = TRUE)
x.mean <- attr(X, "scaled:center")
x.sd <- attr(X, "scaled:scale")
y.mean <- attr(Y, "scaled:center")
y.sd <- attr(Y, "scaled:scale")
n <- nrow(X)
p <- ncol(X)
q <- ncol(Y)
# Determine number of components
rank_X <- qr(X)$rank
if (is.null(n.components)) {
n.components <- rank_X
} else {
n.components <- min(n.components, rank_X)
}
# Preallocate matrices
T <- matrix(0, n, n.components) # X scores
U <- matrix(0, n, n.components) # Y scores
P_loadings <- matrix(0, p, n.components) # X loadings
W <- matrix(0, p, n.components) # X weights
Q_loadings <- matrix(0, q, n.components) # Y loadings (reference)
B_vector <- numeric(n.components)
# Initial total sum of squares
SSX_total <- sum(X^2)
SSY_total <- sum(Y^2)
X_explained <- numeric(n.components)
Y_explained <- numeric(n.components)
# Store initial X and Y
E <- X
F <- Y
for (h in seq_len(n.components)) {
# Step 1: Cross-covariance matrix
R <- t(E) %*% F
# Step 2: SVD of R
svd_R <- svd(R)
w <- svd_R$u[, 1, drop = FALSE]
c <- svd_R$v[, 1, drop = FALSE]
# Step 3: Scores
t <- E %*% w
t <- t / sqrt(sum(t^2)) # normalize t
u <- F %*% c
# Step 4: Loadings
p <- t(E) %*% t
# Step 5: Regression scalar b
b <- drop(t(t) %*% u)
# Step 6: Deflation
E <- E - t %*% t(p)
F <- F - b * t %*% t(c)
# Store results
T[, h] <- t
U[, h] <- u
P_loadings[, h] <- p
W[, h] <- w
Q_loadings[, h] <- c
B_vector[h] <- b
# Explained variance
X_explained[h] <- sum(p^2) / SSX_total * 100
Y_explained[h] <- (b^2) / SSY_total * 100
}
# Cumulative variance explained
X_cum_explained <- cumsum(X_explained)
Y_cum_explained <- cumsum(Y_explained)
# Clean up effective components
effective_components <- sum(B_vector != 0)
P_loadings <- P_loadings[, seq_len(effective_components), drop = FALSE]
Q_loadings <- Q_loadings[, seq_len(effective_components), drop = FALSE]
W <- W[, seq_len(effective_components), drop = FALSE]
T <- T[, seq_len(effective_components), drop = FALSE]
U <- U[, seq_len(effective_components), drop = FALSE]
B_vector <- B_vector[seq_len(effective_components)]
X_explained <- X_explained[seq_len(effective_components)]
Y_explained <- Y_explained[seq_len(effective_components)]
X_cum_explained <- X_cum_explained[seq_len(effective_components)]
Y_cum_explained <- Y_cum_explained[seq_len(effective_components)]
# Normalize C (Y weights)
C <- apply(Q_loadings, 2, function(c) c / sqrt(sum(c^2)))
# Pseudo-inverse of P_loadings
svd_P <- svd(P_loadings)
d_inv <- ifelse(svd_P$d > .Machine$double.eps, 1 / svd_P$d, 0)
P_pinv <- t(svd_P$v %*% diag(d_inv) %*% t(svd_P$u))
P_pinv <- P_pinv[, seq_len(effective_components), drop = FALSE]
# Final scaled coefficients
B_scaled <- P_pinv %*% diag(B_vector) %*% t(Q_loadings)
# Rescale to original units
B_original <- sweep(B_scaled, 2, y.sd, "*")
B_original <- sweep(B_original, 1, x.sd, "/")
rownames(B_original) <- colnames(x)
colnames(B_original) <- colnames(y)
intercept <- rep(0, length(y.mean))
names(intercept) <- colnames(y)
list(
T = T, # X scores
U = U, # Y scores
W = W, # X weights
C = C, # Y weights (normalized)
P_loadings = P_loadings, # X loadings (reference)
Q_loadings = Q_loadings, # Y loadings (reference)
B_vector = B_vector,
coefficients = B_original,
intercept = intercept,
X_explained = X_explained,
Y_explained = Y_explained,
X_cum_explained = X_cum_explained,
Y_cum_explained = Y_cum_explained
)
}