gd.plot <- function(A=matrix(c(4,3,3,4), nrow=2), x = c(-10, 7), n.iter = 20, FUN=NULL, a=1.2){ print(eigen(A)) plot(a*x[1]*c(1,-1), a*x[2]*c(1,-1), type="n", xlab="X", ylab="Y", main = "Optimal Gradient Descent") points(x[1], x[2], pch=16) v <- u <- x i = 0 LEN <- NULL repeat { if(!is.null(FUN)) FUN(A, x) y <- as.numeric(A%*%x) gam <- sum(y*y)/sum(y*as.numeric(A%*%y)) B = diag(c(1,1))-gam * A v <- u u <- x x <- as.numeric(B%*%x) if(is.null(LEN)) LEN <- sqrt(sum((x-u)^2)) arrow.length = sqrt(sum((x-u)^2))/LEN*0.25 i = i + 1 arrows(u[1],u[2], x[1], x[2], length = arrow.length, angle = 20, col=i) # if(i >= 2) lines(c(v[1], x[1]), c(v[2], x[2]), col=i+1) if(i >= n.iter || sum(x*x) < 10^(-8)) return(x) } } eplot <- function(A, x){ c2 <- sum(x*as.numeric(A%*%x)) E <- eigen(A) U <- t(E$vectors) val <- E$values U[1,] <- sqrt(val[1]/c2) * U[1,] U[2,] <- sqrt(val[2]/c2) * U[2,] B <- solve(U) N <- 500 theta <- seq(0, 2*pi, length=N) X <- matrix(nrow=2, ncol= N) for(i in 1:N){ X[,i] <- as.numeric(B %*% c(cos(theta[i]), sin(theta[i]))) } lines(X[1,], X[2,], col="gray") } gd.plot(x=c(-10, 8), FUN = eplot, n.iter=20)