import os
import sys
from google.colab import drive
drive.mount('/content/drive')
sys.path.append('/content/drive/MyDrive/Projects/2023-HJ-Prox/src/')
from hj_prox import *
from functions import *
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import time
torch.set_default_dtype(torch.float64)
torch.manual_seed(0)
device = 'cuda:0'
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Approximate Proximals and Moreau Envelopes¶
The Proximal of $f$ is given by \begin{equation} \text{prox}_{tf}(x) \triangleq \text{argmin}_{z\in\mathbb{R}^n} f(z) + \dfrac{1}{2t}\|z-x\|^2, \end{equation} and the Moreau envelope of $f$ is given by \begin{equation} u(x,t) \triangleq \inf_{z\in \mathbb{R}^n} f(z) + \dfrac{1}{2t}\|z-x\|^2 \end{equation}
We leverage the fact that the solution to the Moreau envelope above satisfies the Hamilton-Jacobi Equation \begin{equation} \begin{split} u_t^\delta + \frac{1}{2}\|Du^\delta \|^2 \ = \frac{\delta}{2} \Delta u^\delta \qquad &\text{ in } \mathbb{R}^n\times (0,T] \\ u = f \qquad &\text{ in } \mathbb{R}^n\times \{t = 0\} \end{split} \end{equation} when $\delta = 0$.
By adding a viscous term ($\delta > 0$), we are able to approximate the solution to the HJ equation using the Cole-Hopf formula to obtain \begin{equation} u^\delta(x,t) = - \delta \ln\Big(\Phi_t * \exp(-f/\delta)\Big)(x) = - \delta \ln \int_{\mathbb{R}^n} \Phi(x-y,t) \exp\left(\frac{-f(y)}{\delta}\right) dy \end{equation} where \begin{equation} \Phi(x,t) = \frac{1}{{(4\pi \delta t)}^{n/2}} \exp{\frac{-\|x\|^2}{4\delta t}}. \end{equation} This allows us to write the Moreau Envelope (and the proximal) explicitly as an expectation. In particular, we obtain \begin{equation} \text{prox}_{tf}(x) = \dfrac{\mathbb{E}_{y \sim \mathcal{N}_{x, \delta t I}} \left[y \exp\left(-\delta^{-1}f(y)\right) \right]}{\mathbb{E}_{y \sim \mathcal{N}_{x, \delta t I}} \left[\exp\left(-\delta^{-1}f(y)\right) \right]} \end{equation} and \begin{equation} u(x,t) \approx - \delta \ln \mathbb{E}_{y \sim \mathcal{N}_{x, \delta t I}} \left[y \exp\left(-\delta^{-1}f(y)\right) \right] \end{equation}
# plotting parameters
title_fontsize = 22
fontsize = 20
fig1 = plt.figure()
my_blue = '#1f77b4'
my_orange = '#F97306'
<Figure size 640x480 with 0 Axes>
Approximate Prox (Shrink) and Moreau Envelope for L1 Norm¶
Clean L1 Norm Proximal/Moreau Computation¶
f = l1_norm
analytic_prox = l1_norm_prox
# create a grid of x's for plotting
x = torch.linspace(-1,1,100).view(-1,1).to(device)
y_vals = torch.zeros(x.shape, device=device)
prox_true = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
errs = torch.zeros(x.shape, device=device)
t = 2e-1
delta = 1e-2
alpha = 1.0
# y_vals = l1_norm(x) + 0.1*torch.randn(x.shape[0], device=device)
y_vals = f(x)
prox_true = analytic_prox(x, t=t)
prox_HJ = torch.zeros(prox_true.shape, device=device)
n_integral_samples = int(1e5)
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
envelope_HJ[i] = temp_envelope
envelope_true[i] = f(prox_true[i].view(1,1)) + (1/(2*t))*torch.norm(prox_true[i] - x[i], p=2)**2
errs[i] = f(temp) + (temp - x[i])/t
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), envelope_HJ.cpu(), '--g', linewidth=3)
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['$f$', 'u', '$u^\delta$'],fontsize=fontsize, loc=9)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'l1_norm_envelope.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-3-31371be98f0c>:32: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
#Generate Files for Howard
filename = 'fig2a1_u.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(envelope_true):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
filename = 'fig2a1_udelta.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(envelope_HJ):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), prox_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), prox_HJ.cpu(), '--', linewidth=3, color='g')
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['True Prox', 'HJ Prox'],fontsize=fontsize, loc=2)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'l1_norm_prox_comparison.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-5-c80673add8e2>:2: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
Noisy L1 Norm Proximal/Moreau Computation¶
f = l1_norm_noisy
analytic_prox = l1_norm_prox
# create a grid of x's for plotting
x = torch.linspace(-1,1,100).view(-1,1).to(device)
y_vals = torch.zeros(x.shape, device=device)
y_vals = f(x)
prox_true = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_HJ2 = torch.zeros(x.shape, device=device)
t = 0.1
delta = 5e-2
delta2 = 1e-2
alpha = 1.0
prox_true = analytic_prox(x, t=t)
prox_HJ = torch.zeros(prox_true.shape, device=device)
prox_HJ2 = torch.zeros(prox_true.shape, device=device)
n_integral_samples = int(1e4)
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
envelope_HJ[i] = temp_envelope
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta2, alpha=alpha, device=device)
prox_HJ2[i] = temp
envelope_HJ2[i] = temp_envelope
# ------------------------ PLOT without noiseless y ------------------------
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_HJ.cpu(), linewidth=4, color='g')
ax.plot(x.cpu(), envelope_HJ2.cpu(), linewidth=4)
ax.plot(x.cpu(), envelope_l1_norm(x, t=t).cpu(), 'k', linewidth=3) ###### for save_str = 'eye_candy'
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['noisy $f$', '$u^{\delta_1}$', '$u^{\delta_2}$'],fontsize=fontsize, loc=9)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
# save_str = 'l1_norm_envelope_noisy.pdf'
save_str = 'eye_candy.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-6-14052be998c3>:33: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
Approximate Prox and Moreau Envelope for Quadratic Function¶
A = torch.ones(1,1, device=device)
A = A.view(1,1)
b = torch.ones(1, device=device)
def f(x):
return quadratic(x, A, b)
def analytic_prox(x, t=0.5):
return quadratic_prox(x, A, b, t=t)
f = f
# create a grid of x's for plotting
x = torch.linspace(-2.0,0.5,100, device=device).view(-1,1)
y_vals = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
t = 5e-1
delta = 5e-2
alpha = 1.0
y_vals = f(x)
errs = torch.zeros(x.shape, device=device)
prox_true = analytic_prox(x, t=t)
prox_HJ = torch.zeros(x.shape, device=device)
n_integral_samples = int(1e5)
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
errs[i] = f(temp) + (temp - x[i])/t
envelope_HJ[i] = temp_envelope
envelope_true[i] = f(prox_true[i].view(1,1)) + (1/(2*t))*torch.norm(prox_true[i] - x[i], p=2)**2
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), envelope_HJ.cpu(), '--g', linewidth=3)
ax.legend(['$f$', 'u', '$u^\delta$'],fontsize=fontsize, loc=9)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'quadratic_envelope.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-7-e9418ee90c3a>:36: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
# Figs for Howard
filename = 'fig2a2_udelta.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(envelope_HJ):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), prox_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), prox_HJ.cpu(), '--', linewidth=3, color='g')
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['True Prox', 'HJ Prox'],fontsize=fontsize, loc=2)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'quadratic_prox_comparison.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-9-f213e4ce4ac9>:2: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
# Figs for Howard
filename = 'fig2a2_hjprox.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(prox_HJ):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
Noisy Quadratic Moreau and Prox¶
def f(x):
return quadratic_noisy(x, A, b)
y_vals = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
t = 5e-1
delta = 5e-2
delta2 = 1e-2
alpha = 1.0
# y_vals = l1_norm(x) + 0.1*torch.randn(x.shape[0], device=device)
y_vals = f(x)
prox_true = analytic_prox(x, t=t)
prox_HJ = torch.zeros(prox_true.shape, device=device)
prox_HJ2 = torch.zeros(prox_true.shape, device=device)
n_integral_samples = int(1e4)
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
envelope_HJ[i] = temp_envelope
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta2, alpha=alpha, device=device)
prox_HJ2[i] = temp
envelope_HJ2[i] = temp_envelope
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_HJ.cpu(), linewidth=4, color='g')
ax.plot(x.cpu(), envelope_HJ2.cpu(), linewidth=4)
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['noisy $f$', '$u^{\delta_1}$', '$u^{\delta_2}$'],fontsize=fontsize, loc=9)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'quadratic_envelope_noisy.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-11-754e1ba4d0d8>:31: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
# Figs for Howard
filename = 'fig2d2_noisyf.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(y_vals):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
# Figs for Howard
filename = 'fig2d2_udelta1.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(envelope_HJ):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
# Figs for Howard
filename = 'fig2d2_udelta2.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(envelope_HJ2):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
# ax.plot(x.cpu(), prox_true.cpu(), linewidth=4)
ax.plot(x.cpu(), prox_HJ.cpu(), linewidth=4, color='g')
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['HJ Prox from noisy \n samples'],fontsize=fontsize+2, loc=2)
# ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'quadratic_prox_noisy.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-13-dfba4c3aef84>:2: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
Approximate Prox and Moreau Envelope for Log Barrier¶
f = log_barrier
analytic_prox = log_barrier_prox
# create a grid of x's for plotting
x = torch.linspace(2,10,100).view(-1,1).to(device)
y_vals = torch.zeros(x.shape, device=device)
prox_true = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
errs = torch.zeros(x.shape, device=device)
t = 2.0
delta = 1e-1
alpha = 1.0
# y_vals = l1_norm(x) + 0.1*torch.randn(x.shape[0], device=device)
y_vals = f(x)
prox_true = analytic_prox(x, t=t)
prox_HJ = torch.zeros(prox_true.shape, device=device)
n_integral_samples = int(1e4)
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
envelope_HJ[i] = temp_envelope
envelope_true[i] = f(prox_true[i].view(1,1)) + (1/(2*t))*torch.norm(prox_true[i] - x[i], p=2)**2
errs[i] = f(temp) + (temp - x[i])/t
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), envelope_HJ.cpu(), '--g', linewidth=3)
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['$f$', 'u', '$u^\delta$'],fontsize=fontsize, loc=9)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'log_barrier_envelope.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-15-a788420ea3cb>:32: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), prox_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), prox_HJ.cpu(), '--', linewidth=3, color='g')
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['True Prox', 'HJ Prox'],fontsize=fontsize, loc=2)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'log_barrier_prox_comparison.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-16-cc2ac5705551>:2: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
Noisy Log Barrier¶
f = log_barrier_noisy
analytic_prox = log_barrier_prox
# create a grid of x's for plotting
y_vals = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
t = 2.0
delta = 5e-2
delta2 = 1e-2
alpha = 1.0
# y_vals = l1_norm(x) + 0.1*torch.randn(x.shape[0], device=device)
y_vals = f(x)
prox_true = analytic_prox(x, t=t)
prox_HJ = torch.zeros(prox_true.shape, device=device)
prox_HJ2 = torch.zeros(prox_true.shape, device=device)
n_integral_samples = int(1e4)
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
envelope_HJ[i] = temp_envelope
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta2, alpha=alpha, device=device)
prox_HJ2[i] = temp
envelope_HJ2[i] = temp_envelope
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_HJ.cpu(), linewidth=4, color='g')
ax.plot(x.cpu(), envelope_HJ2.cpu(), linewidth=4)
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['noisy $f$', '$u^{\delta_1}$', '$u^{\delta_2}$'],fontsize=fontsize, loc=1)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'log_barrier_envelope_noisy.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-17-65d0accfa9cf>:32: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
# Figs for Howard
filename = 'fig2d3_noisyf.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(y_vals):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
# Figs for Howard
filename = 'fig2d3_udelta1.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(envelope_HJ):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
# Figs for Howard
filename = 'fig2d3_udelta2.dat'
with open(filename, 'w') as csv_file:
for idx, f_val in enumerate(envelope_HJ2):
csv_file.write('%0.5e %0.5e\n' % (x[idx], f_val))
Nonconvex Moreau Envelopes with Analytic Formulas¶
def f(x):
return -torch.norm(x, p=1, dim=1)
# create a grid of x's for plotting
x = torch.linspace(-1.0,1.0,100, device=device).view(-1,1)
y_vals = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
t = 1e-1
delta = 1e-2
alpha = 1.0
y_vals = f(x)
errs = torch.zeros(x.shape, device=device)
prox_true = torch.zeros(x.shape, device=device)
prox_HJ = torch.zeros(x.shape, device=device)
n_integral_samples = int(1e5)
for i in range(x.shape[0]):
# print('f(x[i].view(1,1)) = ', f(x[i].view(1,1)))
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
errs[i] = f(temp) + (temp - x[i])/t
envelope_HJ[i] = temp_envelope
# envelope_true[i] = f(prox_true[i].view(1,1)) + (1/(2*t))*torch.norm(prox_true[i] - x[i], p=2)**2
envelope_true[i] = -torch.norm(x[i].view(1,1), p=1, dim=1) - t/2
# prox_true[i] =
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), envelope_HJ.cpu(), '--g', linewidth=3)
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['$f$', 'u', '$u^\delta$'],fontsize=fontsize+2, loc=8)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'nonconvex1_envelope.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-19-e38f0bd84404>:31: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
def f(x, a=1):
return -(a/2)* torch.norm(x, p=2, dim=1)**2
f = f
# create a grid of x's for plotting
x = torch.linspace(-1.0,1.0,100, device=device).view(-1,1)
y_vals = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
t = 2e-1
delta = 1e-2
alpha = 1.0
a = 1.0
# y_vals = l1_norm(x) + 0.1*torch.randn(x.shape[0], device=device)
y_vals = f(x)
errs = torch.zeros(x.shape, device=device)
prox_true = torch.zeros(x.shape, device=device)
prox_HJ = torch.zeros(x.shape, device=device)
n_integral_samples = int(1e5)
for i in range(x.shape[0]):
# print('f(x[i].view(1,1)) = ', f(x[i].view(1,1)))
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
errs[i] = f(temp) + (temp - x[i])/t
envelope_HJ[i] = temp_envelope
# envelope_true[i] = f(prox_true[i].view(1,1)) + (1/(2*t))*torch.norm(prox_true[i] - x[i], p=2)**2
envelope_true[i] = -(a/(2*(1-a*t)))* torch.norm(x[i].view(1,1), p=2, dim=1)**2
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), envelope_HJ.cpu(), '--g', linewidth=3)
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['$f$', 'u', '$u^\delta$'],fontsize=fontsize+2, loc=8)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'nonconvex2_envelope.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-20-7c5ebcd98e9d>:33: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
Convex Functions with Unknown Proxes¶
def proximal_objective(z, x):
# assumes only one sample
return torch.norm(z)**2 - torch.sum(torch.log(z)) + (1/(2*t)) * torch.norm(z - x)**2
def proximal_objective_gradient(z, x):
return 2*z - (1/z) + (1/t)*(z-x)
def steepest_descent(x, f, grad_f, max_iters=1000, tol=1e-2, step_size = 1e-1, verbose=True):
# assumes only one sample
xk = x
fk = f(xk)
grad_fk = grad_f(xk)
norm_grad0 = torch.norm(grad_fk)
print('iter = 0', ', fk = ', fk, '|grad_fk| = ', 1)
for i in range(max_iters):
xk = xk - step_size*grad_fk
rel_grad_norm = torch.norm(grad_fk)/norm_grad0
if verbose:
print('iter = ', i+1, ', fk = ', ', |grad_fk| = ', rel_grad_norm)
fk = f(xk)
grad_fk = grad_f(xk)
if rel_grad_norm < tol:
print('SD converged in ', i, ' iterations')
break
return xk
def evaluate_proximal(x):
def func(z):
return proximal_objective(z,x)
def grad_func(z):
return proximal_objective_gradient(z,x)
prox_val = steepest_descent(x, func, grad_func)
return prox_val
def f(x):
return torch.norm(x, dim=1)**2 - torch.sum(torch.log(x), dim=1)
# create a grid of x's for plotting
x = torch.linspace(1,3,100).view(-1,1).to(device)
y_vals = torch.zeros(x.shape, device=device)
prox_true = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
errs = torch.zeros(x.shape, device=device)
t = 0.1
delta = 1e-1
alpha = 1.0
y_vals = f(x)
prox_HJ = torch.zeros(prox_true.shape, device=device)
n_integral_samples = int(1e5)
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
envelope_HJ[i] = temp_envelope
prox_true[i] = evaluate_proximal(x[i])
envelope_true[i] = f(prox_true[i].view(1,1)) + (1/(2*t))*torch.norm(prox_true[i] - x[i], p=2)**2
errs[i] = f(temp) + (temp - x[i])/t
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), envelope_HJ.cpu(), '--g', linewidth=3)
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['$f$', 'u', '$u^\delta$'],fontsize=fontsize, loc=9)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'non_analytic_convex_envelope.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
iter = 0 , fk = tensor(1., device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.3111, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0993, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0315, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0100, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.0208, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.3072, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0969, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0303, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0095, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.0428, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.3035, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0947, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0293, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0091, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.0660, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.3000, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0925, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0283, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0087, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.0904, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2967, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0905, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0274, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0083, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.1160, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2935, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0886, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0265, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0080, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.1427, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2904, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0868, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0257, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0076, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.1706, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2875, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0851, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0250, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0073, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.1995, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2848, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0835, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0243, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0071, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.2296, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2821, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0820, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0236, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0068, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.2608, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2796, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0805, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0230, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0066, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.2932, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2772, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0791, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0224, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0063, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.3266, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2749, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0778, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0218, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0061, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.3610, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2727, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0766, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0213, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0059, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.3966, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2706, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0754, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0208, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0058, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.4332, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2686, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0743, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0204, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0056, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.4709, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2666, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0732, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0199, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0054, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.5096, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2648, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0721, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0195, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0053, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.5493, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2630, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0712, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0191, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0051, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.5901, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2613, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0702, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0187, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0050, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.6320, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2596, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0693, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0184, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0049, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.6748, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2580, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0685, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0180, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0048, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.7187, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2565, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0677, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0177, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0046, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.7636, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2551, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0669, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0174, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0045, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.8095, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2537, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0661, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0171, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0044, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.8564, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2523, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0654, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0168, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0043, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.9042, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2510, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0647, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0166, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0042, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(1.9531, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2497, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0640, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0163, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0042, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.0030, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2485, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0634, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0161, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0041, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.0538, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2473, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0628, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0158, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0040, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.1056, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2462, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0622, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0156, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0039, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.1584, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2451, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0616, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0154, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0038, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.2122, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2441, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0611, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0152, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0038, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.2670, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2431, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0606, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0150, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0037, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.3227, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2421, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0601, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0148, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0037, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.3793, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2411, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0596, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0146, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0036, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.4369, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2402, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0591, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0144, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0035, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.4955, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2393, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0586, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0143, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0035, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.5550, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2385, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0582, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0141, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0034, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.6155, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2376, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0578, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0140, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0034, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.6769, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2368, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0574, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0138, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0033, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.7392, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2360, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0570, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0137, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0033, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.8025, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2353, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0566, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0135, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0032, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.8668, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2346, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0562, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0134, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0032, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.9319, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2338, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0559, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0133, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0032, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(2.9980, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2332, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0555, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0132, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0031, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.0650, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2325, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0552, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0130, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0031, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.1330, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2318, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0549, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0129, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0030, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.2018, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2312, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0546, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0128, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0030, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.2716, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2306, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0543, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0127, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0030, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.3423, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2300, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0540, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0126, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0029, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.4139, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2294, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0537, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0125, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0029, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.4865, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2289, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0534, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0124, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0029, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.5599, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2283, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0532, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0123, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0029, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.6343, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2278, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0529, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0122, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0028, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.7096, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2273, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0526, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0121, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0028, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.7858, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2268, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0524, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0120, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0028, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.8628, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2263, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0522, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0120, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0027, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(3.9408, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2258, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0519, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0119, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0027, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.0197, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2254, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0517, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0118, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0027, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.0995, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2249, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0515, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0117, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0027, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.1802, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2245, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0513, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0117, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0027, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.2618, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2240, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0511, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0116, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0026, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.3443, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2236, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0509, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0115, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0026, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.4277, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2232, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0507, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0115, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0026, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.5120, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2228, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0505, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0114, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0026, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.5971, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2224, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0503, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0113, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0026, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.6832, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2221, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0501, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0113, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0025, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.7702, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2217, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0499, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0112, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0025, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.8580, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2213, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0498, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0111, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0025, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(4.9467, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2210, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0496, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0111, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0025, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.0364, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2207, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0494, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0110, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0025, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.1269, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2203, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0493, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0110, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0024, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.2182, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2200, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0491, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0109, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0024, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.3105, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2197, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0490, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0109, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0024, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.4037, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2194, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0488, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0108, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0024, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.4977, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2191, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0487, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0108, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0024, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.5926, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2188, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0486, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0107, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0024, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.6884, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2185, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0484, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0107, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0024, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.7850, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2182, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0483, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0106, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.8826, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2179, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0482, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0106, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(5.9810, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2177, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0480, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0106, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.0803, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2174, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0479, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0105, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.1805, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2171, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0478, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0105, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.2815, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2169, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0477, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0104, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.3834, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2166, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0476, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0104, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.4862, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2164, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0474, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0104, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.5899, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2162, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0473, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0103, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0023, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.6944, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2159, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0472, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0103, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.7998, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2157, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0471, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0103, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(6.9061, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2155, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0470, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0102, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(7.0132, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2153, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0469, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0102, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(7.1212, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2151, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0468, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0102, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(7.2301, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2149, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0467, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0101, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(7.3398, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2147, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0466, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0101, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(7.4504, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2145, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0465, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0101, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(7.5618, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2143, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0464, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0100, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(7.6742, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2141, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0464, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0100, device='cuda:0') iter = 5 , fk = , |grad_fk| = tensor(0.0022, device='cuda:0') SD converged in 4 iterations iter = 0 , fk = tensor(7.7873, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2139, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0463, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0100, device='cuda:0') SD converged in 3 iterations iter = 0 , fk = tensor(7.9014, device='cuda:0') |grad_fk| = 1 iter = 1 , fk = , |grad_fk| = tensor(1., device='cuda:0') iter = 2 , fk = , |grad_fk| = tensor(0.2137, device='cuda:0') iter = 3 , fk = , |grad_fk| = tensor(0.0462, device='cuda:0') iter = 4 , fk = , |grad_fk| = tensor(0.0100, device='cuda:0') SD converged in 3 iterations
<ipython-input-23-c7672a6b1f1e>:31: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), prox_true.cpu(), linewidth=3, color=my_orange)
ax.plot(x.cpu(), prox_HJ.cpu(), '--', linewidth=3, color='g')
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['True Prox', 'HJ Prox'],fontsize=fontsize, loc=2)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'non_analytic_convex_prox_comparison.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-25-749aff71ffe3>:2: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
def f_noisy(x):
return torch.norm(x, dim=1)**2 - torch.sum(torch.log(x), dim=1) + 5e-1*torch.randn(x.shape[0], device=x.device)
f = f_noisy
y_vals = torch.zeros(x.shape, device=device)
prox_true = torch.zeros(x.shape, device=device)
envelope_HJ = torch.zeros(x.shape, device=device)
envelope_true = torch.zeros(x.shape, device=device)
errs = torch.zeros(x.shape, device=device)
t = 1e-1
delta = 5e-1
delta2 = 1e-1
alpha = 1.0
# y_vals = l1_norm(x) + 0.1*torch.randn(x.shape[0], device=device)
y_vals = f(x)
prox_true = analytic_prox(x, t=t)
prox_HJ = torch.zeros(prox_true.shape, device=device)
prox_HJ2 = torch.zeros(prox_true.shape, device=device)
n_integral_samples = int(1e4)
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta, alpha=alpha, device=device)
prox_HJ[i] = temp
envelope_HJ[i] = temp_envelope
for i in range(x.shape[0]):
temp, ls_iters, temp_envelope = compute_prox(x[i].view(1,1), t, f, int_samples = n_integral_samples, delta = delta2, alpha=alpha, device=device)
prox_HJ2[i] = temp
envelope_HJ2[i] = temp_envelope
# PLOT
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
ax.plot(x.cpu(), y_vals.cpu(), linewidth=3);
ax.plot(x.cpu(), envelope_HJ.cpu(), linewidth=4, color='g')
ax.plot(x.cpu(), envelope_HJ2.cpu(), linewidth=4)
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['noisy $f$', '$u^{\delta_1}$', '$u^{\delta_2}$'],fontsize=fontsize, loc=2)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'non_analytic_convex_envelope_noisy.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-26-26e0534142f9>:35: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
fig1 = plt.figure()
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
# ax.plot(x.cpu(), prox_true.cpu(), linewidth=4)
ax.plot(x.cpu(), prox_HJ.cpu(), linewidth=4, color='g')
# ax.set_xlabel("x-axis", fontsize=title_fontsize)
ax.legend(['HJ Prox from noisy \n samples'],fontsize=fontsize+2, loc=2)
# ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'non_analytic_convex_prox_noisy.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-27-29a46d037b0c>:2: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
High Dimensional Shrink Experiments¶
f = l1_norm
analytic_prox = l1_norm_prox
dim_array = [10, 25, 50, 75, 100]
n_samples = int(1000)
n_integral_samples_array = [int(1e1), int(1e3), int(1e4), int(1e5)]
rel_errs_array = torch.zeros(len(dim_array), len(n_integral_samples_array))
torch.manual_seed(2)
delta = 1e-1
t = 1e-2
for k in range(len(dim_array)):
print('\n DIMENSION = ', dim_array[k])
for i in range(len(n_integral_samples_array)):
x = torch.randn(n_samples, dim_array[k], device=device)
prox_true = analytic_prox(x, t=t)
for j in range(n_samples): # need to loop over samples since HJ prox can only handle one sample at a time
# print('x[j,:].unsqueeze(1).shape = ', x[j,:].unsqueeze(1).shape)
prox_HJ, ls_iters, temp_envelope = compute_prox(x[j,:].unsqueeze(1), t, f, int_samples = n_integral_samples_array[i], delta = delta, alpha=alpha, device=device)
# NOTE: need to permute prox_HJ back to n_samples x dim
# print('prox_true[j,:].shape = ', prox_true[j,:].shape)
# print('prox_HJ.squeeze(1)shape = ', prox_HJ.squeeze(1).shape)
rel_errs_array[k, i] = rel_errs_array[k, i] + torch.norm(prox_true[j,:].cpu() - prox_HJ.squeeze(1).cpu())/torch.norm(prox_true[j,:].cpu())
rel_errs_array[k, i] = rel_errs_array[k, i]/n_samples
print('dim = ', dim_array[k], ', n_integral_samples = ', n_integral_samples_array[i], ', rel err = ', rel_errs_array[k, i], ', ls_iters = ', ls_iters)
DIMENSION = 10 dim = 10 , n_integral_samples = 10 , rel err = tensor(0.0153) , ls_iters = 1 dim = 10 , n_integral_samples = 1000 , rel err = tensor(0.0021) , ls_iters = 1 dim = 10 , n_integral_samples = 10000 , rel err = tensor(0.0010) , ls_iters = 1 dim = 10 , n_integral_samples = 100000 , rel err = tensor(0.0007) , ls_iters = 1 DIMENSION = 25 dim = 25 , n_integral_samples = 10 , rel err = tensor(0.0181) , ls_iters = 1 dim = 25 , n_integral_samples = 1000 , rel err = tensor(0.0034) , ls_iters = 1 dim = 25 , n_integral_samples = 10000 , rel err = tensor(0.0014) , ls_iters = 1 dim = 25 , n_integral_samples = 100000 , rel err = tensor(0.0009) , ls_iters = 1 DIMENSION = 50 dim = 50 , n_integral_samples = 10 , rel err = tensor(0.0209) , ls_iters = 1 dim = 50 , n_integral_samples = 1000 , rel err = tensor(0.0064) , ls_iters = 1 dim = 50 , n_integral_samples = 10000 , rel err = tensor(0.0029) , ls_iters = 1 dim = 50 , n_integral_samples = 100000 , rel err = tensor(0.0014) , ls_iters = 1 DIMENSION = 75 dim = 75 , n_integral_samples = 10 , rel err = tensor(0.0224) , ls_iters = 1 dim = 75 , n_integral_samples = 1000 , rel err = tensor(0.0091) , ls_iters = 1 dim = 75 , n_integral_samples = 10000 , rel err = tensor(0.0049) , ls_iters = 1 dim = 75 , n_integral_samples = 100000 , rel err = tensor(0.0025) , ls_iters = 1 DIMENSION = 100 dim = 100 , n_integral_samples = 10 , rel err = tensor(0.0241) , ls_iters = 1 dim = 100 , n_integral_samples = 1000 , rel err = tensor(0.0116) , ls_iters = 1 dim = 100 , n_integral_samples = 10000 , rel err = tensor(0.0073) , ls_iters = 1 dim = 100 , n_integral_samples = 100000 , rel err = tensor(0.0040) , ls_iters = 1
title_fontsize = 22
fontsize = 18
fig1 = plt.figure()
title_fontsize = 22
fontsize = 15
my_blue = '#1f77b4'
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
for i in range(len(dim_array)):
ax.semilogy(n_integral_samples_array, rel_errs_array[i,:], linewidth=3);
ax.set_xlabel("Number of Samples", fontsize=title_fontsize)
ax.legend(['dim 10', 'dim 25', 'dim 50', 'dim 75', 'dim 100'],fontsize=fontsize, loc=1)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'shrink_higher_dim_err_vs_samples.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-30-e40ab8e14fd5>:9: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
High Dimensional Quadratic Experiments¶
dim_array = [10, 25, 50, 75, 100]
n_samples = 1000
n_integral_samples_array = [int(1e0), int(1e2), int(1e3), int(1e4)]
rel_errs_array = torch.zeros(len(dim_array), len(n_integral_samples_array))
torch.manual_seed(2)
delta = 1e-1
t = 1e-2
for k in range(len(dim_array)):
print('\n DIMENSION = ', dim_array[k])
for i in range(len(n_integral_samples_array)):
for j in range(n_samples): # need to loop over samples since HJ prox can only handle one sample at a time
A = torch.eye(dim_array[k], device=device)
# A = torch.randn(dim_array[k], dim_array[k], device=device)
# A = A + A.t() + torch.eye(dim_array[k], device=device)
b = torch.ones(A.shape[0], device=device)
def f(x):
return quadratic(x, A, b)
def analytic_prox(x, t=0.5):
return quadratic_prox(x, A, b, t=t)
f = f
x = torch.randn(dim_array[k],1, device=device)
prox_true = analytic_prox(x.permute(1,0), t=t)
prox_HJ, ls_iters, envelopes = compute_prox(x, t, f, int_samples = n_integral_samples_array[i], delta = delta, alpha=alpha, device=device)
# NOTE: need to permute prox_HJ back to n_samples x dim
rel_errs_array[k, i] = rel_errs_array[k, i] + torch.norm(prox_true.cpu() - prox_HJ.permute(1,0).cpu())/torch.norm(prox_true.cpu())
rel_errs_array[k, i] = rel_errs_array[k, i]/n_samples
print('dim = ', dim_array[k], ', n_integral_samples = ', n_integral_samples_array[i], ', rel err = ', rel_errs_array[k, i], ', ls_iters = ', ls_iters)
DIMENSION = 10 dim = 10 , n_integral_samples = 1 , rel err = tensor(0.0367) , ls_iters = 1 dim = 10 , n_integral_samples = 100 , rel err = tensor(0.0076) , ls_iters = 1 dim = 10 , n_integral_samples = 1000 , rel err = tensor(0.0029) , ls_iters = 1 dim = 10 , n_integral_samples = 10000 , rel err = tensor(0.0010) , ls_iters = 1 DIMENSION = 25 dim = 25 , n_integral_samples = 1 , rel err = tensor(0.0358) , ls_iters = 1 dim = 25 , n_integral_samples = 100 , rel err = tensor(0.0125) , ls_iters = 1 dim = 25 , n_integral_samples = 1000 , rel err = tensor(0.0063) , ls_iters = 1 dim = 25 , n_integral_samples = 10000 , rel err = tensor(0.0028) , ls_iters = 1 DIMENSION = 50 dim = 50 , n_integral_samples = 1 , rel err = tensor(0.0352) , ls_iters = 1 dim = 50 , n_integral_samples = 100 , rel err = tensor(0.0174) , ls_iters = 1 dim = 50 , n_integral_samples = 1000 , rel err = tensor(0.0119) , ls_iters = 1 dim = 50 , n_integral_samples = 10000 , rel err = tensor(0.0073) , ls_iters = 1 DIMENSION = 75 dim = 75 , n_integral_samples = 1 , rel err = tensor(0.0352) , ls_iters = 1 dim = 75 , n_integral_samples = 100 , rel err = tensor(0.0206) , ls_iters = 1 dim = 75 , n_integral_samples = 1000 , rel err = tensor(0.0158) , ls_iters = 1 dim = 75 , n_integral_samples = 10000 , rel err = tensor(0.0109) , ls_iters = 1 DIMENSION = 100 dim = 100 , n_integral_samples = 1 , rel err = tensor(0.0350) , ls_iters = 1 dim = 100 , n_integral_samples = 100 , rel err = tensor(0.0222) , ls_iters = 1 dim = 100 , n_integral_samples = 1000 , rel err = tensor(0.0179) , ls_iters = 1 dim = 100 , n_integral_samples = 10000 , rel err = tensor(0.0140) , ls_iters = 1
title_fontsize = 22
fontsize = 18
fig1 = plt.figure()
title_fontsize = 22
fontsize = 15
my_blue = '#1f77b4'
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
for i in range(len(dim_array)):
ax.semilogy(n_integral_samples_array, rel_errs_array[i,:], linewidth=3);
ax.set_xlabel("Number of Samples", fontsize=title_fontsize)
ax.legend(['dim 10', 'dim 25', 'dim 50', 'dim 75', 'dim 100'],fontsize=fontsize, loc=1)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'quadratic_higher_dim_err_vs_samples.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-32-ff10d27841a8>:9: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')
High Dimensional Log Barrier Experiments¶
f = log_barrier
analytic_prox = log_barrier_prox
dim_array = [10, 25, 50, 75, 100]
n_samples = 1000
n_integral_samples_array = [int(1e0), int(1e2), int(1e3), int(1e4)]
rel_errs_array = torch.zeros(len(dim_array), len(n_integral_samples_array))
torch.manual_seed(2)
delta = 2e-1
t = 1e-2
for k in range(len(dim_array)):
print('\n DIMENSION = ', dim_array[k])
for i in range(len(n_integral_samples_array)):
for j in range(n_samples): # need to loop over samples since HJ prox can only handle one sample at a time
x = torch.rand(dim_array[k],1, device=device) + 2
prox_true = analytic_prox(x.permute(1,0), t=t)
prox_HJ, ls_iters, envelopes = compute_prox(x, t, f, int_samples = n_integral_samples_array[i], delta = delta, alpha=alpha, device=device)
# NOTE: need to permute prox_HJ back to n_samples x dim
rel_errs_array[k, i] = rel_errs_array[k, i] + torch.norm(prox_true.cpu() - prox_HJ.permute(1,0).cpu())/torch.norm(prox_true.cpu())
rel_errs_array[k, i] = rel_errs_array[k, i]/n_samples
print('dim = ', dim_array[k], ', n_integral_samples = ', n_integral_samples_array[i], ', rel err = ', rel_errs_array[k, i], ', ls_iters = ', ls_iters)
DIMENSION = 10 dim = 10 , n_integral_samples = 1 , rel err = tensor(0.0173) , ls_iters = 1 dim = 10 , n_integral_samples = 100 , rel err = tensor(0.0018) , ls_iters = 1 dim = 10 , n_integral_samples = 1000 , rel err = tensor(0.0006) , ls_iters = 1 dim = 10 , n_integral_samples = 10000 , rel err = tensor(0.0002) , ls_iters = 1 DIMENSION = 25 dim = 25 , n_integral_samples = 1 , rel err = tensor(0.0176) , ls_iters = 1 dim = 25 , n_integral_samples = 100 , rel err = tensor(0.0019) , ls_iters = 1 dim = 25 , n_integral_samples = 1000 , rel err = tensor(0.0006) , ls_iters = 1 dim = 25 , n_integral_samples = 10000 , rel err = tensor(0.0002) , ls_iters = 1 DIMENSION = 50 dim = 50 , n_integral_samples = 1 , rel err = tensor(0.0177) , ls_iters = 1 dim = 50 , n_integral_samples = 100 , rel err = tensor(0.0022) , ls_iters = 1 dim = 50 , n_integral_samples = 1000 , rel err = tensor(0.0007) , ls_iters = 1 dim = 50 , n_integral_samples = 10000 , rel err = tensor(0.0002) , ls_iters = 1 DIMENSION = 75 dim = 75 , n_integral_samples = 1 , rel err = tensor(0.0177) , ls_iters = 1 dim = 75 , n_integral_samples = 100 , rel err = tensor(0.0024) , ls_iters = 1 dim = 75 , n_integral_samples = 1000 , rel err = tensor(0.0008) , ls_iters = 1 dim = 75 , n_integral_samples = 10000 , rel err = tensor(0.0002) , ls_iters = 1 DIMENSION = 100 dim = 100 , n_integral_samples = 1 , rel err = tensor(0.0177) , ls_iters = 1 dim = 100 , n_integral_samples = 100 , rel err = tensor(0.0026) , ls_iters = 1 dim = 100 , n_integral_samples = 1000 , rel err = tensor(0.0008) , ls_iters = 1 dim = 100 , n_integral_samples = 10000 , rel err = tensor(0.0003) , ls_iters = 1
title_fontsize = 22
fontsize = 18
fig1 = plt.figure()
title_fontsize = 22
fontsize = 15
my_blue = '#1f77b4'
plt.style.use('seaborn-whitegrid')
ax = plt.axes()
for i in range(len(dim_array)):
ax.semilogy(n_integral_samples_array, rel_errs_array[i,:], linewidth=3);
ax.set_xlabel("Number of Samples", fontsize=title_fontsize)
ax.legend(['dim 10', 'dim 25', 'dim 50', 'dim 75', 'dim 100'],fontsize=fontsize, loc=1)
ax.tick_params(labelsize=fontsize, which='both', direction='in')
save_str = 'log_barrier_higher_dim_err_vs_samples.pdf'
fig1.savefig(save_str, dpi=300 , bbox_inches="tight", pad_inches=0.0)
<ipython-input-34-21601891ee5b>:9: MatplotlibDeprecationWarning: The seaborn styles shipped by Matplotlib are deprecated since 3.6, as they no longer correspond to the styles shipped by seaborn. However, they will remain available as 'seaborn-v0_8-<style>'. Alternatively, directly use the seaborn API instead. plt.style.use('seaborn-whitegrid')