w: {}".
format(z, w))#outputz: tensor([[2.
, 2.
], [2.
, 2.
], [2.
, 2.
]], grad_fn=<MulBackward0>),w: 2.
0#Computes the gradient of current tensorw.
backward()x.
grad#outputtensor([[-2.
, -2.
], [-2.
, -2.
], [-2.
, -2.
]])3.
LR With Stochastic Gradient DescentThe starting point for much of this notebook is based notes I transcribed from Dr.
Alvarez’s excellent ODSC presentation: “Understanding the PyTorch Framework with Applications to Deep Learning.
”Step #1: Basic Definitions of Data Spacen_data = 10_000X = torch.
ones(n_data,2)#populate random points into XX[:,0].
uniform_(-1.
15,1.
15)actual = to.
tensor([np.
random.
uniform(-14,14), np.
random.
uniform(-14,14)])y = X @ actual + to.
randn(n_data)/3Step #2: Train/Test SplitThere is no auto-covariance in this toy data dataset, therefore a simple train test split will suffice.
#Train test splittrain_size = int(n_data * .
75)test_size = n_data – train_sizeX_train, X_test = X[0 : train_size, :], X[train_size : n_data, :]y_train, y_test = y[0 : train_size], y[train_size : n_data]import matplotlib.
pyplot as pltplt.
scatter(X_train[:,0], y_train, s = .
1);Step #3: Define Scoring Functiondef mean_standard_error(y, y_pred): return ((y – y_pred) ** 2).
mean()Step #4: Start with a arbitrary model to begin training#A random predictionmodel = torch.
tensor([np.
random.
uniform(-14,14), np.
random.
uniform(-14,14)])y_hat = X_train @ modelmean_standard_error(y_hat, y_train)plt.
scatter(X_train[:,0],y_train, s = .
1)plt.
scatter(X_train[:,0],y_hat, s = .
1)Step #5: Parameterize the modelimport torch.
nn as nnmodel = nn.
Parameter(model)model#outputParameter containing:tensor([8.
6251, 6.
5764], requires_grad=True)Bringing Step 1–5 together#A glorified stochastic gradient descent machinedef gentle_linear_torch(data, target, learning_rate, iterations): answer = {} def mean_standard_error(y_actual, y_pred): return ((y_actual – y_pred) ** 2).
mean() #Random Initial Model model = nn.
Parameter(torch.
tensor([np.
random.
uniform(-14,14), np.
random.
uniform(-14,14)])) y_hat = data @ model mean_standard_error(y_hat, target)for time in range(iterations): y_hat = data @ model loss = mean_standard_error(target, y_hat) if time > 0 and last_loss == loss: print(".convergence of SGD at step {}".
format(time)) return(model) break if time > 0 and time % 5 == 0: print("step: {:4} MSE: {:5}".
format(time, float(last_loss.
data – loss.
data))) plt.
scatter(data[:,0], target, s = .
1, alpha = .
05); plt.
scatter(data[:,0], y_hat.
detach().
numpy(), s = .
1); loss.
backward() with to.
no_grad(): #These *_ set values like inplace = True model.
sub_(learning_rate * model.
grad) model.
grad.
zero_() last_loss = loss return(model)sgd = gentle_linear_torch( X_train, y_train, .
1, 1000)y_hat = X_test @ sgdfloat(mean_standard_error(y_hat, y_test))#MSE output0.
10991530865430832plt.
scatter(X_test[:,0], y_test, s = .
1, alpha = .
5);plt.
scatter(X_test[:,0], y_hat.
detach().
numpy(), s = .
1);Plot on test setNormal Equation SolutionCalculating the linear regression with Ordinary least squares (OLS) estimator:theta = (torch.
t(X_train) @ X_train).
inverse() @ to.
t(X_train) @ y_trainprint(theta)print(sgd)#outputtensor([0.
8584, 1.
9531])Parameter containing:tensor([0.
8580, 1.
9532], requires_grad=True)Done!And there you have it.
Let me know if you like these less verbose posts.
.