#!/usr/bin/env python
# coding: utf-8

# In[1]:


#example from slides
u = [2, 1, 6, 0, 2, 1]
v = [4, 1, 3, 1, 0, 14]
z = [11, 3, 26, 3, 10, 8]


# In[2]:


#example linear regression basis functions
f0 = lambda u, v: u
from numpy import sqrt
f1 = lambda u, v: sqrt(v)


# In[3]:


from numpy import array
A = array([f0(u, v), f1(u, v)]).transpose() #the design matrix
y = array(z)
n = A.shape[0] #number of data points
m = A.shape[1] #number of function parameters to fit


# In[5]:


#find least squares parameter values
#by solving the normal equations
from numpy.linalg import solve
beta_n = solve( A.transpose()@A, 
                A.transpose()@y.reshape((n, 1)))


# In[7]:


#computationally better approach
from numpy.linalg import lstsq
beta = lstsq(A, y, rcond=1)[0]


# In[8]:


beta


# In[9]:


#fitted function values corresponding to the given z
fx = A@beta
#or fx = beta[0]*array(f0(u, v)) + beta[1]*array(f1(u, v))


# In[10]:


#visualize the regression function fit
from matplotlib.pyplot import plot, xlabel, ylabel
zmin = min(z) - 1
zmax = max(z) + 1
plot(y, fx, 's')
plot([zmin, zmax], [zmin, zmax]) #1-1 line
xlabel('Given z values')
ylabel('Regression model with least-squares beta')


# In[11]:


#calculate regression diagnostics
from numpy import sum
r = y - fx #vector of residuals
RSS = sum(r ** 2)
from numpy import mean
TSS = sum((y - mean(y))**2)
R2 = 1 - RSS/TSS #coefficient of determination
#adjusted R^2 (larger is better)
R2a = 1 - ((n-1)/(n-m))*(RSS/TSS)
#Akaike information criterion (smaller is better)
from numpy import log
AIC = n * log(RSS/n) + 2*m*n/(n - m - 1)


# In[14]:


#nonlinear regression function example for the same data
from scipy.optimize import least_squares
f = lambda beta: beta[0]*array(u) + array(v)**beta[1]
r = lambda beta: y - f(beta) #residual vector as function of beta
s = least_squares(r, array([4, 1]))
r_ls = r(s.x)
RSS = sum(r_ls ** 2)
R2 = 1 - RSS/TSS #TSS is the same as before


# In[17]:


s.x #least-squares beta values


# In[10]:


# https://scikit-learn.org/stable/auto_examples/classification/plot_digits_classification.html
get_ipython().run_line_magic('pip', 'install scikit-learn')


# In[18]:


# Standard scientific Python imports
import matplotlib.pyplot as plt

# Import datasets, classifiers and performance metrics
from sklearn import datasets, metrics, svm
from sklearn.model_selection import train_test_split


# In[19]:


digits = datasets.load_digits()

_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
for ax, image, label in zip(axes, digits.images, digits.target):
    ax.set_axis_off()
    ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
    ax.set_title("Training: %i" % label)


# In[20]:


# flatten the images
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

# Create a classifier: a support vector classifier
clf = svm.SVC(gamma=0.001)

# Split data into 50% train and 50% test subsets
X_train, X_test, y_train, y_test = train_test_split(
    data, digits.target, test_size=0.5, shuffle=False
)

# Learn the digits on the train subset
clf.fit(X_train, y_train)

# Predict the value of the digit on the test subset
predicted = clf.predict(X_test)


# In[21]:


_, axes = plt.subplots(nrows=1, ncols=4, figsize=(10, 3))
for ax, image, prediction in zip(axes, X_test, predicted):
    ax.set_axis_off()
    image = image.reshape(8, 8)
    ax.imshow(image, cmap=plt.cm.gray_r, interpolation="nearest")
    ax.set_title(f"Prediction: {prediction}")


# In[22]:


print(
    f"Classification report for classifier {clf}:\n"
    f"{metrics.classification_report(y_test, predicted)}\n"
)


# In[23]:


disp = metrics.ConfusionMatrixDisplay.from_predictions(y_test, predicted)
disp.figure_.suptitle("Confusion Matrix")
print(f"Confusion matrix:\n{disp.confusion_matrix}")

plt.show()


# In[ ]:




