#!/usr/bin/env python
# coding: utf-8

# In[1]:


#example numbers from slides
u = [2, 1, 6, 0, 2, 1]
v = [4, 1, 3, 1, 0, 14]
z = [11, 3, 26, 3, 10, 8]


# In[2]:


#example linear regression basis functions
f0 = lambda u, v: u
from numpy import sqrt
f1 = lambda u, v: sqrt(v)


# In[7]:


from numpy import array
A = array([f0(u, v), f1(u, v)]).transpose() #design matrix


# In[8]:


A


# In[10]:


n = A.shape[0]
m = A.shape[1]


# In[12]:


y = array(z)


# In[17]:


#find least squares parameter values
from numpy.linalg import solve
solve( A.transpose()@A, A.transpose()@y.reshape((n, 1)))


# In[16]:


(A.transpose()@A).shape


# In[23]:


from numpy.linalg import lstsq
beta = lstsq(A, y, rcond=1)[0]


# In[21]:


lstsq(A, y, rcond=1)[1] #RSS from regression


# In[22]:


# In[24]:


beta


# In[26]:


fx = beta[0]*array(f0(u, v)) + beta[1]*array(f1(u, v))


# In[27]:


fx


# In[29]:


#visualize the regression function fit
from matplotlib.pyplot import plot, xlabel, ylabel
plot(y, fx, 's')
plot([3, 26], [3, 26])
xlabel('Given z values')
ylabel('Regression model with least-squares beta')


# In[30]:


from numpy import sum
r = y - fx
RSS = sum(r ** 2)


# In[32]:


from numpy import mean
TSS = sum((y - mean(y))**2)


# In[33]:


TSS


# In[34]:


R2 = 1 - RSS/TSS #coefficient of determination


# In[37]:


#adjusted R^2 (larger is better)
R2a = 1 - ((n-1)/(n-m))*(RSS/TSS)
R2a


# In[38]:


#Akaike information criterion (smaller is better)
from numpy import log
AIC = n * log(RSS/n) + 2*m*n/(n - m - 1)
AIC


# In[44]:


#nonlinear regression function example
from scipy.optimize import least_squares
f = lambda beta: beta[0]*array(u) + array(v)**beta[1]
r = lambda beta: y - f(beta) #residual vector as function of beta
s = least_squares(r, array([4, 1]))


# In[46]:


# In[47]:


s.x


# In[48]:


r_ls = r(s.x)
RSS = sum(r_ls ** 2)
RSS


# In[49]:


R2 = 1 - RSS/TSS
R2


# In[1]:


from sklearn.datasets import fetch_california_housing


# In[2]:


california = fetch_california_housing()


# In[3]:


print(california.DESCR)


# In[6]:


X = california.data


# In[7]:


y = california.target


# In[10]:


from matplotlib.pyplot import plot, hist, xlabel, ylabel
hist(y)


# In[11]:


plot(X[:, 0], y, 's')
xlabel('Neighborhood income [$10K/y ?]')
ylabel('Neighborhood house price [$100K]')


# In[12]:


plot(X[:, 1], y, 's')
xlabel('House age [y]')
ylabel('Neighborhood house price [$100K]')


# In[13]:


plot(X[:, 7],X[:, 6], 's') #longtitude-latitude


# In[18]:


from numpy import corrcoef
corrcoef(X.transpose())


# In[22]:


#example linear regression of price vs. predictor values
n = len(y)
from numpy import zeros
A = zeros((n, 9))
A[:, 0:8] = X
A[:, 8] = 1


# In[23]:


A[0, :]


# In[24]:


from numpy.linalg import lstsq
beta = lstsq(A, y, rcond=1)[0]
beta


# In[25]:


fx = A@beta
plot(y, fx, 's')
plot([0, 5], [0, 5])
xlabel('Given prices')
ylabel('Regression model with least-squares beta')


# In[26]:


r_ls = y - fx
RSS = sum(r_ls ** 2)
RSS


# In[27]:


from numpy import mean
TSS = sum((y - mean(y))**2)


# In[28]:


R2 = 1 - RSS/TSS
R2


# In[29]:


m = len(beta)
m


# In[30]:


#adjusted R^2 (larger is better)
R2a = 1 - ((n-1)/(n-m))*(RSS/TSS)
R2a


# In[ ]: