-
Notifications
You must be signed in to change notification settings - Fork 0
/
line_of_best_fit.py
59 lines (46 loc) · 1.57 KB
/
line_of_best_fit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import pandas as pd
import matplotlib.pyplot as plt
dataset = pd.read_csv('\\Users\\hamza\\Documents\\VS Code\\Python\\Line of Best Fit\\data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
def mean(d): return sum(d)/len(d)
def std_dev(d):
d_mean = mean(d)
numerator = 0
for e in d:
numerator += (e-d_mean)**2
return (numerator/(len(d)-1))**(1/2)
def corr_coeff(X, y):
n = len(X)
Xy = []
for i in range(n):
Xy.append(X[i]*y[i])
X_sq = [e**2 for e in X]
y_sq = [e**2 for e in y]
numerator = n*sum(Xy)-sum(X)*sum(y)
denominator = ((n*sum(X_sq)-sum(X)**2)*(n*sum(y_sq)-sum(y)**2))**(1/2)
return numerator/denominator
def slope(r, std_dev_X, std_dev_y): return r*(std_dev_y/std_dev_X)
def y_intercept(slope, X_mean, y_mean): return y_mean-(slope*X_mean)
def equation_coeffs(X, y):
b1 = slope(corr_coeff(X, y), std_dev(X), std_dev(y))[0]
b0 = y_intercept(b1, mean(X), mean(y))[0]
return b0, b1
def estimate(e_X):
return b0 + b1*e_X
def equation():
return 'y=' + str(b0) + ('+' if b1 >= 0 else '') + str(b1) + 'x'
def r_squared():
ss_res = sum([(y[i]-estimate(X[i]))**2 for i in range(len(X))])
ss_tot = sum([(y[i]-mean(y))**2 for i in range(len(X))])
return 1-(ss_res/ss_tot)
def plot(eq, r_sq):
title = str(eq) + ' : R^2=' + str(r_sq)
plt.title(title)
plt.xlabel('INDEPENDANT')
plt.ylabel('DEPENDANT')
plt.scatter(X, y, color='red')
plt.plot(X, [estimate(e) for e in X], color='blue')
plt.show()
b0, b1 = equation_coeffs(X, y)
plot(equation(), r_squared())