# ML算法：使用Python进行多元线性回归

2021年5月5日13:38:32 发表评论 1,165 次浏览

1. 简单线性回归
2. 多元线性回归

• 线性度：因变量和自变量之间的关系应该是线性的。
• 同方性：误差应保持恒定。
• 多元正态性：多元回归假设残差是正态分布的。
• 缺乏多重共线性：假定数据中很少或没有多重共线性。

m-1

``````D2 = D1-1
Here D2, D1 = Dummy Variables``````

• 全能
• 向后淘汰
• 前向选择
• 双向消除
• 分数比较

### 任何多元线性回归模型涉及的步骤

1. 导入库。
2. 导入数据集。
3. 编码分类数据。
4. 避免虚拟变量陷阱。
5. 将数据集分为训练集和测试集。

``````import numpy as np
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt

def generate_dataset(n):
x = []
y = []
random_x1 = np.random.rand()
random_x2 = np.random.rand()
for i in range (n):
x1 = i
x2 = i /2 + np.random.rand() * n
x.append([ 1 , x1, x2])
y.append(random_x1 * x1 + random_x2 * x2 + 1 )
return np.array(x), np.array(y)

x, y = generate_dataset( 200 )

mpl.rcParams[ 'legend.fontsize' ] = 12

fig = plt.figure()
ax = fig.gca(projection = '3d' )

ax.scatter(x[:, 1 ], x[:, 2 ], y, label = 'y' , s = 5 )
ax.legend()
ax.view_init( 45 , 0 )

plt.show()``````

``````def mse(coef, x, y):
return np.mean((np.dot(x, coef) - y) * * 2 ) /2

return np.mean(x.transpose() * (np.dot(x, coef) - y), axis = 1 )

def multilinear_regression(coef, x, y, lr, b1 = 0.9 , b2 = 0.999 , epsilon = 1e - 8 ):
prev_error = 0
m_coef = np.zeros(coef.shape)
v_coef = np.zeros(coef.shape)
moment_m_coef = np.zeros(coef.shape)
moment_v_coef = np.zeros(coef.shape)
t = 0

while True :
error = mse(coef, x, y)
if abs (error - prev_error) <= epsilon:
break
prev_error = error
t + = 1
m_coef = b1 * m_coef + ( 1 - b1) * grad
v_coef = b2 * v_coef + ( 1 - b2) * grad * * 2
moment_m_coef = m_coef /( 1 - b1 * * t)
moment_v_coef = v_coef /( 1 - b2 * * t)

delta = ((lr /moment_v_coef * * 0.5 + 1e - 8 ) *
(b1 * moment_m_coef + ( 1 - b1) * grad /( 1 - b1 * * t)))

coef = np.subtract(coef, delta)
return coef

coef = np.array([ 0 , 0 , 0 ])
c = multilinear_regression(coef, x, y, 1e - 1 )
fig = plt.figure()
ax = fig.gca(projection = '3d' )

ax.scatter(x[:, 1 ], x[:, 2 ], y, label = 'y' , s = 5 , color = "dodgerblue" )

ax.scatter(x[:, 1 ], x[:, 2 ], c[ 0 ] + c[ 1 ] * x[:, 1 ] + c[ 2 ] * x[:, 2 ], label = 'regression' , s = 5 , color = "orange" )

ax.view_init( 45 , 0 )
ax.legend()
plt.show()``````