Python-SVM内核与从头开始的算法
我使用支持向量机处理三种不同的核-线性核、多项式核和径向核,但我得到了以下错误 我尝试过不同的方法,是否有任何方法可以修复我的algo类或我丢失了任何函数 这是我的列车和测试数据。 以下是错误:Python-SVM内核与从头开始的算法,python,python-2.7,machine-learning,svm,Python,Python 2.7,Machine Learning,Svm,我使用支持向量机处理三种不同的核-线性核、多项式核和径向核,但我得到了以下错误 我尝试过不同的方法,是否有任何方法可以修复我的algo类或我丢失了任何函数 这是我的列车和测试数据。 以下是错误: --------------------------------------------------------------------------- AttributeError Traceback (most recent call last)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-115-32145be147af> in <module>()
21 plt.show()
22 print("Predicting on Test Data")
---> 23 y_pred = svmLinear.predict(X_test)
24 print("")
25 print("Confusion Matrix")
<ipython-input-114-0ecf6ae6acd2> in predict(self, X)
111
112 def predict(self, X):
--> 113 return np.sign(self.project(X))
114
115 def project(self, X):
<ipython-input-114-0ecf6ae6acd2> in project(self, X)
114
115 def project(self, X):
--> 116 if self.w is not None:
117 return np.dot(X, self.w) + self.b
118 else:
AttributeError: 'SVM' object has no attribute 'w'
---------------------------------------------------------------------------
AttributeError回溯(最近一次呼叫上次)
在()
21 plt.show()
22打印(“测试数据预测”)
--->23 y_pred=SVM线性预测(X_检验)
24字(“”)
25打印(“混淆矩阵”)
在预测中(self,X)
111
112 def预测(自我,X):
-->113返回np.符号(自项目(X))
114
115 def项目(自我,X):
在项目中(self,X)
114
115 def项目(自我,X):
-->116如果self.w不是无:
117返回np.dot(X,self.w)+self.b
118.其他:
AttributeError:“SVM”对象没有属性“w”
代码如下:
import numpy as np # use this scientific library for creating & procesing arrays/matrices
import matplotlib.pyplot as plt # Backend library for plotting
import matplotlib.colors
from matplotlib import style
from numpy import linalg
import cvxopt
import cvxopt.solvers
import pandas as pd
import sys
class SVM(object):
"""
Support Vector Machine Classifier
"""
def __init__(self, kernel=None, C=None, loss="hinge"):
self._margin = 0
print ("\n *******************Support Vector Machine Initialization*******************")
if C is not None:
self._C = float(C)
print("\nC ->", C)
else:
self._C = 10000
if kernel is None:
self._kernel = self.linear_kernel
elif(kernel is 'polynomial_kernel'):
self._kernel = self.polynomial_kernel
elif(kernel == 'gaussian_kernel'):
self._kernel = self.gaussian_kernel
#print("Kernel selected ->", self._kernel)
#Input the data to this method to train the SVM
def fit(self, X, y):
n_samples, n_features = X.shape
#print("\n\nNumber of examples in a sample = ",n_samples , ", Number of features = ", n_features)
self._w = np.zeros(n_features)
# Initialize the Gram matrix for taking the output from QP solution
K = np.zeros((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
K[i,j] = self._kernel(X[i], X[j])
#print("K[", i,",", j, "] = ", K[i,j])
# Here we have to solve the convext optimization problem
# min 1/2 x^T P x + q^T x
# s.t.
# Gx <= h
# Ax = b
P = cvxopt.matrix(np.outer(y, y) * K)
q = cvxopt.matrix(-np.ones(n_samples))
#q is a vector of ones
A = cvxopt.matrix(y, (1, n_samples), 'd')
b = cvxopt.matrix(0.0)
#G & h are required for soft-margin classifier
if (self._kernel == self.linear_kernel):
G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
#G is an identity matrix with −1s as its diagonal
# so that our greater than is transformed into less than
h = cvxopt.matrix(np.zeros(n_samples))
#h is vector of zeros
else:
G_std = np.diag(np.ones(n_samples) * -1)
G_slack = np.identity(n_samples)
h_std = np.zeros(n_samples)
h_slack = np.ones(n_samples) * self._C
G = cvxopt.matrix(np.vstack((G_std, G_slack)))
h = cvxopt.matrix(np.hstack((h_std, h_slack)))
cvxopt.solvers.options['show_progress'] = False
solution = cvxopt.solvers.qp(P, q, G, h, A, b)
# Lagrange multipliers
alpha = np.ravel(solution['x'])
# Now figure out the Support Vectors i.e yi(xi.w + b) = 1
# Check whether langrange multiplier has non-zero value
sv = alpha > 1e-4
self._alpha = alpha[sv]
self._Support_Vectors = X[sv]
self._Support_Vectors_Labels = y[sv]
print ("\n Total number of examples = ", n_samples)
print ("\n Total number of Support Vectors found = ", len(self._Support_Vectors))
print("\n\n Support Vectors are: \n", self._Support_Vectors)
print("\n\n Support Vectors Labels are: \n", self._Support_Vectors_Labels)
#Now let us define the decision boundary
#w = Σαi*yi*xi
if (self._kernel == self.linear_kernel):
for i in range(len(self._alpha)):
#print(i, self._alpha[i], self._Support_Vectors_Labels[i], self._Support_Vectors[i])
self._w += self._alpha[i] * self._Support_Vectors_Labels[i] * self._Support_Vectors[i]
else:
self._w = None
print("\n Weights are : ",self._w)
#b = yi − wT xi
ind = np.arange(len(alpha))[sv]
self._b = y[ind] - np.dot(X[ind], self._w)
def predict(self, X):
return np.sign(self.project(X))
def project(self, X):
if self.w is not None:
return np.dot(X, self.w) + self.b
else:
y_predict = np.zeros(len(X))
for i in range(len(X)):
s = 0
for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):
s += a * sv_y * self.kernel(X[i], sv)
y_predict[i] = s
return y_predict + self.b
def linear_kernel(self, x1, x2):
return np.dot(x1, x2)
def polynomial_kernel(self, x1, x2, p=3):
return (1 + np.dot(x1, x2)) ** p
def gaussian_kernel(x, y, sigma=5.0):
return np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2)))
def plot_linear_separator(self, X, y):
plt.figure(1)
#plt.subplot(221)
for i in range(len(X)):
if (y[i] == 1):
plt.plot(X[i][0], X[i][1], 'ob')
else:
plt.plot(X[i][0], X[i][1], 'xr')
slope = -self._w[0] / self._w[1]
intercept = -self._b / self._w[1]
x = np.arange(0, len(self._Support_Vectors))
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("SVM with linear separable case")
plt.axis("tight")
#plt.plot(x, (x * slope) + intercept, '--k')
hyp_x_min = -5
hyp_x_max = 20
# (w.x+b) = 1
# positive support vector hyperplane
psv1 = (-self._w[0]*hyp_x_min - self._b + 1) / self._w[1]
psv2 = (-self._w[0]*hyp_x_max - self._b + 1) / self._w[1]
plt.plot([hyp_x_min, hyp_x_max], [psv1, psv2], 'k-.', linewidth=0.2)
# (w.x+b) = -1
# negative support vector hyperplane
nsv1 = (-self._w[0]*hyp_x_min - self._b - 1) / self._w[1]
nsv2 = (-self._w[0]*hyp_x_max - self._b - 1) / self._w[1]
plt.plot([hyp_x_min,hyp_x_max], [nsv1,nsv2], '--k', linewidth=0.2)
# (w.x+b) = 0
# discriminant function
df1 = (-self._w[0]*hyp_x_min - self._b) / self._w[1]
df2 = (-self._w[0]*hyp_x_max - self._b) / self._w[1]
plt.plot([hyp_x_min, hyp_x_max],[df1, df2], 'y')
def plot_linear_margin(self):
plt.figure(2)
#plt.subplot(221)
#we need to make three lines in total
# w.x + b = 0,
a0 = -4;
a1 = (-self._w[0] * a0 - self._margin ) / self._w[1]
b0 = 4;
b1 = (-self._w[0] * b0 - self._margin ) / self._w[1]
pl.plot([a0,b0], [a1,b1], "k")
# w.x + b = 1
# w.x + b = -1
#labels
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("SVM non-linear case")
plt.axis("tight")
import numpy as np#使用此科学库创建和处理数组/矩阵
导入matplotlib.pyplot作为plt#后端库进行打印
导入matplotlib.colors
从matplotlib导入样式
来自numpy import linalg
进口cvxopt
导入cvxopt.solver
作为pd进口熊猫
导入系统
类别SVM(对象):
"""
支持向量机分类器
"""
def uuu init(self,kernel=None,C=None,loss=“铰链”):
自限值=0
打印(“\n*************************支持向量机初始化*************************”)
如果C不是无:
自身。_C=浮动(C)
打印(“\nC->”,C)
其他:
自我评价(C=10000)
如果内核为无:
self.\u内核=self.linear\u内核
elif(内核是“多项式_内核”):
self.\u核=self.多项式\u核
elif(内核=‘高斯内核’):
self.\u内核=self.gaussian\u内核
#打印(“内核选择->”,self.\u内核)
#将数据输入该方法训练支持向量机
def配合(自、X、y):
n_样本,n_特征=X形状
#打印(“\n\n样本中的示例数=”,n_样本,”,特征数=,n_特征)
self._w=np.零(n_特征)
#初始化用于从QP解决方案获取输出的Gram矩阵
K=np.零((n_个样本,n_个样本))
对于范围内的i(n_样本):
对于范围内的j(n_样本):
K[i,j]=self._核(X[i],X[j])
#打印(“K[”,i,,“,j,”]=”,K[i,j])
#这里我们要解决凸优化问题
#最小1/2X^TPX+q^TX
#科技部。
#Gx 1e-4
自身α=α[sv]
自支撑向量=X[sv]
自支持向量标签=y[sv]
打印(“\n示例总数=”,n_示例)
打印(“\n找到的支持向量总数=”,len(自支持向量))
打印(“\n\n支持向量是:\n”,self.\u支持向量)
打印(“\n\n支持向量标签为:\n”,self.\u支持向量\u标签)
#现在让我们定义决策边界
#w=αi*yi*xi
如果(self.\u内核==self.linear\u内核):
对于范围内的i(len(self._alpha)):
#打印(i,self.\u alpha[i],self.\u支持向量\u标签[i],self.\u支持向量[i])
self.\u w+=self.\u alpha[i]*self.\u支持向量\u标签[i]*self.\u支持向量[i]
其他:
self.\u w=无
打印(“\n权重为:”,self.\u w)
#b=yi− 西席
ind=np.arange(len(alpha))[sv]
self.\u b=y[ind]-np.dot(X[ind],self.\u w)
def预测(自我,X):
返回np.符号(自项目(X))
def项目(自我,X):
如果self.w不是无:
返回np.dot(X,self.w)+self.b
其他:
y_predict=np.zero(len(X))
对于范围内的i(len(X)):
s=0
对于a,sv_y,zip中的sv(self.a,self.sv_y,self.sv):
s+=a*sv_y*self.kernel(X[i],sv)
y_predict[i]=s
返回y_predict+self.b
def线性_内核(self、x1、x2):
返回np.点(x1,x2)
def多项式_核(self,x1,x2,p=3):
返回值(1+np.点(x1,x2))**p
def gaussian_内核(x,y,sigma=5.0):
返回np.exp(-linalg.norm(x-y)**2/(2*(sigma**2)))
def plot_线性分隔符(自、X、y):
plt.图(1)
#小地块(221)
对于范围内的i(len(X)):
如果(y[i]==1):
plt.绘图(X[i][0]、X[i][1]、‘ob’)
其他:
plt.plot(X[i][0],X[i][1],'xr')
斜率=-self.\w[0]/self.\w[1]
截距=-self.\u b/self.\u w[1]
x=np.arange(0,len(自支撑向量))
plt.xlabel(“x1”)
plt.ylabel(“x2”)
plt.title(“具有线性可分离情况的SVM”)
plt.轴(“紧”)
#plt.绘图(x,(x*斜率)+截距,'--k')
hyp_x_min=-5
hyp_x_max=20
#(w.x+b)=1
#正支持向量超平面
psv1=(-self.\u w[0]*hyp\u x\u min-self.\u b+1)/self.\u w[1]
psv2=(-self.\u w[0]*hyp\u x\u max-self.\u b+1)/self.\u w[1]
plt.绘图([hyp_x_min,hyp_x_max],[psv1,psv2],“k-”,线宽=0.2)
#(w.x+b)=-1
#负支持向量超平面
nsv1=(-self.\u w[0]*hyp\u x\u min-self.\u b-1)/self.\u w[1]
nsv2=(-self.\u w[0]*hyp\u x\u max-self.\u b-1)/self.\u w[1]
plt.绘图([hyp_x_min,hyp_x_max],[nsv1,nsv2],'--k',线宽=0.2)
#(w.x+b)=0
#判别函数
df1=(-self.\u w[0]*hyp_x_min-self.\u b)/self.\u w[1]
df2=(-self.\u w[0]*hyp_x_max-self.\u b)/self.\u w[1]
plt.绘图([hyp_x_min,hyp_x_max],[df1,df2],'y')
def plot_linear_余量(自):
plt.图(2)