Python-SVM内核与从头开始的算法

Python-SVM内核与从头开始的算法,python,python-2.7,machine-learning,svm,Python,Python 2.7,Machine Learning,Svm,我使用支持向量机处理三种不同的核-线性核、多项式核和径向核,但我得到了以下错误 我尝试过不同的方法,是否有任何方法可以修复我的algo类或我丢失了任何函数 这是我的列车和测试数据。 以下是错误: --------------------------------------------------------------------------- AttributeError Traceback (most recent call last)

我使用支持向量机处理三种不同的核-线性核、多项式核和径向核,但我得到了以下错误

我尝试过不同的方法,是否有任何方法可以修复我的algo类或我丢失了任何函数

这是我的列车和测试数据。

以下是错误:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-115-32145be147af> in <module>()
     21 plt.show()
     22 print("Predicting on Test Data")
---> 23 y_pred = svmLinear.predict(X_test)
     24 print("")
     25 print("Confusion Matrix")

<ipython-input-114-0ecf6ae6acd2> in predict(self, X)
    111 
    112     def predict(self, X):
--> 113         return np.sign(self.project(X))
    114 
    115     def project(self, X):

<ipython-input-114-0ecf6ae6acd2> in project(self, X)
    114 
    115     def project(self, X):
--> 116         if self.w is not None:
    117             return np.dot(X, self.w) + self.b
    118         else:

AttributeError: 'SVM' object has no attribute 'w'
---------------------------------------------------------------------------
AttributeError回溯(最近一次呼叫上次)
在()
21 plt.show()
22打印(“测试数据预测”)
--->23 y_pred=SVM线性预测(X_检验)
24字(“”)
25打印(“混淆矩阵”)
在预测中(self,X)
111
112 def预测(自我,X):
-->113返回np.符号(自项目(X))
114
115 def项目(自我,X):
在项目中(self,X)
114
115 def项目(自我,X):
-->116如果self.w不是无:
117返回np.dot(X,self.w)+self.b
118.其他:
AttributeError:“SVM”对象没有属性“w”
代码如下:

import numpy as np               # use this scientific library for creating & procesing arrays/matrices
import matplotlib.pyplot as plt  # Backend library for plotting
import matplotlib.colors
from matplotlib import style
from numpy import linalg
import cvxopt
import cvxopt.solvers
import pandas as pd
import sys


class SVM(object):
    """
    Support Vector Machine Classifier

    """

    def __init__(self, kernel=None, C=None, loss="hinge"):
        self._margin = 0
        print ("\n *******************Support Vector Machine Initialization*******************")

        if C is not None:
            self._C = float(C)
            print("\nC ->", C)
        else:
            self._C = 10000

        if kernel is None:
            self._kernel = self.linear_kernel
        elif(kernel is 'polynomial_kernel'):
            self._kernel = self.polynomial_kernel
        elif(kernel == 'gaussian_kernel'):
            self._kernel = self.gaussian_kernel

        #print("Kernel selected ->", self._kernel)

    #Input the data to this method to train the SVM
    def fit(self, X, y):
        n_samples, n_features = X.shape
        #print("\n\nNumber of examples in a sample = ",n_samples , ", Number of features = ", n_features)
        self._w = np.zeros(n_features)

        # Initialize the Gram matrix for taking the output from QP solution
        K = np.zeros((n_samples, n_samples))
        for i in range(n_samples):
            for j in range(n_samples):
                K[i,j] = self._kernel(X[i], X[j])
                #print("K[", i,",", j, "] = ", K[i,j])

        # Here we have to solve the convext optimization problem
        # min 1/2 x^T P x + q^T x
        # s.t.
        #  Gx <= h
        #  Ax = b

        P = cvxopt.matrix(np.outer(y, y) * K)
        q = cvxopt.matrix(-np.ones(n_samples))
        #q is a vector of ones
        A = cvxopt.matrix(y, (1, n_samples), 'd')
        b = cvxopt.matrix(0.0)

        #G & h are required for soft-margin classifier

        if (self._kernel == self.linear_kernel):
            G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
            #G is an identity matrix with −1s as its diagonal
            # so that our greater than is transformed into less than
            h = cvxopt.matrix(np.zeros(n_samples))
            #h is vector of zeros
        else:
            G_std = np.diag(np.ones(n_samples) * -1)
            G_slack = np.identity(n_samples)

            h_std = np.zeros(n_samples)
            h_slack = np.ones(n_samples) * self._C

            G = cvxopt.matrix(np.vstack((G_std, G_slack)))
            h = cvxopt.matrix(np.hstack((h_std, h_slack)))

        cvxopt.solvers.options['show_progress'] = False
        solution = cvxopt.solvers.qp(P, q, G, h, A, b)

        # Lagrange multipliers
        alpha = np.ravel(solution['x'])

        # Now figure out the Support Vectors i.e yi(xi.w + b) = 1
        # Check whether langrange multiplier has non-zero value
        sv = alpha > 1e-4
        self._alpha = alpha[sv]
        self._Support_Vectors = X[sv]
        self._Support_Vectors_Labels = y[sv]

        print ("\n Total number of examples = ", n_samples)
        print ("\n Total number of Support Vectors found = ", len(self._Support_Vectors))
        print("\n\n Support Vectors are: \n", self._Support_Vectors)
        print("\n\n Support Vectors Labels are: \n", self._Support_Vectors_Labels)

        #Now let us define the decision boundary
        #w = Σαi*yi*xi
        if (self._kernel == self.linear_kernel):
            for i in range(len(self._alpha)):
                #print(i, self._alpha[i], self._Support_Vectors_Labels[i], self._Support_Vectors[i])
                self._w += self._alpha[i] * self._Support_Vectors_Labels[i] * self._Support_Vectors[i]
        else:
            self._w = None
        print("\n Weights are : ",self._w)

        #b = yi − wT xi
        ind = np.arange(len(alpha))[sv]
        self._b = y[ind] - np.dot(X[ind], self._w)

    def predict(self, X):
        return np.sign(self.project(X))

    def project(self, X):
        if self.w is not None:
            return np.dot(X, self.w) + self.b
        else:
            y_predict = np.zeros(len(X))
            for i in range(len(X)):
                s = 0
                for a, sv_y, sv in zip(self.a, self.sv_y, self.sv):
                    s += a * sv_y * self.kernel(X[i], sv)
                y_predict[i] = s
            return y_predict + self.b

    def linear_kernel(self, x1, x2):
        return np.dot(x1, x2)

    def polynomial_kernel(self, x1, x2, p=3):
        return (1 + np.dot(x1, x2)) ** p

    def gaussian_kernel(x, y, sigma=5.0):
        return np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2)))

    def plot_linear_separator(self, X, y):
        plt.figure(1)
        #plt.subplot(221)
        for i in range(len(X)):
            if (y[i] == 1):
                plt.plot(X[i][0], X[i][1], 'ob')
            else:
                plt.plot(X[i][0], X[i][1], 'xr')

        slope = -self._w[0] / self._w[1]
        intercept = -self._b / self._w[1]
        x = np.arange(0, len(self._Support_Vectors))
        plt.xlabel("x1")
        plt.ylabel("x2")
        plt.title("SVM with linear separable case")
        plt.axis("tight")

        #plt.plot(x, (x * slope) + intercept, '--k')

        hyp_x_min = -5
        hyp_x_max = 20

        # (w.x+b) = 1
        # positive support vector hyperplane
        psv1 = (-self._w[0]*hyp_x_min - self._b + 1) / self._w[1]
        psv2 = (-self._w[0]*hyp_x_max - self._b + 1) / self._w[1]
        plt.plot([hyp_x_min, hyp_x_max], [psv1, psv2], 'k-.', linewidth=0.2)

        # (w.x+b) = -1
        # negative support vector hyperplane
        nsv1 = (-self._w[0]*hyp_x_min - self._b - 1) / self._w[1]
        nsv2 = (-self._w[0]*hyp_x_max - self._b - 1) / self._w[1]
        plt.plot([hyp_x_min,hyp_x_max], [nsv1,nsv2], '--k', linewidth=0.2)

        # (w.x+b) = 0
        # discriminant function
        df1 = (-self._w[0]*hyp_x_min - self._b) / self._w[1]
        df2 = (-self._w[0]*hyp_x_max - self._b) / self._w[1]
        plt.plot([hyp_x_min, hyp_x_max],[df1, df2], 'y')

    def plot_linear_margin(self):
        plt.figure(2)
        #plt.subplot(221)

        #we need to make three lines in total

        # w.x + b = 0,  
        a0 = -4;
        a1 = (-self._w[0] * a0 - self._margin ) / self._w[1]
        b0 = 4;
        b1 = (-self._w[0] * b0 - self._margin ) / self._w[1]
        pl.plot([a0,b0], [a1,b1], "k")

        # w.x + b = 1

        # w.x + b = -1
        #labels
        plt.xlabel("x1")
        plt.ylabel("x2")
        plt.title("SVM non-linear case")
        plt.axis("tight")
import numpy as np#使用此科学库创建和处理数组/矩阵
导入matplotlib.pyplot作为plt#后端库进行打印
导入matplotlib.colors
从matplotlib导入样式
来自numpy import linalg
进口cvxopt
导入cvxopt.solver
作为pd进口熊猫
导入系统
类别SVM(对象):
"""
支持向量机分类器
"""
def uuu init(self,kernel=None,C=None,loss=“铰链”):
自限值=0
打印(“\n*************************支持向量机初始化*************************”)
如果C不是无:
自身。_C=浮动(C)
打印(“\nC->”,C)
其他:
自我评价(C=10000)
如果内核为无:
self.\u内核=self.linear\u内核
elif(内核是“多项式_内核”):
self.\u核=self.多项式\u核
elif(内核=‘高斯内核’):
self.\u内核=self.gaussian\u内核
#打印(“内核选择->”,self.\u内核)
#将数据输入该方法训练支持向量机
def配合(自、X、y):
n_样本,n_特征=X形状
#打印(“\n\n样本中的示例数=”,n_样本,”,特征数=,n_特征)
self._w=np.零(n_特征)
#初始化用于从QP解决方案获取输出的Gram矩阵
K=np.零((n_个样本,n_个样本))
对于范围内的i(n_样本):
对于范围内的j(n_样本):
K[i,j]=self._核(X[i],X[j])
#打印(“K[”,i,,“,j,”]=”,K[i,j])
#这里我们要解决凸优化问题
#最小1/2X^TPX+q^TX
#科技部。
#Gx 1e-4
自身α=α[sv]
自支撑向量=X[sv]
自支持向量标签=y[sv]
打印(“\n示例总数=”,n_示例)
打印(“\n找到的支持向量总数=”,len(自支持向量))
打印(“\n\n支持向量是:\n”,self.\u支持向量)
打印(“\n\n支持向量标签为:\n”,self.\u支持向量\u标签)
#现在让我们定义决策边界
#w=αi*yi*xi
如果(self.\u内核==self.linear\u内核):
对于范围内的i(len(self._alpha)):
#打印(i,self.\u alpha[i],self.\u支持向量\u标签[i],self.\u支持向量[i])
self.\u w+=self.\u alpha[i]*self.\u支持向量\u标签[i]*self.\u支持向量[i]
其他:
self.\u w=无
打印(“\n权重为:”,self.\u w)
#b=yi− 西席
ind=np.arange(len(alpha))[sv]
self.\u b=y[ind]-np.dot(X[ind],self.\u w)
def预测(自我,X):
返回np.符号(自项目(X))
def项目(自我,X):
如果self.w不是无:
返回np.dot(X,self.w)+self.b
其他:
y_predict=np.zero(len(X))
对于范围内的i(len(X)):
s=0
对于a,sv_y,zip中的sv(self.a,self.sv_y,self.sv):
s+=a*sv_y*self.kernel(X[i],sv)
y_predict[i]=s
返回y_predict+self.b
def线性_内核(self、x1、x2):
返回np.点(x1,x2)
def多项式_核(self,x1,x2,p=3):
返回值(1+np.点(x1,x2))**p
def gaussian_内核(x,y,sigma=5.0):
返回np.exp(-linalg.norm(x-y)**2/(2*(sigma**2)))
def plot_线性分隔符(自、X、y):
plt.图(1)
#小地块(221)
对于范围内的i(len(X)):
如果(y[i]==1):
plt.绘图(X[i][0]、X[i][1]、‘ob’)
其他:
plt.plot(X[i][0],X[i][1],'xr')
斜率=-self.\w[0]/self.\w[1]
截距=-self.\u b/self.\u w[1]
x=np.arange(0,len(自支撑向量))
plt.xlabel(“x1”)
plt.ylabel(“x2”)
plt.title(“具有线性可分离情况的SVM”)
plt.轴(“紧”)
#plt.绘图(x,(x*斜率)+截距,'--k')
hyp_x_min=-5
hyp_x_max=20
#(w.x+b)=1
#正支持向量超平面
psv1=(-self.\u w[0]*hyp\u x\u min-self.\u b+1)/self.\u w[1]
psv2=(-self.\u w[0]*hyp\u x\u max-self.\u b+1)/self.\u w[1]
plt.绘图([hyp_x_min,hyp_x_max],[psv1,psv2],“k-”,线宽=0.2)
#(w.x+b)=-1
#负支持向量超平面
nsv1=(-self.\u w[0]*hyp\u x\u min-self.\u b-1)/self.\u w[1]
nsv2=(-self.\u w[0]*hyp\u x\u max-self.\u b-1)/self.\u w[1]
plt.绘图([hyp_x_min,hyp_x_max],[nsv1,nsv2],'--k',线宽=0.2)
#(w.x+b)=0
#判别函数
df1=(-self.\u w[0]*hyp_x_min-self.\u b)/self.\u w[1]
df2=(-self.\u w[0]*hyp_x_max-self.\u b)/self.\u w[1]
plt.绘图([hyp_x_min,hyp_x_max],[df1,df2],'y')
def plot_linear_余量(自):
plt.图(2)