Python 为什么LinearSVC的精度与SDGClassizer不同？_Python_Python 3.x_Scikit Learn_Svm

Python 为什么LinearSVC的精度与SDGClassizer不同？

python python-3.x scikit-learn

Python 为什么LinearSVC的精度与SDGClassizer不同？,python,python-3.x,scikit-learn,svm,Python,Python 3.x,Scikit Learn,Svm,我正在微调线性支持向量机的参数。有多种方法可以做到这一点，但我想在时间方面比较LinearSVC和SDGClassizer。我希望准确度得分是相同的，但即使使用GridSearchCV进行微调，LinearSVC的得分也会更低。我试过多次修改参数，但使用LinearSVC我能得到的最大值是41.176，而SDGClassizer的最大值是41.503。为什么? 守则： class SVMSentiment(Base): """Predict sentiment sc

我正在微调线性支持向量机的参数。有多种方法可以做到这一点，但我想在时间方面比较LinearSVC和SDGClassizer。我希望准确度得分是相同的，但即使使用GridSearchCV进行微调，LinearSVC的得分也会更低。我试过多次修改参数，但使用LinearSVC我能得到的最大值是41.176，而SDGClassizer的最大值是41.503。为什么?

守则：

class SVMSentiment(Base):
"""Predict sentiment scores using a linear Support Vector Machine (SVM).
Uses a sklearn pipeline.
"""
def __init__(self, model_file: str=None) -> None:
    super().__init__()
    # pip install sklearn
    from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
    from sklearn.linear_model import SGDClassifier
    from sklearn.svm import SVC, LinearSVC
    from sklearn.pipeline import Pipeline


    self.pipeline = Pipeline(
        [
            ('vect', CountVectorizer()),
            ('tfidf', TfidfTransformer()),
            #('tfidf', TfidfVectorizer()),
            ('clf', LinearSVC( loss='hinge',
              penalty='l2', max_iter = 10,



             #SGDClassifier(
                #loss='hinge',
               # penalty='l2',
              #alpha=1e-3,
              #  random_state=42,
             #max_iter=100,
                #learning_rate = 'optimal',
                #tol=None



            )),
        ]
    )

def predict(self, train_file: str, test_file: str, lower_case: bool) -> pd.DataFrame:
    "Train model using sklearn pipeline"
    from sklearn.model_selection import GridSearchCV
    from sklearn.svm import SVC
    from sklearn.linear_model import SGDClassifier
    from sklearn import svm
    from sklearn import preprocessing
    from sklearn.preprocessing import LabelEncoder, OneHotEncoder
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.svm import LinearSVC
    train_df = self.read_data(train_file, lower_case)
    param_range = [0.001, 0.01, 0.1, 1, 10, 100]
    parameters = {
        # 'vect__ngram_range': [(1, 1), (1, 2)],
        'tfidf__use_idf': (True, False),
        #'clf__alpha': [0.0001, 0.001, 0.01, 1, 10, 100],
        'clf__max_iter': [10, 100, 1000],
        'clf__tol': [0, 0.0001, 0.001, 0.01],
        'clf__loss':['hinge'],
        'clf__penalty': ['l2'],
        'clf__C': param_range

    }


    parameters1 = {'clf__C': param_range, 'clf__gamma': param_range, 'clf__kernel': ['linear'], 'clf__tol' : [0, 0.01]

                  }
    lr = LinearSVC()
    print(lr.get_params().keys())
    gs_clf = GridSearchCV(self.pipeline, parameters, cv=5, n_jobs=-1)
    gs_clf = gs_clf.fit(train_df['text'], train_df['truth'])
    print(gs_clf.best_score_)
    for param_name in sorted(parameters.keys()):
        print("%s: %r" % (param_name, gs_clf.best_params_[param_name]))

    # estimator_svm.best_score

    learner = self.pipeline.fit(train_df['text'], train_df['truth'])
    # Fit the learner to the test data
    test_df = self.read_data(test_file, lower_case)

    test_df['pred'] = learner.predict(test_df['text'])
    return test_df

为什么您希望两种不同类型的分类器具有相同的性能？它们的工作方式不同，因此可以在同一数据集上产生不同的结果。