Python 使用CNN对表格数据进行分类

Python 使用CNN对表格数据进行分类,python,image,machine-learning,classification,conv-neural-network,Python,Image,Machine Learning,Classification,Conv Neural Network,我正试图用CNN对表格数据进行分类。为此,我将数据与基本图像进行卷积。我在这里读到了这种方法: 我的数据集有1200行(1200幅图像)和25个特征。这是我用来卷积数据的代码: import pandas as pd from sklearn.preprocessing import StandardScaler, PowerTransformer, MinMaxScaler import numpy as np from keras.preprocessing.image import Ima

我正试图用CNN对表格数据进行分类。为此,我将数据与基本图像进行卷积。我在这里读到了这种方法:

我的数据集有1200行(1200幅图像)和25个特征。这是我用来卷积数据的代码:

import pandas as pd
from sklearn.preprocessing import StandardScaler, PowerTransformer, MinMaxScaler
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img, save_img
import csv

Dataset = pd.read_csv("....csv", header=0)
feature_columns = ['...']
x = Dataset[feature_columns] # Merkmale
y = Dataset.Classifier # Label

sc = MinMaxScaler(feature_range=(0.1, 0.9))
#sc = PowerTransformer()
x = sc.fit_transform(x)
#x = x.to_numpy()
sc_2 = MinMaxScaler(feature_range=(0, 255))

dict = []
num_0 = 0
for i in range(len(y)):
    class_name=""
    if int(y[i]) == 0:
        class_name = 'reuse'
    if int(y[i]) == 1:
        class_name = 'recycling'
    if int(y[i]) == 2:
        class_name = 'disposal'

    x_new = x[i].reshape(5,5)
    x_new = np.tile(x_new,(50,50))
    #x_new = x_new.repeat(repeats=50, axis=0).repeat(repeats=50, axis=1)
    x_new = (x_new.reshape(x_new.shape+(1,)))

    img = load_img(".../Unbenannt.png", grayscale=True)
    img_array = img_to_array(img)

    new_array_new = np.multiply(img_array, x_new)

    sc_2 = MinMaxScaler(feature_range=(0, 255))
    for j in range(250):
        new_array_new[j] = sc_2.fit_transform(new_array_new[j])

    path = ('.../Image/%s_%s' % (y[i], num_0) +'.png')
    save_img(path, new_array_new)
    dict.append({'No':num_0, 'Name': '%s_%s' % (y[i], num_0)+'.png', 'Class': class_name})
    num_0 = num_0 + 1
    
csv_columns = ['No', 'Name', 'Class']
csv_file = 'Images.csv'
try:
    with open(csv_file, 'w') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
        writer.writeheader()
        for data in dict:
            writer.writerow(data)
except IOError:
    print("I/O error")
这是基本图像

这些是三种不同标签的结果

结果并不令人满意。我可以做些什么来改进图像?不同的图案是可见的,但这是1200幅图像中每一幅的情况。所以很明显,没有像这样的真正的分类。如果我试着用这些图片对它进行分类,CNN的准确率达到了50%。他呆在那里。以下是CNN的代码:

import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
import pandas as pd
from sklearn.preprocessing import StandardScaler, PowerTransformer
from keras.utils import to_categorical
from keras import Sequential
from keras.layers import Dense, Dropout, Conv1D, LSTM, MaxPooling1D, Flatten, BatchNormalization, Activation, Conv2D, MaxPooling2D
import numpy as np
from sklearn.model_selection import cross_val_score, RepeatedKFold, KFold, cross_validate, StratifiedKFold, RepeatedStratifiedKFold
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from sklearn.model_selection import cross_val_score, RepeatedKFold, KFold, cross_validate, StratifiedKFold, RepeatedStratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from keras.losses import binary_crossentropy, categorical_crossentropy
from keras.optimizers import RMSprop, SGD

Dataset = pd.read_csv('.../Images.csv', header=0)
num_epochs = 30

def create_new_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(256, 256, 1)))
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.2))
    model.add(Dense(3, activation='softmax'))
    opt = SGD(lr=0.001, momentum=0.9)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return(model)


y = Dataset['Class']

print(y.head())
y = y.to_numpy()

rkfold = RepeatedStratifiedKFold(n_splits=4, n_repeats=1)


idg = ImageDataGenerator(width_shift_range=0.1,
                         height_shift_range=0.1,
                         zoom_range=0.3,
                         fill_mode='nearest',
                         horizontal_flip = True,
                         rescale=1./255)

#idg = ImageDataGenerator(rescale=1./255)


image_dir = '/content/Image'

Accuracy = []
for train_index, test_index in rkfold.split(np.zeros(len(y)),y):
     training_data = Dataset.iloc[train_index]
     test_data = Dataset.iloc[test_index]

     train_data_generator = idg.flow_from_dataframe(training_data, directory = image_dir, x_col = "Name", y_col = 'Class', class_mode = "categorical", shuffle = True, batch_size=10, color_mode="grayscale")
     test_data_generator  = idg.flow_from_dataframe(test_data, directory = image_dir, x_col = "Name", y_col = 'Class', class_mode = "categorical", shuffle = True, batch_size=10, color_mode="grayscale")

     model = create_new_model()
 
     history = model.fit(train_data_generator, epochs=nb_epoch)   

     score = model.evaluate(test_data_generator)
     Accuracy.append(score[1])

print("Accuracy: %.2f%% (+/- %.2f%%)" % (np.mean(Accuracy), np.std(Accuracy)))