can';t将numpy数组转换为pytorch张量

can';t将numpy数组转换为pytorch张量,pytorch,artificial-intelligence,Pytorch,Artificial Intelligence,我正在尝试建立一个人工智能来识别手写数字。 我试图通过将图像向各个方向移动一到两个像素来扩展数据集,为了使这个过程更快,我使用了多处理。 这是我的代码: import pandas import torch import torch.nn as nn from torch.autograd import Variable import pandas as pd import numpy as np from torch.utils.data import TensorDataset import

我正在尝试建立一个人工智能来识别手写数字。 我试图通过将图像向各个方向移动一到两个像素来扩展数据集,为了使这个过程更快,我使用了多处理。 这是我的代码:

import pandas
import torch
import torch.nn as nn
from torch.autograd import Variable
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset
import matplotlib.pyplot as plt
from scipy.ndimage.interpolation import shift as image_shift
import math
import time
import multiprocessing as mp

def multiproc_img_sft(img, y, sfts, procNum):
    labels = []
    for i in range(784):
        labels.append(f"p{i}")
    labels.append("labels")

    #q.put(image_shift(img.reshape(28, 28), sft).reshape(1, 1, 28, 28))
    img = img.reshape(-1, 784)
    imgs = np.empty((0, img.shape[1]))
    y_expended = np.empty((0, 1))
    for i in range(img.shape[0]):
        if i % 1000 == 0:
            imgs = np.append(imgs, y_expended, axis=1)
            DF = pd.DataFrame(imgs, columns=labels)

            if i == 0:
                DF.to_csv(f"extended_{procNum}.csv", index=False)
            else:
                DF.to_csv(f"extended_{procNum}.csv", mode="a", index=False)

            imgs = np.empty((0, img.shape[1]))
            y_expended = np.empty((0, 1))
        if i % 50 == 0:
            print(f"{procNum}:   {i+1} \\ {img.shape[0]}")
        for sft in sfts:
            imgs = np.append(imgs, image_shift(img[i].reshape(28, 28), sft).reshape(-1, 784), axis=0)
            y_expended = np.append(y_expended, [y[i]], axis=0)
            #print(y[i], y_expended.shape)

    #print(procNum, y_expended, y_expended.shape, imgs.shape)
    #y_expended = np.array(y_expended)

    imgs = np.append(imgs, y_expended, axis=1)
    DF = pd.DataFrame(imgs, columns=labels)

    DF.to_csv(f"extended_{procNum}.csv", mode="a", index=False)

    print(procNum, "dataframe labels =", labels)
    print(procNum, "y_expended =", y_expended.shape, type(y_expended))

    print(f"proc {procNum} ended")

if __name__ == "__main__":
    extend_X = True
    num_of_processes = 1

    shifts = [[1, 0], [0, 1], [-1, 0], [0, -1]]#,
###              [1, 1], [1, -1], [-1, 1], [-1, -1],
###
###              [2, 0], [0, 2], [-2, 0], [0, -2],
###              [2, 2], [2, -2], [-2, 2], [-2, -2],
###              [2, 1], [1, 2],  [2, -1], [1, -2],  [-1, 2], [-2, 1],  [-1, -2], [-2, -1]]
    #shifts = [[1, 0]]

    batch_size = 100
    batch_size = 1000
    n_iters = 10000
    learning_rate = 0.003325
    learning_rate = 0.003325
    # Accuracy: 99.43386243386243, Correct: 18793, Total: 18900
    # batch_size = 100
    # n_iters = 10000
    # learning_rate = 0.003325

    train_data = pd.read_csv("train.csv")
    X, y = train_data.drop(labels=['label'], axis=1).values, train_data['label'].values

    # X = scaler.fit_transform(X)

    X_train, X_test = X[:37800, :].reshape(-1, 1, 28, 28), X[37800:, :].reshape(-1, 1, 28, 28)
    y_train, y_test = y[:37800].reshape(-1, 1), y[37800:].reshape(-1, 1)
    print(y_train.shape)
    1==1

    print("extend_X =", extend_X)

    if extend_X:
        processes = []
        for j in range(num_of_processes):
            #multiproc_img_sft(X_train, y_train, shifts, j)

            print(j)
            lower_bound = math.floor(len(X_train) / num_of_processes) * j
            upper_bound = math.floor(len(X_train) / num_of_processes) * (j+1)
            if upper_bound == len(X_train):
                upper_bound -= 1

            print(X_train.shape, lower_bound, upper_bound)
            #p = mp.Process(target=multiproc_img_sft, args=(X_train[lower_bound:upper_bound], y_train[lower_bound:upper_bound], shifts, j))
            p = mp.Process(target=multiproc_img_sft,
                           args=(X_train[0:100], y_train[0:100], shifts, j))
            processes.append(p)
            p.start()

        for j in range(num_of_processes):
            print("process", j, "has joined")
            processes[j].join()

        labels = []
        for i in range(784):
            labels.append(f"p{i}")
        DF = pandas.DataFrame(columns=labels)
        for i in range(num_of_processes):
            print(pd.read_csv(f"extended_{i}.csv").shape)
            DF = pd.concat([DF, pd.read_csv(f"extended_{i}.csv")], ignore_index=True)
            print(f"combined {i+1} dataframes \\ {num_of_processes} dataframes")
        DF.to_csv("X_Train_Extended.csv", index=False)
        print("SAVED ALL DATAFRAMES")


    extended_X = pd.read_csv("X_Train_Extended.csv")
    print(extended_X)
    print(extended_X.shape)
    extended_X = extended_X.values
    y_train = np.append(y_train, extended_X[:, -1].reshape(-1, 1), axis=0).reshape(-1)
    X_train = np.append(X_train, extended_X[:, :-1].reshape(-1, 1, 28, 28), axis=0)

    print(y_train.shape, X_train.shape)


    print("\n\n")


    X_train_size = len(X_train) - 18900
    X_val, y_val = X_train[X_train_size:], y_train[X_train_size:]
    X_train, y_train = X_train[:X_train_size], y_train[:X_train_size]

    print(X_train.shape, y_train.shape)
    print(X_val.shape, y_val.shape)
    print(X_test.shape, y_test.shape)


    X_train_tensor = torch.Tensor(X_train)
    X_val_tensor = torch.Tensor(X_val)
    X_test_tensor = torch.Tensor(X_test)
我得到的错误如下:

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.
在线:

X_train_tensor = torch.Tensor(X_train)

任何帮助都是好的

这意味着
X\u列车中的任何东西都不是
torch.Tensor中的任何东西。所以它可能不是任何数字类型。你能找出什么是
X\u train.dtype
吗?