can';t将numpy数组转换为pytorch张量
我正在尝试建立一个人工智能来识别手写数字。 我试图通过将图像向各个方向移动一到两个像素来扩展数据集,为了使这个过程更快,我使用了多处理。 这是我的代码:can';t将numpy数组转换为pytorch张量,pytorch,artificial-intelligence,Pytorch,Artificial Intelligence,我正在尝试建立一个人工智能来识别手写数字。 我试图通过将图像向各个方向移动一到两个像素来扩展数据集,为了使这个过程更快,我使用了多处理。 这是我的代码: import pandas import torch import torch.nn as nn from torch.autograd import Variable import pandas as pd import numpy as np from torch.utils.data import TensorDataset import
import pandas
import torch
import torch.nn as nn
from torch.autograd import Variable
import pandas as pd
import numpy as np
from torch.utils.data import TensorDataset
import matplotlib.pyplot as plt
from scipy.ndimage.interpolation import shift as image_shift
import math
import time
import multiprocessing as mp
def multiproc_img_sft(img, y, sfts, procNum):
labels = []
for i in range(784):
labels.append(f"p{i}")
labels.append("labels")
#q.put(image_shift(img.reshape(28, 28), sft).reshape(1, 1, 28, 28))
img = img.reshape(-1, 784)
imgs = np.empty((0, img.shape[1]))
y_expended = np.empty((0, 1))
for i in range(img.shape[0]):
if i % 1000 == 0:
imgs = np.append(imgs, y_expended, axis=1)
DF = pd.DataFrame(imgs, columns=labels)
if i == 0:
DF.to_csv(f"extended_{procNum}.csv", index=False)
else:
DF.to_csv(f"extended_{procNum}.csv", mode="a", index=False)
imgs = np.empty((0, img.shape[1]))
y_expended = np.empty((0, 1))
if i % 50 == 0:
print(f"{procNum}: {i+1} \\ {img.shape[0]}")
for sft in sfts:
imgs = np.append(imgs, image_shift(img[i].reshape(28, 28), sft).reshape(-1, 784), axis=0)
y_expended = np.append(y_expended, [y[i]], axis=0)
#print(y[i], y_expended.shape)
#print(procNum, y_expended, y_expended.shape, imgs.shape)
#y_expended = np.array(y_expended)
imgs = np.append(imgs, y_expended, axis=1)
DF = pd.DataFrame(imgs, columns=labels)
DF.to_csv(f"extended_{procNum}.csv", mode="a", index=False)
print(procNum, "dataframe labels =", labels)
print(procNum, "y_expended =", y_expended.shape, type(y_expended))
print(f"proc {procNum} ended")
if __name__ == "__main__":
extend_X = True
num_of_processes = 1
shifts = [[1, 0], [0, 1], [-1, 0], [0, -1]]#,
### [1, 1], [1, -1], [-1, 1], [-1, -1],
###
### [2, 0], [0, 2], [-2, 0], [0, -2],
### [2, 2], [2, -2], [-2, 2], [-2, -2],
### [2, 1], [1, 2], [2, -1], [1, -2], [-1, 2], [-2, 1], [-1, -2], [-2, -1]]
#shifts = [[1, 0]]
batch_size = 100
batch_size = 1000
n_iters = 10000
learning_rate = 0.003325
learning_rate = 0.003325
# Accuracy: 99.43386243386243, Correct: 18793, Total: 18900
# batch_size = 100
# n_iters = 10000
# learning_rate = 0.003325
train_data = pd.read_csv("train.csv")
X, y = train_data.drop(labels=['label'], axis=1).values, train_data['label'].values
# X = scaler.fit_transform(X)
X_train, X_test = X[:37800, :].reshape(-1, 1, 28, 28), X[37800:, :].reshape(-1, 1, 28, 28)
y_train, y_test = y[:37800].reshape(-1, 1), y[37800:].reshape(-1, 1)
print(y_train.shape)
1==1
print("extend_X =", extend_X)
if extend_X:
processes = []
for j in range(num_of_processes):
#multiproc_img_sft(X_train, y_train, shifts, j)
print(j)
lower_bound = math.floor(len(X_train) / num_of_processes) * j
upper_bound = math.floor(len(X_train) / num_of_processes) * (j+1)
if upper_bound == len(X_train):
upper_bound -= 1
print(X_train.shape, lower_bound, upper_bound)
#p = mp.Process(target=multiproc_img_sft, args=(X_train[lower_bound:upper_bound], y_train[lower_bound:upper_bound], shifts, j))
p = mp.Process(target=multiproc_img_sft,
args=(X_train[0:100], y_train[0:100], shifts, j))
processes.append(p)
p.start()
for j in range(num_of_processes):
print("process", j, "has joined")
processes[j].join()
labels = []
for i in range(784):
labels.append(f"p{i}")
DF = pandas.DataFrame(columns=labels)
for i in range(num_of_processes):
print(pd.read_csv(f"extended_{i}.csv").shape)
DF = pd.concat([DF, pd.read_csv(f"extended_{i}.csv")], ignore_index=True)
print(f"combined {i+1} dataframes \\ {num_of_processes} dataframes")
DF.to_csv("X_Train_Extended.csv", index=False)
print("SAVED ALL DATAFRAMES")
extended_X = pd.read_csv("X_Train_Extended.csv")
print(extended_X)
print(extended_X.shape)
extended_X = extended_X.values
y_train = np.append(y_train, extended_X[:, -1].reshape(-1, 1), axis=0).reshape(-1)
X_train = np.append(X_train, extended_X[:, :-1].reshape(-1, 1, 28, 28), axis=0)
print(y_train.shape, X_train.shape)
print("\n\n")
X_train_size = len(X_train) - 18900
X_val, y_val = X_train[X_train_size:], y_train[X_train_size:]
X_train, y_train = X_train[:X_train_size], y_train[:X_train_size]
print(X_train.shape, y_train.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)
X_train_tensor = torch.Tensor(X_train)
X_val_tensor = torch.Tensor(X_val)
X_test_tensor = torch.Tensor(X_test)
我得到的错误如下:
TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.
在线:
X_train_tensor = torch.Tensor(X_train)
任何帮助都是好的这意味着
X\u列车中的任何东西都不是torch.Tensor中的任何东西。所以它可能不是任何数字类型。你能找出什么是X\u train.dtype
吗?