Python 深度学习的输出是否可能在同一批中随机洗牌?
我在研究一维信号的VAE:这是一种加了噪声的正弦信号。为了检查重构输出的形状,我用原始信号和VAE重构信号绘制了一些图。然而,具有相同指数的信号看起来并不十分相似,但指数的非相关信号显示出相似的形状。我怀疑深度网络的输出是在批处理范围内随机安排的。我使用了在线示例中的基本结构 数据集加载:Python 深度学习的输出是否可能在同一批中随机洗牌?,python,pytorch,lstm,autoencoder,Python,Pytorch,Lstm,Autoencoder,我在研究一维信号的VAE:这是一种加了噪声的正弦信号。为了检查重构输出的形状,我用原始信号和VAE重构信号绘制了一些图。然而,具有相同指数的信号看起来并不十分相似,但指数的非相关信号显示出相似的形状。我怀疑深度网络的输出是在批处理范围内随机安排的。我使用了在线示例中的基本结构 数据集加载: train_data = signalDataset(train_dataset) train_set = DataLoader(dataset=train_data, batch_size=opt.bat
train_data = signalDataset(train_dataset)
train_set = DataLoader(dataset=train_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.workers)
for batch_idx, data in enumerate(train_set):
data = data.to(device)
数据集:
class signalDataset:
def __init__(self, dataset_name, normalize=True):
self.dataset_name = dataset_name
t_data = pd.read_csv(dataset_name, header=None)
data = torch.from_numpy(np.expand_dims(np.array([t_data.iloc[i] for i in range(0, len(t_data))]), -1)).float()
self.data = self.normalize(data) if normalize else data
self.seq_len = data.size(1)
# Estimates distribution parameters of deltas (Gaussian) from normalized data
original_deltas = data[:, -1] - data[:, 0]
self.original_deltas = original_deltas
self.or_delta_max, self.or_delta_min = original_deltas.max(), original_deltas.min()
deltas = self.data[:, -1] - self.data[:, 0]
self.deltas = deltas
self.delta_mean, self.delta_std = deltas.mean(), deltas.std()
self.delta_max, self.delta_min = deltas.max(), deltas.min()
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def normalize(self, x_list):
x_norm = torch.empty(0, dtype=torch.float)
for x in x_list :
self.max = x.max()
self.min = x.min()
# self.max = abs(x).max()
# self.min = (-1) * abs(x).max()
# x = (2 * (x - self.min) / (self.max - self.min) - 1) # -1~1
x = (x - self.min) / (self.max - self.min) # 0~1
x = torch.unsqueeze(x, 0)
x_norm = torch.cat([x_norm, x], dim=0)
# x = torch.sigmoid(x)
return x_norm
def denormalize(self, x):
if not hasattr(self, 'max') or not hasattr(self, 'min'):
raise Exception("You are calling denormalize, but the input was not normalized")
return 0.5 * (x * self.max - x * self.min + self.max + self.min)
def sample_deltas(self, number):
return (torch.randn(number, 1) + self.delta_mean) * self.delta_std
def normalize_deltas(self, x):
return ((self.delta_max - self.delta_min) * (x - self.or_delta_min) / (
self.or_delta_max - self.or_delta_min) + self.delta_min)
LSTM VAE:
class LSTM_VAE(nn.Module):
def __init__(self,
input_size=60000,
hidden=[1024, 512, 256, 128, 64],
latent_size=1024,
num_layers=2,
bidirectional=True):
super().__init__()
self.input_size = input_size
self.hidden = hidden
self.latent_size = latent_size
self.num_layers = num_layers
self.bidirectional = bidirectional
self.relu = nn.ReLU()
self.encode = nn.LSTM(input_size=self.input_size,
hidden_size=self.hidden[0],
num_layers=2,
batch_first=True,
bidirectional=True)
self.decode = nn.LSTM(input_size=self.latent_size,
hidden_size=self.hidden[2],
num_layers=2,
batch_first=True,
bidirectional=True)
self.fc1 = nn.Linear(self.hidden[0]*2, self.hidden[1])
self.fc2 = nn.Linear(self.hidden[1], self.hidden[2])
self.fc31 = nn.Linear(self.hidden[2], self.latent_size)
self.fc32 = nn.Linear(self.hidden[2], self.latent_size)
self.bn1 = nn.BatchNorm1d(1)
self.bn2 = nn.BatchNorm1d(1)
self.bn3 = nn.BatchNorm1d(1)
self.fc4 = nn.Linear(self.hidden[2]*2, self.hidden[1])
self.fc5 = nn.Linear(self.hidden[1], self.hidden[0])
self.fc6 = nn.Linear(self.hidden[0], self.input_size)
def encoder(self, x):
x = torch.unsqueeze(x, 1)
x, _ = self.encode(x)
x = self.fc1(x)
x = self.fc2(x)
mu = self.fc31(x)
log_var = self.fc32(x)
return mu, log_var
def decoder(self, z):
z, _ = self.decode(z)
z = self.fc4(z)
z = self.fc5(z)
z = self.fc6(z)
z = torch.sigmoid(z)
return torch.squeeze(z)
def sampling(self, mu, log_var):
std = torch.exp(0.5 * log_var)
eps = torch.randn_like(std)
return mu + eps * std
def forward(self, x):
mu, log_var = self.encoder(x.view(-1, self.input_size))
z = self.sampling(mu, log_var)
z = self.decoder(z)
return z, mu, log_var
我搜索了批量数据和数据安排,但没有找到任何有用的结果。我想知道是否可以批量输出洗牌?或者只是我的代码有问题?
任何意见或建议都会对我很有帮助。您使用的是火炬数据加载器吗?您到底是如何将数据传递到网络的?请提供一个。亲爱的@gooddicts我添加了数据加载部分!您似乎在数据加载器中选择了
shuffle=True
,因此它正在洗牌您的数据。(train\u set=DataLoader(dataset=train\u data,batch\u size=opt.batch\u size,shuffle=True,num\u workers=opt.workers)
)@gooddeds我理解DataLoader的shuffle参数:D我反对的是,如果有四个索引为1到4的信号,那么输出信号也有1到4个索引。使用VAE时,输入索引1信号应与输出索引1信号相同,但结果显示输出索引3与输入索引1相同。该指数只是一个例子,但这种结果不断出现。