Python 来自并发psycopg2 postgres select查询的空结果
我正在尝试使用自定义pytorch数据集的getitem方法从postgres数据库检索标签和特征数据集。当我尝试使用随机索引进行采样时,我的查询不会返回任何结果 我已经检查了我的查询是否直接在psql cli上工作。是的。 我已检查数据库连接池是否存在问题。似乎没有。 我已经恢复到顺序采样,它仍然是完全功能的,所以随机索引值似乎是查询的一个问题 执行查询的getitem方法位于下面。这显示了顺序查询和尝试无序查询。这两个都通过变量名清楚地标记Python 来自并发psycopg2 postgres select查询的空结果,python,postgresql,concurrency,pytorch,psycopg2,Python,Postgresql,Concurrency,Pytorch,Psycopg2,我正在尝试使用自定义pytorch数据集的getitem方法从postgres数据库检索标签和特征数据集。当我尝试使用随机索引进行采样时,我的查询不会返回任何结果 我已经检查了我的查询是否直接在psql cli上工作。是的。 我已检查数据库连接池是否存在问题。似乎没有。 我已经恢复到顺序采样,它仍然是完全功能的,所以随机索引值似乎是查询的一个问题 执行查询的getitem方法位于下面。这显示了顺序查询和尝试无序查询。这两个都通过变量名清楚地标记 def __getitem__(self, idx
def __getitem__(self, idx):
query = """SELECT ls.taxonomic_id, it.tensor
FROM genomics.tensors2 AS it
INNER JOIN genomics.labeled_sequences AS ls
ON ls.accession_number = it.accession_number
WHERE (%s) <= it.index
AND CARDINALITY(tensor) = 89
LIMIT (%s) OFFSET (%s)"""
shuffle_query = """BEGIN
SELECT ls.taxonomic_id, it.tensor
FROM genomics.tensors2 AS it
INNER JOIN genomics.labeled_sequences AS ls
ON ls.accession_number = it.accession_number
WHERE it.index BETWEEN (%s) AND (%s)
END"""
batch_size = 500
upper_bound = idx + batch_size
query_data = (idx, batch_size, batch_size)
shuffle_query_data = (idx, upper_bound)
result = None
results = None
conn = self.conn_pool.getconn()
try:
conn.set_session(readonly=True, autocommit=True)
cursor = conn.cursor()
cursor.execute(query, query_data)
results = cursor.fetchall()
self.conn_pool.putconn(conn)
print(idx)
print(results)
except Error as conn_pool_error:
print('Multithreaded __getitem__ query error')
print(conn_pool_error)
label_list = []
sequence_list = []
for (i,result) in enumerate(results):
if result is not None:
(label, sequence) = self.create_batch_stack_element(result)
label_list.append(label)
sequence_list.append(sequence)
label_stack = torch.stack(label_list).to('cuda')
sequence_stack = torch.stack(sequence_list).to('cuda')
return (label_stack, sequence_stack)
def create_batch_stack_element(self, result):
if result is not None:
label = np.array(result[0], dtype=np.int64)
sequence = np.array(result[1], dtype=np.int64)
label = torch.from_numpy(label)
sequence = torch.from_numpy(sequence)
return (label, sequence)
else:
return None
def\uuuu getitem\uuuuuuuuuuuuuuuuuuuuux(self,idx):
query=“”选择ls.taxonomic_id,it.tensor
来自基因组学的tensors2
内连接基因组学。标记为ls的_序列
关于ls.accession\u number=it.accession\u number
其中(%s)这是从具有可索引键的postgres表中为pytorch正确构造的getitem
def __getitem__(self, idx: int) -> tuple:
query = """SELECT ls.taxonomic_id, it.tensor
FROM genomics.tensors2 AS it
INNER JOIN genomics.labeled_sequences AS ls
ON ls.accession_number = it.accession_number
WHERE (%s) = it.index"""
query_data = (idx,)
result = None
conn = self.conn_pool.getconn()
try:
conn.set_session(readonly=True, autocommit=True)
cursor = conn.cursor()
cursor.execute(query, query_data)
result = cursor.fetchone()
self.conn_pool.putconn(conn)
except Error as conn_pool_error:
print('Multithreaded __getitem__ query error')
print(conn_pool_error)
return result
def collate(self, results: list) -> tuple:
label_list = []
sequence_list = []
for result in results:
if result is not None:
print(result)
result = self.create_batch_stack_element(result)
if result is not None:
label_list.append(result[0])
sequence_list.append(result[1])
label_stack = torch.stack(label_list)
sequence_stack = torch.stack(sequence_list)
return (label_stack, sequence_stack)
def create_batch_stack_element(self, result: tuple) -> tuple:
if result is not None:
label = np.array(result[0], dtype=np.int64)
sequence = np.array(result[1], dtype=np.int64)
label = torch.from_numpy(label)
sequence = torch.from_numpy(sequence)
return (label, sequence)
return None
然后,我用以下命令调用了我的培训函数:
for rank in range(num_processes):
p = mp.Process(target=train, args=(dataloader,))
p.start()
processes.append(p)
for p in processes:
p.join()