Python的自然连接实现

Python的自然连接实现,python,sql,natural-join,Python,Sql,Natural Join,我正在python中实现自然连接。前两行显示表属性,后两行显示每个表的元组或行 预期产出: [['A', 1, 'A', 'a', 'A'], ['A', 1, 'A', 'a', 'Y'], ['A', 1, 'Y', 'a', 'A'], ['A', 1, 'Y', 'a', 'Y'], ['S', 2, 'B', 'b', 'S']] 我得到的是: [['A', 1, 'A', 'a', 'A', 'Y'], ['A', 1, 'A', 'a', 'A', 'Y']]

我正在python中实现自然连接。前两行显示表属性,后两行显示每个表的元组或行

预期产出:

[['A', 1, 'A', 'a', 'A'], 
 ['A', 1, 'A', 'a', 'Y'], 
 ['A', 1, 'Y', 'a', 'A'], 
 ['A', 1, 'Y', 'a', 'Y'], 
 ['S', 2, 'B', 'b', 'S']]
我得到的是:

[['A', 1, 'A', 'a', 'A', 'Y'], 
 ['A', 1, 'A', 'a', 'A', 'Y']]
我已经看过代码,一切似乎都是正确的,我将感谢任何帮助

t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')

t1tuples = [['A', 1, 'A', 'a'], 
            ['B', 2, 'Y', 'a'], 
            ['Y', 4, 'B', 'b'], 
            ['A', 1, 'Y', 'a'], 
            ['S', 2, 'B', 'b']]

t2tuples = [[1, 'a', 'A'], 
            [3, 'a', 'B'], 
            [1, 'a', 'Y'], 
            [2, 'b', 'S'], 
            [3, 'b', 'E']]

def findindices(t1atts, t2atts):
  t1index=[]
  t2index=[]
  for index, att in enumerate(t1atts):
    for index2, att2 in enumerate(t2atts):
      if att == att2:
        t1index.append(index)
        t2index.append(index2)
  return t1index, t2index

def main():
  tpl=0; tpl2=0; i=0; j=0; count=0; result=[]
  t1index, t2index = findindices(t1atts, t2atts)
  for tpl in t1tuples:
    while tpl2 in range(len(t2tuples)):
      i=0; j=0
      while (i in range(len(t1index))) and (j in range(len(t2index))):
          if tpl[t1index[i]] != t2tuples[tpl2][t2index[j]]:
            i=len(t1index)
            j=len(t1index)
          else:
            count+=1
          i+=1
          j+=1
      if count == len(t1index):
        extravals = [val for index, val in enumerate(t2tuples[tpl2]) if index not in t2index]
        temp = tpl
        tpl += extravals
        result.append(tpl)
        tpl = temp
      count=0
      tpl2+=1
  print result

好的,这是解决方案,请验证并让我知道它是否适合您:

我改变了一点命名来理解自己:

#!/usr/bin/python

table1 = ('A', 'B', 'C', 'D')
table2 = ('B', 'D', 'E')

row1 = [['A', 1, 'A', 'a'],
        ['B', 2, 'Y', 'a'],
        ['Y', 4, 'B', 'b'],
        ['A', 1, 'Y', 'a'],
        ['S', 2, 'B', 'b']]

row2 = [[1, 'a', 'A'],
        [3, 'a', 'B'],
        [1, 'a', 'Y'],
        [2, 'b', 'S'],
        [3, 'b', 'E']]

def findindices(table1, table2):
    inter = set(table1).intersection(set(table2))
    tup_index1 = [table1.index(x) for x in inter]
    tup_index2 = [table2.index(x) for x in inter]]
    return tup_index1, tup_index2

def main():

    final_lol = list()

    tup_index1, tup_index2 = findindices(table1, table2)

    merge_tup = zip(tup_index1, tup_index2)

    for tup1 in row1:
        for tup2 in row2:
            for m in merge_tup:
                if tup1[m[0]] != tup2[m[1]]:
                    break
            else:
               ls = []
               ls.extend(tup1)
               ls.append(tup2[-1])
               final_lol.append(ls)
    return final_lol

if __name__ == '__main__':
    import pprint
    pprint.pprint(main())
输出:

[['A', 1, 'A', 'a', 'A'],
 ['A', 1, 'A', 'a', 'Y'],
 ['A', 1, 'Y', 'a', 'A'],
 ['A', 1, 'Y', 'a', 'Y'],
 ['S', 2, 'B', 'b', 'S']]

这是我想到的。在结束之前,我会做更多的重构等工作

import pprint

t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')

t1tuples = [
    ['A', 1, 'A', 'a'],
    ['B', 2, 'Y', 'a'],
    ['Y', 4, 'B', 'b'],
    ['A', 1, 'Y', 'a'],
    ['S', 2, 'B', 'b']]

t2tuples = [
    [1, 'a', 'A'],
    [3, 'a', 'B'],
    [1, 'a', 'Y'],
    [2, 'b', 'S'],
    [3, 'b', 'E']]


t1columns = set(t1atts)
t2columns = set(t2atts)
t1map = {k: i for i, k in enumerate(t1atts)}
t2map = {k: i for i, k in enumerate(t2atts)}

join_on = t1columns & t2columns
diff = t2columns - join_on

def match(row1, row2):
   return all(row1[t1map[rn]] == row2[t2map[rn]] for rn in join_on)

results = []
for t1row in t1tuples:
    for t2row in t2tuples:
        if match(t1row, t2row):
            row = t1row[:]
            for rn in diff:
                row.append(t2row[t2map[rn]])
            results.append(row)

pprint.pprint(results)
我得到了预期的结果:

[['A', 1, 'A', 'a', 'A'],
 ['A', 1, 'A', 'a', 'Y'],
 ['A', 1, 'Y', 'a', 'A'],
 ['A', 1, 'Y', 'a', 'Y'],
 ['S', 2, 'B', 'b', 'S']]

你想加入吗?1元组和2元组?是的,我是。然后,输出也将是一个元组列表。但是,我看不到从['a',1',a',a',a',a',Y']'中获取'['a',1',a',a']+[1',a',a']的任何规则。在自然连接中,T1tuple中的每个元组都会与T2tuple中的每个元组进行比较…如果找到匹配,该元组附加到结果,即我得到的输出或不正确的输出。请在“预期输出”下查看程序应输出的内容。您也可以通过调用main()自己运行程序并查看输出。