Python 迭代csv并将重复值移动到标题行

Python 迭代csv并将重复值移动到标题行,python,algorithm,Python,Algorithm,我想写一个小的python脚本,在CSV文件上迭代生成另一个文件 我的目标是基于CSV文件生成Django模型 但是我的代码没有产生正确的结果 示例CSV输入 预期结果 型号:患者 字段pat_ide#您可以在不将结果存储在数据结构中的情况下执行此操作: with open('redcap_ddd.csv', newline='', encoding="utf8") as csvfile: has_header = csv.Sniffer().has_header(c

我想写一个小的python脚本,在CSV文件上迭代生成另一个文件

我的目标是基于CSV文件生成Django模型

但是我的代码没有产生正确的结果

示例CSV输入 预期结果
型号:患者

字段pat_ide#您可以在不将结果存储在数据结构中的情况下执行此操作:

with open('redcap_ddd.csv', newline='', encoding="utf8") as csvfile:
    has_header = csv.Sniffer().has_header(csvfile.read(1024))
    csvfile.seek(0)
    spamreader = csv.reader(csvfile, delimiter=';', quotechar='|')
    if has_header:
        next(spamreader)
    
    first = True
    for row in spamreader:
        if first:
            print(f"models : {row[0]}")
        first = not first 
        print(f"field {row[1]}")
        if first:
            print("line 1")
            print("line 2")
            print()

下面是我的完整代码 它是一个代码段,基于csv文件(数据字典)为Django应用程序自动生成models.py文件 希望这能帮助别人

models_fields = []
models = set()
model_key = defaultdict(list)
m = defaultdict(list)

with open('redcap_ddd.csv', newline='', encoding="utf8") as csvfile:
    has_header = csv.Sniffer().has_header(csvfile.read(1024))
    csvfile.seek(0)

    spamreader = csv.reader(csvfile, delimiter=';', quotechar='|')
    if has_header:
        next(spamreader)

    for row in spamreader:
        models.add(row[1])
        if row[3] == 'Entier' and row[18] == 'P':
            model_key[row[1]].append(row[0].lower())
            m[row[1]].append(str(row[0].lower())+' = models.AutoField(primary_key=True)')
        elif row[3] == 'Entier':
            m[row[1]].append(str(row[0].lower())+" = models.IntegerField("+str(row[4])+", null=True, blank=True)")              
        elif row[3] == 'Booleen':
            m[row[1]].append(str(row[0].lower())+" = models.BooleanField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Octet' and row[9] =='Case à cocher':
            m[row[1]].append(str(row[0].lower())+" = models.BooleanField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Octet' and row[9] =='menu déroulant':
            m[row[1]].append(str(row[0].lower())+" = models.IntegerField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Réel':
            m[row[1]].append(str(row[0].lower())+" = models.FloatField("+str(row[4])+", null=True, blank=True)")
        elif row[3][:6] == 'Texte' and row[18] == 'I':
            t = len(row[0])-1
            m[row[1]].append(str(row[0].lower())+" = models.CharField("+str(row[4])+", max_length='"+str(row[3][8:t])+"', unique=True, null=True, blank=True)")
        elif row[3][:6] == 'Texte':
            t = len(row[4])-1
            m[row[1]].append(str(row[0].lower())+" = models.CharField("+str(row[4])+", max_length='"+str(row[3][8:t])+"', null=True, blank=True)")
        elif row[3] == 'Date' and  row[6] == 'jj/mm/aaaa' and  row[7] == '':
            m[row[1]].append(str(row[0].lower())+" = models.DateField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Date' and  row[6] == 'jj/mm/aaaa hh:mm:ss' and  row[7] == '':
            m[row[1]].append(str(row[0].lower())+" = models.DateTimeField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Date' and  row[6] == 'jj/mm/aaaa' and  row[7] == 'auto (serveur)':
            m[row[1]].append(str(row[0].lower())+" = models.DateField("+str(row[4])+", , null=True, blank=True, auto_now_add=True)")
        elif row[3] == 'Date' and  row[6] == 'jj/mm/aaaa hh:mm:ss' and  row[7] == 'auto (serveur)':
            m[row[1]].append(str(row[0].lower())+" = models.DateTimeField("+str(row[4])+", null=True, blank=True, auto_now_add=True)")
        else:
            m[row[1]].append(str(row[0].lower())+" = models.CharField("+str(row[4])+", max_length=50, null=True, blank=True)")
        
# print('m',m)
# print('m["patient"]',m["patient"])
# print('model_key',model_key)
# print('model_key["patient"][0]',model_key["patient"][0])

with open('models.txt', 'w', newline='', encoding="utf8") as f1:
    thewriter = csv.writer(f1,delimiter=' ',quotechar='\t', quoting=csv.QUOTE_MINIMAL)
    thewriter2 = csv.writer(f1,delimiter=' ',quotechar='',escapechar=' ',quoting=csv.QUOTE_NONE)

    print('import django 1')
    print('import django 2')
    print()
    thewriter2.writerow(['from django.db import models',])
    thewriter2.writerow(['from safedelete.models import SafeDeleteModel, SOFT_DELETE, SOFT_DELETE_CASCADE',])
    thewriter2.writerow(['from simple_history.models import HistoricalRecords',])
    thewriter2.writerow(['from django.utils import timezone',])
    thewriter2.writerow(['from partial_date import PartialDateField',])
    thewriter2.writerow('')

    for model in models:
        print('models :',model)
        thewriter2.writerow(['class '+model.capitalize()+'(models.Model):',])
        thewriter2.writerow(['\t'+'""" A class to create a '+model+' instance. """',])
        thewriter2.writerow('')        

        for field in m[model]:
            print('field',field)
            thewriter.writerow(field.split(";"))
        print('line 1')
        print('line 2')
        print()
        thewriter2.writerow(['\t'+'log = HistoricalRecords()',])
        thewriter2.writerow('')
        thewriter2.writerow(['\t'+'class Meta:',])
        thewriter2.writerow(['\t\t'+'db_table = "crf_'+model[:3]+'"',])
        thewriter2.writerow(['\t\t'+'verbose_name_plural = '+model.capitalize()+'s',])
        thewriter2.writerow(['\t\t'+'ordering = ["'+model_key[model][0]+'"]',])
        thewriter2.writerow('')
        thewriter2.writerow(['\t'+'def __str__(self):',])
        thewriter2.writerow(['\t\t'+'return f"{self.'+model_key[model][0]+'}"',])
        thewriter2.writerow('')
        thewriter2.writerow('')

我在以下行中遇到一个错误:
ValueError:要解压缩的值太多(预期为2个)
这意味着您的csv有2个以上的列。是这种情况吗?你能回到我身边吗?如果您的CSV有更多的列,最好在您的问题中提到这一点,因为示例显示了两列,我将相应地调整我的答案;形式;var3;var4;var5;var6;var7;var8;var9;var10;var11;var12;var13;var14;var15;var16;var17;var18;VAR19您能否更新您问题中的示例?如果是相关的,请确保预期输出与之一致,并且确实是您所期望的。我已经更新了我的答案。
models = set()
m = dict()
with open('redcap_ddd.csv', newline='', encoding="utf8") as csvfile:
    has_header = csv.Sniffer().has_header(csvfile.read(1024))
    csvfile.seek(0)

    spamreader = csv.reader(csvfile, delimiter=';', quotechar='|')
    if has_header:
        next(spamreader)
    i = 0
    for row in spamreader:
        models.add(row[1])
        m[row[1]] = [row[0]]                    #<- row[0] is overwrited so only the second line is stored
        # m[row[1]] = ['pat_ide','pat_pat']     #<-- what I should be able to have 

print('import django 1')
print('import django 2')
print()
for model in models:
    print('models :',model)
    for field in m[model]:
        print('field',field)
    print('line 1')
    print('line 2')
    print()
models : patient
field pat_pat           # <- only the second line is stored as list is reinitialized during loop
line 1
line 2

models : inclusion
field inc_pat           # <- only the second line is stored as list is reinitialized during loop
line 1
line 2

models : demographic
field dem_pat           # <- only the second line is stored as list is reinitialized during loop
line 1
line 2

with open('redcap_ddd.csv', newline='', encoding="utf8") as csvfile:
    has_header = csv.Sniffer().has_header(csvfile.read(1024))
    csvfile.seek(0)
    spamreader = csv.reader(csvfile, delimiter=';', quotechar='|')
    if has_header:
        next(spamreader)
    
    first = True
    for row in spamreader:
        if first:
            print(f"models : {row[0]}")
        first = not first 
        print(f"field {row[1]}")
        if first:
            print("line 1")
            print("line 2")
            print()
models_fields = []
models = set()
model_key = defaultdict(list)
m = defaultdict(list)

with open('redcap_ddd.csv', newline='', encoding="utf8") as csvfile:
    has_header = csv.Sniffer().has_header(csvfile.read(1024))
    csvfile.seek(0)

    spamreader = csv.reader(csvfile, delimiter=';', quotechar='|')
    if has_header:
        next(spamreader)

    for row in spamreader:
        models.add(row[1])
        if row[3] == 'Entier' and row[18] == 'P':
            model_key[row[1]].append(row[0].lower())
            m[row[1]].append(str(row[0].lower())+' = models.AutoField(primary_key=True)')
        elif row[3] == 'Entier':
            m[row[1]].append(str(row[0].lower())+" = models.IntegerField("+str(row[4])+", null=True, blank=True)")              
        elif row[3] == 'Booleen':
            m[row[1]].append(str(row[0].lower())+" = models.BooleanField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Octet' and row[9] =='Case à cocher':
            m[row[1]].append(str(row[0].lower())+" = models.BooleanField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Octet' and row[9] =='menu déroulant':
            m[row[1]].append(str(row[0].lower())+" = models.IntegerField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Réel':
            m[row[1]].append(str(row[0].lower())+" = models.FloatField("+str(row[4])+", null=True, blank=True)")
        elif row[3][:6] == 'Texte' and row[18] == 'I':
            t = len(row[0])-1
            m[row[1]].append(str(row[0].lower())+" = models.CharField("+str(row[4])+", max_length='"+str(row[3][8:t])+"', unique=True, null=True, blank=True)")
        elif row[3][:6] == 'Texte':
            t = len(row[4])-1
            m[row[1]].append(str(row[0].lower())+" = models.CharField("+str(row[4])+", max_length='"+str(row[3][8:t])+"', null=True, blank=True)")
        elif row[3] == 'Date' and  row[6] == 'jj/mm/aaaa' and  row[7] == '':
            m[row[1]].append(str(row[0].lower())+" = models.DateField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Date' and  row[6] == 'jj/mm/aaaa hh:mm:ss' and  row[7] == '':
            m[row[1]].append(str(row[0].lower())+" = models.DateTimeField("+str(row[4])+", null=True, blank=True)")
        elif row[3] == 'Date' and  row[6] == 'jj/mm/aaaa' and  row[7] == 'auto (serveur)':
            m[row[1]].append(str(row[0].lower())+" = models.DateField("+str(row[4])+", , null=True, blank=True, auto_now_add=True)")
        elif row[3] == 'Date' and  row[6] == 'jj/mm/aaaa hh:mm:ss' and  row[7] == 'auto (serveur)':
            m[row[1]].append(str(row[0].lower())+" = models.DateTimeField("+str(row[4])+", null=True, blank=True, auto_now_add=True)")
        else:
            m[row[1]].append(str(row[0].lower())+" = models.CharField("+str(row[4])+", max_length=50, null=True, blank=True)")
        
# print('m',m)
# print('m["patient"]',m["patient"])
# print('model_key',model_key)
# print('model_key["patient"][0]',model_key["patient"][0])

with open('models.txt', 'w', newline='', encoding="utf8") as f1:
    thewriter = csv.writer(f1,delimiter=' ',quotechar='\t', quoting=csv.QUOTE_MINIMAL)
    thewriter2 = csv.writer(f1,delimiter=' ',quotechar='',escapechar=' ',quoting=csv.QUOTE_NONE)

    print('import django 1')
    print('import django 2')
    print()
    thewriter2.writerow(['from django.db import models',])
    thewriter2.writerow(['from safedelete.models import SafeDeleteModel, SOFT_DELETE, SOFT_DELETE_CASCADE',])
    thewriter2.writerow(['from simple_history.models import HistoricalRecords',])
    thewriter2.writerow(['from django.utils import timezone',])
    thewriter2.writerow(['from partial_date import PartialDateField',])
    thewriter2.writerow('')

    for model in models:
        print('models :',model)
        thewriter2.writerow(['class '+model.capitalize()+'(models.Model):',])
        thewriter2.writerow(['\t'+'""" A class to create a '+model+' instance. """',])
        thewriter2.writerow('')        

        for field in m[model]:
            print('field',field)
            thewriter.writerow(field.split(";"))
        print('line 1')
        print('line 2')
        print()
        thewriter2.writerow(['\t'+'log = HistoricalRecords()',])
        thewriter2.writerow('')
        thewriter2.writerow(['\t'+'class Meta:',])
        thewriter2.writerow(['\t\t'+'db_table = "crf_'+model[:3]+'"',])
        thewriter2.writerow(['\t\t'+'verbose_name_plural = '+model.capitalize()+'s',])
        thewriter2.writerow(['\t\t'+'ordering = ["'+model_key[model][0]+'"]',])
        thewriter2.writerow('')
        thewriter2.writerow(['\t'+'def __str__(self):',])
        thewriter2.writerow(['\t\t'+'return f"{self.'+model_key[model][0]+'}"',])
        thewriter2.writerow('')
        thewriter2.writerow('')