使用pymarc marc8_到unicode的Python字符串比较不再有效_Python_String_Python 2.7_Unicode_Marc

使用pymarc marc8_到unicode的Python字符串比较不再有效

python string python-2.7 unicode

使用pymarc marc8_到unicode的Python字符串比较不再有效,python,string,python-2.7,unicode,marc,Python,String,Python 2.7,Unicode,Marc,我的代码使用MARCReader导入一个MARC文件，并将字符串与可接受答案列表进行比较。如果MARC中的字符串在我的列表中没有匹配项，它将被添加到错误列表中。这在Windows7上的Python 2.7.4安装中已经运行了多年，没有任何问题。我最近买了一台Windows10机器并安装了Python2.7.10，现在带有非标准字符的字符串无法匹配。问题不仅仅是Python2.7.10；我已经在这台新机器上安装了从2.7.4到2.7.10的所有版本，并且遇到了同样的问题。在Windows7机器上新

我的代码使用MARCReader导入一个MARC文件，并将字符串与可接受答案列表进行比较。如果MARC中的字符串在我的列表中没有匹配项，它将被添加到错误列表中。这在Windows7上的Python 2.7.4安装中已经运行了多年，没有任何问题。我最近买了一台Windows10机器并安装了Python2.7.10，现在带有非标准字符的字符串无法匹配。问题不仅仅是Python2.7.10；我已经在这台新机器上安装了从2.7.4到2.7.10的所有版本，并且遇到了同样的问题。在Windows7机器上新安装的Python 2.7.10也会遇到问题

我删掉了不相关的函数，并且戏剧性地删掉了主列表。在本例中，Académie des Sciences是一个现有的存储库，但Acadm̌ie des Sciences现在出现在我们的新存储库列表中

# -*- coding: utf-8 -*-
from aipmarc import get_catdb, get_bibno, parse_date
from phfawstemplate import browsepage #, nutchpage, eadpage, titlespage
from pymarc import MARCReader, marc8_to_unicode
from time import strftime
from umlautsort import alafiling
import urllib2
import sys
import os
import string

def make_newrepos_list(list, fn):   # Create list of unexpected repositories found in the MArcout database dump
    output = "These new repositories are not yet included in the master list in phfaws.py. Please add the repository code (in place of ""NEWCODE*""), and the URL (in place of ""TEST""), and then add these lines to phfaws.py. Please keep the list alphabetical. \nYou can find repository codes at http://www.loc.gov/marc/organizations/ \n \n"
    for row in list:
        output = '%s    reposmasterlist.append([u"%s", "%s", "%s"])\n' % (output, row[0], row[1], row[2])

    fh = open(fn,'w')
    fh.write(output.encode("utf-8"))
    fh.close()

def main(marcfile):
    reader = MARCReader(file(marcfile))

    '''
    Creating list of preset repository codes.
    '''
    reposmasterlist =[[u"American Institute of Physics", "MdCpAIP", "http://www.aip.org/history/nbl/index.html"]]
    reposmasterlist.append([u"Académie des Sciences", "FrACADEMIE", "http://www.academie-sciences.fr/fr/Transmettre-les-connaissances/inventaires-des-fonds-d-archives-personnelles.html"])
    reposmasterlist.append([u"American Association for the Advancement of Science", "daaas", "http://archives.aaas.org/"])

    newreposcounter = 0
    newrepos = ""
    newreposlist = []

    findingaidcounter = 0
    reposcounter = 0

    for record in reader:
        if record['903']:                                   # Get only records where 903a="PHFAWS"
            phfawsfull = record.get_fields('903')
            for field in phfawsfull:
                phfawsnote = field['a']
                if 'PHFAWS' in phfawsnote:
                    if record['852'] is not None:           # Get only records where 852/repository is not blank
                        repository = record.get_fields('852')
                        for field in repository:
                            reposname = field['a']
                        reposname = marc8_to_unicode(reposname) # Convert repository name from MARC file to Unicode
                        reposname = reposname.rstrip('.,')
                        reposcode = None
                        reposurl = None
                        for row in reposmasterlist:             # Match field 852 repository against the master list. 
                            if row[0] == reposname:             # If it's in the master list, use the master list to populate our repository-related fields
                                reposcode = row[1]
                                reposurl = row[2]
                        if record['856'] is not None:       # Get only records where 856 is not blank and includes "online finding aid"
                            links = record.get_fields('856')
                            for field in links:
                                linksthree = field['3']
                                if linksthree is not None and "online finding aid" in linksthree:
                                    if reposcode == None:       # If this record's repository wasn't in the master list, add to list of new repositories
                                        newreposcounter += 1
                                        newrepos = '%s %s \n' % (newrepos, reposname)
                                        reposcode = "NEWCODE" + str(newreposcounter)
                                        reposurl = "TEST"
                                        reposmasterlist.append([reposname, reposcode, reposurl])
                                        newreposlist.append([reposname, reposcode, reposurl])
                                    human_url = field['u']
                                else:
                                    pass
                        else:
                            pass
                    else:
                        pass
                else:
                    pass
        else:
            pass

    # Output list of new repositories
    newreposlist.sort(key = lambda rep: rep[0])
    if newreposcounter != 0:
        status = '%d new repositories found. you must add information on these repositories, then run phfaws.py again. Please see the newly updated rewrepos.txt for details.' % (newreposcounter)
        sys.stderr.write(status)
        make_newrepos_list(newreposlist, 'newrepos.txt')

if __name__ == '__main__':
    try:
        mf = sys.argv[1]
        sys.exit(main(mf))
    except IndexError:
        sys.exit('Usage: %s <marcfile>' % sys.argv[0])

编辑：我发现简单地注释reposname=marc8_to_unicoreposName行就可以得到我想要的结果。我仍然不明白这是为什么，因为这是一个必要的步骤之前

这对我来说意味着数据库中字符串的编码从MARC8更改为Unicode。你最近升级过你的编目系统吗