Python 熊猫:忽略lambda中的异常

Python 熊猫:忽略lambda中的异常,python,pandas,exception,lambda,Python,Pandas,Exception,Lambda,我有一个由pdf URL组成的数据框架 Source 0 http://www.ampire.com.tw/en/download.asp?fileN... 1 http://www.ampire.com.tw/en/download.asp?fileN... 2 http://www.buckeyeshapeform.com/media/1240/iso... 3 http://www.ioni

我有一个由pdf URL组成的数据框架

                                              Source
0  http://www.ampire.com.tw/en/download.asp?fileN...
1  http://www.ampire.com.tw/en/download.asp?fileN...
2  http://www.buckeyeshapeform.com/media/1240/iso...
3  http://www.ionix-systems.com/files/EN91002009I...
4  http://php2.twinner.com.tw/files/chiplus/ISO90...
我创建了一个函数,将这些URL转换为图像,然后对它们进行OCR并返回特定行,我希望该行填充一个新列 我就是这么做的:

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    pdf = requests.get(x,stream=True)
    images = pdf2image.convert_from_bytes(pdf.raw.read())
    sora = 'sora.png'
    images[0].save(sora, 'PNG')
    img = cv2.imread(sora, cv2.IMREAD_COLOR)
    #img = cv2.blur(img, (5, 5))

    #HSV (hue, saturation, value)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    cv2.imwrite('temp.png',v)
    p = pytesseract.image_to_string(cv2.imread('temp.png'))
    for line in p.lower().split('\n'):
        if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
            return(line.title())
        elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
            return(line.title())
            
            
我想用lambda运行一些东西,如果URL关闭或者不包含pdf或任何类似的错误,它只需在“B”列中键入“Down”,如下所示

df['B'] = df['Source'].apply(lambda x: "Down" if Exception else address(x))

正确的方法是什么?

我会检查您的
请求的响应。get()
,如果响应不符合预期,则返回
Down
。它可能看起来像:

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    pdf = requests.get(x,stream=True)
    if pdf.status_code != 200: # You could be even more specific here
        return "Down"
    else:
        images = pdf2image.convert_from_bytes(pdf.raw.read())
        sora = 'sora.png'
        images[0].save(sora, 'PNG')
        img = cv2.imread(sora, cv2.IMREAD_COLOR)
        #img = cv2.blur(img, (5, 5))

        #HSV (hue, saturation, value)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        cv2.imwrite('temp.png',v)
        p = pytesseract.image_to_string(cv2.imread('temp.png'))
        for line in p.lower().split('\n'):
            if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
            elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())

我将检查您的
请求的响应。get()
,如果响应不符合预期,则返回
Down
。它可能看起来像:

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    pdf = requests.get(x,stream=True)
    if pdf.status_code != 200: # You could be even more specific here
        return "Down"
    else:
        images = pdf2image.convert_from_bytes(pdf.raw.read())
        sora = 'sora.png'
        images[0].save(sora, 'PNG')
        img = cv2.imread(sora, cv2.IMREAD_COLOR)
        #img = cv2.blur(img, (5, 5))

        #HSV (hue, saturation, value)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        cv2.imwrite('temp.png',v)
        p = pytesseract.image_to_string(cv2.imread('temp.png'))
        for line in p.lower().split('\n'):
            if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
            elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())

我做错了,我试着添加try,除了用一种愚蠢的方式

def address2(x):
    for link in x:
        try:
            pdf = requests.get(link,stream=True)
            images = pdf2image.convert_from_bytes(pdf.raw.read())
            sora = 'sora.png'
            images[0].save(sora, 'PNG')
            img = cv2.imread(sora, cv2.IMREAD_COLOR)
            #img = cv2.blur(img, (5, 5))
            #HSV (hue, saturation, value)
            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            h, s, v = cv2.split(hsv)
            cv2.imwrite('temp.png',v)
            p = pytesseract.image_to_string(cv2.imread('temp.png'))
            for line in p.split('\n\n'):
                for country in pycountry.countries:
                    if country.name in line and('Quality' not in line) and ('Certificat' not in line):
                        return(line.title())
                    elif country.alpha_3 in line and('Quality' not in line) and ('Certificat' not in line):
                        return(line.title())
        except:
            return("Down")
但我把它改成:

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    try:
        pdf = requests.get(x,stream=True)
        images = pdf2image.convert_from_bytes(pdf.raw.read())
        sora = 'sora.png'
        images[0].save(sora, 'PNG')
        img = cv2.imread(sora, cv2.IMREAD_COLOR)
        #img = cv2.blur(img, (5, 5))

        #HSV (hue, saturation, value)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        cv2.imwrite('temp.png',v)
        p = pytesseract.image_to_string(cv2.imread('temp.png'))
        for line in p.lower().split('\n'):
            if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
            elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
    except:
        return("Down")

现在没事了

我做错了,我试着用一种愚蠢的方式添加try和except

def address2(x):
    for link in x:
        try:
            pdf = requests.get(link,stream=True)
            images = pdf2image.convert_from_bytes(pdf.raw.read())
            sora = 'sora.png'
            images[0].save(sora, 'PNG')
            img = cv2.imread(sora, cv2.IMREAD_COLOR)
            #img = cv2.blur(img, (5, 5))
            #HSV (hue, saturation, value)
            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            h, s, v = cv2.split(hsv)
            cv2.imwrite('temp.png',v)
            p = pytesseract.image_to_string(cv2.imread('temp.png'))
            for line in p.split('\n\n'):
                for country in pycountry.countries:
                    if country.name in line and('Quality' not in line) and ('Certificat' not in line):
                        return(line.title())
                    elif country.alpha_3 in line and('Quality' not in line) and ('Certificat' not in line):
                        return(line.title())
        except:
            return("Down")
但我把它改成:

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    try:
        pdf = requests.get(x,stream=True)
        images = pdf2image.convert_from_bytes(pdf.raw.read())
        sora = 'sora.png'
        images[0].save(sora, 'PNG')
        img = cv2.imread(sora, cv2.IMREAD_COLOR)
        #img = cv2.blur(img, (5, 5))

        #HSV (hue, saturation, value)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        cv2.imwrite('temp.png',v)
        p = pytesseract.image_to_string(cv2.imread('temp.png'))
        for line in p.lower().split('\n'):
            if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
            elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
    except:
        return("Down")

现在一切都好了

为什么不从函数返回“Down”?查看
try except
块。在
images=pdf2image.convert_from_bytes(pdf.raw.read())
行中出现异常。它没有继续到最后,我尝试以try的形式制作它,但仍然没有得到任何结果。我做得不对。请展示您尝试的内容感谢您的指点,当我在粘贴之前阅读函数时,我意识到我是多么的愚蠢,为什么不直接从你的函数返回
“Down”
?查看
try except
块。在
images=pdf2image.convert_from_bytes(pdf.raw.read())
行中出现异常。它没有继续到最后,我尝试以try的形式制作它,但仍然没有得到任何结果。我做得不对。请展示您尝试的内容感谢您的指点,当我在粘贴函数之前阅读函数时,我意识到我是多么愚蠢,你应该尽量避免使用这样一个通用的
块,除了:
块。相反,只捕获预期的异常,例如
except:TheRelationExceptionType
。这是因为,如果您的
try
-块中有10多行出现任何其他故障,您可能不会注意到,因为您的非常普通的
,除了
之外,都会吞下它。这可能会导致非常意外的行为。您应该尽量避免使用这样的常规
,除了:
块。相反,只捕获预期的异常,例如
except:TheRelationExceptionType
。这是因为,如果您的
try
-块中有10多行出现任何其他故障,您可能不会注意到,因为您的非常普通的
,除了
之外,都会吞下它。这可能会导致非常意外的行为。