Python 熊猫：忽略lambda中的异常_Python_Pandas_Exception_Lambda

Python 熊猫：忽略lambda中的异常

python pandas exception lambda

Python 熊猫：忽略lambda中的异常,python,pandas,exception,lambda,Python,Pandas,Exception,Lambda,我有一个由pdf URL组成的数据框架 Source 0 http://www.ampire.com.tw/en/download.asp?fileN... 1 http://www.ampire.com.tw/en/download.asp?fileN... 2 http://www.buckeyeshapeform.com/media/1240/iso... 3 http://www.ioni

我有一个由pdf URL组成的数据框架

                                              Source
0  http://www.ampire.com.tw/en/download.asp?fileN...
1  http://www.ampire.com.tw/en/download.asp?fileN...
2  http://www.buckeyeshapeform.com/media/1240/iso...
3  http://www.ionix-systems.com/files/EN91002009I...
4  http://php2.twinner.com.tw/files/chiplus/ISO90...

我创建了一个函数，将这些URL转换为图像，然后对它们进行OCR并返回特定行，我希望该行填充一个新列我就是这么做的：

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    pdf = requests.get(x,stream=True)
    images = pdf2image.convert_from_bytes(pdf.raw.read())
    sora = 'sora.png'
    images[0].save(sora, 'PNG')
    img = cv2.imread(sora, cv2.IMREAD_COLOR)
    #img = cv2.blur(img, (5, 5))

    #HSV (hue, saturation, value)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    h, s, v = cv2.split(hsv)
    cv2.imwrite('temp.png',v)
    p = pytesseract.image_to_string(cv2.imread('temp.png'))
    for line in p.lower().split('\n'):
        if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
            return(line.title())
        elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
            return(line.title())

我想用lambda运行一些东西，如果URL关闭或者不包含pdf或任何类似的错误，它只需在“B”列中键入“Down”，如下所示

df['B'] = df['Source'].apply(lambda x: "Down" if Exception else address(x))

正确的方法是什么？

我会检查您的

请求的响应。get（）

，如果响应不符合预期，则返回

Down

。它可能看起来像：

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    pdf = requests.get(x,stream=True)
    if pdf.status_code != 200: # You could be even more specific here
        return "Down"
    else:
        images = pdf2image.convert_from_bytes(pdf.raw.read())
        sora = 'sora.png'
        images[0].save(sora, 'PNG')
        img = cv2.imread(sora, cv2.IMREAD_COLOR)
        #img = cv2.blur(img, (5, 5))

        #HSV (hue, saturation, value)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        cv2.imwrite('temp.png',v)
        p = pytesseract.image_to_string(cv2.imread('temp.png'))
        for line in p.lower().split('\n'):
            if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
            elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())

我将检查您的

请求的响应。get（）

，如果响应不符合预期，则返回

Down

。它可能看起来像：

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    pdf = requests.get(x,stream=True)
    if pdf.status_code != 200: # You could be even more specific here
        return "Down"
    else:
        images = pdf2image.convert_from_bytes(pdf.raw.read())
        sora = 'sora.png'
        images[0].save(sora, 'PNG')
        img = cv2.imread(sora, cv2.IMREAD_COLOR)
        #img = cv2.blur(img, (5, 5))

        #HSV (hue, saturation, value)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        cv2.imwrite('temp.png',v)
        p = pytesseract.image_to_string(cv2.imread('temp.png'))
        for line in p.lower().split('\n'):
            if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
            elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())

我做错了，我试着添加try，除了用一种愚蠢的方式

def address2(x):
    for link in x:
        try:
            pdf = requests.get(link,stream=True)
            images = pdf2image.convert_from_bytes(pdf.raw.read())
            sora = 'sora.png'
            images[0].save(sora, 'PNG')
            img = cv2.imread(sora, cv2.IMREAD_COLOR)
            #img = cv2.blur(img, (5, 5))
            #HSV (hue, saturation, value)
            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            h, s, v = cv2.split(hsv)
            cv2.imwrite('temp.png',v)
            p = pytesseract.image_to_string(cv2.imread('temp.png'))
            for line in p.split('\n\n'):
                for country in pycountry.countries:
                    if country.name in line and('Quality' not in line) and ('Certificat' not in line):
                        return(line.title())
                    elif country.alpha_3 in line and('Quality' not in line) and ('Certificat' not in line):
                        return(line.title())
        except:
            return("Down")

但我把它改成：

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    try:
        pdf = requests.get(x,stream=True)
        images = pdf2image.convert_from_bytes(pdf.raw.read())
        sora = 'sora.png'
        images[0].save(sora, 'PNG')
        img = cv2.imread(sora, cv2.IMREAD_COLOR)
        #img = cv2.blur(img, (5, 5))

        #HSV (hue, saturation, value)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        cv2.imwrite('temp.png',v)
        p = pytesseract.image_to_string(cv2.imread('temp.png'))
        for line in p.lower().split('\n'):
            if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
            elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
    except:
        return("Down")

现在没事了

我做错了，我试着用一种愚蠢的方式添加try和except

def address2(x):
    for link in x:
        try:
            pdf = requests.get(link,stream=True)
            images = pdf2image.convert_from_bytes(pdf.raw.read())
            sora = 'sora.png'
            images[0].save(sora, 'PNG')
            img = cv2.imread(sora, cv2.IMREAD_COLOR)
            #img = cv2.blur(img, (5, 5))
            #HSV (hue, saturation, value)
            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
            h, s, v = cv2.split(hsv)
            cv2.imwrite('temp.png',v)
            p = pytesseract.image_to_string(cv2.imread('temp.png'))
            for line in p.split('\n\n'):
                for country in pycountry.countries:
                    if country.name in line and('Quality' not in line) and ('Certificat' not in line):
                        return(line.title())
                    elif country.alpha_3 in line and('Quality' not in line) and ('Certificat' not in line):
                        return(line.title())
        except:
            return("Down")

但我把它改成：

import cv2
import requests
import pdf2image
import pytesseract
import dateutil.parser as dparser
import pandas as pd
import numpy as np
def address(x):
    try:
        pdf = requests.get(x,stream=True)
        images = pdf2image.convert_from_bytes(pdf.raw.read())
        sora = 'sora.png'
        images[0].save(sora, 'PNG')
        img = cv2.imread(sora, cv2.IMREAD_COLOR)
        #img = cv2.blur(img, (5, 5))

        #HSV (hue, saturation, value)
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)
        cv2.imwrite('temp.png',v)
        p = pytesseract.image_to_string(cv2.imread('temp.png'))
        for line in p.lower().split('\n'):
            if ':20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
            elif ': 20' in line and ('iso' in line or 'iatf' in line or '1so' in line):
                return(line.title())
    except:
        return("Down")

现在一切都好了

为什么不从函数返回“Down”？查看

try except

块。在

images=pdf2image.convert_from_bytes（pdf.raw.read（））

行中出现异常。它没有继续到最后，我尝试以try的形式制作它，但仍然没有得到任何结果。我做得不对。请展示您尝试的内容感谢您的指点，当我在粘贴之前阅读函数时，我意识到我是多么的愚蠢，为什么不直接从你的函数返回

“Down”

？查看

try except

块。在

images=pdf2image.convert_from_bytes（pdf.raw.read（））

行中出现异常。它没有继续到最后，我尝试以try的形式制作它，但仍然没有得到任何结果。我做得不对。请展示您尝试的内容感谢您的指点，当我在粘贴函数之前阅读函数时，我意识到我是多么愚蠢，你应该尽量避免使用这样一个通用的

块，除了：

块。相反，只捕获预期的异常，例如

except:TheRelationExceptionType

。这是因为，如果您的

try

-块中有10多行出现任何其他故障，您可能不会注意到，因为您的非常普通的

，除了之外，都会吞下它。这可能会导致非常意外的行为。您应该尽量避免使用这样的常规，除了：
块。相反，只捕获预期的异常，例如except:TheRelationExceptionType
。这是因为，如果您的try
-块中有10多行出现任何其他故障，您可能不会注意到，因为您的非常普通的，除了之外，都会吞下它。这可能会导致非常意外的行为。