Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/python/335.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
CssParser错误,将html转换为pdf时,PISA&;python_Python_Pdf Generation_Pisa_Css Parsing - Fatal编程技术网

CssParser错误,将html转换为pdf时,PISA&;python

CssParser错误,将html转换为pdf时,PISA&;python,python,pdf-generation,pisa,css-parsing,Python,Pdf Generation,Pisa,Css Parsing,我正在尝试使用pisa和python将html文档转换为pdf文件。它适用于小html代码。但是当你通过它传递google.com的html数据,或者实际上是任何一个大的html文件时,它就会消除这个错误 以下是将html转换为pdf的代码: import ho.pisa as pisa import sys import os ls =[] for arg in sys.argv: ls.append(arg) pisa.showLogging() print ls html_fil

我正在尝试使用pisa和python将html文档转换为pdf文件。它适用于小html代码。但是当你通过它传递google.com的html数据,或者实际上是任何一个大的html文件时,它就会消除这个错误

以下是将html转换为pdf的代码:

import ho.pisa as pisa
import sys
import os
ls =[]
for arg in sys.argv:
    ls.append(arg)
pisa.showLogging()
print ls

html_file = open(ls[1])
HTML = html_file.read()
filename = os.path.basename(str(ls[1]))
print filename
str(os.getcwd()+filename)
pdfFile =open(str(os.getcwd()+filename), "wb")
pdf = pisa.CreatePDF(HTML,pdfFile)

if not pdf.err:
    print "ds"
    pisa.startViewer(filename)

pdfFile.close()
html_file.close()
这就是抛出的错误:

ERROR [ho.pisa] C:\Python27\lib\site-packages\sx\pisa3\pisa_document.py line 223: Document error

Traceback (most recent call last):
  File "C:\Python27\lib\site-packages\sx\pisa3\pisa_document.py", line 128, in pisaDocument
    c = pisaStory(src, path, link_callback, debug, default_css, xhtml, encoding,
 c=c, xml_output=xml_output)
  File "C:\Python27\lib\site-packages\sx\pisa3\pisa_document.py", line 73, in pisaStory
    pisaParser(src, c, default_css, xhtml, encoding, xml_output)
  File "C:\Python27\lib\site-packages\sx\pisa3\pisa_parser.py", line 626, in pisaParser
    c.parseCSS()
  File "C:\Python27\lib\site-packages\sx\pisa3\pisa_context.py", line 545, in parseCSS
    self.css = self.cssParser.parse(self.cssText)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 358, in parse
    src, stylesheet = self._parseStylesheet(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 453, in _parseStylesheet
    src, atResults = self._parseAtKeyword(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 577, in _parseAtKeyword
    src, result = self._parseAtIdent(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 722, in _parseAtIdent
    src, stylesheet = self._parseStylesheet(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 458, in _parseStylesheet
    src, ruleset = self._parseRuleset(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 737, in _parseRuleset
    src, properties = self._parseDeclarationGroup(src.lstrip())
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 922, in _parseDeclarationGroup
    raise self.ParseError('Declaration group closing \'}\' not found', src, ctxsrc)
CSSParseError: Declaration group closing '}' not found:: (u'{', u'0%{opacity:0}50%{opa')
Traceback (most recent call last):
  File "trypdf.py", line 16, in <module>
    pdf = pisa.CreatePDF(HTML,pdfFile)
  File "C:\Python27\lib\site-packages\sx\pisa3\pisa_document.py", line 128, in pisaDocument
    c = pisaStory(src, path, link_callback, debug, default_css, xhtml, encoding,
 c=c, xml_output=xml_output)
  File "C:\Python27\lib\site-packages\sx\pisa3\pisa_document.py", line 73, in pisaStory
pisaParser(src, c, default_css, xhtml, encoding, xml_output)
  File "C:\Python27\lib\site-packages\sx\pisa3\pisa_parser.py", line 626, in pisaParser
    c.parseCSS()
  File "C:\Python27\lib\site-packages\sx\pisa3\pisa_context.py", line 545, in parseCSS
self.css = self.cssParser.parse(self.cssText)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 358, in parse
src, stylesheet = self._parseStylesheet(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 453, in _parseStylesheet
    src, atResults = self._parseAtKeyword(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 577, in _parseAtKeyword
    src, result = self._parseAtIdent(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 722, in _parseAtIdent
    src, stylesheet = self._parseStylesheet(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 458, in _parseStylesheet
    src, ruleset = self._parseRuleset(src)
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 737, in _parseRuleset
    src, properties = self._parseDeclarationGroup(src.lstrip())
  File "C:\Python27\lib\site-packages\sx\w3c\cssParser.py", line 922, in _parseDeclarationGroup
    raise self.ParseError('Declaration group closing \'}\' not found', src, ctxsrc)
sx.w3c.cssParser.CSSParseError: Declaration group closing '}' not found:: (u'{', u'0%{opacity:0}50%{opa')
ERROR[ho.pisa]C:\Python27\lib\site packages\sx\pisa3\pisa\u document.py第223行:文档错误
回溯(最近一次呼叫最后一次):
pisaDocument中的文件“C:\Python27\lib\site packages\sx\pisa3\pisa_document.py”,第128行
c=pisaStory(src、path、link\u回调、debug、default\u css、xhtml、编码、,
c=c,xml\u输出=xml\u输出)
文件“C:\Python27\lib\site packages\sx\pisa3\pisa_document.py”,第73行,在pisaStory中
pisaParser(src、c、default_css、xhtml、编码、xml_输出)
pisaParser中的文件“C:\Python27\lib\site packages\sx\pisa3\pisa_parser.py”,第626行
c、 parseCSS()
文件“C:\Python27\lib\site packages\sx\pisa3\pisa_context.py”,第545行,在parseCSS中
self.css=self.cssParser.parse(self.cssText)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第358行,在parse中
src,stylesheet=self.\u解析样式表(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第453行,在_parseStylesheet中
src,atResults=self.\u parseAtKeyword(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第577行,在_parseAtKeyword中
src,result=self.\u parseAtIdent(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第722行,在_parsatident中
src,stylesheet=self.\u解析样式表(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第458行,在样式表中
src,ruleset=self.\u语法规则集(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第737行,在语法规则集中
src,properties=self.\u parseDeclarationGroup(src.lstrip())
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第922行,在_parseDeclarationGroup中
raise self.ParseError('DECLASSION group closing\'}'未找到',src,ctxsrc)
CSSParseError:未找到关闭“}”的声明组::(u“{”,u'0%{opacity:0}50%{opa')
回溯(最近一次呼叫最后一次):
文件“trypdf.py”,第16行,在
pdf=pisa.CreatePDF(HTML,pdf文件)
pisaDocument中的文件“C:\Python27\lib\site packages\sx\pisa3\pisa_document.py”,第128行
c=pisaStory(src、path、link\u回调、debug、default\u css、xhtml、编码、,
c=c,xml\u输出=xml\u输出)
文件“C:\Python27\lib\site packages\sx\pisa3\pisa_document.py”,第73行,在pisaStory中
pisaParser(src、c、default_css、xhtml、编码、xml_输出)
pisaParser中的文件“C:\Python27\lib\site packages\sx\pisa3\pisa_parser.py”,第626行
c、 parseCSS()
文件“C:\Python27\lib\site packages\sx\pisa3\pisa_context.py”,第545行,在parseCSS中
self.css=self.cssParser.parse(self.cssText)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第358行,在parse中
src,stylesheet=self.\u解析样式表(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第453行,在_parseStylesheet中
src,atResults=self.\u parseAtKeyword(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第577行,在_parseAtKeyword中
src,result=self.\u parseAtIdent(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第722行,在_parsatident中
src,stylesheet=self.\u解析样式表(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第458行,在样式表中
src,ruleset=self.\u语法规则集(src)
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第737行,在语法规则集中
src,properties=self.\u parseDeclarationGroup(src.lstrip())
文件“C:\Python27\lib\site packages\sx\w3c\cssParser.py”,第922行,在_parseDeclarationGroup中
raise self.ParseError('DECLASSION group closing\'}'未找到',src,ctxsrc)
sx.w3c.cssParser.CSSPARSERROR:未找到关闭“}”的声明组::(u“{”,u'0%{opacity:0}50%{opa')

xhmlt2pdf
不适用于所有网站。相反,您可以使用:

编辑: 我找到了另一个使用PyQt的解决方案(来自,感谢Mark K):


它转换HTML和CSS吗?@TheCreator 232是的,它转换。
import pdfkit
pdfkit.from_url('http://google.com', 'out.pdf')
import sys 
from PyQt4.QtCore import *
from PyQt4.QtGui import * 
from PyQt4.QtWebKit import * 

app = QApplication(sys.argv)
web = QWebView()
web.load(QUrl("http://www.yahoo.com"))
printer = QPrinter()
printer.setPageSize(QPrinter.A4)
printer.setOutputFormat(QPrinter.PdfFormat)
printer.setOutputFileName("fileOK.pdf")

def convertIt():
    web.print_(printer)
    print "Pdf generated"
    QApplication.exit()

QObject.connect(web, SIGNAL("loadFinished(bool)"), convertIt)
sys.exit(app.exec_())