来自字符串的python getelementbyid_Python_File Upload_Html Parsing

来自字符串的python getelementbyid

python file-upload

来自字符串的python getelementbyid,python,file-upload,html-parsing,Python,File Upload,Html Parsing,我有以下程序，试图将一个（或多个）文件上载到图像上载站点，但我很难找到如何解析返回的HTML以获取直接链接（包含在中）我的代码如下： #!/usr/bin/python # -*- coding: utf-8 -*- import pycurl import urllib import urlparse import xml.dom.minidom import StringIO import sys import gtk import os import imghdr import loca

我有以下程序，试图将一个（或多个）文件上载到图像上载站点，但我很难找到如何解析返回的HTML以获取直接链接（包含在

中）

我的代码如下：

#!/usr/bin/python
# -*- coding: utf-8 -*-
import pycurl
import urllib
import urlparse
import xml.dom.minidom
import StringIO
import sys
import gtk
import os
import imghdr
import locale
import gettext
try:
    import pynotify
except:
    print "Install pynotify. It's whoasome!"

APP="Uploadir Uploader"
DIR="locale"

locale.setlocale(locale.LC_ALL, '')
gettext.bindtextdomain(APP, DIR)
gettext.textdomain(APP)
_ = gettext.gettext

##STRINGS
uploading = _("Uploading image to Uploadir.")
oneimage = _("1 image has been successfully uploaded.")
multimages = _("images have been successfully uploaded.")
uploadfailed = _("Unable to upload to Uploadir.")

class Uploadir:
    def __init__(self, args):

        self.images = []
        self.urls = []
        self.broadcasts = []
        self.username=""
        self.password=""
        if len(args) == 1:
            return
        else:
            for file in args:
                if file == args[0] or file == "":
                    continue
                if file.startswith("-u"):
                    self.username = file.split("-u")[1]
                    #print self.username
                    continue
                if file.startswith("-p"):
                    self.password = file.split("-p")[1]
                    #print self.password
                    continue
                self.type = imghdr.what(file)
                self.images.append(file)

        for file in self.images:
            self.upload(file)

        self.setClipBoard()

        self.broadcast(self.broadcasts)

    def broadcast(self, l):
        try:
            str = '\n'.join(l)
            n = pynotify.Notification(str)
            n.set_urgency(pynotify.URGENCY_LOW)
            n.show()
        except:
            for line in l:
                print line


    def upload(self, file):
        #Try to login
            cookie_file_name = "/tmp/uploadircookie"

        if ( self.username!="" and self.password!=""):
            print "Uploadir authentication in progress"
            l=pycurl.Curl()
            loginData = [ ("username",self.username),("password", self.password), ("login", "Login") ]
            l.setopt(l.URL, "http://uploadir.com/user/login")
            l.setopt(l.HTTPPOST, loginData)
            l.setopt(l.USERAGENT,"User-Agent: Uploadir (Python Image Uploader)")
            l.setopt(l.FOLLOWLOCATION,1)
            l.setopt(l.COOKIEFILE,cookie_file_name)
            l.setopt(l.COOKIEJAR,cookie_file_name)
            l.setopt(l.HEADER,1)
            loginDataReturnedBuffer = StringIO.StringIO()
            l.setopt( l.WRITEFUNCTION, loginDataReturnedBuffer.write )

            if l.perform():
                self.broadcasts.append("Login failed. Please check connection.")
                l.close()
                return

            loginDataReturned = loginDataReturnedBuffer.getvalue()
            l.close()
            #print loginDataReturned

            if loginDataReturned.find("<li>Your supplied username or password is invalid.</li>")!=-1:
                self.broadcasts.append("Uploadir authentication failed. Username/password invalid.")
                return
            else:
                self.broadcasts.append("Uploadir authentication successful.")

            #cookie = loginDataReturned.split("Set-Cookie: ")[1]
            #cookie = cookie.split(";",0)

            #print cookie



        c = pycurl.Curl()

        values = [
                ("file", (c.FORM_FILE, file)),
                ("terms", "1"),
                ("submit", "submit")
             ]

        buf = StringIO.StringIO()

        c.setopt(c.URL, "http://uploadir.com/file/upload")
        c.setopt(c.HTTPPOST, values)
        c.setopt(c.COOKIEFILE, cookie_file_name)
        c.setopt(c.COOKIEJAR, cookie_file_name)
        c.setopt(c.WRITEFUNCTION, buf.write)


        if c.perform():
            self.broadcasts.append(uploadfailed+" "+file+".")
            c.close()
            return

        self.result = buf.getvalue()
        #print self.result
        c.close()

        doc = urlparse.urlparse(self.result)

        print doc

        self.urls.append(doc.getElementsByTagName("download")[0].childNodes[0].nodeValue)

    def setClipBoard(self):
        c = gtk.Clipboard()
        c.set_text('\n'.join(self.urls))
        c.store()
        if len(self.urls) == 1:
            self.broadcasts.append(oneimage)
        elif len(self.urls) != 0:
            self.broadcasts.append(str(len(self.urls))+" "+multimages)

if __name__ == '__main__':
    uploadir = Uploadir(sys.argv)

这与解析HTML无关。它所做的只是将URL分解为位：协议、网络地址、路径等。例如：

>>> urlparse.urlparse("http://www.stackoverflow.com/questions/4699888") ParseResult(scheme='http', netloc='www.stackoverflow.com', path='/questions/4699888', params='', query='', fragment='') >>>urlparse.urlparse（“http://www.stackoverflow.com/questions/4699888") ParseResult（scheme='http'，netloc='www.stackoverflow.com'，path='/questions/4699888'，params=''，query=''，fragment=''）

对于解析HTML，请尝试。

请隔离相关代码行，并可能添加您尝试解析的HTML的相关片段-否则这个问题将很难回答…用这些信息编辑了我的帖子。我将如何使用BeautifulSoup进行此操作？@matthewgall:我自己从未使用过BeautifulSoup，但是你可以通过阅读文档开始学习，在 >>> urlparse.urlparse("http://www.stackoverflow.com/questions/4699888") ParseResult(scheme='http', netloc='www.stackoverflow.com', path='/questions/4699888', params='', query='', fragment='')