Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/php/253.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
Php 需要帮助分析HTTP表单吗_Php_Html_Post_Curl - Fatal编程技术网

Php 需要帮助分析HTTP表单吗

Php 需要帮助分析HTTP表单吗,php,html,post,curl,Php,Html,Post,Curl,为了训练自己使用php和HTML表单,我决定制作一个小型web应用程序,它可以从另一个网站收集数据,但可以在移动设备上显示数据 在本次练习中,我选择了我所在地区的公交公司所在地:。我分析了网站,找到了名为“form1”的表单,它通过POST方法向网站发送数据 我开始编写php代码,在互联网上发现可以使用cURL发送帖子字段。所以我做了。不幸的是,它不起作用。我得到了网站的错误页面。所以我猜肯定有一些字段丢失了,但我已经检查了所有内容,找不到其他字段。就这样,我再次来到这里,寻求帮助 web应用程


为了训练自己使用php和HTML表单,我决定制作一个小型web应用程序,它可以从另一个网站收集数据,但可以在移动设备上显示数据

在本次练习中,我选择了我所在地区的公交公司所在地:。我分析了网站,找到了名为“form1”的表单,它通过POST方法向网站发送数据

我开始编写php代码,在互联网上发现可以使用cURL发送帖子字段。所以我做了。不幸的是,它不起作用。我得到了网站的错误页面。所以我猜肯定有一些字段丢失了,但我已经检查了所有内容,找不到其他字段。就这样,我再次来到这里,寻求帮助

web应用程序位于上,也可以位于上

如果有人能帮我解决这个问题,我将不胜感激,
ief2


PS:代码的某些部分是荷兰语编写的,因此这里有一些翻译:

  • Gemeente=城镇/城市
  • Plaats=位置
  • Nummer=数字
  • 基准=日期
  • Dag=天
  • 月份
  • Jaar=年
  • Uur=小时
  • Aankomst=到达
  • Vertrek=离开
  • Berekenen=计算

PPS:下载链接显然不起作用,但是我下载它时没有问题,所以这里有一些代码片段:

index.php

德丽金移动
维特里克:
格米恩特:
斯特拉特:
努默:

安科姆斯特:
格米恩特:
斯特拉特:
努默:

数据:
安科姆斯特
Vertrek

Dag:字段丢失,服务器对post数据的响应非常奇怪。我只能自动化一页。要点击其他链接,cookies显然是不够的

我已经编写了一些代码,这些代码可能对需要了解表单布局的其他人有用:

HTMLFormExtractor.py

#!/usr/bin/python
import sys
import getopt
import urllib
import re

# ############################
# This code may be used by anyone. It may be used in both free
# and commercial software. It may be copied, modified and even
# be sold. The creator of this code takes no responsibility for
# any damage this script could do.
# ############################

# ############################
# ############################
# Usage: ./exec [-x] [URL]
# 
# This application logs all forms of an HTML document and it's
# objects which have the HTML 'name'-attribute set. The program
# currently only works when the attributes of the objects are
# styled like the XML format (eg: name="myname").
# 
# Options:
#   -x: Create an XML document of the following form:
#           ==== BEGIN XML ====
#           formlist
#               form (variable)
#                   attribute (variable)
#                       name
#                       value
#
#                   object (variable)
#                       type (eg: input)
#                       name (eg: username)
#           ==== END XML ====
#
#   URL: a URL pointing to an available, HTML file. If it's not
#       specified specified the program will read the HTML document 
#       from the standard input.
#
# ############################

# ===== DATA =====
global FORM_OBJECTS_TAG_NAME
FORM_OBJECTS_TAG_NAME = ("input", 
    "textarea", 
    "label", 
    "fieldset", 
    "legend", 
    "select", 
    "optgroup", 
    "option", 
    "button")



# ===== CLASSES =====
class HTMLAttribute:
    def __init__(self, name, value, orString = None):
        self.name = name
        self.value = value
        self.originalString = None

    @classmethod
    def withAttributeString(cls, string):
        """Takes a string of the form attrNam="value" """
        attrNameRegex = "\w+="
        attrName = re.findall(attrNameRegex, string)[0]
        attrName = attrName[0:len(attrName)-1]

        valueRegex = "[\"'].*?[\"']"
        value = re.findall(valueRegex, string)[0]
        value = value[1:len(value)-1]

        return cls(attrName, value, string)

class HTMLObject:
    def __init__(self, aName):
        self.name = aName
        self.attributes = [] # contains HTMLAttribute

    def addAttribute(self, anAttribute):
        self.attributes.append(anAttribute)

    def getAttributeWithName(self, aName):
        """Returns none or an HTLMAttribute"""
        aName = aName.lower()
        for anAttribute in self.attributes:
            if anAttribute.name.lower() == aName: return anAttribute
        return None

    @classmethod
    def withTagString(cls, string):
        """Takes a string of the form <aTagName attrName="value" ... >"""
        tagOnyRegex = "<.*?>"
        regObj = re.compile(tagOnyRegex, re.S)
        string = re.findall(regObj, string)[0]

        tagNameRegex = "(?<=<)\w+[\s>]"
        tagName = re.findall(tagNameRegex, string)[0]
        tagName = tagName[0:len(tagName)-1]

        attrRegex = "\w+=[\"'].*?[\"']"
        allAttributes = re.findall(attrRegex, string)

        myObj = cls(tagName)
        for anAttrString in allAttributes:
            attrObj = HTMLAttribute.withAttributeString(anAttrString)
            myObj.addAttribute(attrObj)

        return myObj

class HTMLForm:
    def __init__(self, name, htmlObjects):
        self.name = name
        self.HTMLObjects = htmlObjects # list of HTMLObject

# ===== FUNCTIONS =====
def getFormsFromHTML(htmlData):
    regex = re.compile("<form.*?>.*?</form>", re.IGNORECASE | re.S)
    result = re.findall(regex, htmlData)
    return result

def getFormObjects(aForm):
    """Returns a list of HTMLObjects"""
    global FORM_OBJECTS_TAG_NAME
    myRegex = "<(?:"
    myOrRegexLen = len(myRegex)
    for aTagName in FORM_OBJECTS_TAG_NAME:
        myRegex += aTagName + "|"
    if len(myRegex) == myOrRegexLen: return []

    myRegex = myRegex[0:len(myRegex)-1]
    myRegex += ").*?>"

    regObj = re.compile(myRegex, re.S | re.I)
    allObjects = re.findall(regObj, aForm)

    foundObjects = []
    for anObject in allObjects:
        anObj = HTMLObject.withTagString(anObject)
        foundObjects.append(anObj)

    return foundObjects

def printForms(foundForms, foundObjects):
    """Pass on a list of HTMLObject and a list of lists of HTMLObjects
    The first list are the forms the second are the objects contained by
    the forms at the corresponding index of the first list."""
    counter = 0
    for aForm in foundForms:
        print "===== FORM " + str(counter+1) + " ====="

        print "\tATTRIBUTES:"
        for anAttribute in aForm.attributes:
            print "\t\t" + anAttribute.name + ": '" + anAttribute.value + "'"

        print "\n\t" + str(len(foundObjects)) + " OBJECTS:"
        for anObject in foundObjects[counter]:
            nameAttribute = anObject.getAttributeWithName("name")
            if nameAttribute != None:
                print "\t\t" + anObject.name + " (name=\"" + nameAttribute.value + "\")"

        print "\n"
        counter += 1


def createXMLString(foundForms, foundObjects):
    """Pass on a list of HTMLObject and a list of lists of HTMLObjects
    The first list are the forms the second are the objects contained by
    the forms at the corresponding index of the first list.

    XML:
        formlist
            form (mult)
                attribute (mult)
                    name
                    value

                object (mult)
                    type (eg: input)
                    name (eg: username)
    """
    counter = 0
    xmlString = "<formlist>\n"
    for aForm in foundForms:
        # make form child
        formXMLChild = "\t<form>\n"

        # add all attributes
        for anAttr in aForm.attributes:
            formXMLChild += "\t\t<attribute>\n"
            formXMLChild += "\t\t\t<name>" + anAttr.name + "</name>\n"
            formXMLChild += "\t\t\t<value>" + anAttr.value + "</value>\n"
            formXMLChild += "\t\t</attribute>\n"

        # add all input objects if they have a name
        for anObject in foundObjects[counter]:
            nameAttr = anObject.getAttributeWithName("name")
            if nameAttr != None:
                formXMLChild += "\t\t<object>\n"
                formXMLChild += "\t\t\t<type>" + anObject.name + "</type>\n"
                formXMLChild += "\t\t\t<name>" + nameAttr.value + "</name>\n"
                formXMLChild += "\t\t</object>\n"

        # end child and append
        formXMLChild += "\t<form>\n\n"
        xmlString += formXMLChild
        counter += 1

    # end xml and return the string
    xmlString = xmlString[0:len(xmlString)-1] + "</formlist>\n"
    return xmlString


# ===== MAIN =====
# Parse the command line options
userArgv = sys.argv[1:]
flags, arguments = getopt.getopt(userArgv, "x")
wantsXMLFormat = flags.count(('-x', '')) > 0
hasURL = len(arguments) > 0;

# Get the HTML data
myHTML = None;
if hasURL:
    myURL = arguments[0];
    urlHandle = urllib.urlopen(myURL)
    if urlHandle == None:
        print "Failed to open the URL"
        sys.exit(1)
    myHTML = urlHandle.read()
    urlHandle.close()

else:
    myHTML = sys.stdin.read()

# Get all forms
htmlForms = getFormsFromHTML(myHTML)

# Loop with all forms
foundForms = []
foundObjects = [] # list of list
for aFormTag in htmlForms:
    # append the form
    formChilds = getFormObjects(aFormTag)
    formHTMLObject = HTMLObject.withTagString(aFormTag)
    foundForms.append(formHTMLObject)

    # append a form input object
    allObjects = getFormObjects(aFormTag)
    foundObjects.append(allObjects)


# Print or create xml
if not wantsXMLFormat:
    printForms(foundForms, foundObjects)
else:
    myXMLString = createXMLString(foundForms, foundObjects)
    print myXMLString
#/usr/bin/python
导入系统
导入getopt
导入URL库
进口稀土
# ############################
#任何人都可以使用此代码。它可以在两种情况下免费使用
#和商业软件。它可以被复制、修改甚至修改
#被卖掉。此代码的创建者对以下内容不承担任何责任:
#此脚本可能造成的任何损坏。
# ############################
# ############################
# ############################
#用法:./exec[-x][URL]
# 
#此应用程序记录HTML文档的所有形式,并且
#具有HTML“name”属性集的对象。节目
#当前仅当对象的属性为
#样式类似于XML格式(例如:name=“myname”)。
# 
#选项:
#-x:创建以下形式的XML文档:
#==开始XML====
#表单列表
#形式(变量)
#属性(变量)
#名字
#价值观
#
#对象(变量)
#类型(例如:输入)
#名称(例如:用户名)
#==结束XML====
#
#URL:指向可用HTML文件的URL。如果不是
#指定程序将读取HTML文档
#从标准输入。
#
# ############################
#====数据=====
全局窗体\u对象\u标记\u名称
表单\对象\标签\名称=(“输入”,
“文本区域”,
“标签”,
“字段集”,
“传奇”,
“选择”,
“optgroup”,
“选择权”,
“按钮”)
#=======课程=====
类HTMLAttribute:
定义初始化(self、name、value或string=None):
self.name=名称
自我价值=价值
self.originalString=无
@类方法
def withAttributeString(cls,字符串):
“”采用attrNam=“value”形式的字符串
attrNameRegex=“\w+=”
attrName=re.findall(attrNameRegex,字符串)[0]
attrName=attrName[0:len(attrName)-1]
valueRegex=“[\”].*?[\”']
value=re.findall(valueRegex,string)[0]
值=值[1:len(值)-1]
返回cls(属性名、值、字符串)
类HTMLObject:
定义初始化(self,aName):
self.name=aName
self.attributes=[]包含HTMLAttribute
def addAttribute(自身、属性):
self.attributes.append(anaAttribute)
def getAttributeWithName(self,aName):
“”“返回none或HTLMAttribute”“”
aName=aName.lower()
对于self.attributes中的anAttribute:
如果anAttribute.name.lower()==aName:返回anAttribute
一无所获
@类方法
def withTagString(cls,字符串):
“”“采用以下格式的字符串”“”
tagOnyRegex=“”
regObj=re.compile(tagOnyRegex,re.S)
string=re.findall(regObj,string)[0]

tagNameRegex=“(?
document.forms[1].elements.length
说有14个,但你只列出了11个。还有一些网站拒绝在没有cookie的情况下运行(一个跟踪和两个javascript测试)。查看Firebug网络流量。我确实忘记了13个中的一个(有一个收音机),但你看到的是
Route.php
,这是结果解析器,它有一个11列的表。但无论如何,我检查了cookies,我认为你是对的。当我禁用cookies时,网站上说“Uw session is verlopen”(=“你的会话已过期”)。现在是否可以手工制作这样的cookie?或者是否有其他方法来解决此问题?没有查看您的代码。下载链接无效。最好在此处粘贴摘录。--cURL允许以某种方式设置cookie,请查看各种
CURLOPT_cookie*
标志。我认为这是此类任务最常见的问题。
<DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
    <head>
        <title>De Lijn Mobile - Berekeningen</title>
    </head>

    <body>
        <?php
            require_once("./Plaats.php");
            require_once("./Date.php");
            require_once("./DeLijn.php");

            echo "Gathering data...<br>";
            $gemeente = $_POST["vertrekGemeente"];
            $straat = $_POST["vertrekStraat"];
            $nummer = $_POST["vertrekNummer"];
            $vertrekPlaats = new Plaats($gemeente, $straat, $nummer);

            $gemeente = $_POST["aankomstGemeente"];
            $straat = $_POST["aankomstStraat"];
            $nummer = $_POST["aankomstNummer"];
            $aankomstPlaats = new Plaats($gemeente, $straat, $nummer);

            $datumType = $_POST["datumType"];
            $dag = $_POST["datumDag"];
            $maand = $_POST["datumMaand"];
            $jaar = $_POST["datumJaar"];
            $uur = $_POST["datumUur"];
            $min = $_POST["datumMinuten"];
            $datum = Date::withDate($jaar, $maand, $dag, $uur, $min);
            $datum->month = $maand;

            echo "Searching...<br>";
            searchDeLijn($vertrekPlaats,
                $aankomstPlaats,
                $datumType,
                $datum);

        ?>
    </body>
</html>
<?php

require_once("Route.php");
require_once("Date.php");
require_once("Plaats.php");

// ==== Returns of Route objects or null
define('DATE_ARRIVAL', "aankomst");
define('DATE_DEPARTURE', "vertrek");
function searchDeLijn($dep, $ar, $dateType, $date) {
    $vertrekkenOfAankomen = "aankomen";
    if(DATE_DEPARTURE === $dateType) {
        $vertrekkenOfAankomen = "vertrekken";
    }
    $myMins = (int)$date->minutes;
    $myMins -= ($myMins % 5);
    $postFields = array(
        "form1:vertrekGemeenteInput" => $dep->gemeente,
        "form1:vertrekStraatInput" => $dep->straat,
        "form1:vertrekNrInput" => $dep->nummer,

        "form1:aankomstGemeenteInput" => $ar->gemeente,
        "form1:aankomstStraatInput" => $ar->straat,
        "form1:aankomstNrInput" => $ar->nummer,

        "form1:vertrekkenOfAankomenRadio" => $vertrekkenOfAankomen,
        "form1:dagCombo" => (string)(int)$date->day,
        "form1:maandCombo" => (string)(int)$date->month,
        "form1:jaarCombo" => $date->year,
        "form1:uurCombo" => (string)(int)$date->hour,
        "form1:minutenCombo" => (string)$myMins);

    print_r($postFields);

    // do the curl
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL,
        'http://reisinfo.delijn.be/reisinfo/RouteplannerHomeBeperktServlet?taal=nl');
    curl_setopt($ch, CURLOPT_POST, 1);
    curl_setopt($ch, CURLOPT_POSTFIELDS, $postFields);
    curl_setopt($ch, CURLOPT_HEADER, 0);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

    $contents = curl_exec($ch);
    curl_close($ch);
    if($contents == false) {
        return null;
    }

    echo $contents;

    $myRouteObjects = extractRoutesFromXMLData($contents);
    return $myRouteObjects;
}

// ==== Returns array of Route objects or null
function extractRoutesFromXMLData($dataString) {
    $tableBody = getResultsTableBody($dataString);
    if($tableBody != null) { return null; }

    $tableRows = getTableRowsOfTableBody($tableBody);
    if($tableRows != null) { return null; }

    // put them in an array
    $myArray = array();
    $count = $tableRows->length;
    for($i = 0; $i < $count; $i++) {
        $aNode = $tableRows->item(0);
        $myArray[] = $aNode;
    }

    return $myArray;
}

// ==== Returns XMLDocument or null
function getResultsTableBody($dataString) {
    // Get table element
    $status = preg_match('/<TABLE id="routeplanner_overzicht".*?>.*?<\/TABLE>/is',
            $docString, $matches);
    if($status == 0) {
        return null;
    }

    $tableElement = $matches[0];

    // Extract body
    $status = preg_match('/<TBODY>.*?<\/TBODY>/is',
            $tableElement, $matches);
    if($status == 0) {
        return null;
    }

    $doc = new DOMDocument();
    $doc->loadXML($matches[0]);

    return $doc;
}


// ==== Retunrs XMLNodeList or null
function getTableRowsOfTableBody($xmlDoc) {
    $xpath = new DOMXPath($domDoc);
    $xpathres = $xpath->evaluate("//tbody[0]/tr");
    if($xpathres == false) {
        return null;
    }

    return $xpathres;
}
?>
#!/usr/bin/python
import sys
import getopt
import urllib
import re

# ############################
# This code may be used by anyone. It may be used in both free
# and commercial software. It may be copied, modified and even
# be sold. The creator of this code takes no responsibility for
# any damage this script could do.
# ############################

# ############################
# ############################
# Usage: ./exec [-x] [URL]
# 
# This application logs all forms of an HTML document and it's
# objects which have the HTML 'name'-attribute set. The program
# currently only works when the attributes of the objects are
# styled like the XML format (eg: name="myname").
# 
# Options:
#   -x: Create an XML document of the following form:
#           ==== BEGIN XML ====
#           formlist
#               form (variable)
#                   attribute (variable)
#                       name
#                       value
#
#                   object (variable)
#                       type (eg: input)
#                       name (eg: username)
#           ==== END XML ====
#
#   URL: a URL pointing to an available, HTML file. If it's not
#       specified specified the program will read the HTML document 
#       from the standard input.
#
# ############################

# ===== DATA =====
global FORM_OBJECTS_TAG_NAME
FORM_OBJECTS_TAG_NAME = ("input", 
    "textarea", 
    "label", 
    "fieldset", 
    "legend", 
    "select", 
    "optgroup", 
    "option", 
    "button")



# ===== CLASSES =====
class HTMLAttribute:
    def __init__(self, name, value, orString = None):
        self.name = name
        self.value = value
        self.originalString = None

    @classmethod
    def withAttributeString(cls, string):
        """Takes a string of the form attrNam="value" """
        attrNameRegex = "\w+="
        attrName = re.findall(attrNameRegex, string)[0]
        attrName = attrName[0:len(attrName)-1]

        valueRegex = "[\"'].*?[\"']"
        value = re.findall(valueRegex, string)[0]
        value = value[1:len(value)-1]

        return cls(attrName, value, string)

class HTMLObject:
    def __init__(self, aName):
        self.name = aName
        self.attributes = [] # contains HTMLAttribute

    def addAttribute(self, anAttribute):
        self.attributes.append(anAttribute)

    def getAttributeWithName(self, aName):
        """Returns none or an HTLMAttribute"""
        aName = aName.lower()
        for anAttribute in self.attributes:
            if anAttribute.name.lower() == aName: return anAttribute
        return None

    @classmethod
    def withTagString(cls, string):
        """Takes a string of the form <aTagName attrName="value" ... >"""
        tagOnyRegex = "<.*?>"
        regObj = re.compile(tagOnyRegex, re.S)
        string = re.findall(regObj, string)[0]

        tagNameRegex = "(?<=<)\w+[\s>]"
        tagName = re.findall(tagNameRegex, string)[0]
        tagName = tagName[0:len(tagName)-1]

        attrRegex = "\w+=[\"'].*?[\"']"
        allAttributes = re.findall(attrRegex, string)

        myObj = cls(tagName)
        for anAttrString in allAttributes:
            attrObj = HTMLAttribute.withAttributeString(anAttrString)
            myObj.addAttribute(attrObj)

        return myObj

class HTMLForm:
    def __init__(self, name, htmlObjects):
        self.name = name
        self.HTMLObjects = htmlObjects # list of HTMLObject

# ===== FUNCTIONS =====
def getFormsFromHTML(htmlData):
    regex = re.compile("<form.*?>.*?</form>", re.IGNORECASE | re.S)
    result = re.findall(regex, htmlData)
    return result

def getFormObjects(aForm):
    """Returns a list of HTMLObjects"""
    global FORM_OBJECTS_TAG_NAME
    myRegex = "<(?:"
    myOrRegexLen = len(myRegex)
    for aTagName in FORM_OBJECTS_TAG_NAME:
        myRegex += aTagName + "|"
    if len(myRegex) == myOrRegexLen: return []

    myRegex = myRegex[0:len(myRegex)-1]
    myRegex += ").*?>"

    regObj = re.compile(myRegex, re.S | re.I)
    allObjects = re.findall(regObj, aForm)

    foundObjects = []
    for anObject in allObjects:
        anObj = HTMLObject.withTagString(anObject)
        foundObjects.append(anObj)

    return foundObjects

def printForms(foundForms, foundObjects):
    """Pass on a list of HTMLObject and a list of lists of HTMLObjects
    The first list are the forms the second are the objects contained by
    the forms at the corresponding index of the first list."""
    counter = 0
    for aForm in foundForms:
        print "===== FORM " + str(counter+1) + " ====="

        print "\tATTRIBUTES:"
        for anAttribute in aForm.attributes:
            print "\t\t" + anAttribute.name + ": '" + anAttribute.value + "'"

        print "\n\t" + str(len(foundObjects)) + " OBJECTS:"
        for anObject in foundObjects[counter]:
            nameAttribute = anObject.getAttributeWithName("name")
            if nameAttribute != None:
                print "\t\t" + anObject.name + " (name=\"" + nameAttribute.value + "\")"

        print "\n"
        counter += 1


def createXMLString(foundForms, foundObjects):
    """Pass on a list of HTMLObject and a list of lists of HTMLObjects
    The first list are the forms the second are the objects contained by
    the forms at the corresponding index of the first list.

    XML:
        formlist
            form (mult)
                attribute (mult)
                    name
                    value

                object (mult)
                    type (eg: input)
                    name (eg: username)
    """
    counter = 0
    xmlString = "<formlist>\n"
    for aForm in foundForms:
        # make form child
        formXMLChild = "\t<form>\n"

        # add all attributes
        for anAttr in aForm.attributes:
            formXMLChild += "\t\t<attribute>\n"
            formXMLChild += "\t\t\t<name>" + anAttr.name + "</name>\n"
            formXMLChild += "\t\t\t<value>" + anAttr.value + "</value>\n"
            formXMLChild += "\t\t</attribute>\n"

        # add all input objects if they have a name
        for anObject in foundObjects[counter]:
            nameAttr = anObject.getAttributeWithName("name")
            if nameAttr != None:
                formXMLChild += "\t\t<object>\n"
                formXMLChild += "\t\t\t<type>" + anObject.name + "</type>\n"
                formXMLChild += "\t\t\t<name>" + nameAttr.value + "</name>\n"
                formXMLChild += "\t\t</object>\n"

        # end child and append
        formXMLChild += "\t<form>\n\n"
        xmlString += formXMLChild
        counter += 1

    # end xml and return the string
    xmlString = xmlString[0:len(xmlString)-1] + "</formlist>\n"
    return xmlString


# ===== MAIN =====
# Parse the command line options
userArgv = sys.argv[1:]
flags, arguments = getopt.getopt(userArgv, "x")
wantsXMLFormat = flags.count(('-x', '')) > 0
hasURL = len(arguments) > 0;

# Get the HTML data
myHTML = None;
if hasURL:
    myURL = arguments[0];
    urlHandle = urllib.urlopen(myURL)
    if urlHandle == None:
        print "Failed to open the URL"
        sys.exit(1)
    myHTML = urlHandle.read()
    urlHandle.close()

else:
    myHTML = sys.stdin.read()

# Get all forms
htmlForms = getFormsFromHTML(myHTML)

# Loop with all forms
foundForms = []
foundObjects = [] # list of list
for aFormTag in htmlForms:
    # append the form
    formChilds = getFormObjects(aFormTag)
    formHTMLObject = HTMLObject.withTagString(aFormTag)
    foundForms.append(formHTMLObject)

    # append a form input object
    allObjects = getFormObjects(aFormTag)
    foundObjects.append(allObjects)


# Print or create xml
if not wantsXMLFormat:
    printForms(foundForms, foundObjects)
else:
    myXMLString = createXMLString(foundForms, foundObjects)
    print myXMLString