Php 将简单的HTML DOM空间放入类中
我使用简单的HTMLDOM从网站获取元素,但当class属性有空格时,我什么也得不到 源HTML来自Php 将简单的HTML DOM空间放入类中,php,html,simple-html-dom,Php,Html,Simple Html Dom,我使用简单的HTMLDOM从网站获取元素,但当class属性有空格时,我什么也得不到 源HTML来自 1. EDIT5 team.php <?php // START team.php class Team { public $name, $matches, $wins, $draws, $losses, $goals; public static function parseRow($row): ?self { $result
1.
EDIT5
team.php
<?php
// START team.php
class Team
{
public $name, $matches, $wins, $draws, $losses, $goals;
public static function parseRow($row): ?self
{
$result = new self();
$result->name = $result->parseMatch($row, 'span.team_name_span a');
if (null === $result->name) {
return null; // couldn't even match the name, probably not a team row, skip it
}
$result->matches = $result->parseMatch($row, 'td.col_matches_played');
$result->wins = $result->parseMatch($row, 'td.col_wins');
$result->draws = $result->parseMatch($row, 'td.col_draws');
$result->losses = $result->parseMatch($row, 'td.col_losses');
$result->goals = $result->parseMatch($row, 'td.col_goals');
return $result;
}
private function parseMatch($row, $selector)
{
if (!empty($match = $row->find($selector, 0))) {
return $match->innertext;
}
return null;
}
}
// END team.php
?>
解决方案:
类名没有空格,不要尝试匹配它们
SimpleHtmlDom不支持这样的属性选择器。另外,您正在匹配一个类,就好像它的类名中有空格一样。因此,与此相反:
$wins = $html->find("td[class=wins col_wins]");
$draws = $html->find("td[class=draws col_draws]");
$losses = $html->find("td[class=losses col_losses]");
执行以下操作以匹配同时匹配两个类名的td元素:
$wins = $html->find("td.wins.col_wins");
$draws = $html->find("td.draws.col_draws");
$losses = $html->find("td.losses.col_losses");
此外,HTML标记不要求您匹配这两个类来获取数据,您可以简单地执行以下操作:
$wins = $html->find("td.col_wins");
$draws = $html->find("td.col_draws");
$losses = $html->find("td.col_losses");
获取重复的选择器(在行中循环)。
您试图从表的行中提取的是数据数组。更具体地说,是这样的:
$teams = [
['Arsenal', matches, wins, ...],
['Liverpool', matches, wins, ...],
...
];
这意味着您需要对表的每一行运行相同的数据提取。SimpleHtmlDom通过jQuery(如find
方法)简化了这一过程,这些方法可以从任何匹配的元素调用
完全解
这个解决方案实际上定义了一个Team
对象来加载每一行的数据。未来的调整应该简单得多
这里需要注意的重要一点是,首先我们以$row
的形式循环遍历每个表行,并从$row->find([selector])
收集团队和数字
//START team.php
班队
{
公开$name、$matches、$wins、$draw、$loss、$goals;
公共函数构造($row)
{
$this->name=$this->parseMatch($row,'span.team_name_span a');
如果(null==$this->name){
return;//甚至无法匹配名称,可能不是团队行,跳过它
}
$this->matches=$this->parseMatch($row,'td.col_matches_played');
$this->wins=$this->parseMatch($row'td.col_wins');
$this->draws=$this->parseMatch($row,'td.col_draws');
$this->loss=$this->parseMatch($row,'td.col_loss');
$this->goals=$this->parseMatch($row,'td.col_goals');
}
私有函数parseMatch($row,$selector)
{
如果(!empty($match=$row->find($selector,0))){
返回$match->innertext;
}
返回null;
}
公共函数isValid()
{
返回null!=$this->name;
}
公共函数getMatchData()//示例
{
返回“
”$this->wins.:”.$this->matches.”;
}
}
//END team.php
//启动DOM解析块
$teams=[];
foreach($html->find('table.stats-table tr')作为$row){
$team=new team($row);//如果可能,将行加载到团队对象中
//如果该条目与行不匹配,则跳过该条目
如果($team->isValid()){
//我们在这里实际做的只是OOP的等价物:
//$teams[]=['name'=>$row->find('span.teams\u name\u span a',0)->innertext,…];
$teams[]=$teams;
}
}
foreach($teams作为$team){
echo“$team->name.”;
echo$团队->损失;
echo$team->getMatchData();
}
//结束DOM解析块
然后在属性选择器中正确引用类名字符串…?尝试使用%20而不是空格,或者将其放在quoteWhat$html->find()
此函数的作用中?我修复了PHP代码中的一个错误,请参阅please@CBroe对不起,我不明白你的要求谢谢你的回答,我有一个问题:我必须将它们与简单的HTMLDOM或高级HTMLDOM一起使用?这并不重要。支持它。有许多不同的项目使用“AdvancedHtmlDom”这个名称,但我怀疑他们也会支持它。很抱歉,它不起作用。我仍然只得到第一个值。我需要更改我的问题代码中的某些内容?如果您试图访问此类数据的多行,请提供整个表,其中包含3或4行您试图匹配的内容。请正确格式化HTML,而不是将其全部放在一行中。代码是完美的!你说得对,我会尽量说得更具体些。谢谢你的帮助和建议!
$wins = $html->find("td.wins.col_wins");
$draws = $html->find("td.draws.col_draws");
$losses = $html->find("td.losses.col_losses");
$wins = $html->find("td.col_wins");
$draws = $html->find("td.col_draws");
$losses = $html->find("td.col_losses");
$teams = [
['Arsenal', matches, wins, ...],
['Liverpool', matches, wins, ...],
...
];
// START team.php
class Team
{
public $name, $matches, $wins, $draws, $losses, $goals;
public function __construct($row)
{
$this->name = $this->parseMatch($row, 'span.team_name_span a');
if (null === $this->name) {
return; // couldn't even match the name, probably not a team row, skip it
}
$this->matches = $this->parseMatch($row, 'td.col_matches_played');
$this->wins = $this->parseMatch($row, 'td.col_wins');
$this->draws = $this->parseMatch($row, 'td.col_draws');
$this->losses = $this->parseMatch($row, 'td.col_losses');
$this->goals = $this->parseMatch($row, 'td.col_goals');
}
private function parseMatch($row, $selector)
{
if (!empty($match = $row->find($selector, 0))) {
return $match->innertext;
}
return null;
}
public function isValid()
{
return null !== $this->name;
}
public function getMatchData() //example
{
return "<br><b>". $this->wins .' : '. $this->matches . "</b>";
}
}
// END team.php
// START DOM parsing block
$teams = [];
foreach($html->find('table.stats-table tr') as $row) {
$team = new Team($row); // load the row into a Team object if possible
// skipp this entry if it couldn't match the row
if ($team->isValid()) {
// what were actually doing here is just the OOP equivalent of:
// $teams[] = ['name' => $row->find('span.team_name_span a',0)->innertext, ...];
$teams[] = $team;
}
}
foreach($teams as $team) {
echo "<h1>".$team->name."</h1>";
echo $team->losses;
echo $team->getMatchData();
}
// END DOM Parsing Block