Php 从字符串中获取URL
有一段时间,我一直在搜索一个代码,用PHP从字符串中提取URL。我基本上是想从一条消息中得到一个缩短的URL,然后再做一个HEAD请求来找到实际的链接 有人有从字符串返回URL的代码吗 提前谢谢 为幽灵狗编辑: 下面是我正在分析的示例:Php 从字符串中获取URL,php,string,url,Php,String,Url,有一段时间,我一直在搜索一个代码,用PHP从字符串中提取URL。我基本上是想从一条消息中得到一个缩短的URL,然后再做一个HEAD请求来找到实际的链接 有人有从字符串返回URL的代码吗 提前谢谢 为幽灵狗编辑: 下面是我正在分析的示例: $test = "I am testing this application for http://test.com YAY!"; 这是我得到的解决问题的答案: $regex = '$\b(https?|ftp|file)://[-A-Z0-9+&@#
$test = "I am testing this application for http://test.com YAY!";
这是我得到的解决问题的答案:
$regex = '$\b(https?|ftp|file)://[-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%=~_|]$i';
preg_match_all($regex, $string, $result, PREG_PATTERN_ORDER);
$A = $result[0];
foreach($A as $B)
{
$URL = GetRealURL($B);
echo "$URL<BR>";
}
function GetRealURL( $url )
{
$options = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HEADER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => "",
CURLOPT_USERAGENT => "spider",
CURLOPT_AUTOREFERER => true,
CURLOPT_CONNECTTIMEOUT => 120,
CURLOPT_TIMEOUT => 120,
CURLOPT_MAXREDIRS => 10,
);
$ch = curl_init( $url );
curl_setopt_array( $ch, $options );
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
curl_close( $ch );
return $header['url'];
}
$regex='$\b(https?| ftp |文件):/[-A-Z0-9+&@#/%?=~|!:,.;]*[-A-Z0-9+&@#/%=~|]$i';
preg_match_all($regex,$string,$result,preg_PATTERN_ORDER);
$A=$result[0];
foreach($A为$B)
{
$URL=GetRealURL($B);
回显“$URL
”;
}
函数GetRealURL($url)
{
$options=array(
CURLOPT_RETURNTRANSFER=>true,
CURLOPT_HEADER=>true,
CURLOPT_FOLLOWLOCATION=>true,
CURLOPT_编码=>“”,
CURLOPT_USERAGENT=>“蜘蛛”,
CURLOPT_AUTOREFERER=>true,
CURLOPT_CONNECTTIMEOUT=>120,
CULLOPT_超时=>120,
CURLOPT_MAXREDIRS=>10,
);
$ch=curl\u init($url);
curl_setopt_数组($ch$options);
$content=curl\u exec($ch);
$err=curl\u errno($ch);
$errmsg=curl\u error($ch);
$header=curl\u getinfo($ch);
卷曲关闭($ch);
返回$header['url'];
}
有关详细信息,请参见答案。类似于:
$matches = array();
preg_match_all('/http:\/\/[a-zA-Z0-9.-]+\/[a-zA-Z0-9.-]+/', $text, $matches);
print_r($matches);
您需要调整regexp以获得您想要的结果
为了获得URL,考虑一些简单的事情:
curl-Ihttp://url.com/path |grep位置:|awk'{print$2}'
此代码可能会有所帮助(请参阅MadTechie的最新帖子):
给我举个例子看看你在说什么好吗?是的,这正是我需要的
<?php
$string = "some random text http://tinyurl.com/9uxdwc some http://google.com random text http://tinyurl.com/787988";
$regex = '$\b(https?|ftp|file)://[-A-Z0-9+&@#/%?=~_|!:,.;]*[-A-Z0-9+&@#/%=~_|]$i';
preg_match_all($regex, $string, $result, PREG_PATTERN_ORDER);
$A = $result[0];
foreach($A as $B)
{
$URL = GetRealURL($B);
echo "$URL<BR>";
}
function GetRealURL( $url )
{
$options = array(
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HEADER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => "",
CURLOPT_USERAGENT => "spider",
CURLOPT_AUTOREFERER => true,
CURLOPT_CONNECTTIMEOUT => 120,
CURLOPT_TIMEOUT => 120,
CURLOPT_MAXREDIRS => 10,
);
$ch = curl_init( $url );
curl_setopt_array( $ch, $options );
$content = curl_exec( $ch );
$err = curl_errno( $ch );
$errmsg = curl_error( $ch );
$header = curl_getinfo( $ch );
curl_close( $ch );
return $header['url'];
}
?>