Warning: file_get_contents(/data/phpspider/zhask/data//catemap/1/php/297.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
PHP从字符串中删除URL_Php - Fatal编程技术网

PHP从字符串中删除URL

PHP从字符串中删除URL,php,Php,如果我有一个包含url的字符串(例如,我们称之为$url),例如 如何从字符串中删除URL? 困难在于,URL也可能在没有http:///的情况下显示,例如 $url = "Here is another funny site www.tinyurl.com/55555"; 没有HTML存在。如果存在http或www,我如何开始搜索,然后删除文本/数字/符号直到第一个空格?您需要编写一个正则表达式来提取URL。我重新阅读了这个问题,这里有一个功能可以按预期工作: function cleane

如果我有一个包含url的字符串(例如,我们称之为$url),例如

如何从字符串中删除URL? 困难在于,URL也可能在没有http:///的情况下显示,例如

$url = "Here is another funny site www.tinyurl.com/55555";

没有HTML存在。如果存在http或www,我如何开始搜索,然后删除文本/数字/符号直到第一个空格?

您需要编写一个正则表达式来提取URL。

我重新阅读了这个问题,这里有一个功能可以按预期工作:

function cleaner($url) {
  $U = explode(' ',$url);

  $W =array();
  foreach ($U as $k => $u) {
    if (stristr($u,'http') || (count(explode('.',$u)) > 1)) {
      unset($U[$k]);
      return cleaner( implode(' ',$U));
    }
  }
  return implode(' ',$U);
}

$url = "Here is another funny site www.tinyurl.com/55555 and http://www.tinyurl.com/55555 and img.hostingsite.com/badpic.jpg";
echo "Cleaned: " . cleaner($url);
编辑#2/#3(我一定很无聊)。以下是验证URL中是否存在TLD的版本:

function containsTLD($string) {
  preg_match(
    "/(AC($|\/)|\.AD($|\/)|\.AE($|\/)|\.AERO($|\/)|\.AF($|\/)|\.AG($|\/)|\.AI($|\/)|\.AL($|\/)|\.AM($|\/)|\.AN($|\/)|\.AO($|\/)|\.AQ($|\/)|\.AR($|\/)|\.ARPA($|\/)|\.AS($|\/)|\.ASIA($|\/)|\.AT($|\/)|\.AU($|\/)|\.AW($|\/)|\.AX($|\/)|\.AZ($|\/)|\.BA($|\/)|\.BB($|\/)|\.BD($|\/)|\.BE($|\/)|\.BF($|\/)|\.BG($|\/)|\.BH($|\/)|\.BI($|\/)|\.BIZ($|\/)|\.BJ($|\/)|\.BM($|\/)|\.BN($|\/)|\.BO($|\/)|\.BR($|\/)|\.BS($|\/)|\.BT($|\/)|\.BV($|\/)|\.BW($|\/)|\.BY($|\/)|\.BZ($|\/)|\.CA($|\/)|\.CAT($|\/)|\.CC($|\/)|\.CD($|\/)|\.CF($|\/)|\.CG($|\/)|\.CH($|\/)|\.CI($|\/)|\.CK($|\/)|\.CL($|\/)|\.CM($|\/)|\.CN($|\/)|\.CO($|\/)|\.COM($|\/)|\.COOP($|\/)|\.CR($|\/)|\.CU($|\/)|\.CV($|\/)|\.CX($|\/)|\.CY($|\/)|\.CZ($|\/)|\.DE($|\/)|\.DJ($|\/)|\.DK($|\/)|\.DM($|\/)|\.DO($|\/)|\.DZ($|\/)|\.EC($|\/)|\.EDU($|\/)|\.EE($|\/)|\.EG($|\/)|\.ER($|\/)|\.ES($|\/)|\.ET($|\/)|\.EU($|\/)|\.FI($|\/)|\.FJ($|\/)|\.FK($|\/)|\.FM($|\/)|\.FO($|\/)|\.FR($|\/)|\.GA($|\/)|\.GB($|\/)|\.GD($|\/)|\.GE($|\/)|\.GF($|\/)|\.GG($|\/)|\.GH($|\/)|\.GI($|\/)|\.GL($|\/)|\.GM($|\/)|\.GN($|\/)|\.GOV($|\/)|\.GP($|\/)|\.GQ($|\/)|\.GR($|\/)|\.GS($|\/)|\.GT($|\/)|\.GU($|\/)|\.GW($|\/)|\.GY($|\/)|\.HK($|\/)|\.HM($|\/)|\.HN($|\/)|\.HR($|\/)|\.HT($|\/)|\.HU($|\/)|\.ID($|\/)|\.IE($|\/)|\.IL($|\/)|\.IM($|\/)|\.IN($|\/)|\.INFO($|\/)|\.INT($|\/)|\.IO($|\/)|\.IQ($|\/)|\.IR($|\/)|\.IS($|\/)|\.IT($|\/)|\.JE($|\/)|\.JM($|\/)|\.JO($|\/)|\.JOBS($|\/)|\.JP($|\/)|\.KE($|\/)|\.KG($|\/)|\.KH($|\/)|\.KI($|\/)|\.KM($|\/)|\.KN($|\/)|\.KP($|\/)|\.KR($|\/)|\.KW($|\/)|\.KY($|\/)|\.KZ($|\/)|\.LA($|\/)|\.LB($|\/)|\.LC($|\/)|\.LI($|\/)|\.LK($|\/)|\.LR($|\/)|\.LS($|\/)|\.LT($|\/)|\.LU($|\/)|\.LV($|\/)|\.LY($|\/)|\.MA($|\/)|\.MC($|\/)|\.MD($|\/)|\.ME($|\/)|\.MG($|\/)|\.MH($|\/)|\.MIL($|\/)|\.MK($|\/)|\.ML($|\/)|\.MM($|\/)|\.MN($|\/)|\.MO($|\/)|\.MOBI($|\/)|\.MP($|\/)|\.MQ($|\/)|\.MR($|\/)|\.MS($|\/)|\.MT($|\/)|\.MU($|\/)|\.MUSEUM($|\/)|\.MV($|\/)|\.MW($|\/)|\.MX($|\/)|\.MY($|\/)|\.MZ($|\/)|\.NA($|\/)|\.NAME($|\/)|\.NC($|\/)|\.NE($|\/)|\.NET($|\/)|\.NF($|\/)|\.NG($|\/)|\.NI($|\/)|\.NL($|\/)|\.NO($|\/)|\.NP($|\/)|\.NR($|\/)|\.NU($|\/)|\.NZ($|\/)|\.OM($|\/)|\.ORG($|\/)|\.PA($|\/)|\.PE($|\/)|\.PF($|\/)|\.PG($|\/)|\.PH($|\/)|\.PK($|\/)|\.PL($|\/)|\.PM($|\/)|\.PN($|\/)|\.PR($|\/)|\.PRO($|\/)|\.PS($|\/)|\.PT($|\/)|\.PW($|\/)|\.PY($|\/)|\.QA($|\/)|\.RE($|\/)|\.RO($|\/)|\.RS($|\/)|\.RU($|\/)|\.RW($|\/)|\.SA($|\/)|\.SB($|\/)|\.SC($|\/)|\.SD($|\/)|\.SE($|\/)|\.SG($|\/)|\.SH($|\/)|\.SI($|\/)|\.SJ($|\/)|\.SK($|\/)|\.SL($|\/)|\.SM($|\/)|\.SN($|\/)|\.SO($|\/)|\.SR($|\/)|\.ST($|\/)|\.SU($|\/)|\.SV($|\/)|\.SY($|\/)|\.SZ($|\/)|\.TC($|\/)|\.TD($|\/)|\.TEL($|\/)|\.TF($|\/)|\.TG($|\/)|\.TH($|\/)|\.TJ($|\/)|\.TK($|\/)|\.TL($|\/)|\.TM($|\/)|\.TN($|\/)|\.TO($|\/)|\.TP($|\/)|\.TR($|\/)|\.TRAVEL($|\/)|\.TT($|\/)|\.TV($|\/)|\.TW($|\/)|\.TZ($|\/)|\.UA($|\/)|\.UG($|\/)|\.UK($|\/)|\.US($|\/)|\.UY($|\/)|\.UZ($|\/)|\.VA($|\/)|\.VC($|\/)|\.VE($|\/)|\.VG($|\/)|\.VI($|\/)|\.VN($|\/)|\.VU($|\/)|\.WF($|\/)|\.WS($|\/)|\.XN--0ZWM56D($|\/)|\.XN--11B5BS3A9AJ6G($|\/)|\.XN--80AKHBYKNJ4F($|\/)|\.XN--9T4B11YI5A($|\/)|\.XN--DEBA0AD($|\/)|\.XN--G6W251D($|\/)|\.XN--HGBK6AJ7F53BBA($|\/)|\.XN--HLCJ6AYA9ESC7A($|\/)|\.XN--JXALPDLP($|\/)|\.XN--KGBECHTV($|\/)|\.XN--ZCKZAH($|\/)|\.YE($|\/)|\.YT($|\/)|\.YU($|\/)|\.ZA($|\/)|\.ZM($|\/)|\.ZW)/i",
    $string,
    $M);
  $has_tld = (count($M) > 0) ? true : false;
  return $has_tld;
}

function cleaner($url) {
  $U = explode(' ',$url);

  $W =array();
  foreach ($U as $k => $u) {
    if (stristr($u,".")) { //only preg_match if there is a dot    
      if (containsTLD($u) === true) {
      unset($U[$k]);
      return cleaner( implode(' ',$U));
    }      
    }
  }
  return implode(' ',$U);
}


$url = "Here is another funny site badurl.badone somesite.ca/worse.jpg but this badsite.com www.tinyurl.com/55555 and http://www.tinyurl.com/55555 and img.hostingsite.com/badpic.jpg";
echo "Cleaned: " . cleaner($url);
返回:

Cleaned: Here is another funny site badurl.badone but this and and

解析URL的文本很困难,寻找已经存在的、经过大量测试的代码,这比编写自己的代码和缺少边缘案例要好。例如,我将看一看Django的过程,它将URL包装在锚中。您可以将其移植到PHP,而不是将URL包装在锚中,只需从文本中删除它们。

谢谢mike

$string = preg_replace('/\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|$!:,.;]*[A-Z0-9+&@#\/%=~_|$]/i', '', $string);  
更新一点,它返回通知错误


“/\b(https?| ftp |文件):\/\/[-A-Z0-9+&@#\/%?=~~|$!:,.;]*[A-Z0-9+&@#\/%=~|$]/i”


$string=preg|u replace('/\b(https?| ftp |文件):\/\/[-A-Z0-9+&@#\/%?=~~|$!:,.;]*[A-Z0-9+&@#\/%=~|$]/i','$string);

$url=“这是一个有趣的网站http://www.tunyurl.com/34934";
$replace='http www.com.org.net';
$with='';
$clean\u url=clean($url,$replace,$with);
echo$clean_url;
函数clean($url,$replace,$with){
$replace=explode(“,$replace”);
$new_字符串=“”;
$check=分解(“,$url);
foreach($key=>$value){
foreach($替换为$key2=>$value2){
如果(-1
我们讨论的是从字符串中提取URL还是删除实际链接本身$url=“这里是另一个有趣的网站www.tinyurl.com/55555”;(提取)$url=“这里是另一个有趣的网站www.tinyurl.com/55555”;$someVar='www.tinyurl.com/55555';(删除)$url=“这是另一个有趣的网站”;感谢您花时间对此进行扩展。“此与”是否应为蓝色?我不想编辑实际输出;-)这不会捕获与引号等其他字符相邻的URL。因此,
将无法正确过滤以删除URL。您可以使用strip_标记,但如果这不是您想要的,则需要对此进行调整。由于某些原因,如果url前有换行符,则会中断此操作。例如:“Some text and Some more”可以正常工作,但如果单词“more”(而不是空格)后面有一个\n,则结果是“Some text and Some”。有什么建议吗?谢谢我替换了$U=explode(“”,$url);使用$U=preg_分割('/\s+/',$url);这似乎很管用。你能用你的代码解释一下吗?这可能对OP或未来的用户有更多帮助。
$string = preg_replace('/\b(https?|ftp|file):\/\/[-A-Z0-9+&@#\/%?=~_|$!:,.;]*[A-Z0-9+&@#\/%=~_|$]/i', '', $string);  
$url = "Here is a funny site http://www.tunyurl.com/34934";
$replace = 'http www .com .org .net';
$with = '';

$clean_url = clean($url,$replace,$with);
echo $clean_url;

function clean($url,$replace,$with) {   

  $replace = explode(" ",$replace);
  $new_string = '';
  $check = explode(" ",$url);

  foreach($check AS $key => $value) {
     foreach($replace AS $key2 => $value2 ) {
        if (-1 < strpos( strtolower($value), strtolower($value2) )  ) {
            $value = $with;
            break;
        }
     }
    $new_string .= " ".$value;
  }
 return $new_string;
}