php在已解析文档文件的空格处拆分字符串
我已将文档文件解析为字符串php在已解析文档文件的空格处拆分字符串,php,string,split,space,Php,String,Split,Space,我已将文档文件解析为字符串 function parseWord($userDoc) { $fileHandle = fopen($userDoc, "r"); $word_text = @fread($fileHandle, filesize($userDoc)); $line = ""; $tam = filesize($userDoc); $nulos = 0; $caracteres = 0; for($i=1536; $i<$tam; $i++) { $line .=
function parseWord($userDoc)
{
$fileHandle = fopen($userDoc, "r");
$word_text = @fread($fileHandle, filesize($userDoc));
$line = "";
$tam = filesize($userDoc);
$nulos = 0;
$caracteres = 0;
for($i=1536; $i<$tam; $i++)
{
$line .= $word_text[$i];
if( $word_text[$i] == 0)
{
$nulos++;
}
else
{
$nulos=0;
$caracteres++;
}
if( $nulos>1996)
{
break;
}
}
//echo $caracteres;
$lines = explode(chr(0x0D),$line);
//$outtext = "<pre>";
$outtext = "";
foreach($lines as $thisline)
{
$tam = strlen($thisline);
if( !$tam )
{
continue;
}
$new_line = "";
for($i=0; $i<$tam; $i++)
{
$onechar = $thisline[$i];
if( $onechar > chr(240) )
{
continue;
}
if( $onechar >= chr(0x20) )
{
$caracteres++;
$new_line .= $onechar;
}
if( $onechar == chr(0x14) )
{
$new_line .= "</a>";
}
if( $onechar == chr(0x07) )
{
$new_line .= "\t";
if( isset($thisline[$i+1]) )
{
if( $thisline[$i+1] == chr(0x07) )
{
$new_line .= "\n";
}
}
}
}
//troca por hiperlink
$new_line = str_replace("HYPERLINK" ,"<a href=",$new_line);
$new_line = str_replace("\o" ,">",$new_line);
$new_line .= "\n";
//link de imagens
$new_line = str_replace("INCLUDEPICTURE" ,"<br><img src=",$new_line);
$new_line = str_replace("\*" ,"><br>",$new_line);
$new_line = str_replace("MERGEFORMATINET" ,"",$new_line);
$outtext .= nl2br($new_line);
}
return $outtext;
}
$text = parseWord($userDoc);
函数parseWord($userDoc)
{
$fileHandle=fopen($userDoc,“r”);
$word_text=@fread($fileHandle,filesize($userDoc));
$line=“”;
$tam=文件大小($userDoc);
$nulos=0;
$caracteres=0;
(i=1536美元;i=1996美元)
{
打破
}
}
//echo$caracteres;
$lines=分解(chr(0x0D),$line);
//$outtext=“”;
$outtext=“”;
foreach($行作为$thisline)
{
$tam=strlen($thisline);
如果(!$tam)
{
继续;
}
$new_line=“”;
对于($i=0;$i chr(240))
{
继续;
}
如果($onechar>=chr(0x20))
{
$caracteres++;
$new_line.=$onechar;
}
如果($onechar==chr(0x14))
{
$new_行=“”;
}
如果($onechar==chr(0x07))
{
$new_line.=“\t”;
如果(isset($thisline[$i+1]))
{
如果($thisline[$i+1]==chr(0x07))
{
$new_line.=“\n”;
}
}
}
}
//特罗卡·波尔·希珀林
$new\u line=str\u replace(“超链接”,“新行”);
$new_line.=“\n”;
//图像链接
$new\u line=str\u replace(“INCLUDEPICTURE”、“
”、$new\u line);
$new\u line=str\u replace(“MERGEFORMATINET”、“”、$new\u line);
$outtext.=nl2br($new_行);
}
返回$outtext;
}
$text=parseWord($userDoc);
但现在我需要在每个空格后拆分字符串,因为我想在数据库中为搜索机器发布每个单词。有人能帮我吗
有人能帮我吗?如果你用空格分隔,每个单词都会包含标点符号。 相反,使用with1作为第二个参数。 这将返回所有单词的列表,不带任何标点符号