使用jsoup将html文本替换为span
我想改变HTML元素的文本内容,使其具有一定的背景色。 HTML的格式如下使用jsoup将html文本替换为span,html,jsoup,Html,Jsoup,我想改变HTML元素的文本内容,使其具有一定的背景色。 HTML的格式如下 <html> <head></head> <body>Gc <br> Stable <br> Oral intake better <br> Urine stool normal <br> </body> </html> 我有字符串形式的html "<ht
<html>
<head></head>
<body>Gc <br>
Stable <br>
Oral intake better <br>
Urine stool normal <br>
</body>
</html>
我有字符串形式的html
"<html><head></head><body>Gc <br>Stable <br>Oral intake better <br>Urine stool normal <br>Pain Relief <br>Vital stable <br>No problem <br>Adv tab pan 40mg 1od <br>Tab pcm500mg 6hourly <br>Cab gab 300mg 1bd <br>Cab becasol 1od <br>Cab Tramadol 50mg 6hourly </body></html>"
“Gc
稳定
口服量更好
尿便正常
疼痛缓解
生命稳定
无问题
Adv-tab-pan 40mg 1od
tab-pcm500mg 6h
Cab-gab-300mg 1bd
Cab-becasol 1od
Cab-Tramadol 50mg 6h”
我想匹配元素文本内容,并在匹配HTML字符串时用关键字替换它们。我会改变他们的跨度有给定的背景颜色和匹配关键字的文本
生成的HTML如下所示
<html>
<head></head>
<body>
<div>
<div>
<span style="background: #FF9999;">Gc</span>
<br><span style="background: #FF9999;">Stable</span>
<br><span style="background: #FF9999;">Oral</span> intake better
<br><span style="background: #FF9999;">Urine</span> stool normal
<br>Pain Relief
<br>Vital stable
<br>No problem
<br>Adv tab pan 40mg 1od
<br>Tab pcm500mg 6hourly
<br>Cab gab 300mg 1bd
<br>Cab becasol 1od
<br>Cab Tramadol 50mg 6hourly
</div>
</div>
</body>
</html>
Gc
马厩
口服效果更佳
小便正常
止痛药
重要马厩
没问题
Adv tab pan 40mg 1od
表pcm500mg 6小时
驾驶室gab 300mg 1bd
Cab becasol 1od
Cab曲马多50mg 6小时
如何用java实现它。我正在使用jsoup库
这个代码对我有用。这是最佳方法吗?。或者有没有更好的替代html字符串的方法
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.TextNode;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;
public class regexReplaceHtml {
public static void main(String args[]) throws IOException {
String html2 = "<html><head></head><body>Gc <br>Stable <br>Oral intake better <br>Urine stool normal <br>Pain Relief <br>Vital stable <br>No problem <br>Adv tab pan 40mg 1od <br>Tab pcm500mg 6hourly <br>Cab gab 300mg 1bd <br>Cab becasol 1od <br>Cab Tramadol 50mg 6hourly </body></html>";
String html = "<div>" + html2 + "</div>";
Document doc = Jsoup.parse(html);
List<String> keywords = new ArrayList<String>();
keywords.add("Gc");
keywords.add("Stable");
keywords.add("Oral");
keywords.add("Urine");
String convertedString = replaceHtmlString(doc.html(),keywords);
System.out.println(convertedString);
}
public static String replaceHtmlString(String html, List<String> keywords) {
String htmlString = "<div>" + html + "</div>";
Document doc = Jsoup.parse(htmlString);
Elements elements = doc.body().children().select("*");
for (Element element : elements) {
List<TextNode> tnList = element.textNodes();
for (TextNode tn : tnList) {
String nodeTrimmedText = tn.text().trim();
for (int i = 0; i < keywords.size(); i++) {
String keyword = keywords.get(i);
if (isContainExactWord(nodeTrimmedText, keyword)) {
String nodeText = tn.text();
String keywordHtmlString = "<span style=\"background: #FF9999;\">" + keyword + "</span>";
String replacedTextHtmlString = nodeText.replace(keyword, keywordHtmlString);
tn.text(replacedTextHtmlString);
}
}
}
}
//I had to replace the < and > with the respective symbols
return doc.html().replaceAll("<", "<").replaceAll(">", ">");
}
private static boolean isContainExactWord(String fullString, String partWord) {
String pattern = "\\b" + partWord + "\\b";
Pattern p = Pattern.compile(pattern);
Matcher m = p.matcher(fullString);
return m.find();
}
}
import java.io.IOException;
导入java.util.ArrayList;
导入java.util.List;
导入java.util.regex.Matcher;
导入java.util.regex.Pattern;
导入org.jsoup.jsoup;
导入org.jsoup.nodes.Document;
导入org.jsoup.nodes.Element;
导入org.jsoup.nodes.TextNode;
导入org.jsoup.parser.Tag;
导入org.jsoup.select.Elements;
公共类regexReplaceHtml{
公共静态void main(字符串args[])引发IOException{
String html2=“Gc
稳定
口服量更好
尿便正常
疼痛缓解
生命稳定
无问题
Adv-tab-pan 40mg 1od
tab-pcm500mg 6hourly
Cab-gab-300mg 1bd
Cab-becasol 1od
Cab-Tramadol 50mg 6hourly”;
字符串html=”“+html2+”;
Document doc=Jsoup.parse(html);
列表关键字=新建ArrayList();
关键词:添加(“Gc”);
关键词:添加(“稳定”);
关键词:添加(“口服”);
关键词:添加(“尿液”);
String convertedString=replaceThMLString(doc.html(),关键字);
System.out.println(convertedString);
}
公共静态字符串替换HTMLSTRING(字符串html,列表关键字){
字符串htmlString=“”+html+”;
Document doc=Jsoup.parse(htmlString);
Elements=doc.body().children()。选择(“*”);
for(元素:元素){
List tnList=element.textNodes();
对于(TextNode tn:tnList){
字符串nodeTrimmedText=tn.text().trim();
对于(int i=0;i
下面的代码应该是您想要的。它获取一个关键字列表,并用您提到的span标记替换它们
List<String> keywords = new ArrayList<String>();
keywords.add("Gc");
keywords.add("Stable");
keywords.add("Oral");
keywords.add("Urine");
Element body = doc.getElementsByTag("body").first();
List<TextNode> nodes = body.textNodes();
for(TextNode node : nodes){
String nodeText = node.text();
for(String keyword : keywords){
if(nodeText.contains(keyword)){
String newText = nodeText.replace(keyword, "");
node.text(newText);
node.before("<span style=\"background-color:#FF9999;\">" + keyword + "</span>");
}
}
}
List关键字=new ArrayList();
关键词:添加(“Gc”);
关键词:添加(“稳定”);
关键词:添加(“口服”);
关键词:添加(“尿液”);
Element body=doc.getElementsByTag(“body”).first();
列表节点=body.textNodes();
for(TextNode节点:节点){
字符串nodeText=node.text();
for(字符串关键字:关键字){
if(nodeText.contains(关键字)){
字符串newText=nodeText.replace(关键字“”);
node.text(newText);
node.before(“+关键字+”);
}
}
}
有一个regexp解决方案:
Matcher matcher = Pattern.compile("(Gc|Stable|Oral|Urine)").matcher(html);
while (matcher.find()) {
for (int i = 1; i <= matcher.groupCount(); i++) {
html = html.replace(matcher.group(i), String.format("<span style=\"background-color:#FF9999;\">%s</span>", matcher.group(i)));
}
}
Matcher-Matcher=Pattern.compile(“(Gc |稳定|口服|尿液)”).Matcher(html);
while(matcher.find()){
对于(inti=1;i),如果您将html作为字符串,则simples解决方案是使用string.replace()方法。
Matcher matcher = Pattern.compile("(Gc|Stable|Oral|Urine)").matcher(html);
while (matcher.find()) {
for (int i = 1; i <= matcher.groupCount(); i++) {
html = html.replace(matcher.group(i), String.format("<span style=\"background-color:#FF9999;\">%s</span>", matcher.group(i)));
}
}