Java DBpedia查找uri与#x27';类别“;
我在Github上找到一个使用 DBpedia Lookup发送单词并从DBpedia获取候选URI。问题是:所有URI都带有单词Java DBpedia查找uri与#x27';类别“;,java,uri,categories,dbpedia,Java,Uri,Categories,Dbpedia,我在Github上找到一个使用 DBpedia Lookup发送单词并从DBpedia获取候选URI。问题是:所有URI都带有单词Category。例如,对于单词Berlin,它返回: 而不是 如果我将第一个URI(带有“类别”的URI)放在浏览器上,它不会显示与主题“柏林历史”对应的页面,它会返回一个包含链接列表的页面,我可以在其中找到指向“柏林历史”的链接。但是,如果我放入第二个URI(没有“Category”的URI),它将返回与主题“柏林的历史”对应的页面。如何避免从查找中返
Category
。例如,对于单词Berlin
,它返回:
package com.knowledgebooks.info\u;
导入org.apache.commons.httpclient.httpclient;
导入org.apache.commons.httpclient.HttpException;
导入org.apache.commons.httpclient.HttpMethod;
导入org.apache.commons.httpclient.methods.GetMethod;
导入org.xml.sax.Attributes;
导入org.xml.sax.SAXException;
导入org.xml.sax.helpers.DefaultHandler;
导入javax.xml.parsers.SAXParser;
导入javax.xml.parsers.SAXParserFactory;
导入java.io.IOException;
导入java.io.InputStream;
导入java.net.urlcoder;
导入java.util.*;
/**
*马克·沃森2008-2010版权所有。版权所有。
*许可证:LGPL版本3(http://www.gnu.org/licenses/lgpl-3.0.txt)
*/
//使用Georgi Kobilarov的DBpedia查找web服务
//参考:http://lookup.dbpedia.org/api/search.asmx?op=KeywordSearch
//例如:http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?QueryString=Flagstaff&QueryClass=XML&MaxHits=10
/**
*搜索返回包含任何搜索词的结果。我要去过滤
*结果将忽略不包含所有搜索词的结果。
*/
公共类DBpediaLookupClient扩展了DefaultHandler{
公共DBpediaLookupClient(字符串查询)引发异常{
this.query=query;
HttpClient=新的HttpClient();
字符串query2=query.replaceAll(“,”+”)//urlcoder.encode(查询,“utf-8”);
HttpMethod方法=
新的GetMethod(“http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?QueryString=" +
问题2);
试一试{
客户端执行方法(方法);
系统输出打印LN(方法);
InputStream ins=method.getResponseBodyAsStream();
SAXParserFactory=SAXParserFactory.newInstance();
SAXParser sax=factory.newSAXParser();
parse(ins,this);
}catch(httphe异常){
System.err.println(“连接到lookup.dbpedia.org的Http错误”);
}捕获(ioe异常ioe){
System.err.println(“无法连接到lookup.dbpedia.org”);
}
方法。releaseConnection();
}
private List variableBindings=new ArrayList();
私有映射tempBinding=null;
私有字符串lastElementName=null;
public void startElement(字符串uri、字符串localName、字符串qName、属性)引发SAXException{
//System.out.println(“startElement”+qName);
if(qName.equalsIgnoreCase(“结果”)){
tempBinding=newhashmap();
}
lastElementName=qName;
}
public void endElement(字符串uri、字符串localName、字符串qName)引发SAXException{
//System.out.println(“endElement”+qName);
if(qName.equalsIgnoreCase(“结果”)){
if(!variableBindings.contains(tempBinding)和&containsSearchTerms(tempBinding))
variableBindings.add(tempBinding);
}
}
公共无效字符(char[]ch,int start,int length)引发异常{
字符串s=新字符串(ch,start,length).trim();
//System.out.println(“字符(lastElementName='”+lastElementName+”):“+s”);
如果(s.长度()>0){
if(“Description”.equals(lastElementName)){
if(tempBinding.get(“Description”)==null){
tempBinding.put(“说明”,s);
}
tempBinding.put(“说明”、“+tempBinding.get(“说明”)+s);
}
if(“URI”.equals(lastElementName))tempBinding.put(“URI”,s);
如果(“Label”.equals(lastElementName))tempBinding.put(“Label”,s);
}
}
公共列表变量绑定(){
返回变量绑定;
}
私有布尔包含ArchTerms(映射绑定){
StringBuilder sb=新的StringBuilder();
for(字符串值:bindings.values())sb.append(值);//不需要空白
字符串text=sb.toString().toLowerCase();
StringTokenizer st=新的StringTokenizer(this.query);
而(st.hasMoreTokens()){
if(text.indexOf(st.nextToken().toLowerCase())=-1){
返回false;
}
}
返回true;
}
私有字符串查询=”;
}
当您搜索例如“柏林历史”时,您请求的URL如下
自然艺术博物馆
http://dbpedia.org/resource/Museum_f吕鲁纳图尔昆德
155
柏林历史
http://dbpedia.org/resource/History_of_Berlin
柏林是德国的首都。按照欧洲标准,柏林是一座建于12世纪的年轻城市。
柏林历史
http://dbpedia.org/resource/Category:History_of_Berlin
按地点划分的德国历史
http://dbpedia.org/resource/Category:History_of_Germany_by_location
14
您是对的,有URI
元素具有类别URI,例如
http://dbpedia.org/resource/Category:History_of_Berlin
但是您应该注意的是,从文档的根目录来看
ArrayOfResult/Result/Categories/C
package com.knowledgebooks.info_spiders;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.methods.GetMethod;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.InputStream;
import java.net.URLEncoder;
import java.util.*;
/**
* Copyright Mark Watson 2008-2010. All Rights Reserved.
* License: LGPL version 3 (http://www.gnu.org/licenses/lgpl-3.0.txt)
*/
// Use Georgi Kobilarov's DBpedia lookup web service
// ref: http://lookup.dbpedia.org/api/search.asmx?op=KeywordSearch
// example: http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?QueryString=Flagstaff&QueryClass=XML&MaxHits=10
/**
* Searches return results that contain any of the search terms. I am going to filter
* the results to ignore results that do not contain all search terms.
*/
public class DBpediaLookupClient extends DefaultHandler {
public DBpediaLookupClient(String query) throws Exception {
this.query = query;
HttpClient client = new HttpClient();
String query2 = query.replaceAll(" ", "+"); // URLEncoder.encode(query, "utf-8");
HttpMethod method =
new GetMethod("http://lookup.dbpedia.org/api/search.asmx/KeywordSearch?QueryString=" +
query2);
try {
client.executeMethod(method);
System.out.println(method);
InputStream ins = method.getResponseBodyAsStream();
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser sax = factory.newSAXParser();
sax.parse(ins, this);
} catch (HttpException he) {
System.err.println("Http error connecting to lookup.dbpedia.org");
} catch (IOException ioe) {
System.err.println("Unable to connect to lookup.dbpedia.org");
}
method.releaseConnection();
}
private List<Map<String, String>> variableBindings = new ArrayList<Map<String, String>>();
private Map<String, String> tempBinding = null;
private String lastElementName = null;
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
//System.out.println("startElement " + qName);
if (qName.equalsIgnoreCase("result")) {
tempBinding = new HashMap<String, String>();
}
lastElementName = qName;
}
public void endElement(String uri, String localName, String qName) throws SAXException {
//System.out.println("endElement " + qName);
if (qName.equalsIgnoreCase("result")) {
if (!variableBindings.contains(tempBinding) && containsSearchTerms(tempBinding))
variableBindings.add(tempBinding);
}
}
public void characters(char[] ch, int start, int length) throws SAXException {
String s = new String(ch, start, length).trim();
//System.out.println("characters (lastElementName='" + lastElementName + "'): " + s);
if (s.length() > 0) {
if ("Description".equals(lastElementName)) {
if (tempBinding.get("Description") == null) {
tempBinding.put("Description", s);
}
tempBinding.put("Description", "" + tempBinding.get("Description") + " " + s);
}
if ("URI".equals(lastElementName)) tempBinding.put("URI", s);
if ("Label".equals(lastElementName)) tempBinding.put("Label", s);
}
}
public List<Map<String, String>> variableBindings() {
return variableBindings;
}
private boolean containsSearchTerms(Map<String, String> bindings) {
StringBuilder sb = new StringBuilder();
for (String value : bindings.values()) sb.append(value); // do not need white space
String text = sb.toString().toLowerCase();
StringTokenizer st = new StringTokenizer(this.query);
while (st.hasMoreTokens()) {
if (text.indexOf(st.nextToken().toLowerCase()) == -1) {
return false;
}
}
return true;
}
private String query = "";
}
//if ("URI".equals(lastElementName)) tempBinding.put("URI", s);
if ("URI".equals(lastElementName) && s.indexOf("Category")==-1
&& tempBinding.get("URI") == null) {
tempBinding.put("URI", s);
}