如果使用大于0的偏移量,为什么此SPARQL查询不返回任何数据
我正在尝试从DBPedia检索音乐信息。如果我在以下服务器上运行此查询: 我得到一个结果,但是如果我将OFFSET改为1,结果集是空的?当然,有不止一个结果可用 有什么想法吗?group\u concat是一个聚合器 整个结果将分组并转换为一个组,其中包含一个结果行。因此,偏移量0返回一行,偏移量1不返回任何行 你是想用如果使用大于0的偏移量,为什么此SPARQL查询不返回任何数据,sparql,dbpedia,Sparql,Dbpedia,我正在尝试从DBPedia检索音乐信息。如果我在以下服务器上运行此查询: 我得到一个结果,但是如果我将OFFSET改为1,结果集是空的?当然,有不止一个结果可用 有什么想法吗?group\u concat是一个聚合器 整个结果将分组并转换为一个组,其中包含一个结果行。因此,偏移量0返回一行,偏移量1不返回任何行 你是想用 GROUP BY ?title ? 例如: SELECT (count(*) AS ?C) WHERE { ?s ?p ?o } 一行是计数。因此我找到的解决方案是使用P
GROUP BY ?title
?
例如:
SELECT (count(*) AS ?C)
WHERE
{ ?s ?p ?o }
一行是计数。因此我找到的解决方案是使用Python将查询拆分为多个查询: 创建JSON以指定/定义所需内容:
[{
"root": [
"?title <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:American_hard_rock_musical_groups> .",
"?title <http://xmlns.com/foaf/0.1/isPrimaryTopicOf> ?name "
],
"sub_page": [
"?title <http://xmlns.com/foaf/0.1/isPrimaryTopicOf> <[[X]]> .",
"OPTIONAL { ?title <http://dbpedia.org/ontology/bandMember> ?member . }",
"OPTIONAL { ?title <http://dbpedia.org/ontology/formerBandMember> ?oldMember . }",
"OPTIONAL { ?title <http://dbpedia.org/property/label> ?label . }",
"OPTIONAL { ?title <http://dbpedia.org/property/genre> ?genre . }",
"OPTIONAL { ?title <http://dbpedia.org/property/origin> ?origin . }",
"OPTIONAL { ?title <http://dbpedia.org/ontology/activeYearsStartYear> ?date . }",
"OPTIONAL { ?song <http://dbpedia.org/ontology/artist> ?title . }",
"OPTIONAL { ?songOther <http://dbpedia.org/property/artist> ?title . }",
"OPTIONAL { ?songOtherOther <http://dbpedia.org/ontology/musicalArtist> ?title . }",
"OPTIONAL { ?songOtherOtherOther <http://dbpedia.org/property/producer> ?title}"
],
"service":"<http://dbpedia.org/sparql/>",
"select":[
"title",
"date",
"label_s",
"genre_s",
"member_s",
"oldMember_s",
"origin_s",
"song_s",
"songOther_s",
"songOtherOther_s",
"songOtherOtherOther_s",
"name_X"
],
"language": "en",
"limit": 10000,
"offset": 100,
"category": "music",
"description": "American Hard Rock",
"sub_category": "American_hard_rock_musical_groups"
}]
然后使用此Python脚本使用JSON:
import os, sys
from api.DBPedia import DBPedia
import datetime
import time
import copy
import json
class ProcessStuff(DBPedia):
def __init__(self, fn = "" ):
"""Initialize ProcessStuff class"""
self.filePath = fn
def getConfigFile(self):
"""Reads json file with dbpedia query information"""
try:
jsonFile = open(self.filePath, "r")
data = json.load(jsonFile)
jsonFile.close()
return data
except Exception as e:
print "[getConfigFile] Error in reading file: %s" % e
def queryMultiplier(self, data, identifier = '[[X]]'):
"""Generate new DBPedia queries based on previous query result."""
queries = []
q = self.createSparqlQuery(data)
json_page = self.resolveDBPediaQuery(q = q)
if len(data['sub_page']) > 0:
try:
items = json_page['results']['bindings']
for item in items:
sub_data = copy.deepcopy(data)
# Only allows for one identifier
sub_page_identifier = [var for var in data['select'] if var.endswith('_X')][0].replace('_X','')
name = item[sub_page_identifier]['value']
count = 0
while count < len(sub_data['sub_page']):
if identifier in sub_data['sub_page'][count]:
sub_data['sub_page'][count] = sub_data['sub_page'][count].replace(identifier,name)
count += 1
q = self.createSparqlQuery(sub_data, key = 'sub_page')
queries.append(q)
except Exception as e:
print "[ProcessStuff][queryMultiplier] Error in creating queries for subpages: %s" % e
pass
for query in queries:
file_name = data['category']+"___"+data['sub_category']
print "Fetching query: \n%s" % query
json_page = self.resolveDBPediaQuery(q = query)
print "Processin page and saving to: "+ file_name
self.processPage(json_page, json_file = '../../json_samples/', category = file_name, overwrite = False)
def createConcat(self, data, separator = ";;;"):
""" Creates concat string. """
return "(group_concat(distinct ?"+data+";separator='"+separator+"') as ?"+data+"_s)"
def createSparqlQuery(self, data, separator = ";;;", key = "root", offset = 100):
"""Generates SPARQL query from input file."""
query = []
orderby = []
select = "SELECT DISTINCT"
#from_each_subpage
for prop in data['select']:
if prop.endswith("_s"):
select +=" "+ self.createConcat(prop.split("_s")[0])
else:
v = "?"+ prop.replace('_X','')
select += " "+ v
orderby.append(v)
where = " WHERE { "
closing = 1
query.append(select)
query.append(where)
try:
service = "SERVICE "+data['service'] + " {"
query.append(service)
closing += 1
except:
pass
query.append('\n'.join(data[key]))
while closing > 0:
query.append('}')
closing -= 1
o = " ORDER BY " + ' '.join(orderby)
query.append(o)
try:
limit = data['limit']
l = " LIMIT %s" % limit
query.append(l)
except:
pass
complete_query = '\n'.join(query)
print complete_query
return complete_query
if __name__ == "__main__":
try:
JSON_FILE_NAME = sys.argv[1]
except:
print "JSON file name is needed to run!"
sys.exit(2)
start_time = datetime.datetime.now().time().strftime('%H:%M:%S')
hm = ProcessStuff(JSON_FILE_NAME)
data = hm.getConfigFile()
hm.queryMultiplier(data[0])
end_time = datetime.datetime.now().time().strftime('%H:%M:%S')
total_time=(datetime.datetime.strptime(end_time,'%H:%M:%S') - datetime.datetime.strptime(start_time,'%H:%M:%S'))
print "Took %s to process %s " % (total_time, JSON_FILE_NAME)
然后按如下方式运行代码:python ProcessStuff.py input.json
花了0:31:10处理music.json,处理了441个条目
当然,代码可以更快。。。
ProcessStuff继承的DBPedia类只是发出HTTP请求,稍微清理一下结果并将结果保存为JSON。问题在于属性/艺术家和本体/音乐艺术家谓词,没有它们就可以正常工作。我不知道,但对我来说,它看起来像一个大师级的bug。谢谢@laugedelic,是的,我理解,但没有这些,我就无法获得对我的项目很重要的歌曲信息。我可以通过编程来完成,比如先获取每个乐队,然后为每个乐队获取歌曲,只是想知道是否有更紧凑的方式。你需要分别使用song、songOther和songOtherOther吗?@laugedelic,不,它们可以在一个绑定中一起。你能澄清一下吗。。。?为什么其他查询返回更多结果?另外,如果我删除limit子句,我会得到Virtuoso 22026错误SR319:当试图将3145个字符的字符串存储到临时数组中时,超出了最大行长度,我认为Virtuoso允许省略显式分组。我也尝试过显式地添加它,但它没有改变任何东西。很高兴你解决了你的问题。但我认为这并没有回答您最初的问题,为什么查询不能按预期工作,并且不是SPARQL解决方案。@laughedelic fair:
import os, sys
from api.DBPedia import DBPedia
import datetime
import time
import copy
import json
class ProcessStuff(DBPedia):
def __init__(self, fn = "" ):
"""Initialize ProcessStuff class"""
self.filePath = fn
def getConfigFile(self):
"""Reads json file with dbpedia query information"""
try:
jsonFile = open(self.filePath, "r")
data = json.load(jsonFile)
jsonFile.close()
return data
except Exception as e:
print "[getConfigFile] Error in reading file: %s" % e
def queryMultiplier(self, data, identifier = '[[X]]'):
"""Generate new DBPedia queries based on previous query result."""
queries = []
q = self.createSparqlQuery(data)
json_page = self.resolveDBPediaQuery(q = q)
if len(data['sub_page']) > 0:
try:
items = json_page['results']['bindings']
for item in items:
sub_data = copy.deepcopy(data)
# Only allows for one identifier
sub_page_identifier = [var for var in data['select'] if var.endswith('_X')][0].replace('_X','')
name = item[sub_page_identifier]['value']
count = 0
while count < len(sub_data['sub_page']):
if identifier in sub_data['sub_page'][count]:
sub_data['sub_page'][count] = sub_data['sub_page'][count].replace(identifier,name)
count += 1
q = self.createSparqlQuery(sub_data, key = 'sub_page')
queries.append(q)
except Exception as e:
print "[ProcessStuff][queryMultiplier] Error in creating queries for subpages: %s" % e
pass
for query in queries:
file_name = data['category']+"___"+data['sub_category']
print "Fetching query: \n%s" % query
json_page = self.resolveDBPediaQuery(q = query)
print "Processin page and saving to: "+ file_name
self.processPage(json_page, json_file = '../../json_samples/', category = file_name, overwrite = False)
def createConcat(self, data, separator = ";;;"):
""" Creates concat string. """
return "(group_concat(distinct ?"+data+";separator='"+separator+"') as ?"+data+"_s)"
def createSparqlQuery(self, data, separator = ";;;", key = "root", offset = 100):
"""Generates SPARQL query from input file."""
query = []
orderby = []
select = "SELECT DISTINCT"
#from_each_subpage
for prop in data['select']:
if prop.endswith("_s"):
select +=" "+ self.createConcat(prop.split("_s")[0])
else:
v = "?"+ prop.replace('_X','')
select += " "+ v
orderby.append(v)
where = " WHERE { "
closing = 1
query.append(select)
query.append(where)
try:
service = "SERVICE "+data['service'] + " {"
query.append(service)
closing += 1
except:
pass
query.append('\n'.join(data[key]))
while closing > 0:
query.append('}')
closing -= 1
o = " ORDER BY " + ' '.join(orderby)
query.append(o)
try:
limit = data['limit']
l = " LIMIT %s" % limit
query.append(l)
except:
pass
complete_query = '\n'.join(query)
print complete_query
return complete_query
if __name__ == "__main__":
try:
JSON_FILE_NAME = sys.argv[1]
except:
print "JSON file name is needed to run!"
sys.exit(2)
start_time = datetime.datetime.now().time().strftime('%H:%M:%S')
hm = ProcessStuff(JSON_FILE_NAME)
data = hm.getConfigFile()
hm.queryMultiplier(data[0])
end_time = datetime.datetime.now().time().strftime('%H:%M:%S')
total_time=(datetime.datetime.strptime(end_time,'%H:%M:%S') - datetime.datetime.strptime(start_time,'%H:%M:%S'))
print "Took %s to process %s " % (total_time, JSON_FILE_NAME)