如果使用大于0的偏移量,为什么此SPARQL查询不返回任何数据

如果使用大于0的偏移量,为什么此SPARQL查询不返回任何数据,sparql,dbpedia,Sparql,Dbpedia,我正在尝试从DBPedia检索音乐信息。如果我在以下服务器上运行此查询: 我得到一个结果,但是如果我将OFFSET改为1,结果集是空的?当然,有不止一个结果可用 有什么想法吗?group\u concat是一个聚合器 整个结果将分组并转换为一个组,其中包含一个结果行。因此,偏移量0返回一行,偏移量1不返回任何行 你是想用 GROUP BY ?title ? 例如: SELECT (count(*) AS ?C) WHERE { ?s ?p ?o } 一行是计数。因此我找到的解决方案是使用P

我正在尝试从DBPedia检索音乐信息。如果我在以下服务器上运行此查询:

我得到一个结果,但是如果我将OFFSET改为1,结果集是空的?当然,有不止一个结果可用

有什么想法吗?

group\u concat是一个聚合器

整个结果将分组并转换为一个组,其中包含一个结果行。因此,偏移量0返回一行,偏移量1不返回任何行

你是想用

GROUP BY ?title
?

例如:

SELECT (count(*) AS ?C)
WHERE 
{ ?s ?p ?o }

一行是计数。

因此我找到的解决方案是使用Python将查询拆分为多个查询:

创建JSON以指定/定义所需内容:

[{
"root": [
    "?title <http://purl.org/dc/terms/subject> <http://dbpedia.org/resource/Category:American_hard_rock_musical_groups> .",
    "?title <http://xmlns.com/foaf/0.1/isPrimaryTopicOf> ?name "
],
"sub_page": [
   "?title <http://xmlns.com/foaf/0.1/isPrimaryTopicOf> <[[X]]> .",
   "OPTIONAL { ?title <http://dbpedia.org/ontology/bandMember> ?member . }",
   "OPTIONAL { ?title <http://dbpedia.org/ontology/formerBandMember> ?oldMember . }",
   "OPTIONAL { ?title <http://dbpedia.org/property/label> ?label . }",
   "OPTIONAL { ?title <http://dbpedia.org/property/genre> ?genre . }",
   "OPTIONAL { ?title <http://dbpedia.org/property/origin> ?origin . }", 
   "OPTIONAL { ?title <http://dbpedia.org/ontology/activeYearsStartYear> ?date . }", 
   "OPTIONAL { ?song <http://dbpedia.org/ontology/artist> ?title . }",
   "OPTIONAL { ?songOther <http://dbpedia.org/property/artist> ?title . }", 
   "OPTIONAL { ?songOtherOther <http://dbpedia.org/ontology/musicalArtist> ?title . }",
   "OPTIONAL { ?songOtherOtherOther <http://dbpedia.org/property/producer> ?title}"

],
"service":"<http://dbpedia.org/sparql/>",
"select":[
            "title",
            "date",
            "label_s",
            "genre_s",
            "member_s",
            "oldMember_s",
            "origin_s",
            "song_s",
            "songOther_s",
            "songOtherOther_s",
            "songOtherOtherOther_s",
            "name_X"
    ], 
"language": "en", 
"limit": 10000,
"offset": 100,
"category": "music",
"description": "American Hard Rock",
"sub_category": "American_hard_rock_musical_groups"
}]
然后使用此Python脚本使用JSON:

import os, sys
from api.DBPedia import DBPedia
import datetime 
import time
import copy
import json
class ProcessStuff(DBPedia):
    def __init__(self, fn = "" ):
        """Initialize ProcessStuff class"""
        self.filePath = fn

    def getConfigFile(self):
        """Reads json file with dbpedia query information"""
        try:
            jsonFile = open(self.filePath, "r")
            data = json.load(jsonFile)
            jsonFile.close()
            return data
        except Exception as e:
            print "[getConfigFile] Error in reading file: %s" % e

    def queryMultiplier(self, data, identifier = '[[X]]'):
        """Generate new DBPedia queries based on previous query result."""
        queries = []
        q = self.createSparqlQuery(data)
        json_page = self.resolveDBPediaQuery(q = q)
        if len(data['sub_page']) > 0:
            try:
                items = json_page['results']['bindings']
                for item in items:
                    sub_data = copy.deepcopy(data)
                    # Only allows for one identifier
                    sub_page_identifier = [var for var in data['select'] if var.endswith('_X')][0].replace('_X','')
                    name = item[sub_page_identifier]['value']
                    count = 0
                    while count < len(sub_data['sub_page']):
                        if identifier in sub_data['sub_page'][count]:
                            sub_data['sub_page'][count] = sub_data['sub_page'][count].replace(identifier,name)
                        count += 1
                    q = self.createSparqlQuery(sub_data, key = 'sub_page')
                    queries.append(q)
            except Exception as e:
                print "[ProcessStuff][queryMultiplier] Error in creating queries for subpages: %s" % e
                pass
            for query in queries:
                file_name = data['category']+"___"+data['sub_category']
                print "Fetching query: \n%s" % query
                json_page = self.resolveDBPediaQuery(q = query)
                print "Processin page and saving to: "+ file_name
                self.processPage(json_page, json_file = '../../json_samples/', category = file_name, overwrite = False)

    def createConcat(self, data, separator = ";;;"):
        """ Creates concat string. """
        return "(group_concat(distinct ?"+data+";separator='"+separator+"') as ?"+data+"_s)" 

    def createSparqlQuery(self, data, separator = ";;;", key = "root", offset = 100):
        """Generates SPARQL query from input file."""
        query = []
        orderby = []
        select = "SELECT DISTINCT"
        #from_each_subpage
        for prop in data['select']:
            if prop.endswith("_s"):
                select +=" "+ self.createConcat(prop.split("_s")[0])
            else:
                v = "?"+ prop.replace('_X','')
                select += " "+ v
                orderby.append(v)
        where = " WHERE { "
        closing = 1
        query.append(select)
        query.append(where)
        try:
            service = "SERVICE "+data['service'] + " {"
            query.append(service)
            closing += 1
        except:
            pass
        query.append('\n'.join(data[key]))
        while closing > 0:
            query.append('}')
            closing -= 1
        o = " ORDER BY " + ' '.join(orderby)
        query.append(o)
        try:
            limit = data['limit']
            l = " LIMIT %s" % limit
            query.append(l)
        except:
            pass

        complete_query = '\n'.join(query)
        print complete_query
        return complete_query

if __name__ == "__main__":
    try:
        JSON_FILE_NAME = sys.argv[1]
    except:
        print "JSON file name is needed to run!"
        sys.exit(2)
    start_time = datetime.datetime.now().time().strftime('%H:%M:%S')
    hm = ProcessStuff(JSON_FILE_NAME)
    data = hm.getConfigFile()
    hm.queryMultiplier(data[0])
    end_time = datetime.datetime.now().time().strftime('%H:%M:%S')
    total_time=(datetime.datetime.strptime(end_time,'%H:%M:%S') - datetime.datetime.strptime(start_time,'%H:%M:%S'))
    print "Took %s to process %s " % (total_time, JSON_FILE_NAME)
然后按如下方式运行代码:python ProcessStuff.py input.json

花了0:31:10处理music.json,处理了441个条目

当然,代码可以更快。。。
ProcessStuff继承的DBPedia类只是发出HTTP请求,稍微清理一下结果并将结果保存为JSON。

问题在于属性/艺术家和本体/音乐艺术家谓词,没有它们就可以正常工作。我不知道,但对我来说,它看起来像一个大师级的bug。谢谢@laugedelic,是的,我理解,但没有这些,我就无法获得对我的项目很重要的歌曲信息。我可以通过编程来完成,比如先获取每个乐队,然后为每个乐队获取歌曲,只是想知道是否有更紧凑的方式。你需要分别使用song、songOther和songOtherOther吗?@laugedelic,不,它们可以在一个绑定中一起。你能澄清一下吗。。。?为什么其他查询返回更多结果?另外,如果我删除limit子句,我会得到Virtuoso 22026错误SR319:当试图将3145个字符的字符串存储到临时数组中时,超出了最大行长度,我认为Virtuoso允许省略显式分组。我也尝试过显式地添加它,但它没有改变任何东西。很高兴你解决了你的问题。但我认为这并没有回答您最初的问题,为什么查询不能按预期工作,并且不是SPARQL解决方案。@laughedelic fair:
import os, sys
from api.DBPedia import DBPedia
import datetime 
import time
import copy
import json
class ProcessStuff(DBPedia):
    def __init__(self, fn = "" ):
        """Initialize ProcessStuff class"""
        self.filePath = fn

    def getConfigFile(self):
        """Reads json file with dbpedia query information"""
        try:
            jsonFile = open(self.filePath, "r")
            data = json.load(jsonFile)
            jsonFile.close()
            return data
        except Exception as e:
            print "[getConfigFile] Error in reading file: %s" % e

    def queryMultiplier(self, data, identifier = '[[X]]'):
        """Generate new DBPedia queries based on previous query result."""
        queries = []
        q = self.createSparqlQuery(data)
        json_page = self.resolveDBPediaQuery(q = q)
        if len(data['sub_page']) > 0:
            try:
                items = json_page['results']['bindings']
                for item in items:
                    sub_data = copy.deepcopy(data)
                    # Only allows for one identifier
                    sub_page_identifier = [var for var in data['select'] if var.endswith('_X')][0].replace('_X','')
                    name = item[sub_page_identifier]['value']
                    count = 0
                    while count < len(sub_data['sub_page']):
                        if identifier in sub_data['sub_page'][count]:
                            sub_data['sub_page'][count] = sub_data['sub_page'][count].replace(identifier,name)
                        count += 1
                    q = self.createSparqlQuery(sub_data, key = 'sub_page')
                    queries.append(q)
            except Exception as e:
                print "[ProcessStuff][queryMultiplier] Error in creating queries for subpages: %s" % e
                pass
            for query in queries:
                file_name = data['category']+"___"+data['sub_category']
                print "Fetching query: \n%s" % query
                json_page = self.resolveDBPediaQuery(q = query)
                print "Processin page and saving to: "+ file_name
                self.processPage(json_page, json_file = '../../json_samples/', category = file_name, overwrite = False)

    def createConcat(self, data, separator = ";;;"):
        """ Creates concat string. """
        return "(group_concat(distinct ?"+data+";separator='"+separator+"') as ?"+data+"_s)" 

    def createSparqlQuery(self, data, separator = ";;;", key = "root", offset = 100):
        """Generates SPARQL query from input file."""
        query = []
        orderby = []
        select = "SELECT DISTINCT"
        #from_each_subpage
        for prop in data['select']:
            if prop.endswith("_s"):
                select +=" "+ self.createConcat(prop.split("_s")[0])
            else:
                v = "?"+ prop.replace('_X','')
                select += " "+ v
                orderby.append(v)
        where = " WHERE { "
        closing = 1
        query.append(select)
        query.append(where)
        try:
            service = "SERVICE "+data['service'] + " {"
            query.append(service)
            closing += 1
        except:
            pass
        query.append('\n'.join(data[key]))
        while closing > 0:
            query.append('}')
            closing -= 1
        o = " ORDER BY " + ' '.join(orderby)
        query.append(o)
        try:
            limit = data['limit']
            l = " LIMIT %s" % limit
            query.append(l)
        except:
            pass

        complete_query = '\n'.join(query)
        print complete_query
        return complete_query

if __name__ == "__main__":
    try:
        JSON_FILE_NAME = sys.argv[1]
    except:
        print "JSON file name is needed to run!"
        sys.exit(2)
    start_time = datetime.datetime.now().time().strftime('%H:%M:%S')
    hm = ProcessStuff(JSON_FILE_NAME)
    data = hm.getConfigFile()
    hm.queryMultiplier(data[0])
    end_time = datetime.datetime.now().time().strftime('%H:%M:%S')
    total_time=(datetime.datetime.strptime(end_time,'%H:%M:%S') - datetime.datetime.strptime(start_time,'%H:%M:%S'))
    print "Took %s to process %s " % (total_time, JSON_FILE_NAME)