Python 使用gae搜索和游标分页时,除最后一页外,所有页面上的实体总数错误
我使用gae的搜索API,得到了一些奇怪的结果。当它返回文档数量时,它的数量大约是10倍。我在mapreduce中使用if测试来检查实体是否可见(一个布尔变量),以及该实体是否在过去60天内被修改,只有在过去60天内修改的实体才应该在索引中。那么你认为我做错了什么?奇怪的是,它返回的元素数与一个空白查询的总数差不多,而这个查询的总数应该与索引中的所有内容相匹配,我得到的元素数是它应该返回的元素数的十倍,但只有数字是错误的,当我翻阅实际结果集时,它的长度是正确的,然后最后一页匹配的总实体数的值是正确的。你能帮助我吗?在除最后一页之外的所有分页页上,结果数过多。 仅在最后一页显示正确的实体总数。为什么? 我用于构建索引的mapreduce代码是:Python 使用gae搜索和游标分页时,除最后一页外,所有页面上的实体总数错误,python,google-app-engine,python-2.7,mapreduce,full-text-search,Python,Google App Engine,Python 2.7,Mapreduce,Full Text Search,我使用gae的搜索API,得到了一些奇怪的结果。当它返回文档数量时,它的数量大约是10倍。我在mapreduce中使用if测试来检查实体是否可见(一个布尔变量),以及该实体是否在过去60天内被修改,只有在过去60天内修改的实体才应该在索引中。那么你认为我做错了什么?奇怪的是,它返回的元素数与一个空白查询的总数差不多,而这个查询的总数应该与索引中的所有内容相匹配,我得到的元素数是它应该返回的元素数的十倍,但只有数字是错误的,当我翻阅实际结果集时,它的长度是正确的,然后最后一页匹配的总实体数的值是正
def index(entity):
try:
edge = datetime.datetime.now() - timedelta(days=60)
if (entity.published == True and entity.modified > edge):
city_entity = montaomodel.City.all().filter('name =',
entity.city).get()
region_entity = montaomodel.Region.all().filter('name =',
entity.region).get()
price = 0
try:
if entity.price:
price = long(entity.price)
except (Exception), e:
price = 0
logging.info('price conversion failed for entity %s', str(entity.key().id()) )
mileage = -1
try:
if entity.mileage:
mileage = int(entity.mileage)
except (Exception), e:
mileage = -1
logging.info('mileage conversion failed for entity %s', str(entity.key().id()) )
regdate = -1
try:
if entity.regdate:
regdate = int(entity.regdate)
except (Exception), e:
regdate = -1
logging.info('regdate conversion failed for entity %s', str(entity.key().id()) )
company_ad = 0
if entity.company_ad:
company_ad = 1
cityId = 0
if city_entity:
cityId = city_entity.key().id()
regionID = 0
if region_entity:
regionID = region_entity.key().id()
category = 0
if entity.category:
category = entity.category
doc = search.Document(doc_id=str(entity.key()), fields=[
search.TextField(name='title', value=entity.title),
search.TextField(name='text', value=entity.text),
search.TextField(name='city', value=entity.city),
search.TextField(name='region', value=entity.region),
search.NumberField(name='cityID',
value=int(cityId)),
search.NumberField(name='regionID',
value=int(regionID)),
search.NumberField(name='category',
value=int(category)),
search.NumberField(name='constant', value=1),
search.NumberField(name='adID',
value=int(entity.key().id())),
search.TextField(name='name', value=entity.name),
search.DateField(name='date',
value=entity.modified.date()),
search.NumberField(name='price', value=long(price)),
search.NumberField(name='mileage',
value=int(mileage)),
search.NumberField(name='regdate',
value=int(regdate)),
search.TextField(name='type', value=entity.type),
search.TextField(name='currency', value=entity.currency),
search.NumberField(name='company_ad',
value=company_ad),
search.NumberField(name='hour',
value=entity.modified.hour),
search.NumberField(name='minute',
value=entity.modified.minute),
], language='en')
yield search.Index(name='ads').put(doc)
#yield op.db.Put(ad)
except Exception, e:
logging.info('There occurred exception:%s' % str(e))
搜索代码是
def find_documents(query_string, limit, cursor):
try:
subject_desc = search.SortExpression(
expression='date',
direction=search.SortExpression.DESCENDING,
default_value=datetime.now().date())
# Sort up to 1000 matching results by subject in descending order
sort = search.SortOptions(expressions=[subject_desc], limit=1000)
# Set query options
options = search.QueryOptions(
limit=limit, # the number of results to return
cursor=cursor,
sort_options=sort,
#returned_fields=['author', 'subject', 'summary'],
#snippeted_fields=['content']
)
query = search.Query(query_string=query_string, options=options)
index = search.Index(name=_INDEX_NAME)
# Execute the query
return index.search(query)
except search.Error:
logging.exception('Search failed')
return None
当有1000多个结果时,全文搜索报告找到的文档数是近似值。GAE使用的近似值与您的数据不符 你可以用 更改报告的搜索结果数的准确性,例如
# Set query options
options = search.QueryOptions(
number_found_accuracy=2000
)
这使得报告的文档数只要少于2000个就准确无误