Warning: file_get_contents(/data/phpspider/zhask/data//catemap/2/csharp/275.json): failed to open stream: No such file or directory in /data/phpspider/zhask/libs/function.php on line 167

Warning: Invalid argument supplied for foreach() in /data/phpspider/zhask/libs/tag.function.php on line 1116

Notice: Undefined index: in /data/phpspider/zhask/libs/function.php on line 180

Warning: array_chunk() expects parameter 1 to be array, null given in /data/phpspider/zhask/libs/function.php on line 181
C# Lucene.net:查询并使用筛选器限制结果_C#_.net_Full Text Search_Lucene.net - Fatal编程技术网

C# Lucene.net:查询并使用筛选器限制结果

C# Lucene.net:查询并使用筛选器限制结果,c#,.net,full-text-search,lucene.net,C#,.net,Full Text Search,Lucene.net,像往常一样,我求助于Stackoverflow用户群的巨大脑力来帮助解决我正在与之斗争的Lucene.NET问题。首先,当谈到Lucene和Lucene.NET时,我是一个彻头彻尾的noob,通过在线使用分散的教程和代码片段,我为我的场景拼凑了以下解决方案 情景 我有以下结构的索引: --------------------------------------------------------- | id | date | security | text

像往常一样,我求助于Stackoverflow用户群的巨大脑力来帮助解决我正在与之斗争的Lucene.NET问题。首先,当谈到Lucene和Lucene.NET时,我是一个彻头彻尾的noob,通过在线使用分散的教程和代码片段,我为我的场景拼凑了以下解决方案

情景

我有以下结构的索引:

---------------------------------------------------------
| id  |    date    | security |           text          |
---------------------------------------------------------
|  1  | 2011-01-01 | -1-12-4- | some analyzed text here |
---------------------------------------------------------
|  2  | 2011-01-01 |  -11-3-  | some analyzed text here |
---------------------------------------------------------
|  3  | 2011-01-01 |    -1-   | some analyzed text here |
---------------------------------------------------------
我需要能够查询文本字段,但将结果限制为具有特定角色ID的用户

为了实现这一点(在多次访问谷歌之后),我想到了使用“安全字段”和Lucene过滤器来限制结果集,如下所示:

class SecurityFilter : Lucene.Net.Search.Filter
{
    public override System.Collections.BitArray Bits(Lucene.Net.Index.IndexReader indexReader)
    {
        BitArray bitarray = new BitArray(indexReader.MaxDoc());

        for (int i = 0; i < bitarray.Length; i++)
        {
            if (indexReader.Document(i).Get("security").Contains("-1-"))
            {
                bitarray.Set(i, true);
            }
        }

        return bitarray;
    }
}
这将按预期工作,只返回id为1和3的文档。问题是,在大型索引上,此过程变得非常缓慢


最后,我的问题。。。有没有人对如何加快速度有什么建议,或者有比我在这里介绍的更有效的替代解决方案?

如果您将安全字段索引为已分析的字段(例如,它将安全字符串拆分为1 12 4…)

您可以创建这样的过滤器

Filter filter = new QueryFilter(new TermQuery(new Term("security ", "1")));


形成一个查询,比如
一些文本+安全性:1

我用一个简单的例子修改了我的答案,解释了我在上一个答案中的意思

我做得很快,不尊重最佳实践,但它应该给你一个想法

请注意,安全字段需要标记化,以便其中的每个ID都是单独的标记,例如使用

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Lucene.Net.Search;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Analysis.Standard;
using System.IO;

namespace ConsoleApplication1
{
    class Program
    {
        public class RoleFilterCache
        {
            static public Dictionary<string, Filter> Cache = new Dictionary<string,Filter>();

            static public Filter Get(string role)
            {
                Filter cached = null;
                if (!Cache.TryGetValue(role, out cached))
                {
                    return null;
                }
                return cached;
            }

            static public void Put(string role, Filter filter)
            {
                if (role != null)
                {
                    Cache[role] = filter;
                }
            }
        }

        public class User
        {
            public string Username;
            public List<string> Roles;
        }

        public static Filter GetFilterForUser(User u)
        {
            BooleanFilter userFilter = new BooleanFilter();
            foreach (string rolename in u.Roles)
            {   
                // call GetFilterForRole and add to the BooleanFilter
                userFilter.Add(
                    new BooleanFilterClause(GetFilterForRole(rolename), BooleanClause.Occur.SHOULD)
                );
            }
            return userFilter;
        }

        public static Filter GetFilterForRole(string role)
        {
            Filter roleFilter = RoleFilterCache.Get(role);
            if (roleFilter == null)
            {
                roleFilter =
                    // the caching wrapper filter makes it cache the BitSet per segmentreader
                    new CachingWrapperFilter(
                        // builds the filter from the index and not from iterating
                        // stored doc content which is much faster
                        new QueryWrapperFilter(
                            new TermQuery(
                                new Term("security", role)
                            )
                        )
                );
                // put in cache
                RoleFilterCache.Put(role, roleFilter);
            }
            return roleFilter;
        }


        static void Main(string[] args)
        {
            IndexWriter iw = new IndexWriter(new FileInfo("C:\\example\\"), new StandardAnalyzer(), true);
            Document d = new Document();

            Field aField = new Field("content", "", Field.Store.YES, Field.Index.ANALYZED);
            Field securityField = new Field("security", "", Field.Store.NO, Field.Index.ANALYZED);

            d.Add(aField);
            d.Add(securityField);

            aField.SetValue("Only one can see.");
            securityField.SetValue("1");
            iw.AddDocument(d);
            aField.SetValue("One and two can see.");
            securityField.SetValue("1 2");
            iw.AddDocument(d);
            aField.SetValue("One and two can see.");
            securityField.SetValue("1 2");
            iw.AddDocument(d);
            aField.SetValue("Only two can see.");
            securityField.SetValue("2");
            iw.AddDocument(d);

            iw.Close();

            User userone = new User()
            {
                Username = "User one",
                Roles = new List<string>()
            };
            userone.Roles.Add("1");
            User usertwo = new User()
            {
                Username = "User two",
                Roles = new List<string>()
            };
            usertwo.Roles.Add("2");
            User userthree = new User()
            {
                Username = "User three",
                Roles = new List<string>()
            };
            userthree.Roles.Add("1");
            userthree.Roles.Add("2");

            PhraseQuery phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("content", "can"));
            phraseQuery.Add(new Term("content", "see"));

            IndexSearcher searcher = new IndexSearcher("C:\\example\\", true);

            Filter securityFilter = GetFilterForUser(userone);
            TopDocs results = searcher.Search(phraseQuery, securityFilter,25);
            Console.WriteLine("User One Results:");
            foreach (var aResult in results.ScoreDocs)
            {
                Console.WriteLine(
                    searcher.Doc(aResult.doc).
                    Get("content")
                );
            }
            Console.WriteLine("\n\n");

            securityFilter = GetFilterForUser(usertwo);
            results = searcher.Search(phraseQuery, securityFilter, 25);
            Console.WriteLine("User two Results:");
            foreach (var aResult in results.ScoreDocs)
            {
                Console.WriteLine(
                    searcher.Doc(aResult.doc).
                    Get("content")
                );
            }
            Console.WriteLine("\n\n");

            securityFilter = GetFilterForUser(userthree);
            results = searcher.Search(phraseQuery, securityFilter, 25);
            Console.WriteLine("User three Results (should see everything):");
            foreach (var aResult in results.ScoreDocs)
            {
                Console.WriteLine(
                    searcher.Doc(aResult.doc).
                    Get("content")
                );
            }
            Console.WriteLine("\n\n");
            Console.ReadKey();
        }
    }
}
使用系统;
使用System.Collections.Generic;
使用System.Linq;
使用系统文本;
使用Lucene.Net.Search;
使用Lucene.Net.Documents;
使用Lucene.Net.Index;
使用Lucene.Net.Analysis.Standard;
使用System.IO;
命名空间控制台应用程序1
{
班级计划
{
公共类RoleFilterCache
{
静态公共字典缓存=新字典();
静态公共筛选器获取(字符串角色)
{
Filter cached=null;
if(!Cache.TryGetValue(角色,缓存外))
{
返回null;
}
返回缓存;
}
静态公共void Put(字符串角色、筛选器)
{
if(角色!=null)
{
缓存[角色]=过滤器;
}
}
}
公共类用户
{
公共字符串用户名;
公开名单角色;
}
公共静态筛选器GetFilterForUser(用户u)
{
BooleanFilter userFilter=新的BooleanFilter();
foreach(美国角色中的字符串rolename)
{   
//调用GetFilterForRole并添加到BooleanFilter
userFilter.Add(
新的BooleanFilterClause(GetFilterForRole(rolename),BooleanClause.occure.SHOULD)
);
}
返回用户过滤器;
}
公共静态筛选器GetFilterForRole(字符串角色)
{
过滤器roleFilter=RoleFilterCache.Get(角色);
if(roleFilter==null)
{
滚滤器=
//缓存包装过滤器使其缓存每个segmentreader的位集
新型CachingWrapperFilter(
//从索引而不是迭代生成筛选器
//存储的文档内容要快得多
新型查询过滤器(
新术语查询(
新术语(“安全”,角色)
)
)
);
//缓存
RoleFilterCache.Put(角色,roleFilter);
}
返回滚滤器;
}
静态void Main(字符串[]参数)
{
IndexWriter iw=新的IndexWriter(新文件信息(“C:\\example\\”),新的StandardAnalyzer(),true);
文件d=新文件();
Field aField=新字段(“内容”,“Field.Store.YES,Field.Index.analysisted”);
字段securityField=新字段(“security”,“”,Field.Store.NO,Field.Index.analysis);
d、 添加(外地);
d、 添加(securityField);
SetValue(“只有一个人能看到”);
securityField.SetValue(“1”);
iw.补充文件(d);
SetValue(“一和二可以看到”);
securityField.SetValue(“12”);
iw.补充文件(d);
SetValue(“一和二可以看到”);
securityField.SetValue(“12”);
iw.补充文件(d);
SetValue(“只有两个可以看到”);
securityField.SetValue(“2”);
iw.补充文件(d);
iw.Close();
用户userone=新用户()
{
Username=“User one”,
角色=新列表()
};
userone.Roles.Add(“1”);
用户usertwo=新用户()
{
Username=“User 2”,
角色=新列表()
};
usertwo.Roles.Add(“2”);
用户userthree=新用户()
{
Username=“User三”,
角色=新列表()
};
userthree.Roles.Add(“1”);
userthree.Roles.Add(“2”);
PhraseQuery PhraseQuery=新PhraseQuery();
添加(新术语(“内容”,“可以”));
添加(新术语(“内容”、“参见”);
IndexSearcher=newindexSearcher(“C:\\example\\”,true);
Filter securityFilter=GetFilterForUser(userone);
TopDocs results=searcher.Search(短语查询,安全过滤器,25);
Console.WriteLine(“用户一结果:”);
foreach(var)是结果中的结果
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using Lucene.Net.Search;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Analysis.Standard;
using System.IO;

namespace ConsoleApplication1
{
    class Program
    {
        public class RoleFilterCache
        {
            static public Dictionary<string, Filter> Cache = new Dictionary<string,Filter>();

            static public Filter Get(string role)
            {
                Filter cached = null;
                if (!Cache.TryGetValue(role, out cached))
                {
                    return null;
                }
                return cached;
            }

            static public void Put(string role, Filter filter)
            {
                if (role != null)
                {
                    Cache[role] = filter;
                }
            }
        }

        public class User
        {
            public string Username;
            public List<string> Roles;
        }

        public static Filter GetFilterForUser(User u)
        {
            BooleanFilter userFilter = new BooleanFilter();
            foreach (string rolename in u.Roles)
            {   
                // call GetFilterForRole and add to the BooleanFilter
                userFilter.Add(
                    new BooleanFilterClause(GetFilterForRole(rolename), BooleanClause.Occur.SHOULD)
                );
            }
            return userFilter;
        }

        public static Filter GetFilterForRole(string role)
        {
            Filter roleFilter = RoleFilterCache.Get(role);
            if (roleFilter == null)
            {
                roleFilter =
                    // the caching wrapper filter makes it cache the BitSet per segmentreader
                    new CachingWrapperFilter(
                        // builds the filter from the index and not from iterating
                        // stored doc content which is much faster
                        new QueryWrapperFilter(
                            new TermQuery(
                                new Term("security", role)
                            )
                        )
                );
                // put in cache
                RoleFilterCache.Put(role, roleFilter);
            }
            return roleFilter;
        }


        static void Main(string[] args)
        {
            IndexWriter iw = new IndexWriter(new FileInfo("C:\\example\\"), new StandardAnalyzer(), true);
            Document d = new Document();

            Field aField = new Field("content", "", Field.Store.YES, Field.Index.ANALYZED);
            Field securityField = new Field("security", "", Field.Store.NO, Field.Index.ANALYZED);

            d.Add(aField);
            d.Add(securityField);

            aField.SetValue("Only one can see.");
            securityField.SetValue("1");
            iw.AddDocument(d);
            aField.SetValue("One and two can see.");
            securityField.SetValue("1 2");
            iw.AddDocument(d);
            aField.SetValue("One and two can see.");
            securityField.SetValue("1 2");
            iw.AddDocument(d);
            aField.SetValue("Only two can see.");
            securityField.SetValue("2");
            iw.AddDocument(d);

            iw.Close();

            User userone = new User()
            {
                Username = "User one",
                Roles = new List<string>()
            };
            userone.Roles.Add("1");
            User usertwo = new User()
            {
                Username = "User two",
                Roles = new List<string>()
            };
            usertwo.Roles.Add("2");
            User userthree = new User()
            {
                Username = "User three",
                Roles = new List<string>()
            };
            userthree.Roles.Add("1");
            userthree.Roles.Add("2");

            PhraseQuery phraseQuery = new PhraseQuery();
            phraseQuery.Add(new Term("content", "can"));
            phraseQuery.Add(new Term("content", "see"));

            IndexSearcher searcher = new IndexSearcher("C:\\example\\", true);

            Filter securityFilter = GetFilterForUser(userone);
            TopDocs results = searcher.Search(phraseQuery, securityFilter,25);
            Console.WriteLine("User One Results:");
            foreach (var aResult in results.ScoreDocs)
            {
                Console.WriteLine(
                    searcher.Doc(aResult.doc).
                    Get("content")
                );
            }
            Console.WriteLine("\n\n");

            securityFilter = GetFilterForUser(usertwo);
            results = searcher.Search(phraseQuery, securityFilter, 25);
            Console.WriteLine("User two Results:");
            foreach (var aResult in results.ScoreDocs)
            {
                Console.WriteLine(
                    searcher.Doc(aResult.doc).
                    Get("content")
                );
            }
            Console.WriteLine("\n\n");

            securityFilter = GetFilterForUser(userthree);
            results = searcher.Search(phraseQuery, securityFilter, 25);
            Console.WriteLine("User three Results (should see everything):");
            foreach (var aResult in results.ScoreDocs)
            {
                Console.WriteLine(
                    searcher.Doc(aResult.doc).
                    Get("content")
                );
            }
            Console.WriteLine("\n\n");
            Console.ReadKey();
        }
    }
}