c# – Lucene返回非积分的文件

我们最近升级了我们工作的CMS,不得不从
Lucene.net V2.3.1.301至V2.9.4.1

我们在原始解决方案中使用了CustomScoreQuery,该解决方案进行了内置查询无法实现的各种过滤。 (GEO,多日期范围等)

由于从旧版本迁移到新版本的Lucene,它开始返回文档,即使在检查结果时它们的数量为0或甚至为负数

enter image description here
以下是演示该问题的重新编码的示例

    public LuceneTest()
    {
        Lucene.Net.Store.Directory luceneIndexDirectory = FSDirectory.Open(new System.IO.DirectoryInfo(@"C:\inetpub\wwwroot\Project\build\Data\indexes\all_site_search_en"));
        Analyzer analyzer = new WhitespaceAnalyzer(); 
        IndexSearcher searcher = new IndexSearcher(luceneIndexDirectory, true);
        QueryParser parser = new QueryParser(Lucene.Net.Util.Version.LUCENE_23, "", analyzer);
        parser.SetAllowLeadingWildcard(true);
        Query dateQuery = ComposeEventDateQuery(new DateTime(2015, 11, 23), new DateTime(2015,11,25),  searcher);
        BooleanQuery combinedQuery = new BooleanQuery();
        BooleanQuery.SetMaxClauseCount(10000);
        combinedQuery.Add(dateQuery, BooleanClause.Occur.MUST);

        TopDocs hitsFound = searcher.Search(dateQuery, 1000);
        System.Console.WriteLine(String.Format("Found {0} matches with the date filters", hitsFound.TotalHits));
        System.Console.ReadKey();
    }



    public static Query ComposeEventDateQuery(DateTime fromDate, DateTime ToDate, IndexSearcher MySearcher)
    {
        BooleanQuery query = new BooleanQuery();
        Query boolQuery3A = new TermQuery(new Lucene.Net.Index.Term("_language", "en"));
        Query eventDateQuery = new EventDateQuery1(boolQuery3A, MySearcher, fromDate, ToDate, false);
        query.Add(eventDateQuery, BooleanClause.Occur.MUST);
        return query;
    }


    public class EventDateQuery1 : CustomScoreQuery
    {
        private Searcher _searcher;
        private DateTime _fromDT;
        private DateTime _toDT;
        private readonly string _dateFormat = "yyyyMMdd";

        private bool _shouldMatchNonEvents = true;

        public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, bool shouldMatchNonEvents, int dateRange = 14)
            : base(subQuery)
        {
            _searcher = searcher;
            _fromDT = fromDT.Date;
            _toDT = fromDT.AddDays(dateRange).Date;
            _shouldMatchNonEvents = shouldMatchNonEvents;
        }

        public EventDateQuery1(Query subQuery, Searcher searcher, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents)
            : base(subQuery)
        {
            _searcher = searcher;
            _fromDT = fromDT.Date;
            _toDT = toDT.Date;
            _shouldMatchNonEvents = shouldMatchNonEvents;
        }


        public override string ToString()
        {
            return GenerateUniqueKey();
        }

        public override string ToString(string field)
        {
            return GenerateUniqueKey();
        }

        public override string Name()
        {
            return GenerateUniqueKey();
        }

        public string GenerateUniqueKey()
        {
            return String.Format("EventDateQuery_{0}_{1}_{2}", _fromDT.ToString(_dateFormat), _toDT.ToString(_dateFormat), _shouldMatchNonEvents.ToString());
        }

        protected override CustomScoreProvider GetCustomScoreProvider(IndexReader reader)
        {
            return new EventDateQueryCustomScoreProvider(reader, _fromDT, _toDT, _shouldMatchNonEvents);
        }



    }

    public class EventDateQueryCustomScoreProvider : CustomScoreProvider
    {
        private DateTime _fromDT;
        private DateTime _toDT;
        private readonly string _dateFormat = "yyyyMMdd";
        private bool _shouldMatchNonEvents = true;
        private float NoMatchFloat = 0f;
        private float MatchFloat = 1f;

        public EventDateQueryCustomScoreProvider(IndexReader reader, DateTime fromDT, DateTime toDT, bool shouldMatchNonEvents)
            : base(reader)
        {
            _fromDT = fromDT.Date;
            _toDT = toDT.Date;
            _shouldMatchNonEvents = shouldMatchNonEvents;
        }



        public override float CustomScore(int doc, float subQueryScore, float valSrcScore)
        {
            return myScore(doc);
        }

        public override float CustomScore(int doc, float subQueryScore, float[] valSrcScores)
        {
            return myScore(doc);
        }

        public float myScore(int doc)
        {
            //Below is a fake implementation just to prove the run
            if (doc < 10)
            {
                return 1F;
            }
            else
            {
                return 0F;
            }
        }



    }

关于如何让Lucene不返回这些文件的任何建议将是巨大的。
提前致谢。

您可以编写一个仅收集> 0分数的文档的自定义收集器。然后将此收集器的一个实例传递给Search()方法。这样一个收藏家here的实现。

但是,如果您不需要所有的结果,documentation就会提出这个解决方案。可能是这样,因为您只选择前1000个文档。

http://stackoverflow.com/questions/33360962/lucene-returning-documents-with-non-positive-score

本站文章除注明转载外,均为本站原创或编译
转载请明显位置注明出处:c# – Lucene返回非积分的文件