From c5367f9ba51906301e7ec21d17a295389acada75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Ku=C5=BEela?= Date: Tue, 29 Dec 2020 10:53:30 +0100 Subject: [PATCH 1/5] Add .net core support, update to 4.x version of lucene --- .../FacetSearcherConfiguration.cs | 20 ++ .../MemoryOptimizer/DefaultMemoryOptimizer.cs | 37 +++ .../MemoryOptimizer/IMemoryOptimizer.cs | 9 + .../MemoryOptimizer/NoMemoryOptimizer.cs | 13 + MultiFacetLuceneCore/FacetFieldInfo.cs | 16 ++ MultiFacetLuceneCore/FacetMatch.cs | 9 + MultiFacetLuceneCore/FacetSearchResult.cs | 11 + MultiFacetLuceneCore/FacetSearcher.cs | 225 ++++++++++++++++++ .../MultiFacetLuceneCore.csproj | 10 + MultiFacetLuceneCore/ResultCollection.cs | 82 +++++++ MultiFacetLuceneNet.sln | 13 +- 11 files changed, 443 insertions(+), 2 deletions(-) create mode 100644 MultiFacetLuceneCore/Configuration/FacetSearcherConfiguration.cs create mode 100644 MultiFacetLuceneCore/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs create mode 100644 MultiFacetLuceneCore/Configuration/MemoryOptimizer/IMemoryOptimizer.cs create mode 100644 MultiFacetLuceneCore/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs create mode 100644 MultiFacetLuceneCore/FacetFieldInfo.cs create mode 100644 MultiFacetLuceneCore/FacetMatch.cs create mode 100644 MultiFacetLuceneCore/FacetSearchResult.cs create mode 100644 MultiFacetLuceneCore/FacetSearcher.cs create mode 100644 MultiFacetLuceneCore/MultiFacetLuceneCore.csproj create mode 100644 MultiFacetLuceneCore/ResultCollection.cs diff --git a/MultiFacetLuceneCore/Configuration/FacetSearcherConfiguration.cs b/MultiFacetLuceneCore/Configuration/FacetSearcherConfiguration.cs new file mode 100644 index 0000000..e105616 --- /dev/null +++ b/MultiFacetLuceneCore/Configuration/FacetSearcherConfiguration.cs @@ -0,0 +1,20 @@ +using MultiFacetLucene.Configuration.MemoryOptimizer; + +namespace MultiFacetLucene.Configuration +{ + public class FacetSearcherConfiguration + { + public FacetSearcherConfiguration() + { + MinimumCountInTotalDatasetForFacet = 1; + MemoryOptimizer = null; + } + public static FacetSearcherConfiguration Default() + { + return new FacetSearcherConfiguration { MinimumCountInTotalDatasetForFacet = 1, MemoryOptimizer = null}; + } + public int MinimumCountInTotalDatasetForFacet { get; set; } + + public IMemoryOptimizer MemoryOptimizer { get; set; } + } +} \ No newline at end of file diff --git a/MultiFacetLuceneCore/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs b/MultiFacetLuceneCore/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs new file mode 100644 index 0000000..00b2f4f --- /dev/null +++ b/MultiFacetLuceneCore/Configuration/MemoryOptimizer/DefaultMemoryOptimizer.cs @@ -0,0 +1,37 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace MultiFacetLucene.Configuration.MemoryOptimizer +{ + public class DefaultMemoryOptimizer : IMemoryOptimizer + { + private readonly int _keepPercent; + private readonly int _optimizeIfTotalCountIsGreaterThan; + + public DefaultMemoryOptimizer(int keepPercent, int optimizeIfTotalCountIsGreaterThan) + { + _keepPercent = keepPercent; + _optimizeIfTotalCountIsGreaterThan = optimizeIfTotalCountIsGreaterThan; + } + + //Flag certain bitsets as lazyload (recalculate) + //If total number of facet values is larger than... + //have X percent removed + public IEnumerable SetAsLazyLoad(List facetValuesList) + { + var totalCount = facetValuesList.Sum(a => a.FacetValueBitSetList.Count); + if (totalCount < _optimizeIfTotalCountIsGreaterThan) yield break; + foreach (var facetValues in facetValuesList) + { + var index = 0; + var percent = Convert.ToInt32(totalCount * _keepPercent / 100.0); + foreach (var value in facetValues.FacetValueBitSetList) + { + if (index++ > percent) + yield return value; + } + } + } + } +} \ No newline at end of file diff --git a/MultiFacetLuceneCore/Configuration/MemoryOptimizer/IMemoryOptimizer.cs b/MultiFacetLuceneCore/Configuration/MemoryOptimizer/IMemoryOptimizer.cs new file mode 100644 index 0000000..3eaaa8a --- /dev/null +++ b/MultiFacetLuceneCore/Configuration/MemoryOptimizer/IMemoryOptimizer.cs @@ -0,0 +1,9 @@ +using System.Collections.Generic; + +namespace MultiFacetLucene.Configuration.MemoryOptimizer +{ + public interface IMemoryOptimizer + { + IEnumerable SetAsLazyLoad(List facetValuesList); + } +} \ No newline at end of file diff --git a/MultiFacetLuceneCore/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs b/MultiFacetLuceneCore/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs new file mode 100644 index 0000000..2a16dc4 --- /dev/null +++ b/MultiFacetLuceneCore/Configuration/MemoryOptimizer/NoMemoryOptimizer.cs @@ -0,0 +1,13 @@ +using System.Collections.Generic; + +namespace MultiFacetLucene.Configuration.MemoryOptimizer +{ + public class NoMemoryOptimizer : IMemoryOptimizer + { + //Never flag any facetvalues as lazyload (recalculate) + public IEnumerable SetAsLazyLoad(List facetValuesList) + { + yield break; + } + } +} \ No newline at end of file diff --git a/MultiFacetLuceneCore/FacetFieldInfo.cs b/MultiFacetLuceneCore/FacetFieldInfo.cs new file mode 100644 index 0000000..4692891 --- /dev/null +++ b/MultiFacetLuceneCore/FacetFieldInfo.cs @@ -0,0 +1,16 @@ +using System.Collections.Generic; + +namespace MultiFacetLucene +{ + public class FacetFieldInfo + { + public FacetFieldInfo() + { + Selections = new List(); + MaxToFetchExcludingSelections = 20; + } + public string FieldName { get; set; } + public List Selections { get; set; } + public int MaxToFetchExcludingSelections { get; set; } + } +} \ No newline at end of file diff --git a/MultiFacetLuceneCore/FacetMatch.cs b/MultiFacetLuceneCore/FacetMatch.cs new file mode 100644 index 0000000..c81ff2e --- /dev/null +++ b/MultiFacetLuceneCore/FacetMatch.cs @@ -0,0 +1,9 @@ +namespace MultiFacetLucene +{ + public class FacetMatch + { + public string FacetFieldName { get; set; } + public string Value { get; set; } + public long Count { get; set; } + } +} \ No newline at end of file diff --git a/MultiFacetLuceneCore/FacetSearchResult.cs b/MultiFacetLuceneCore/FacetSearchResult.cs new file mode 100644 index 0000000..ad94b7c --- /dev/null +++ b/MultiFacetLuceneCore/FacetSearchResult.cs @@ -0,0 +1,11 @@ +using System.Collections.Generic; +using Lucene.Net.Search; + +namespace MultiFacetLucene +{ + public class FacetSearchResult + { + public List Facets { get; set; } + public TopDocs Hits { get; set; } + } +} \ No newline at end of file diff --git a/MultiFacetLuceneCore/FacetSearcher.cs b/MultiFacetLuceneCore/FacetSearcher.cs new file mode 100644 index 0000000..a6bb935 --- /dev/null +++ b/MultiFacetLuceneCore/FacetSearcher.cs @@ -0,0 +1,225 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Lucene.Net.Index; +using Lucene.Net.Search; +using Lucene.Net.Store; +using Lucene.Net.Util; +using MultiFacetLucene.Configuration; + +namespace MultiFacetLucene +{ + public class FacetSearcher : IndexSearcher + { + private readonly ConcurrentDictionary _facetBitSetDictionary = new ConcurrentDictionary(); + + public FacetSearcher(IndexReaderContext context, FacetSearcherConfiguration facetSearcherConfiguration = null) + : base(context) + { + Initialize(facetSearcherConfiguration); + } + + public FacetSearcher(IndexReader r, TaskScheduler executor, FacetSearcherConfiguration facetSearcherConfiguration = null) + : base(r, executor) + { + Initialize(facetSearcherConfiguration); + } + + public FacetSearcher(IndexReader r, FacetSearcherConfiguration facetSearcherConfiguration = null) + : base(r) + { + Initialize(facetSearcherConfiguration); + } + + public FacetSearcher(IndexReaderContext context, TaskScheduler executor, FacetSearcherConfiguration facetSearcherConfiguration = null) + : base(context, executor) + { + Initialize(facetSearcherConfiguration); + } + + public FacetSearcherConfiguration FacetSearcherConfiguration { get; protected set; } + + private void Initialize(FacetSearcherConfiguration facetSearcherConfiguration) + { + FacetSearcherConfiguration = facetSearcherConfiguration ?? FacetSearcherConfiguration.Default(); + } + + + public FacetSearchResult SearchWithFacets(Query baseQueryWithoutFacetDrilldown, int topResults, IList facetFieldInfos) + { + var hits = Search(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, facetFieldInfos, null), topResults); + + var facets = GetAllFacetsValues(baseQueryWithoutFacetDrilldown, facetFieldInfos) + .Where(x => x.Count > 0) + .ToList(); + return new FacetSearchResult() + { + Facets = facets, + Hits = hits + }; + } + + + private FacetValues GetOrCreateFacetBitSet(string facetAttributeFieldName) + { + return _facetBitSetDictionary.GetOrAdd(facetAttributeFieldName, ReadBitSetsForValues); + } + + + private FacetValues ReadBitSetsForValues(string facetAttributeFieldName) + { + var facetValues = new FacetValues {Term = facetAttributeFieldName}; + + facetValues.FacetValueBitSetList.AddRange(GetFacetValueTerms(facetAttributeFieldName).OrderByDescending(x => x.Count)); + + if (FacetSearcherConfiguration.MemoryOptimizer == null) return facetValues; + foreach (var facetValue in FacetSearcherConfiguration.MemoryOptimizer.SetAsLazyLoad(_facetBitSetDictionary.Values.ToList())) + facetValue.Bitset = null; + + return facetValues; + } + + private IEnumerable GetFacetValueTerms(string facetAttributeFieldName) + { + var termReader = MultiFields.GetTerms(IndexReader, facetAttributeFieldName).GetEnumerator(); + + do + { + if (termReader.Term != null) + { + var termString = System.Text.Encoding.UTF8.GetString(termReader.Term.Bytes).TrimEnd('\0'); + var bitset = CalculateOpenBitSetDisi(facetAttributeFieldName, termReader.Term); + var cnt = bitset.Cardinality(); + if (cnt >= FacetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) + yield return new FacetValues.FacetValueBitSet { Value = termString, Bitset = bitset, Count = cnt }; + else + { + bitset = null; + } + } + } while (termReader.MoveNext()); + } + + protected OpenBitSetDISI CalculateOpenBitSetDisi(string facetAttributeFieldName, BytesRef value) + { + var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value)); + var facetQueryFilter = new QueryWrapperFilter(facetQuery); + var liveDocs = MultiFields.GetLiveDocs(IndexReader); + var termDocsEnum = MultiFields.GetTermDocsEnum(IndexReader, liveDocs, facetAttributeFieldName, value); + return new OpenBitSetDISI(termDocsEnum, IndexReader.MaxDoc); + } + + private IEnumerable GetAllFacetsValues(Query baseQueryWithoutFacetDrilldown, + IList facetFieldInfos) + { + return + facetFieldInfos.SelectMany( + facetFieldInfo => + FindMatchesInQuery(baseQueryWithoutFacetDrilldown, facetFieldInfos, facetFieldInfo)); + } + private DocIdSet GetDocIdSet(CachingWrapperFilter cachingWrapperFilter) + { + FixedBitSet idSet = new FixedBitSet(IndexReader.MaxDoc); + foreach (AtomicReaderContext ctx in IndexReader.Context.Leaves) + { + AtomicReader atomicReader = ctx.AtomicReader; + var iterator = cachingWrapperFilter.GetDocIdSet(atomicReader.AtomicContext, atomicReader.LiveDocs)?.GetIterator(); + if (iterator == null) + { + // return EMPTY_DOCIDSET; + } + else + { + idSet.Or(iterator); + } + } + + return idSet as DocIdSet; + } + + private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDrilldown, IList allFacetFieldInfos, FacetFieldInfo facetFieldInfoToCalculateFor) + { + var calculations = 0; + + var queryFilter = new CachingWrapperFilter(new QueryWrapperFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName))); + + var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor); + foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor.FieldName).FacetValueBitSetList) + { + var isSelected = calculatedFacetCounts.IsSelected(facetValueBitSet.Value); + + if (!isSelected && facetValueBitSet.Count < calculatedFacetCounts.MinCountForNonSelected) //Impossible to get a better result + { + if (calculatedFacetCounts.HaveEnoughResults) + break; + } + + OpenBitSetDISI baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(GetDocIdSet(queryFilter).GetIterator(), 1);// changed + + var bitset = facetValueBitSet.Bitset ?? CalculateOpenBitSetDisi(facetFieldInfoToCalculateFor.FieldName, new BytesRef(facetValueBitSet.Value)); + baseQueryWithoutFacetDrilldownCopy.And(bitset); + var count = baseQueryWithoutFacetDrilldownCopy.Cardinality(); + if (count == 0) + continue; + var match = new FacetMatch + { + Count = count, + Value = facetValueBitSet.Value, + FacetFieldName = facetFieldInfoToCalculateFor.FieldName + }; + + calculations++; + if (isSelected) + calculatedFacetCounts.AddToSelected(match); + else + calculatedFacetCounts.AddToNonSelected(match); + } + + return calculatedFacetCounts.GetList(); + } + + + protected Query CreateFacetedQuery(Query baseQueryWithoutFacetDrilldown, IList facetFieldInfos, string facetAttributeFieldName) + { + var facetsToAdd = facetFieldInfos.Where(x => x.FieldName != facetAttributeFieldName && x.Selections.Any()).ToList(); + if (!facetsToAdd.Any()) return baseQueryWithoutFacetDrilldown; + var booleanQuery = new BooleanQuery {{baseQueryWithoutFacetDrilldown, Occur.MUST}}; + foreach (var facetFieldInfo in facetsToAdd) + { + if (facetFieldInfo.Selections.Count == 1) + booleanQuery.Add(new TermQuery(new Term(facetFieldInfo.FieldName, facetFieldInfo.Selections[0])), Occur.MUST); + else + { + var valuesQuery = new BooleanQuery(); + foreach (var value in facetFieldInfo.Selections) + { + valuesQuery.Add(new TermQuery(new Term(facetFieldInfo.FieldName, value)), Occur.SHOULD); + } + booleanQuery.Add(valuesQuery, Occur.MUST); + } + } + return booleanQuery; + } + + public class FacetValues + { + public FacetValues() + { + FacetValueBitSetList = new List(); + } + + public string Term { get; set; } + + public List FacetValueBitSetList { get; set; } + + public class FacetValueBitSet + { + public string Value { get; set; } + public OpenBitSetDISI Bitset { get; set; } + public long Count { get; set; } + } + } + } +} \ No newline at end of file diff --git a/MultiFacetLuceneCore/MultiFacetLuceneCore.csproj b/MultiFacetLuceneCore/MultiFacetLuceneCore.csproj new file mode 100644 index 0000000..5ab633e --- /dev/null +++ b/MultiFacetLuceneCore/MultiFacetLuceneCore.csproj @@ -0,0 +1,10 @@ + + + + netcoreapp3.1 + + + + + + diff --git a/MultiFacetLuceneCore/ResultCollection.cs b/MultiFacetLuceneCore/ResultCollection.cs new file mode 100644 index 0000000..c9aaeee --- /dev/null +++ b/MultiFacetLuceneCore/ResultCollection.cs @@ -0,0 +1,82 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace MultiFacetLucene +{ + internal class ResultCollection + { + private readonly FacetFieldInfo _facetFieldInfoToCalculateFor; + private int _uncalculatedSelectedCount; + public long MinCountForNonSelected { get; protected set; } + + public ResultCollection(FacetFieldInfo facetFieldInfoToCalculateFor) + { + MinCountForNonSelected = 0; + _facetFieldInfoToCalculateFor = facetFieldInfoToCalculateFor; + _uncalculatedSelectedCount = facetFieldInfoToCalculateFor.Selections.Count; + NonSelectedMatches = new List(); + SelectedMatches = new List(); + } + + public bool HaveEnoughResults + { + get { return _uncalculatedSelectedCount == 0 && NonSelectedMatches.Count >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections; } + } + + public bool IsSelected(string facetValue) + { + return _uncalculatedSelectedCount > 0 && _facetFieldInfoToCalculateFor.Selections.Contains(facetValue); + } + + + + public void AddToNonSelected(FacetMatch match) + { + if (NonSelectedMatches.Count >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections) + { + if (match.Count < MinCountForNonSelected) + return; + if (match.Count > MinCountForNonSelected) + { + //Remove tail if possible + while (true) + { + var allWithMinCount = NonSelectedMatches.Where(x => x.Count == MinCountForNonSelected).ToList(); + if (allWithMinCount.Count == 0) + break; + var countWhenAddingThisAndRemovingMin = NonSelectedMatches.Count - allWithMinCount.Count + 1; + if (countWhenAddingThisAndRemovingMin >= _facetFieldInfoToCalculateFor.MaxToFetchExcludingSelections) + { + allWithMinCount.ForEach(x => NonSelectedMatches.Remove(x)); + MinCountForNonSelected = NonSelectedMatches.Min(x => x.Count); + } + else + { + break; + } + } + + } + } + + MinCountForNonSelected = MinCountForNonSelected == 0 ? match.Count : Math.Min(MinCountForNonSelected, match.Count); + + NonSelectedMatches.Add(match); + } + + public void AddToSelected(FacetMatch match) + { + SelectedMatches.Add(match); + _uncalculatedSelectedCount--; + } + + protected List NonSelectedMatches { get; set; } + private List SelectedMatches { get; set; } + + public IEnumerable GetList() + { + return SelectedMatches.Union(NonSelectedMatches).OrderByDescending(x => x.Count); + } + } +} \ No newline at end of file diff --git a/MultiFacetLuceneNet.sln b/MultiFacetLuceneNet.sln index fe5a0ad..83f33c7 100644 --- a/MultiFacetLuceneNet.sln +++ b/MultiFacetLuceneNet.sln @@ -1,7 +1,7 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio 2013 -VisualStudioVersion = 12.0.30110.0 +# Visual Studio Version 16 +VisualStudioVersion = 16.0.30804.86 MinimumVisualStudioVersion = 10.0.40219.1 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultiFacetLucene", "MultiFacetLucene\MultiFacetLucene.csproj", "{086093FD-D444-48BE-B897-7FAC14133C23}" EndProject @@ -9,6 +9,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultiFacetLuceneNet.Tests", EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PerformanceTest", "PerformanceTest\PerformanceTest.csproj", "{0065E899-596D-4B16-9CF5-E917DD957DAB}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "MultiFacetLuceneCore", "MultiFacetLuceneCore\MultiFacetLuceneCore.csproj", "{9B663767-33DD-4289-A31A-43D900888723}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -27,8 +29,15 @@ Global {0065E899-596D-4B16-9CF5-E917DD957DAB}.Debug|Any CPU.Build.0 = Debug|Any CPU {0065E899-596D-4B16-9CF5-E917DD957DAB}.Release|Any CPU.ActiveCfg = Release|Any CPU {0065E899-596D-4B16-9CF5-E917DD957DAB}.Release|Any CPU.Build.0 = Release|Any CPU + {9B663767-33DD-4289-A31A-43D900888723}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9B663767-33DD-4289-A31A-43D900888723}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9B663767-33DD-4289-A31A-43D900888723}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9B663767-33DD-4289-A31A-43D900888723}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {909A5A92-A780-43DF-9CA5-3B75CE15DD4B} + EndGlobalSection EndGlobal From 7511512e3dd396afb527195d6c5715b79adae9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Ku=C5=BEela?= Date: Tue, 29 Dec 2020 11:12:47 +0100 Subject: [PATCH 2/5] Optimize search --- MultiFacetLuceneCore/FacetSearcher.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MultiFacetLuceneCore/FacetSearcher.cs b/MultiFacetLuceneCore/FacetSearcher.cs index a6bb935..8189ff4 100644 --- a/MultiFacetLuceneCore/FacetSearcher.cs +++ b/MultiFacetLuceneCore/FacetSearcher.cs @@ -87,7 +87,7 @@ private FacetValues ReadBitSetsForValues(string facetAttributeFieldName) do { - if (termReader.Term != null) + if (termReader.Term != null && termReader.Term.Bytes.Length > 0) { var termString = System.Text.Encoding.UTF8.GetString(termReader.Term.Bytes).TrimEnd('\0'); var bitset = CalculateOpenBitSetDisi(facetAttributeFieldName, termReader.Term); @@ -144,7 +144,7 @@ private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDr var calculations = 0; var queryFilter = new CachingWrapperFilter(new QueryWrapperFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName))); - + var docIdSet = GetDocIdSet(queryFilter); var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor); foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor.FieldName).FacetValueBitSetList) { @@ -156,7 +156,7 @@ private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDr break; } - OpenBitSetDISI baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(GetDocIdSet(queryFilter).GetIterator(), 1);// changed + OpenBitSetDISI baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(docIdSet.GetIterator(), 1);// changed var bitset = facetValueBitSet.Bitset ?? CalculateOpenBitSetDisi(facetFieldInfoToCalculateFor.FieldName, new BytesRef(facetValueBitSet.Value)); baseQueryWithoutFacetDrilldownCopy.And(bitset); From 179dc263ef8f7e8c1b5b616b4c9f419bc6a458d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Ku=C5=BEela?= Date: Tue, 5 Jan 2021 13:54:20 +0100 Subject: [PATCH 3/5] fix byte to string conversion --- MultiFacetLuceneCore/FacetSearcher.cs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/MultiFacetLuceneCore/FacetSearcher.cs b/MultiFacetLuceneCore/FacetSearcher.cs index 8189ff4..2eb196e 100644 --- a/MultiFacetLuceneCore/FacetSearcher.cs +++ b/MultiFacetLuceneCore/FacetSearcher.cs @@ -87,18 +87,18 @@ private FacetValues ReadBitSetsForValues(string facetAttributeFieldName) do { - if (termReader.Term != null && termReader.Term.Bytes.Length > 0) - { - var termString = System.Text.Encoding.UTF8.GetString(termReader.Term.Bytes).TrimEnd('\0'); - var bitset = CalculateOpenBitSetDisi(facetAttributeFieldName, termReader.Term); - var cnt = bitset.Cardinality(); - if (cnt >= FacetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) - yield return new FacetValues.FacetValueBitSet { Value = termString, Bitset = bitset, Count = cnt }; - else + if (termReader.Term != null && termReader.Term.Bytes.Length > 0) { - bitset = null; + var termString = System.Text.Encoding.UTF8.GetString(termReader.Term.Bytes, 0, termReader.Term.Length).TrimEnd('\0'); + var bitset = CalculateOpenBitSetDisi(facetAttributeFieldName, termReader.Term); + var cnt = bitset.Cardinality(); + if (cnt >= FacetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) + yield return new FacetValues.FacetValueBitSet { Value = termString, Bitset = bitset, Count = cnt }; + else + { + bitset = null; + } } - } } while (termReader.MoveNext()); } From e933d5c38290abdf7a4caf845a77b9cca38b6e91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Ku=C5=BEela?= Date: Fri, 29 Jan 2021 11:04:20 +0100 Subject: [PATCH 4/5] refactor, simplify methods --- MultiFacetLuceneCore/FacetSearcher.cs | 40 +++++++++++++++++++-------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/MultiFacetLuceneCore/FacetSearcher.cs b/MultiFacetLuceneCore/FacetSearcher.cs index 2eb196e..c02eccd 100644 --- a/MultiFacetLuceneCore/FacetSearcher.cs +++ b/MultiFacetLuceneCore/FacetSearcher.cs @@ -89,7 +89,7 @@ private FacetValues ReadBitSetsForValues(string facetAttributeFieldName) { if (termReader.Term != null && termReader.Term.Bytes.Length > 0) { - var termString = System.Text.Encoding.UTF8.GetString(termReader.Term.Bytes, 0, termReader.Term.Length).TrimEnd('\0'); + var termString = System.Text.Encoding.UTF8.GetString(termReader.Term.Bytes, 0, termReader.Term.Length).TrimEnd('\0'); var bitset = CalculateOpenBitSetDisi(facetAttributeFieldName, termReader.Term); var cnt = bitset.Cardinality(); if (cnt >= FacetSearcherConfiguration.MinimumCountInTotalDatasetForFacet) @@ -104,10 +104,17 @@ private FacetValues ReadBitSetsForValues(string facetAttributeFieldName) protected OpenBitSetDISI CalculateOpenBitSetDisi(string facetAttributeFieldName, BytesRef value) { - var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value)); - var facetQueryFilter = new QueryWrapperFilter(facetQuery); - var liveDocs = MultiFields.GetLiveDocs(IndexReader); - var termDocsEnum = MultiFields.GetTermDocsEnum(IndexReader, liveDocs, facetAttributeFieldName, value); + //var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value)); + //var facetQueryFilter = new QueryWrapperFilter(facetQuery); + // var liveDocs = MultiFields.GetLiveDocs(IndexReader); + var termDocsEnum = MultiFields.GetTermDocsEnum(IndexReader, null, facetAttributeFieldName, value); + return new OpenBitSetDISI(termDocsEnum, IndexReader.MaxDoc); + } + + protected OpenBitSetDISI CalculateOpenBitSetDisiForFilteredData(CachingWrapperFilter filter, string facetAttributeFieldName, BytesRef value) + { + // var liveDocs = MultiFields.GetLiveDocs(IndexReader); + var termDocsEnum = MultiFields.GetTermDocsEnum(IndexReader, null, facetAttributeFieldName, value); return new OpenBitSetDISI(termDocsEnum, IndexReader.MaxDoc); } @@ -142,9 +149,8 @@ private DocIdSet GetDocIdSet(CachingWrapperFilter cachingWrapperFilter) private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDrilldown, IList allFacetFieldInfos, FacetFieldInfo facetFieldInfoToCalculateFor) { var calculations = 0; - var queryFilter = new CachingWrapperFilter(new QueryWrapperFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName))); - var docIdSet = GetDocIdSet(queryFilter); + // var docIdSet = GetDocIdSet(queryFilter); var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor); foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor.FieldName).FacetValueBitSetList) { @@ -156,11 +162,8 @@ private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDr break; } - OpenBitSetDISI baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(docIdSet.GetIterator(), 1);// changed - var bitset = facetValueBitSet.Bitset ?? CalculateOpenBitSetDisi(facetFieldInfoToCalculateFor.FieldName, new BytesRef(facetValueBitSet.Value)); - baseQueryWithoutFacetDrilldownCopy.And(bitset); - var count = baseQueryWithoutFacetDrilldownCopy.Cardinality(); + var count = GetFacetCountFromMultipleIndices(queryFilter, bitset); if (count == 0) continue; var match = new FacetMatch @@ -180,6 +183,21 @@ private IEnumerable FindMatchesInQuery(Query baseQueryWithoutFacetDr return calculatedFacetCounts.GetList(); } + private long GetFacetCountFromMultipleIndices(CachingWrapperFilter filter, OpenBitSetDISI facetValueBitSet) + { + long count = 0; + foreach (AtomicReaderContext ctx in IndexReader.Leaves) + { + AtomicReader atomicReader = ctx.AtomicReader; + // TODO: Poznamka pro priste, az budu resit ze se spatne hledaji pocty facetu, zda se ze to souvisi s NULL hodnotama, mozna vyfiltrovat not NULL? + + OpenBitSetDISI baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(filter.GetDocIdSet(atomicReader.AtomicContext, atomicReader.LiveDocs)?.GetIterator(), atomicReader.MaxDoc); + baseQueryWithoutFacetDrilldownCopy.And(facetValueBitSet); + count += baseQueryWithoutFacetDrilldownCopy.Cardinality(); + } + + return count; + } protected Query CreateFacetedQuery(Query baseQueryWithoutFacetDrilldown, IList facetFieldInfos, string facetAttributeFieldName) { From 10242e9dca5672b34f230b8502be1fbe44af9f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Ku=C5=BEela?= Date: Tue, 9 Mar 2021 17:37:22 +0100 Subject: [PATCH 5/5] Fix null reference error when index is split into multiple segments --- MultiFacetLuceneCore/FacetSearcher.cs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/MultiFacetLuceneCore/FacetSearcher.cs b/MultiFacetLuceneCore/FacetSearcher.cs index c02eccd..81ae124 100644 --- a/MultiFacetLuceneCore/FacetSearcher.cs +++ b/MultiFacetLuceneCore/FacetSearcher.cs @@ -190,10 +190,13 @@ private long GetFacetCountFromMultipleIndices(CachingWrapperFilter filter, OpenB { AtomicReader atomicReader = ctx.AtomicReader; // TODO: Poznamka pro priste, az budu resit ze se spatne hledaji pocty facetu, zda se ze to souvisi s NULL hodnotama, mozna vyfiltrovat not NULL? - - OpenBitSetDISI baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(filter.GetDocIdSet(atomicReader.AtomicContext, atomicReader.LiveDocs)?.GetIterator(), atomicReader.MaxDoc); - baseQueryWithoutFacetDrilldownCopy.And(facetValueBitSet); - count += baseQueryWithoutFacetDrilldownCopy.Cardinality(); + var iterator = filter.GetDocIdSet(atomicReader.AtomicContext, atomicReader.LiveDocs)?.GetIterator(); + if (iterator != null) + { + OpenBitSetDISI baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(iterator, atomicReader.MaxDoc); + baseQueryWithoutFacetDrilldownCopy.And(facetValueBitSet); + count += baseQueryWithoutFacetDrilldownCopy.Cardinality(); + } } return count;