Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add .net core support, update to 4.x version of lucene #4

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions MultiFacetLuceneCore/Configuration/FacetSearcherConfiguration.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
using MultiFacetLucene.Configuration.MemoryOptimizer;

namespace MultiFacetLucene.Configuration
{
public class FacetSearcherConfiguration
{
public FacetSearcherConfiguration()
{
MinimumCountInTotalDatasetForFacet = 1;
MemoryOptimizer = null;
}
public static FacetSearcherConfiguration Default()
{
return new FacetSearcherConfiguration { MinimumCountInTotalDatasetForFacet = 1, MemoryOptimizer = null};
}
public int MinimumCountInTotalDatasetForFacet { get; set; }

public IMemoryOptimizer MemoryOptimizer { get; set; }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
using System;
using System.Collections.Generic;
using System.Linq;

namespace MultiFacetLucene.Configuration.MemoryOptimizer
{
public class DefaultMemoryOptimizer : IMemoryOptimizer
{
private readonly int _keepPercent;
private readonly int _optimizeIfTotalCountIsGreaterThan;

public DefaultMemoryOptimizer(int keepPercent, int optimizeIfTotalCountIsGreaterThan)
{
_keepPercent = keepPercent;
_optimizeIfTotalCountIsGreaterThan = optimizeIfTotalCountIsGreaterThan;
}

//Flag certain bitsets as lazyload (recalculate)
//If total number of facet values is larger than...
//have X percent removed
public IEnumerable<FacetSearcher.FacetValues.FacetValueBitSet> SetAsLazyLoad(List<FacetSearcher.FacetValues> facetValuesList)
{
var totalCount = facetValuesList.Sum(a => a.FacetValueBitSetList.Count);
if (totalCount < _optimizeIfTotalCountIsGreaterThan) yield break;
foreach (var facetValues in facetValuesList)
{
var index = 0;
var percent = Convert.ToInt32(totalCount * _keepPercent / 100.0);
foreach (var value in facetValues.FacetValueBitSetList)
{
if (index++ > percent)
yield return value;
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using System.Collections.Generic;

namespace MultiFacetLucene.Configuration.MemoryOptimizer
{
public interface IMemoryOptimizer
{
IEnumerable<FacetSearcher.FacetValues.FacetValueBitSet> SetAsLazyLoad(List<FacetSearcher.FacetValues> facetValuesList);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using System.Collections.Generic;

namespace MultiFacetLucene.Configuration.MemoryOptimizer
{
public class NoMemoryOptimizer : IMemoryOptimizer
{
//Never flag any facetvalues as lazyload (recalculate)
public IEnumerable<FacetSearcher.FacetValues.FacetValueBitSet> SetAsLazyLoad(List<FacetSearcher.FacetValues> facetValuesList)
{
yield break;
}
}
}
16 changes: 16 additions & 0 deletions MultiFacetLuceneCore/FacetFieldInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
using System.Collections.Generic;

namespace MultiFacetLucene
{
public class FacetFieldInfo
{
public FacetFieldInfo()
{
Selections = new List<string>();
MaxToFetchExcludingSelections = 20;
}
public string FieldName { get; set; }
public List<string> Selections { get; set; }
public int MaxToFetchExcludingSelections { get; set; }
}
}
9 changes: 9 additions & 0 deletions MultiFacetLuceneCore/FacetMatch.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace MultiFacetLucene
{
public class FacetMatch
{
public string FacetFieldName { get; set; }
public string Value { get; set; }
public long Count { get; set; }
}
}
11 changes: 11 additions & 0 deletions MultiFacetLuceneCore/FacetSearchResult.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
using System.Collections.Generic;
using Lucene.Net.Search;

namespace MultiFacetLucene
{
public class FacetSearchResult
{
public List<FacetMatch> Facets { get; set; }
public TopDocs Hits { get; set; }
}
}
246 changes: 246 additions & 0 deletions MultiFacetLuceneCore/FacetSearcher.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Util;
using MultiFacetLucene.Configuration;

namespace MultiFacetLucene
{
public class FacetSearcher : IndexSearcher
{
private readonly ConcurrentDictionary<string, FacetValues> _facetBitSetDictionary = new ConcurrentDictionary<string, FacetValues>();

public FacetSearcher(IndexReaderContext context, FacetSearcherConfiguration facetSearcherConfiguration = null)
: base(context)
{
Initialize(facetSearcherConfiguration);
}

public FacetSearcher(IndexReader r, TaskScheduler executor, FacetSearcherConfiguration facetSearcherConfiguration = null)
: base(r, executor)
{
Initialize(facetSearcherConfiguration);
}

public FacetSearcher(IndexReader r, FacetSearcherConfiguration facetSearcherConfiguration = null)
: base(r)
{
Initialize(facetSearcherConfiguration);
}

public FacetSearcher(IndexReaderContext context, TaskScheduler executor, FacetSearcherConfiguration facetSearcherConfiguration = null)
: base(context, executor)
{
Initialize(facetSearcherConfiguration);
}

public FacetSearcherConfiguration FacetSearcherConfiguration { get; protected set; }

private void Initialize(FacetSearcherConfiguration facetSearcherConfiguration)
{
FacetSearcherConfiguration = facetSearcherConfiguration ?? FacetSearcherConfiguration.Default();
}


public FacetSearchResult SearchWithFacets(Query baseQueryWithoutFacetDrilldown, int topResults, IList<FacetFieldInfo> facetFieldInfos)
{
var hits = Search(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, facetFieldInfos, null), topResults);

var facets = GetAllFacetsValues(baseQueryWithoutFacetDrilldown, facetFieldInfos)
.Where(x => x.Count > 0)
.ToList();
return new FacetSearchResult()
{
Facets = facets,
Hits = hits
};
}


private FacetValues GetOrCreateFacetBitSet(string facetAttributeFieldName)
{
return _facetBitSetDictionary.GetOrAdd(facetAttributeFieldName, ReadBitSetsForValues);
}


private FacetValues ReadBitSetsForValues(string facetAttributeFieldName)
{
var facetValues = new FacetValues {Term = facetAttributeFieldName};

facetValues.FacetValueBitSetList.AddRange(GetFacetValueTerms(facetAttributeFieldName).OrderByDescending(x => x.Count));

if (FacetSearcherConfiguration.MemoryOptimizer == null) return facetValues;
foreach (var facetValue in FacetSearcherConfiguration.MemoryOptimizer.SetAsLazyLoad(_facetBitSetDictionary.Values.ToList()))
facetValue.Bitset = null;

return facetValues;
}

private IEnumerable<FacetValues.FacetValueBitSet> GetFacetValueTerms(string facetAttributeFieldName)
{
var termReader = MultiFields.GetTerms(IndexReader, facetAttributeFieldName).GetEnumerator();

do
{
if (termReader.Term != null && termReader.Term.Bytes.Length > 0)
{
var termString = System.Text.Encoding.UTF8.GetString(termReader.Term.Bytes, 0, termReader.Term.Length).TrimEnd('\0');
var bitset = CalculateOpenBitSetDisi(facetAttributeFieldName, termReader.Term);
var cnt = bitset.Cardinality();
if (cnt >= FacetSearcherConfiguration.MinimumCountInTotalDatasetForFacet)
yield return new FacetValues.FacetValueBitSet { Value = termString, Bitset = bitset, Count = cnt };
else
{
bitset = null;
}
}
} while (termReader.MoveNext());
}

protected OpenBitSetDISI CalculateOpenBitSetDisi(string facetAttributeFieldName, BytesRef value)
{
//var facetQuery = new TermQuery(new Term(facetAttributeFieldName, value));
//var facetQueryFilter = new QueryWrapperFilter(facetQuery);
// var liveDocs = MultiFields.GetLiveDocs(IndexReader);
var termDocsEnum = MultiFields.GetTermDocsEnum(IndexReader, null, facetAttributeFieldName, value);
return new OpenBitSetDISI(termDocsEnum, IndexReader.MaxDoc);
}

protected OpenBitSetDISI CalculateOpenBitSetDisiForFilteredData(CachingWrapperFilter filter, string facetAttributeFieldName, BytesRef value)
{
// var liveDocs = MultiFields.GetLiveDocs(IndexReader);
var termDocsEnum = MultiFields.GetTermDocsEnum(IndexReader, null, facetAttributeFieldName, value);
return new OpenBitSetDISI(termDocsEnum, IndexReader.MaxDoc);
}

private IEnumerable<FacetMatch> GetAllFacetsValues(Query baseQueryWithoutFacetDrilldown,
IList<FacetFieldInfo> facetFieldInfos)
{
return
facetFieldInfos.SelectMany(
facetFieldInfo =>
FindMatchesInQuery(baseQueryWithoutFacetDrilldown, facetFieldInfos, facetFieldInfo));
}
private DocIdSet GetDocIdSet(CachingWrapperFilter cachingWrapperFilter)
{
FixedBitSet idSet = new FixedBitSet(IndexReader.MaxDoc);
foreach (AtomicReaderContext ctx in IndexReader.Context.Leaves)
{
AtomicReader atomicReader = ctx.AtomicReader;
var iterator = cachingWrapperFilter.GetDocIdSet(atomicReader.AtomicContext, atomicReader.LiveDocs)?.GetIterator();
if (iterator == null)
{
// return EMPTY_DOCIDSET;
}
else
{
idSet.Or(iterator);
}
}

return idSet as DocIdSet;
}

private IEnumerable<FacetMatch> FindMatchesInQuery(Query baseQueryWithoutFacetDrilldown, IList<FacetFieldInfo> allFacetFieldInfos, FacetFieldInfo facetFieldInfoToCalculateFor)
{
var calculations = 0;
var queryFilter = new CachingWrapperFilter(new QueryWrapperFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName)));
// var docIdSet = GetDocIdSet(queryFilter);
var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor);
foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor.FieldName).FacetValueBitSetList)
{
var isSelected = calculatedFacetCounts.IsSelected(facetValueBitSet.Value);

if (!isSelected && facetValueBitSet.Count < calculatedFacetCounts.MinCountForNonSelected) //Impossible to get a better result
{
if (calculatedFacetCounts.HaveEnoughResults)
break;
}

var bitset = facetValueBitSet.Bitset ?? CalculateOpenBitSetDisi(facetFieldInfoToCalculateFor.FieldName, new BytesRef(facetValueBitSet.Value));
var count = GetFacetCountFromMultipleIndices(queryFilter, bitset);
if (count == 0)
continue;
var match = new FacetMatch
{
Count = count,
Value = facetValueBitSet.Value,
FacetFieldName = facetFieldInfoToCalculateFor.FieldName
};

calculations++;
if (isSelected)
calculatedFacetCounts.AddToSelected(match);
else
calculatedFacetCounts.AddToNonSelected(match);
}

return calculatedFacetCounts.GetList();
}

private long GetFacetCountFromMultipleIndices(CachingWrapperFilter filter, OpenBitSetDISI facetValueBitSet)
{
long count = 0;
foreach (AtomicReaderContext ctx in IndexReader.Leaves)
{
AtomicReader atomicReader = ctx.AtomicReader;
// TODO: Poznamka pro priste, az budu resit ze se spatne hledaji pocty facetu, zda se ze to souvisi s NULL hodnotama, mozna vyfiltrovat not NULL?
var iterator = filter.GetDocIdSet(atomicReader.AtomicContext, atomicReader.LiveDocs)?.GetIterator();
if (iterator != null)
{
OpenBitSetDISI baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(iterator, atomicReader.MaxDoc);
baseQueryWithoutFacetDrilldownCopy.And(facetValueBitSet);
count += baseQueryWithoutFacetDrilldownCopy.Cardinality();
}
}

return count;
}

protected Query CreateFacetedQuery(Query baseQueryWithoutFacetDrilldown, IList<FacetFieldInfo> facetFieldInfos, string facetAttributeFieldName)
{
var facetsToAdd = facetFieldInfos.Where(x => x.FieldName != facetAttributeFieldName && x.Selections.Any()).ToList();
if (!facetsToAdd.Any()) return baseQueryWithoutFacetDrilldown;
var booleanQuery = new BooleanQuery {{baseQueryWithoutFacetDrilldown, Occur.MUST}};
foreach (var facetFieldInfo in facetsToAdd)
{
if (facetFieldInfo.Selections.Count == 1)
booleanQuery.Add(new TermQuery(new Term(facetFieldInfo.FieldName, facetFieldInfo.Selections[0])), Occur.MUST);
else
{
var valuesQuery = new BooleanQuery();
foreach (var value in facetFieldInfo.Selections)
{
valuesQuery.Add(new TermQuery(new Term(facetFieldInfo.FieldName, value)), Occur.SHOULD);
}
booleanQuery.Add(valuesQuery, Occur.MUST);
}
}
return booleanQuery;
}

public class FacetValues
{
public FacetValues()
{
FacetValueBitSetList = new List<FacetValueBitSet>();
}

public string Term { get; set; }

public List<FacetValueBitSet> FacetValueBitSetList { get; set; }

public class FacetValueBitSet
{
public string Value { get; set; }
public OpenBitSetDISI Bitset { get; set; }
public long Count { get; set; }
}
}
}
}
10 changes: 10 additions & 0 deletions MultiFacetLuceneCore/MultiFacetLuceneCore.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Lucene.Net" Version="4.8.0-beta00013" />
<PackageReference Include="SharpZipLib" Version="1.3.1" />
</ItemGroup>
</Project>
Loading