Skip to content

Commit

Permalink
Improved search queries
Browse files Browse the repository at this point in the history
  • Loading branch information
shartte committed Feb 9, 2025
1 parent 30d2862 commit 0e68bae
Show file tree
Hide file tree
Showing 6 changed files with 261 additions and 47 deletions.
30 changes: 19 additions & 11 deletions docs/docs/02-changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@ import Video from '@site/src/components/Video';

# Changelog

## 2.6.0
## 21.1.0 (Minecraft 1.21.1)

- Switching to the NeoForge versioning scheme, this version is equivalent to version 2.6.0, except for the following changes.
- Improved query parsing for full-text search. Search will now always apply "incremental" search for the last entered word,
assuming the user might not have entered it fully yet. This means searching for "io po" will search for both "io po"
and "io po*", although it will score an exact hit for "po" higher than a hit for "port" (for example).

## 2.6.0 (Minecraft 1.21.1)

- Change the default layout of guides to be a centered column, and add a toolbar button to toggle between
full-width and centered-column layout.
Expand All @@ -15,16 +22,16 @@ import Video from '@site/src/components/Video';
- Added support for blast furnace recipes
- Do not show a navigation bar for guides that do not have any navigation items

## 2.5.1
## 2.5.1 (Minecraft 1.21.1)

- Fix shared recipe types not being collected correctly from the service loader

## 2.5.0
## 2.5.0 (Minecraft 1.21.1)

- Added an extension point for mods to add support for [custom recipe types](20-integration/recipe-types.md) to all guides
- Fixed an issue with navigating to the search screen

## 2.4.0
## 2.4.0 (Minecraft 1.21.1)

- Add missing Markdown node classes to API jar
- Add structure editing commands that only work in singleplayer:
Expand All @@ -36,17 +43,17 @@ import Video from '@site/src/components/Video';
- Added op command `/guideme give <target> <guide>` to quickly give a guide item to an entity target (i.e. `@s`)
- Fix guidebook navbar closing when clicking links

## 2.3.1
## 2.3.1 (Minecraft 1.21.1)

- Fixes a crash with the generic guide item if it has no guide id attached

## 2.3.0
## 2.3.0 (Minecraft 1.21.1)

- GuideME is now published on Maven Central instead of Modmaven
- The group id of the Maven artifact has changed from `appeng` to `org.appliedenergistics`
to enable publishing on Maven Central

## 2.2.0
## 2.2.0 (Minecraft 1.21.1)

- Added full-text search based on Apache Lucene, which is enabled for all guides:
<Video src="guide-search.mp4" />
Expand All @@ -65,15 +72,16 @@ import Video from '@site/src/components/Video';
By default, all custom tags simply add their children to the indexer
- Added the ability to set borders for `LytBox`
- Generalized `GuideUiHost` into `DocumentUiHost`
## 2.1.2

## 2.1.2 (Minecraft 1.21.1)

- Skip fully invisible blocks (without block entities) when calculating the bounding box of a game scene. Fixes inexplicably larger bounds when blocks like `minecraft:light` where included in the exported structure.

## 2.1.1
## 2.1.1 (Minecraft 1.21.1)

- Fix race-condition crash when local file-system changes were processed before the resource reload was finished.

## 2.1.0
## 2.1.0 (Minecraft 1.21.1)

- Adds API to open guides for players from both server- and client-side
- `GuidesCommon.openGuide(Player player, ResourceLocation guideId)` to open the last opened (or start-page if none) page of a guide for the given player.
Expand All @@ -86,6 +94,6 @@ import Video from '@site/src/components/Video';
or `/guideme open @s testmod:guide page.md#anchor` to open a specific page at an anchor.
- Fix mod version being shown as 0.0.0

## 2.0.1
## 2.0.1 (Minecraft 1.21.1)

- Removes superfluous log spam when opening the creative menu.
83 changes: 83 additions & 0 deletions src/main/java/guideme/internal/search/GuideQueryParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
package guideme.internal.search;

import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;

public class GuideQueryParser {
private GuideQueryParser() {
}

/**
* This method will create a query of the following form:
* <ul>
* <li>A query that matches every document where a field contains all terms.</li>
* <li>OR A query that matches every document where a field contains any of the terms, but boosted to 0.1.</li>
* </ul>
*/
public static Query parse(String queryString) {
var tokens = QueryStringSplitter.split(queryString);

var textField = IndexSchema.getTextField("en");
var titleField = IndexSchema.getTitleField("en");

var builder = new BooleanQuery.Builder();

// Exact occurrences in the title are scored with 20% boost
builder.add(new BoostQuery(buildFieldQuery(titleField, tokens, false, BooleanClause.Occur.SHOULD), 1.2f),
BooleanClause.Occur.SHOULD);
// Exact occurrences in the body are scored normally
builder.add(buildFieldQuery(textField, tokens, false, BooleanClause.Occur.SHOULD), BooleanClause.Occur.SHOULD);
// Occurrences in the title, where the last token is expanded to a wildcard are scored at 40%
builder.add(new BoostQuery(buildFieldQuery(titleField, tokens, true, BooleanClause.Occur.SHOULD), 0.4f),
BooleanClause.Occur.SHOULD);
// Occurrences in the body, where the last token is expanded to a wildcard are scored at 20%
builder.add(new BoostQuery(buildFieldQuery(textField, tokens, true, BooleanClause.Occur.SHOULD), 0.2f),
BooleanClause.Occur.SHOULD);

return builder.build();
}

private static BooleanQuery buildFieldQuery(String fieldName, List<String> tokens, boolean makeLastTokenWildcard,
BooleanClause.Occur clause) {
// Prepare a BooleanQuery to combine terms with OR
var booleanQueryBuilder = new BooleanQuery.Builder();

for (int i = 0; i < tokens.size(); i++) {
String token = tokens.get(i);

if (token.contains(" ")) {
// Phrase query
var splitToken = QueryStringSplitter.split(token);
booleanQueryBuilder.add(new PhraseQuery(fieldName, splitToken.toArray(String[]::new)), clause);
continue;
}

// Make the last token a wildcard
if (makeLastTokenWildcard && i == tokens.size() - 1 && !token.endsWith("*")) {
token += "*";
}

Term term = new Term(fieldName, token);

Query q;
if (token.contains("*")) {
q = new WildcardQuery(term);
} else {
q = new TermQuery(term);
}

booleanQueryBuilder.add(q, clause);
}

// Return the constructed BooleanQuery
return booleanQueryBuilder.build();
}

}
53 changes: 17 additions & 36 deletions src/main/java/guideme/internal/search/GuideSearch.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
import net.minecraft.resources.ResourceLocation;
Expand All @@ -31,8 +30,6 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
Expand All @@ -58,16 +55,6 @@
public class GuideSearch implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(GuideSearch.class);

private static final String FIELD_GUIDE_ID = "guide_id";
private static final String FIELD_PAGE_ID = "page_id";

private static final String FIELD_TEXT = "page_content";
private static final String FIELD_TITLE = "page_title";

// Fields for analyzed text
private static final String FIELD_TITLE_EN = "page_title_en";
private static final String FIELD_TEXT_EN = "page_content_en";

/**
* Maximum time spent indexing per tick.
*/
Expand Down Expand Up @@ -99,7 +86,7 @@ public GuideSearch() {

public void index(Guide guide) {
try {
indexWriter.deleteDocuments(new PhraseQuery(FIELD_GUIDE_ID, guide.getId().toString()));
indexWriter.deleteDocuments(new PhraseQuery(IndexSchema.FIELD_GUIDE_ID, guide.getId().toString()));
} catch (IOException e) {
LOG.error("Failed to delete all documents before re-indexing.", e);
}
Expand Down Expand Up @@ -182,27 +169,20 @@ public List<SearchResult> searchGuide(String queryText, @Nullable Guide onlyFrom

var indexSearcher = new IndexSearcher(indexReader);

var parser = new StandardQueryParser(analyzer);
parser.setMultiFields(new String[] {
FIELD_TITLE_EN,
FIELD_TEXT_EN
});
parser.setFieldsBoost(Map.of(FIELD_TITLE_EN, 1.2f));

Query query;
try {
query = parser.parse(queryText, null);
} catch (QueryNodeException e) {
LOG.debug("Failed to parse Lucene query: '{}'", queryText, e);
query = GuideQueryParser.parse(queryText);
} catch (Exception e) {
LOG.debug("Failed to parse search query: '{}'", queryText, e);
return List.of();
}

// Filter by guide if given one
if (onlyFromGuide != null) {
query = new BooleanQuery.Builder()
.add(query, BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(FIELD_GUIDE_ID, onlyFromGuide.getId().toString())),
BooleanClause.Occur.MUST)
.add(new TermQuery(new Term(IndexSchema.FIELD_GUIDE_ID, onlyFromGuide.getId().toString())),
BooleanClause.Occur.FILTER)
.build();
}

Expand All @@ -224,8 +204,8 @@ public List<SearchResult> searchGuide(String queryText, @Nullable Guide onlyFrom

for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
var document = storedFields.document(scoreDoc.doc);
var guideId = ResourceLocation.parse(document.get(FIELD_GUIDE_ID));
var pageId = ResourceLocation.parse(document.get(FIELD_PAGE_ID));
var guideId = ResourceLocation.parse(document.get(IndexSchema.FIELD_GUIDE_ID));
var pageId = ResourceLocation.parse(document.get(IndexSchema.FIELD_PAGE_ID));

var guide = Guides.getById(guideId);
if (guide == null) {
Expand All @@ -241,14 +221,15 @@ public List<SearchResult> searchGuide(String queryText, @Nullable Guide onlyFrom

String bestFragment = "";
try {
bestFragment = highlighter.getBestFragment(analyzer, FIELD_TEXT_EN, document.get(FIELD_TEXT));
bestFragment = highlighter.getBestFragment(analyzer, IndexSchema.FIELD_TEXT_EN,
document.get(IndexSchema.FIELD_TEXT));
} catch (InvalidTokenOffsetsException e) {
LOG.error("Cannot determine text to highlight for result", e);
}

// This is kinda shit, but the Lucene highlighter isn't exactly flexible with its return type
// it only supports strings.
var pageTitle = document.get(FIELD_TITLE);
var pageTitle = document.get(IndexSchema.FIELD_TITLE);

var startOfSegment = 0;
LytFlowSpan currentSpan = new LytFlowSpan();
Expand Down Expand Up @@ -294,15 +275,15 @@ private Document createPageDocument(Guide guide, ParsedGuidePage page) {
var pageTitle = getPageTitle(guide, page);

var doc = new Document();
doc.add(new StringField(FIELD_GUIDE_ID, guide.getId().toString(), Field.Store.YES));
doc.add(new StoredField(FIELD_PAGE_ID, page.getId().toString()));
doc.add(new StringField(IndexSchema.FIELD_GUIDE_ID, guide.getId().toString(), Field.Store.YES));
doc.add(new StoredField(IndexSchema.FIELD_PAGE_ID, page.getId().toString()));

// Store original text for highlighting and display purposes
doc.add(new StoredField(FIELD_TITLE, pageTitle));
doc.add(new StoredField(FIELD_TEXT, pageText));
doc.add(new StoredField(IndexSchema.FIELD_TITLE, pageTitle));
doc.add(new StoredField(IndexSchema.FIELD_TEXT, pageText));

doc.add(new TextField(FIELD_TITLE_EN, pageTitle, Field.Store.NO));
doc.add(new TextField(FIELD_TEXT_EN, pageText, Field.Store.NO));
doc.add(new TextField(IndexSchema.FIELD_TITLE_EN, pageTitle, Field.Store.NO));
doc.add(new TextField(IndexSchema.FIELD_TEXT_EN, pageText, Field.Store.NO));
return doc;
}

Expand Down
29 changes: 29 additions & 0 deletions src/main/java/guideme/internal/search/IndexSchema.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package guideme.internal.search;

import java.util.Map;

final class IndexSchema {
static final String FIELD_GUIDE_ID = "guide_id";
static final String FIELD_PAGE_ID = "page_id";
static final String FIELD_TEXT = "page_content";
static final String FIELD_TITLE = "page_title";
// Fields for analyzed text
static final String FIELD_TITLE_EN = "page_title_en";
static final String FIELD_TEXT_EN = "page_content_en";

private static final Map<String, String> titleFields = Map.of(
"en", FIELD_TITLE_EN);
private static final Map<String, String> textFields = Map.of(
"en", FIELD_TEXT_EN);

private IndexSchema() {
}

public static String getTitleField(String language) {
return titleFields.getOrDefault(language, FIELD_TITLE_EN);
}

public static String getTextField(String language) {
return textFields.getOrDefault(language, FIELD_TEXT_EN);
}
}
56 changes: 56 additions & 0 deletions src/main/java/guideme/internal/search/QueryStringSplitter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package guideme.internal.search;

import java.util.ArrayList;
import java.util.List;

public final class QueryStringSplitter {
private QueryStringSplitter() {
}

public static List<String> split(String query) {
List<String> terms = new ArrayList<>();
StringBuilder currentTerm = new StringBuilder();
boolean insideQuotes = false; // Flag to track if we're inside a quoted phrase

int i = 0;
while (i < query.length()) {
char ch = query.charAt(i);

if (ch == '"' || ch == '\'') {
if (insideQuotes) {
// We found a closing quote, add the term and reset
terms.add(currentTerm.toString());
currentTerm.setLength(0); // Reset the current term
insideQuotes = false;
} else {
// Opening quote, start a quoted phrase
insideQuotes = true;
}
i++; // Move past the quote
} else if (Character.isWhitespace(ch)) {
if (insideQuotes) {
// Inside quotes, we don't split on spaces
currentTerm.append(ch); // Keep spaces inside quotes
} else {
// We found a space outside quotes
if (!currentTerm.isEmpty()) {
terms.add(currentTerm.toString());
currentTerm.setLength(0); // Reset the current term
}
}
i++; // Skip the whitespace
} else {
// Accumulate characters for the current term
currentTerm.append(ch);
i++;
}
}

// Add the last term if there's any remaining text
if (!currentTerm.isEmpty()) {
terms.add(currentTerm.toString());
}

return terms;
}
}
Loading

0 comments on commit 0e68bae

Please sign in to comment.