Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Search feature #6

Merged
merged 8 commits into from
Mar 29, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions AhoCorasick.Net.sln
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".nuget", ".nuget", "{9EBF4F
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{311D94BD-6DCE-46D8-862C-71AB38D19C14}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{389268B9-F389-4506-B79D-B2F4889F41D5}"
ProjectSection(SolutionItems) = preProject
build\AhoCorasick.Net.nuspec = build\AhoCorasick.Net.nuspec
build\AhoCorasick.Net.Source.nuspec = build\AhoCorasick.Net.Source.nuspec
Expand Down
42 changes: 41 additions & 1 deletion src/AhoCorasick.Net/AhoCorasickTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,43 @@ public bool Contains(string text)
return false;
}

// todo copy paste from Contains method: Refactor!
// todo check performance
public IEnumerable<KeyValuePair<string, int>> Search(string text)
{
var currentNode = _rootNode;

var length = text.Length;
for (var i = 0; i < length; i++)
{
while (true)
{
var node = currentNode.GetNode(text[i]);
if (node == null)
{
currentNode = currentNode.Failure;
if (currentNode == _rootNode)
{
break;
}
}
else
{
if (node.IsFinished)
{
foreach (var result in node.Results)
{
yield return new KeyValuePair<string, int>(result, i - result.Length + 1);
}
}

currentNode = node;
break;
}
}
}
}

private void AddPatternToTree(string pattern)
{
var latestNode = _rootNode;
Expand All @@ -69,6 +106,7 @@ private void AddPatternToTree(string pattern)
}

latestNode.IsFinished = true;
latestNode.Results.Add(pattern);
}

private void SetFailures()
Expand Down Expand Up @@ -107,8 +145,8 @@ private void SetFailures()
if (!currentNode.IsFinished)
{
currentNode.IsFinished = failure.IsFinished;
currentNode.Results.AddRange(failure.Results);
}

}
}

Expand All @@ -117,6 +155,7 @@ private class AhoCorasickTreeNode
public readonly AhoCorasickTreeNode Parent;
public AhoCorasickTreeNode Failure;
public bool IsFinished;
public List<string> Results;
public readonly char Key;

private int[] _buckets;
Expand All @@ -135,6 +174,7 @@ private AhoCorasickTreeNode(AhoCorasickTreeNode parent, char key)

_buckets = new int[0];
_entries = new Entry[0];
Results = new List<string>();
}

public AhoCorasickTreeNode[] Nodes
Expand Down
15 changes: 15 additions & 0 deletions tests/AhoCorasick.Net.Tests/AhoCorasickTreeTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;

using Xunit;
using Xunit.Extensions;
Expand Down Expand Up @@ -32,6 +34,19 @@ public void InvalidKeywords(string[] keywords, Type exceptionType)
Assert.Throws(exceptionType, () => new AhoCorasickTree(keywords));
}

[Fact]
public void FindKeywordAndPosition()
{
var keywords = new AhoCorasickTree(new[] { "Mozilla", "6.3", "KHTML", "someKeyword" });
var userAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.99 Safari/537.36";
var keywordsPositions = keywords.Search(userAgent).ToList();

Assert.Equal(3, keywordsPositions.Count);
Assert.Contains(new KeyValuePair<string, int>("Mozilla", 0), keywordsPositions);
Assert.Contains(new KeyValuePair<string, int>("6.3", 24), keywordsPositions);
Assert.Contains(new KeyValuePair<string, int>("KHTML", 56), keywordsPositions);
}

[Fact]
public void Performance()
{
Expand Down