Skip to content

Commit

Permalink
Merge pull request #6 from alexandrnikitin/Issue5_AddSearch
Browse files Browse the repository at this point in the history
Add Search feature
  • Loading branch information
alexandrnikitin committed Mar 29, 2015
2 parents 7ead802 + 5371831 commit de453de
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 3 deletions.
2 changes: 0 additions & 2 deletions AhoCorasick.Net.sln
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".nuget", ".nuget", "{9EBF4F
EndProjectSection
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{311D94BD-6DCE-46D8-862C-71AB38D19C14}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{389268B9-F389-4506-B79D-B2F4889F41D5}"
ProjectSection(SolutionItems) = preProject
build\AhoCorasick.Net.nuspec = build\AhoCorasick.Net.nuspec
build\AhoCorasick.Net.Source.nuspec = build\AhoCorasick.Net.Source.nuspec
Expand Down
42 changes: 41 additions & 1 deletion src/AhoCorasick.Net/AhoCorasickTree.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,43 @@ public bool Contains(string text)
return false;
}

// todo copy paste from Contains method: Refactor!
// todo check performance
public IEnumerable<KeyValuePair<string, int>> Search(string text)
{
var currentNode = _rootNode;

var length = text.Length;
for (var i = 0; i < length; i++)
{
while (true)
{
var node = currentNode.GetNode(text[i]);
if (node == null)
{
currentNode = currentNode.Failure;
if (currentNode == _rootNode)
{
break;
}
}
else
{
if (node.IsFinished)
{
foreach (var result in node.Results)
{
yield return new KeyValuePair<string, int>(result, i - result.Length + 1);
}
}

currentNode = node;
break;
}
}
}
}

private void AddPatternToTree(string pattern)
{
var latestNode = _rootNode;
Expand All @@ -69,6 +106,7 @@ private void AddPatternToTree(string pattern)
}

latestNode.IsFinished = true;
latestNode.Results.Add(pattern);
}

private void SetFailures()
Expand Down Expand Up @@ -107,8 +145,8 @@ private void SetFailures()
if (!currentNode.IsFinished)
{
currentNode.IsFinished = failure.IsFinished;
currentNode.Results.AddRange(failure.Results);
}

}
}

Expand All @@ -117,6 +155,7 @@ private class AhoCorasickTreeNode
public readonly AhoCorasickTreeNode Parent;
public AhoCorasickTreeNode Failure;
public bool IsFinished;
public List<string> Results;
public readonly char Key;

private int[] _buckets;
Expand All @@ -135,6 +174,7 @@ private AhoCorasickTreeNode(AhoCorasickTreeNode parent, char key)

_buckets = new int[0];
_entries = new Entry[0];
Results = new List<string>();
}

public AhoCorasickTreeNode[] Nodes
Expand Down
15 changes: 15 additions & 0 deletions tests/AhoCorasick.Net.Tests/AhoCorasickTreeTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;

using Xunit;
using Xunit.Extensions;
Expand Down Expand Up @@ -32,6 +34,19 @@ public void InvalidKeywords(string[] keywords, Type exceptionType)
Assert.Throws(exceptionType, () => new AhoCorasickTree(keywords));
}

[Fact]
public void FindKeywordAndPosition()
{
var keywords = new AhoCorasickTree(new[] { "Mozilla", "6.3", "KHTML", "someKeyword" });
var userAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.99 Safari/537.36";
var keywordsPositions = keywords.Search(userAgent).ToList();

Assert.Equal(3, keywordsPositions.Count);
Assert.Contains(new KeyValuePair<string, int>("Mozilla", 0), keywordsPositions);
Assert.Contains(new KeyValuePair<string, int>("6.3", 24), keywordsPositions);
Assert.Contains(new KeyValuePair<string, int>("KHTML", 56), keywordsPositions);
}

[Fact]
public void Performance()
{
Expand Down

0 comments on commit de453de

Please sign in to comment.