forked from huaban/jieba-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix:提供新的方法处理自定义用户词典分词带空格的情况(huaban#137)
- Loading branch information
1 parent
e46e44b
commit 6d7c24a
Showing
5 changed files
with
212 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
弹性公网IP,3 | ||
IPSEC VPN,3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
55 changes: 55 additions & 0 deletions
55
src/test/java/com/huaban/analysis/jieba/JiebaSegmenterExtendTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/** | ||
* | ||
*/ | ||
package com.huaban.analysis.jieba; | ||
|
||
import com.huaban.analysis.jieba.JiebaSegmenter.SegMode; | ||
import junit.framework.TestCase; | ||
import org.junit.Test; | ||
|
||
import java.nio.file.Paths; | ||
import java.util.List; | ||
import java.util.Locale; | ||
|
||
|
||
/** | ||
* @author matrix | ||
* | ||
*/ | ||
public class JiebaSegmenterExtendTest extends TestCase { | ||
private JiebaSegmenter segmenter = new JiebaSegmenter(); | ||
String[] sentences = | ||
new String[] { | ||
"订购弹性公网IP", | ||
"订购IPSEC VPN" | ||
}; | ||
|
||
@Override | ||
protected void setUp() throws Exception { | ||
WordDictionary.getInstance().init(Paths.get("conf"), ","); | ||
} | ||
|
||
|
||
@Override | ||
protected void tearDown() throws Exception { | ||
super.tearDown(); | ||
} | ||
|
||
|
||
@Test | ||
public void testCutForSearch() { | ||
for (String sentence : sentences) { | ||
List<SegToken> tokens = segmenter.processExtend(sentence, SegMode.SEARCH); | ||
System.out.print(String.format(Locale.getDefault(), "\n%s\n%s", sentence, tokens.toString())); | ||
} | ||
} | ||
|
||
|
||
@Test | ||
public void testCutForIndex() { | ||
for (String sentence : sentences) { | ||
List<SegToken> tokens = segmenter.processExtend(sentence, SegMode.INDEX); | ||
System.out.print(String.format(Locale.getDefault(), "\n%s\n%s", sentence, tokens.toString())); | ||
} | ||
} | ||
} |