Skip to content

Commit

Permalink
up a demo
Browse files Browse the repository at this point in the history
  • Loading branch information
ansjsun committed Jan 20, 2016
1 parent 710e6a2 commit e66d2ea
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 5 deletions.
58 changes: 58 additions & 0 deletions src/HotFinder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Collection;
import java.util.List;
import java.util.Map.Entry;

import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.NlpAnalysis;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.nlpcn.commons.lang.util.CollectionUtil;
import org.nlpcn.commons.lang.util.IOUtil;
import org.nlpcn.commons.lang.util.MapCount;
import org.nlpcn.commons.lang.util.WordWeight;

public class HotFinder {
public static void main(String[] args) throws IOException { //用来寻找热点
WordWeight ww = new WordWeight(500000, 200000) ;


String temp = null ;

try(BufferedReader reader = IOUtil.getReader("test_data/fl.txt", IOUtil.UTF8)){
while((temp=reader.readLine())!=null){
List<Term> parse = NlpAnalysis.parse(temp) ;
for (Term term : parse) {
ww.add(term.getName(), "all");
}
}
}


try(BufferedReader reader = IOUtil.getReader("test_data/corpus.txt", IOUtil.UTF8)){
while((temp=reader.readLine())!=null){
List<Term> parse = NlpAnalysis.parse(temp) ;
for (Term term : parse) {
ww.add(term.getName(), "sport");
}
}
}



MapCount<String> mapCount = ww.exportChiSquare().get("sport") ;


List<Entry<String, Double>> sortMapByValue = CollectionUtil.sortMapByValue(mapCount.get(), 2) ;

int i = 0 ;
for (Entry<String, Double> entry : sortMapByValue) {
System.out.println(entry);
if(i++>20){
break ;
}
}
}
}
7 changes: 2 additions & 5 deletions src/Test3.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,12 @@
public class Test3 {

public static void main(String[] args) throws IOException {
LDA lda = new LDA(AnsjAnalysis.DEFAUlT,new LDAGibbsModel(20, 50/(double)20, 0.1, 100, Integer.MAX_VALUE, Integer.MAX_VALUE));
BufferedReader newReader = Files.newReader(new File("test_data/corpus.txt"), Charsets.UTF_8);
LDA lda = new LDA(AnsjAnalysis.DEFAUlT,new LDAGibbsModel(12, 50/(double)12, 0.1, 100, Integer.MAX_VALUE, Integer.MAX_VALUE));
BufferedReader newReader = Files.newReader(new File("test_data/fl.txt"), Charsets.UTF_8);
String temp =null ;
int i = 0 ;
while((temp=newReader.readLine())!=null){
lda.addDoc(String.valueOf(++i),temp) ;
if(i>1000){
break ;
}
}

lda.trainAndSave("result/news/", "utf-8") ;
Expand Down

0 comments on commit e66d2ea

Please sign in to comment.