Skip to content

Commit

Permalink
add positionLength
Browse files Browse the repository at this point in the history
  • Loading branch information
phylieac committed Mar 13, 2018
1 parent b8a6c0d commit 4167a52
Show file tree
Hide file tree
Showing 14 changed files with 15 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ public boolean incrementToken() throws IOException {
end = start + length;
termAtt.copyBuffer(buffer[current].toCharArray(), 0, length);
offsetAtt.setOffset(correctOffset(start), correctOffset(end));
posArr.setPositionLength(current+1);
typeAtt.setType("word");
start = end;
current += 1;
Expand Down
22 changes: 12 additions & 10 deletions src/test/java/org/nlpir/lucene/cn/ictclas/NLPIRTokenizerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,30 @@
public class NLPIRTokenizerTest {

public static void main(String[] args) throws Exception {
//NLPIR
// NLPIR
NLPIRTokenizerAnalyzer nta = new NLPIRTokenizerAnalyzer("", 1, "", "", false);
//Index
IndexWriterConfig inconf=new IndexWriterConfig(nta);
// Index
IndexWriterConfig inconf = new IndexWriterConfig(nta);
inconf.setOpenMode(OpenMode.CREATE_OR_APPEND);
IndexWriter index=new IndexWriter(FSDirectory.open(Paths.get("index/")),inconf);
IndexWriter index = new IndexWriter(FSDirectory.open(Paths.get("index/")), inconf);
Document doc = new Document();
doc.add(new TextField("contents", "特朗普表示,很高兴汉堡会晤后再次同习近平主席通话。我同习主席就重大问题保持沟通和协调、两国加强各层级和各领域交往十分重要。当前,美中关系发展态势良好,我相信可以发展得更好。我期待着对中国进行国事访问。",Field.Store.YES));
doc.add(new TextField("contents",
"特朗普表示,很高兴汉堡会晤后再次同习近平主席通话。我同习主席就重大问题保持沟通和协调、两国加强各层级和各领域交往十分重要。当前,美中关系发展态势良好,我相信可以发展得更好。我期待着对中国进行国事访问。",
Field.Store.YES));
index.addDocument(doc);
index.flush();
index.close();
//Search
// Search
String field = "contents";
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get("index/")));
IndexSearcher searcher = new IndexSearcher(reader);
QueryParser parser = new QueryParser(field, nta);
Query query = parser.parse("特朗普习近平");
TopDocs top=searcher.search(query, 100);
System.out.println("总条数:"+top.totalHits);
TopDocs top = searcher.search(query, 100);
System.out.println("总条数:" + top.totalHits);
ScoreDoc[] hits = top.scoreDocs;
for(int i=0;i<hits.length;i++) {
System.out.println("doc="+hits[i].doc+" score="+hits[i].score);
for (int i = 0; i < hits.length; i++) {
System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
Document d = searcher.doc(hits[i].doc);
System.out.println(d.get("contents"));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#Generated by Maven Integration for Eclipse
#Mon Mar 12 11:49:08 CST 2018
#Tue Mar 13 13:25:11 CST 2018
version=6.6.0
groupId=lucene-analyzers-nlpir-ictclas
m2e.projectName=nlpir-analysis-cn-ictclas
Expand Down
Binary file modified target/classes/org/nlpir/lucene/cn/ictclas/NLPIRTokenizer.class
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified target/classes/org/nlpir/segment/CNLPIRLibrary.class
Binary file not shown.
Binary file modified target/classes/org/nlpir/segment/exception/NLPIRException.class
Binary file not shown.
Binary file modified target/lucene-analyzers-nlpir-ictclas-6.6.0.jar
Binary file not shown.
2 changes: 1 addition & 1 deletion target/maven-archiver/pom.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#Generated by Maven
#Mon Mar 12 11:35:22 CST 2018
#Tue Mar 13 12:09:58 CST 2018
version=6.6.0
groupId=lucene-analyzers-nlpir-ictclas
artifactId=lucene-analyzers-nlpir-ictclas
Binary file not shown.

0 comments on commit 4167a52

Please sign in to comment.