-
Notifications
You must be signed in to change notification settings - Fork 209
/
train_ngram_tfidf.py
38 lines (29 loc) · 1.33 KB
/
train_ngram_tfidf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# -*- coding:utf-8 -*-
'''
-------------------------------------------------
Description : tf-idf实现
Author : machinelp
Date : 2020-06-04
-------------------------------------------------
'''
import sys
from textmatch.models.text_embedding.ngram_tf_idf_sklearn import NgramTfIdf
from textmatch.config.constant import Constant as const
if __name__ == '__main__':
# 训练集
words_list = ["我去玉龙雪山并且喜欢玉龙雪山玉龙雪山","我在玉龙雪山并且喜欢玉龙雪山","我在九寨沟"]
# doc
words_list1 = ["我去玉龙雪山并且喜欢玉龙雪山玉龙雪山","我在玉龙雪山并且喜欢玉龙雪山","我在九寨沟", "哈哈哈哈"]
# 训练
tfidf = NgramTfIdf(dic_path=const.NGRAM_TFIDF_DIC_PATH, tfidf_model_path=const.NGRAM_TFIDF_MODEL_PATH, tfidf_index_path=const.NGRAM_TFIDF_INDEX_PATH, )
tfidf.fit(words_list)
# query
tfidf = NgramTfIdf(dic_path=const.NGRAM_TFIDF_DIC_PATH, tfidf_model_path=const.NGRAM_TFIDF_MODEL_PATH, tfidf_index_path=const.NGRAM_TFIDF_INDEX_PATH, )
tfidf.init(words_list1, update=False)
testword = "我在九寨沟,很喜欢"
#for word in jieba.cut(testword):
# print ('>>>>', word)
pre = tfidf.predict(testword)
print ('pre>>>>>', pre)
pre = tfidf._predict(testword)[0]
print ('pre>>>>>', pre)