from sklearn import feature_extraction # 导入sklearn库, 以获取文本的tf-idf值
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
def getVector(get_texts):
mat = CountVectorizer()
tf = TfidfTransformer()
tfidf = tf.fit_transform(mat.fit_transform(get_texts))
word = mat.get_feature_names() # 单词的名称
weight=tfidf.toarray() # 权重矩阵, 在此示范中矩阵为(1, n)
return weight