去标点符号1new = re.sub(r'[^\u4e00-\u9fa5]', '', old) 分词1234import jieba.posseg as jpposseg = jp.cut(text)for i in posseg: dic = {'word': i.word, 'flag': i.flag} # flag 是词性 词性