From 45dbc58cd4f85fa0355ebc27f868f3e40a79f34c Mon Sep 17 00:00:00 2001 From: Yucheng Huo Date: Fri, 26 Apr 2024 19:12:43 -0400 Subject: [PATCH] =?UTF-8?q?Word=20Cloud=E6=AD=A3=E7=A1=AE=E8=AF=86?= =?UTF-8?q?=E5=88=AB@=E7=94=A8=E6=88=B7=E5=90=8D=20#367?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/analysis/analysis.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/analysis/analysis.py b/app/analysis/analysis.py index 2690776d..148b0654 100644 --- a/app/analysis/analysis.py +++ b/app/analysis/analysis.py @@ -1,4 +1,5 @@ import os +import re from collections import Counter import sys from datetime import datetime @@ -28,6 +29,7 @@ def wordcloud_(wxid, time_range=None): } # text = ''.join(map(lambda x: x[7], txt_messages)) text = ''.join(map(lambda x: x[7], txt_messages)) # 1“我”说的话,0“Ta”说的话 + text = re.sub(r'^@\w+\s', '', text) # 去掉@的人 total_msg_len = len(text) # 使用jieba进行分词,并加入停用词