From 644f784686392a3bde8faa65821e80f152d20198 Mon Sep 17 00:00:00 2001 From: l00242672 <45627193+l00242672@users.noreply.github.com> Date: Mon, 6 Nov 2023 15:02:01 +0800 Subject: [PATCH] Update Lines.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Solve the problem of spaces in Chinese character concatenation.解决中文字符拼接有空格的问题 --- pdf2docx/text/Lines.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pdf2docx/text/Lines.py b/pdf2docx/text/Lines.py index 5418c12..c9223e6 100644 --- a/pdf2docx/text/Lines.py +++ b/pdf2docx/text/Lines.py @@ -110,7 +110,7 @@ def adjust_last_word(self, delete_end_line_hyphen:bool): ''' punc_ex_hyphen = ''.join(c for c in string.punctuation if c!='-') def is_end_of_english_word(c): - return c.isalnum() or (c and c in punc_ex_hyphen) + return c.encode().isalnum() or (c and c in punc_ex_hyphen) for i, line in enumerate(self._instances[:-1]): # last char in this line @@ -265,4 +265,4 @@ def tab_position(pos): # tab stop index of given position # update stop reference position if line==self._instances[-1]: break - ref = line.bbox[idx1] if line.in_same_row(self._instances[i+1]) else block.bbox[idx0] \ No newline at end of file + ref = line.bbox[idx1] if line.in_same_row(self._instances[i+1]) else block.bbox[idx0]