Skip to content

Commit

Permalink
Update Lines.py
Browse files Browse the repository at this point in the history
Solve the problem of spaces in Chinese character concatenation.解决中文字符拼接有空格的问题
  • Loading branch information
l00242672 authored and dothinking committed Jan 13, 2024
1 parent fb6c96a commit 644f784
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions pdf2docx/text/Lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def adjust_last_word(self, delete_end_line_hyphen:bool):
'''
punc_ex_hyphen = ''.join(c for c in string.punctuation if c!='-')
def is_end_of_english_word(c):
return c.isalnum() or (c and c in punc_ex_hyphen)
return c.encode().isalnum() or (c and c in punc_ex_hyphen)

for i, line in enumerate(self._instances[:-1]):
# last char in this line
Expand Down Expand Up @@ -265,4 +265,4 @@ def tab_position(pos): # tab stop index of given position

# update stop reference position
if line==self._instances[-1]: break
ref = line.bbox[idx1] if line.in_same_row(self._instances[i+1]) else block.bbox[idx0]
ref = line.bbox[idx1] if line.in_same_row(self._instances[i+1]) else block.bbox[idx0]

0 comments on commit 644f784

Please sign in to comment.