diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 957298f..b228d6e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,6 +1,6 @@ name: Bug Report description: | - Create a bug report for pdf2docx + Create a bug report for pdf2docx. Please use English only in posts and issues to aid maintainers and benefit all users. We will immediately reject and close non-English posts. # We omit `title: "..."` so that the field defaults to blank. If we set it to diff --git a/pdf2docx/table/Cell.py b/pdf2docx/table/Cell.py index cff0124..1b7d421 100644 --- a/pdf2docx/table/Cell.py +++ b/pdf2docx/table/Cell.py @@ -76,7 +76,7 @@ def make_docx(self, table, indexes): n_row, n_col = self.merged_cells i, j = indexes docx_cell = table.cell(i, j) - if n_row*n_col!=1: + if n_row*n_col != 1 and ((i+n_row-1) * table._column_count + j+n_col-1) < len(table._cells): # check whether index is over length of cells _cell = table.cell(i+n_row-1, j+n_col-1) try: docx_cell.merge(_cell) @@ -133,7 +133,8 @@ def _set_style(self, table, indexes): # merged cells are assumed to have same borders with the main cell for m in range(i, i+n_row): for n in range(j, j+n_col): - docx.set_cell_border(table.cell(m, n), **kwargs) + if len(table._cells) > m * table._column_count + n: # check whether index is over length of cells + docx.set_cell_border(table.cell(m, n), **kwargs) # --------------------- # cell bg-color diff --git a/pdf2docx/table/Row.py b/pdf2docx/table/Row.py index e1a442c..2dde91e 100644 --- a/pdf2docx/table/Row.py +++ b/pdf2docx/table/Row.py @@ -66,7 +66,10 @@ def make_docx(self, table, idx_row:int): # to control the layout precisely, set `exact` value, rather than `at least` value # the associated steps in MS word: Table Properties -> Row -> Row height -> exactly docx_row.height_rule = WD_ROW_HEIGHT.EXACTLY - + + if self.height < 0: # to prevent negative height validation + self.height = 0 + # NOTE: row height is counted from center-line of top border to center line of bottom border docx_row.height = Pt(self.height) diff --git a/pdf2docx/text/Lines.py b/pdf2docx/text/Lines.py index 20b5ae9..b97a33a 100644 --- a/pdf2docx/text/Lines.py +++ b/pdf2docx/text/Lines.py @@ -60,6 +60,9 @@ def split_vertically_by_text(self, line_break_free_space_ratio:float, new_paragr ''' rows = self.group_by_physical_rows() + for row in rows: + row.sort_in_line_order() # sort lines in row + # skip if only one row num = len(rows) if num==1: return rows