Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes #308, #317 #318

Merged
merged 3 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Bug Report
description: |
Create a bug report for pdf2docx
Create a bug report for pdf2docx.
Please use English only in posts and issues to aid maintainers and benefit all users. We will immediately reject and close non-English posts.

# We omit `title: "..."` so that the field defaults to blank. If we set it to
Expand Down
5 changes: 3 additions & 2 deletions pdf2docx/table/Cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def make_docx(self, table, indexes):
n_row, n_col = self.merged_cells
i, j = indexes
docx_cell = table.cell(i, j)
if n_row*n_col!=1:
if n_row*n_col != 1 and ((i+n_row-1) * table._column_count + j+n_col-1) < len(table._cells): # check whether index is over length of cells
_cell = table.cell(i+n_row-1, j+n_col-1)
try:
docx_cell.merge(_cell)
Expand Down Expand Up @@ -133,7 +133,8 @@ def _set_style(self, table, indexes):
# merged cells are assumed to have same borders with the main cell
for m in range(i, i+n_row):
for n in range(j, j+n_col):
docx.set_cell_border(table.cell(m, n), **kwargs)
if len(table._cells) > m * table._column_count + n: # check whether index is over length of cells
docx.set_cell_border(table.cell(m, n), **kwargs)

# ---------------------
# cell bg-color
Expand Down
5 changes: 4 additions & 1 deletion pdf2docx/table/Row.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,10 @@ def make_docx(self, table, idx_row:int):
# to control the layout precisely, set `exact` value, rather than `at least` value
# the associated steps in MS word: Table Properties -> Row -> Row height -> exactly
docx_row.height_rule = WD_ROW_HEIGHT.EXACTLY


if self.height < 0: # to prevent negative height validation
self.height = 0

# NOTE: row height is counted from center-line of top border to center line of bottom border
docx_row.height = Pt(self.height)

Expand Down
3 changes: 3 additions & 0 deletions pdf2docx/text/Lines.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ def split_vertically_by_text(self, line_break_free_space_ratio:float, new_paragr
'''
rows = self.group_by_physical_rows()

for row in rows:
row.sort_in_line_order() # sort lines in row

# skip if only one row
num = len(rows)
if num==1: return rows
Expand Down
Loading