Skip to content

Commit

Permalink
Confluence fixes (#4220)
Browse files Browse the repository at this point in the history
* Confluence fixes

* Small tweak

* Address greptile comments
  • Loading branch information
Weves authored Mar 6, 2025
1 parent facc8cc commit a7a374d
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 4 deletions.
2 changes: 1 addition & 1 deletion backend/onyx/connectors/confluence/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def _convert_page_to_document(self, page: dict[str, Any]) -> Document | None:
# Extract basic page information
page_id = page["id"]
page_title = page["title"]
page_url = f"{self.wiki_base}/wiki{page['_links']['webui']}"
page_url = f"{self.wiki_base}{page['_links']['webui']}"

# Get the page content
page_content = extract_text_from_confluence_html(
Expand Down
23 changes: 20 additions & 3 deletions backend/onyx/indexing/indexing_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,12 +464,29 @@ def index_doc_batch(
),
)

successful_doc_ids = {record.document_id for record in insertion_records}
if successful_doc_ids != set(updatable_ids):
all_returned_doc_ids = (
{record.document_id for record in insertion_records}
.union(
{
record.failed_document.document_id
for record in vector_db_write_failures
if record.failed_document
}
)
.union(
{
record.failed_document.document_id
for record in embedding_failures
if record.failed_document
}
)
)
if all_returned_doc_ids != set(updatable_ids):
raise RuntimeError(
f"Some documents were not successfully indexed. "
f"Updatable IDs: {updatable_ids}, "
f"Successful IDs: {successful_doc_ids}"
f"Returned IDs: {all_returned_doc_ids}. "
"This should never happen."
)

last_modified_ids = []
Expand Down

0 comments on commit a7a374d

Please sign in to comment.