0xabu · 0xabu · Dec 30, 2024 · Dec 30, 2024
diff --git a/pdfannots/__init__.py b/pdfannots/__init__.py
@@ -290,9 +290,14 @@ def capture_char(self, text: str) -> None:
                         assert last_charseq != 0
                         i = bisect.bisect_left(self.context_subscribers, (last_charseq,))
                         assert 0 <= i < len(self.context_subscribers)
-                        (found_charseq, found_annot) = self.context_subscribers.pop(i)
-                        assert found_charseq == last_charseq
-                        assert found_annot is a
+                        while True:
+                            (found_charseq, found_annot) = self.context_subscribers[i]
+                            assert found_charseq == last_charseq
+                            if found_annot is a:
+                                self.context_subscribers.pop(i)
+                                break
+                            i += 1
+                            assert i < len(self.context_subscribers)
 
                     else:
                         # This is the first hit for the annotation, so set the pre-context.

diff --git a/pdfannots/printer/markdown.py b/pdfannots/printer/markdown.py
@@ -198,8 +198,8 @@ def format_bullet(
 
         return ret
 
-    def merge_strikeout_context(self, annot: Annotation, text: str) -> str:
-        """Merge the context for a strikeout annotation into the text."""
+    def merge_context(self, annot: Annotation, text: str) -> str:
+        """Merge the context for a strikeout or caret annotation into the text."""
         (pre, post) = annot.get_context(self.remove_hyphens)
 
         if pre:
@@ -208,7 +208,12 @@ def merge_strikeout_context(self, annot: Annotation, text: str) -> str:
         if post:
             post = trim_context(post, keep_right=False)
 
-        return pre + '~~' + text + '~~' + post
+        if annot.subtype == AnnotationType.StrikeOut:
+            return pre + '~~' + text + '~~' + post
+        else:
+            assert annot.subtype == AnnotationType.Caret
+            assert text.isspace()
+            return pre.rstrip(' ') + ' ^ ' + post.lstrip(' ')
 
     def format_annot(
         self,
@@ -229,8 +234,7 @@ def format_annot(
         comment = [l for l in contents.splitlines() if l] if contents else []
 
         if annot.has_context():
-            assert annot.subtype == AnnotationType.StrikeOut
-            text = self.merge_strikeout_context(annot, text)
+            text = self.merge_context(annot, text)
 
         # we are either printing: item text and item contents, or one of the two
         # if we see an annotation with neither, something has gone wrong

diff --git a/pdfannots/types.py b/pdfannots/types.py
@@ -346,6 +346,11 @@ def __init__(
                 box = Box(min(xvals), min(yvals), max(xvals), max(yvals))
                 boxes.append(box)
 
+        # Kludge for Caret annotations that lack quadpoints, but need to capture context
+        if quadpoints is None and subtype == AnnotationType.Caret:
+            assert rect is not None
+            boxes.append(Box.from_coords(rect))
+
         # Compute a meaningful position of this annotation on the page
         assert rect or boxes
         (x0, y0, x1, y1) = rect if rect else boxes[0].get_coords()
@@ -399,7 +404,7 @@ def gettext(self, remove_hyphens: bool = False) -> typ.Optional[str]:
 
     def wants_context(self) -> bool:
         """Returns true if this annotation type should include context."""
-        return self.subtype == AnnotationType.StrikeOut
+        return self.subtype in {AnnotationType.Caret, AnnotationType.StrikeOut}
 
     def set_pre_context(self, pre_context: str) -> None:
         assert self.pre_context is None

diff --git a/tests.py b/tests.py
@@ -160,6 +160,17 @@ def test(self) -> None:
         self.assertEqual(self.annots[2].gettext(), 'This was a novel idea at the time')
 
 
+class Issue61(ExtractionTestBase):
+    filename = 'issue61.pdf'
+
+    def test(self) -> None:
+        self.assertEqual(len(self.annots), 1)
+        a = self.annots[0]
+        self.assertEqual(a.subtype, AnnotationType.Caret)
+        self.assertEqual(a.contents, 'and machine learning')
+        self.assertTrue(a.has_context())
+
+
 class Pr24(ExtractionTestBase):
     filename = 'pr24.pdf'
 

diff --git a/tests/issue61.pdf b/tests/issue61.pdf