Skip to content

Commit

Permalink
feat: optionally split alignment visualization (#110)
Browse files Browse the repository at this point in the history
* feat: optionally split alignment visualization
  • Loading branch information
nikvaessen authored Feb 2, 2025
1 parent 4113e3f commit 2a036a1
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 4 deletions.
37 changes: 33 additions & 4 deletions src/jiwer/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
pairs.
"""

from typing import List, Union
from typing import List, Union, Optional

from jiwer.process import CharacterOutput, WordOutput, AlignmentChunk

Expand All @@ -32,6 +32,7 @@ def visualize_alignment(
output: Union[WordOutput, CharacterOutput],
show_measures: bool = True,
skip_correct: bool = True,
line_width: Optional[int] = None,
) -> str:
"""
Visualize the output of [jiwer.process_words][process.process_words] and
Expand All @@ -45,6 +46,7 @@ def visualize_alignment(
show_measures: If enabled, the visualization will include measures like the WER
or CER
skip_correct: If enabled, the visualization will exclude correct reference and hypothesis pairs
line_width: If set, try, at best effort, to spit sentences into multiple lines if they exceed the width.
Returns:
(str): The visualization as a string
Expand Down Expand Up @@ -95,6 +97,18 @@ def visualize_alignment(
HYP: quite * bit of an even longest sentence here
D I I S I
```
When setting `line_width=80`, the following output will be split into multiple lines:
```txt
sentence 1
REF: This is a very long sentence that is *** much longer than the previous one
HYP: This is a very loong sentence that is not much longer than the previous one
S I
REF: or the one before that
HYP: or *** one before that
D
```
"""
references = output.references
hypothesis = output.hypotheses
Expand All @@ -110,7 +124,7 @@ def visualize_alignment(

final_str += f"sentence {idx+1}\n"
final_str += _construct_comparison_string(
gt, hp, chunks, include_space_seperator=not is_cer
gt, hp, chunks, include_space_seperator=not is_cer, line_width=line_width
)
final_str += "\n"

Expand Down Expand Up @@ -140,10 +154,12 @@ def _construct_comparison_string(
hypothesis: List[str],
ops: List[AlignmentChunk],
include_space_seperator: bool = False,
line_width: Optional[int] = None,
) -> str:
ref_str = "REF: "
hyp_str = "HYP: "
op_str = " "
agg_str = "" # aggregate string for max_chars split

for op in ops:
if op.type == "equal" or op.type == "substitute":
Expand All @@ -165,6 +181,19 @@ def _construct_comparison_string(
for rf, hp, c in zip(ref, hyp, op_chars):
str_len = max(len(rf), len(hp), len(c))

if line_width is not None:
if len(ref_str) + str_len > line_width:
# aggregate the strings
if include_space_seperator:
agg_str += f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n\n"
else:
agg_str += f"{ref_str}\n{hyp_str}\n{op_str}\n\n"

# reset the strings
ref_str = "REF: "
hyp_str = "HYP: "
op_str = " "

if rf == "*":
rf = "".join(["*"] * str_len)
elif hp == "*":
Expand All @@ -181,6 +210,6 @@ def _construct_comparison_string(

if include_space_seperator:
# remove last space
return f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n"
return agg_str + f"{ref_str[:-1]}\n{hyp_str[:-1]}\n{op_str[:-1]}\n"
else:
return f"{ref_str}\n{hyp_str}\n{op_str}\n"
return agg_str + f"{ref_str}\n{hyp_str}\n{op_str}\n"
39 changes: 39 additions & 0 deletions tests/test_alignment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest
import jiwer
from jiwer import visualize_alignment


class TestAlignmentVisualizationWords(unittest.TestCase):
Expand Down Expand Up @@ -143,6 +144,25 @@ def test_empty_ref_with_hyp_deletion(self):
)
self.assertEqual(alignment, correct_alignment)

def test_line_width(self):
correct = """sentence 1
REF: this sentence could be
HYP: this sentence will be
S
REF: split ** **
HYP: split by ai
I I
"""
alignment = visualize_alignment(
jiwer.process_words(
"this sentence could be split", "this sentence will be split by ai"
),
line_width=30,
show_measures=False,
)
self.assertEqual(correct, alignment)


class TestAlignmentVisualizationCharacters(unittest.TestCase):
def test_insertion(self):
Expand Down Expand Up @@ -263,3 +283,22 @@ def test_empty_ref_with_hyp_deletion(self):
show_measures=False,
)
self.assertEqual(alignment, correct_alignment)

def test_line_width(self):
correct = """sentence 1
REF: this sentence could be sp
HYP: this sentence will* be sp
SSS D
REF: lit******
HYP: lit by ai
IIIIII
"""
alignment = visualize_alignment(
jiwer.process_characters(
"this sentence could be split", "this sentence will be split by ai"
),
line_width=30,
show_measures=False,
)
self.assertEqual(correct, alignment)

0 comments on commit 2a036a1

Please sign in to comment.