add more docs on new features (#113)

jitsi · Feb 3, 2025 · bac3b8a · bac3b8a
1 parent be6b690
commit bac3b8a
Show file tree

Hide file tree

Showing 2 changed files with 104 additions and 6 deletions.
diff --git a/docs/usage.md b/docs/usage.md
@@ -1,5 +1,7 @@
 # Usage
 
+## Word error rate  
+
 The most simple use-case is computing the word error rate between two strings:
 
 ```python
@@ -41,6 +43,8 @@ hypothesis = ["hello duck", "i like python"]
 error = wer(reference, hypothesis)
 ```
 
+## Character error rate
+
 We also provide the character error rate:
 
 ```python
@@ -56,7 +60,7 @@ output = jiwer.process_characters(reference, hypothesis)
 error = output.cer
 ```
 
-# Alignment
+## Alignment
 
 With `jiwer.process_words` and `jiwer.process_characters`, you get the alignment between the reference and hypothesis.
 
@@ -88,16 +92,19 @@ print(jiwer.visualize_alignment(out))
 ```
 Gives the following output
 ```text
-sentence 1
+=== SENTENCE 1 === 
+
 REF: **** short one here
 HYP: shoe order one ****
         I     S        D
 
-sentence 2
+=== SENTENCE 2 ===
+
 REF: quite a bit of ** ****  longer sentence ****
 HYP: quite * bit of an even longest sentence here
            D         I    I       S             I
 
+=== SUMMARY ===
 number of sentences: 2
 substitutions=2 deletions=2 insertions=4 hits=5
 
@@ -108,3 +115,60 @@ wer=88.89%
 ```
 
 Note that it also possible to visualize the character-level alignment, simply use the output of `jiwer.process_characters()` instead. 
+
+## Error frequencies
+
+You can list all the substitutions, insertions, and deletion, along with their frequencies:
+
+```python3
+import jiwer
+
+out = jiwer.process_words(
+    ["short one here", "quite a bit of longer sentence"],
+    ["shoe order one", "quite bit of an even longest sentence here"],
+)
+
+print(jiwer.visualize_error_counts(out))
+```
+
+Will return
+```text
+=== SUBSTITUTIONS ===
+short   --> order   = 1x
+longer  --> longest = 1x
+
+=== INSERTIONS ===
+shoe    = 1x
+an even = 1x
+here    = 1x
+
+=== DELETIONS ===
+here = 1x
+a    = 1x
+```
+
+## Transformations
+
+You can apply transformations to reference or hypothesis strings before the calculation of various metrics
+with the transform API. For all available, transformations, see [here](/jiwer/reference/transforms/).
+For the default transformations, see [here](/jiwer/reference/transformations/).
+
+An example of the transformation API:
+
+```python3
+import jiwer
+
+tr = jiwer.Compose([
+    jiwer.RemoveMultipleSpaces(),
+    jiwer.Strip(),
+    jiwer.SubstituteWords({"I'm": 'i am'}),
+    jiwer.ReduceToListOfListOfWords()
+])
+
+out = jiwer.process_words(
+    "I'm good", 
+    "i am bad", 
+    reference_transform=tr, 
+    hypothesis_transform=tr
+)
+```
diff --git a/src/jiwer/alignment.py b/src/jiwer/alignment.py
@@ -53,6 +53,7 @@ def visualize_alignment(
 
     Example:
         This code snippet
+
         ```python
         import jiwer
 
@@ -63,15 +64,17 @@ def visualize_alignment(
 
         print(jiwer.visualize_alignment(out))
         ```
+
         will produce this visualization:
+
         ```txt
         === SENTENCE 1 ===
 
         REF:    # short one here
         HYP: shoe order one    *
                 I     S        D
 
-        === sentence 2 ===
+        === SENTENCE 2 ===
 
         REF: quite a bit of  #    #  longer sentence    #
         HYP: quite * bit of an even longest sentence here
@@ -97,6 +100,7 @@ def visualize_alignment(
                 I     S        D
 
         === SENTENCE 2 ===
+
         REF: quite a bit of  #    #  longer sentence    #
         HYP: quite * bit of an even longest sentence here
                    D         I    I       S             I
@@ -106,6 +110,7 @@ def visualize_alignment(
 
         ```txt
         === SENTENCE 1 ===
+
         REF: This is a very  long sentence that is *** much longer than the previous one
         HYP: This is a very loong sentence that is not much longer than the previous one
                                 S                    I
@@ -268,14 +273,43 @@ def visualize_error_counts(
     Visualize which words (or characters), and how often, were substituted, inserted, or deleted.
 
     Args:
-        output:
+        output: The processed output of reference and hypothesis pair(s).
         show_substitutions: If true, visualize substitution errors.
         show_insertions: If true, visualize insertion errors.
         show_deletions: If true, visualize deletion errors.
         top_k: If set, only visualize the k most frequent errors.
 
-    Returns: A string which visualizes the words/characters and their frequencies.
+    Returns:
+         (str): A string which visualizes the words/characters and their frequencies.
+
+    Example:
+        The code snippet
+        ```python3
+        import jiwer
+
+        out = jiwer.process_words(
+            ["short one here", "quite a bit of longer sentence"],
+            ["shoe order one", "quite bit of an even longest sentence here"],
+        )
+        print(jiwer.visualize_error_counts(out))
+        ```
+
+        will print the following:
 
+        ```txt
+        === SUBSTITUTIONS ===
+        short   --> order   = 1x
+        longer  --> longest = 1x
+
+        === INSERTIONS ===
+        shoe    = 1x
+        an even = 1x
+        here    = 1x
+
+        === DELETIONS ===
+        here = 1x
+        a    = 1x
+        ```
     """
     s, i, d = collect_error_counts(output)