From 0ee64bc6d84ada9a6033ae4427e5caaffec1405d Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 14 Nov 2024 16:50:38 -0500
Subject: [PATCH 01/27] CDL: initial pass at a signclip-based metric, I cannot
 embed on the fly but I can load in the .npy files

---
 pose_evaluation/metrics/signclip_metric.py | 102 +++++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 pose_evaluation/metrics/signclip_metric.py

diff --git a/pose_evaluation/metrics/signclip_metric.py b/pose_evaluation/metrics/signclip_metric.py
new file mode 100644
index 0000000..a82d23d
--- /dev/null
+++ b/pose_evaluation/metrics/signclip_metric.py
@@ -0,0 +1,102 @@
+from pose_evaluation.metrics.base_pose_metric import PoseMetric
+from pose_format import Pose
+from pathlib import Path
+from typing import Literal
+import numpy as np
+import itertools
+from tqdm import tqdm
+from scipy.spatial.distance import cosine
+import math
+import pandas as pd
+
+class SignCLIPEmbeddingDistanceMetric(PoseMetric):
+    def __init__(self,   
+                 model_id="baseline_temporal",
+                 kind: Literal["cosine", "l2"] = "cosine",
+                 higher_is_better: bool = False):
+        super().__init__(name=f"SignCLIPDistanceMetric {kind}", higher_is_better=False)
+
+        self.kind = kind
+
+    def load_precalculated_embedding(self, saved_embedding_path:Path) -> np.ndarray:
+
+        embedding = np.load(saved_embedding_path) # typically (1, 768)
+        if embedding.ndim == 2 and embedding.shape[0] == 1:
+            embedding = embedding[0] # new shape:(768, )
+        return embedding
+    
+    def embed_pose(self, pose:Pose)->np.ndarray:
+        # blocked by the fact that embedding with SignCLIP is nontrivial. 
+        # See https://github.com/sign-language-processing/pose-evaluation/issues/1
+        raise NotImplementedError
+    
+    def get_embedding(self, input: Path|np.ndarray|Pose)->np.ndarray:
+        if isinstance(input, np.ndarray):
+            # often (1, 768)
+            if input.ndim == 2 and input.shape[0] == 1:
+                input = input[0] # new shape:(768, )
+        elif isinstance(input, Path):
+            input = self.load_precalculated_embedding(input)
+        elif isinstance(input, Pose):
+            input = self.embed_pose(pose=input)
+
+        return input
+
+
+    def score(self, hypothesis: Path|np.ndarray|Pose, reference: Path|np.ndarray|Pose) -> float:
+        hypothesis = self.get_embedding(hypothesis)
+        reference = self.get_embedding(reference)            
+        
+        return cosine(hypothesis, reference)
+
+        
+    
+
+
+
+if __name__ =="__main__":
+    metric = SignCLIPEmbeddingDistanceMetric()
+
+    # embeddings_path = Path.cwd()/"ASL_Citizen_curated_sample_with_embeddings_from_all_models"/"embeddings"
+    embeddings_path = Path("/media/aqsa/Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex") 
+    embeddings_files = list(embeddings_path.glob("*.npy"))
+    # embeddings= [metric.load_precalculated_embedding(npy_file)  for npy_file in embeddings_path.glob("*.npy")]
+
+    print(f"Found {len(embeddings_files)} embeddings")
+
+    
+    # loaded = metric.load_precalculated_embedding('pose_evaluation/metrics/test_poses/241481900450897-HOUSE-using-model-sem-lex.npy')
+    
+    # print(f"That makes for {len(combinations)} combinations")
+    i = 0
+    entries =[]
+    out_file = Path.cwd()/"signclip_scores.csv"
+    pd.DataFrame(columns=["hyp","ref","score"]).to_csv(out_file, index=False)
+    for embedding, other_embedding in tqdm(itertools.combinations(embeddings_files, 2),
+                                           total=math.comb(len(embeddings_files), 2),
+                                           desc=f"Calculating scores, writing to {out_file}"):
+            score = metric.score(embedding, other_embedding)
+            entry = {
+                "hyp":embedding.stem.split("-")[0], # e.g. 0031311305138936874-FATHER-using-model-sem-lex.npy becomes 0031311305138936874
+                "ref":other_embedding.stem.split("-")[0], 
+                "score":score
+            }
+            entries.append(entry)
+            i = i+1
+            if i%1000 == 0:
+                # print(f"Collected {len(entries)} scores. Writing to {out_file} resetting")
+                df = pd.DataFrame.from_dict(entries)
+                df.to_csv(out_file, mode="a", index=False, header=False)
+                entries = []
+
+
+            # i = i+1
+            # if i % 1000 == 0:
+            #     print(i)
+            #     exit()
+            # print(f"Score between {embedding.stem} and {other_embedding.stem}: {score}")
+            
+    
+    
+
+    

From e3241fbd18ffe3a729cb8b05a3342c975a52aaf3 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 20 Nov 2024 16:41:09 -0500
Subject: [PATCH 02/27] initial attempt at an evaluation script

---
 .../evaluation/evaluate_signclip.py           | 114 ++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 pose_evaluation/evaluation/evaluate_signclip.py

diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py
new file mode 100644
index 0000000..a67b684
--- /dev/null
+++ b/pose_evaluation/evaluation/evaluate_signclip.py
@@ -0,0 +1,114 @@
+import argparse
+from pathlib import Path
+import pandas as pd
+import numpy as np
+from pose_evaluation.metrics.signclip_distance_metric import SignCLIPEmbeddingDistanceMetric
+from tqdm import tqdm
+
+def load_embedding(file_path: Path) -> np.ndarray:
+    """
+    Load a SignCLIP embedding from a .npy file, ensuring it has the correct shape.
+    
+    Args:
+        file_path (Path): Path to the .npy file.
+    
+    Returns:
+        np.ndarray: The embedding with shape (768,).
+    """
+    embedding = np.load(file_path)
+    if embedding.ndim == 2 and embedding.shape[0] == 1:
+        embedding = embedding[0]  # Reduce shape from (1, 768) to (768,)
+    return embedding
+
+def match_embeddings_to_glosses(emb_dir: Path, split_df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Match .npy embeddings to the corresponding glosses based on the numerical ID.
+    
+    Args:
+        emb_dir (Path): Directory containing the .npy files.
+        split_df (pd.DataFrame): DataFrame containing the split file with the "Video file" column.
+    
+    Returns:
+        pd.DataFrame: Updated DataFrame with an additional column for embeddings.
+    """
+    # Map video file IDs to embeddings
+    embeddings_map = {}
+    for npy_file in emb_dir.glob("*.npy"):
+        numerical_id = npy_file.stem.split("-")[0]
+        embeddings_map[numerical_id] = npy_file
+
+    # Match embeddings to glosses
+    embeddings = []
+    for _, row in split_df.iterrows():
+        video_file = row["Video file"]
+        numerical_id = video_file.split("-")[0]
+        npy_file = embeddings_map.get(numerical_id)
+
+        if npy_file is not None:
+            embeddings.append(load_embedding(npy_file))
+        else:
+            embeddings.append(None)  # Placeholder if no matching file
+
+    split_df["embedding"] = embeddings
+    return split_df
+
+def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine"):
+    """
+    Evaluate SignCLIP embeddings using score_all.
+    
+    Args:
+        emb_dir (Path): Directory containing .npy embeddings.
+        split_file (Path): Path to the split CSV file.
+        kind (str): Metric type ("cosine" or "l2"). Default is "cosine".
+    """
+    # Load split file
+    split_df = pd.read_csv(split_file)
+    
+    # Match embeddings
+    split_df = match_embeddings_to_glosses(emb_dir, split_df)
+    
+    # Filter out rows without embeddings
+    valid_df = split_df.dropna(subset=["embedding"]).reset_index(drop=True)
+    embeddings = valid_df["embedding"].tolist()
+
+    # Initialize metric
+    metric = SignCLIPEmbeddingDistanceMetric(kind=kind)
+
+    # Compute all pairwise scores
+    print(f"Computing {kind} distances for {len(embeddings)} embeddings...")
+    
+    scores = metric.score_all(embeddings, embeddings)
+
+    # Save scores to a CSV file
+    output_file = emb_dir / "signclip_scores.csv"
+    results = []
+    for i, hyp_row in valid_df.iterrows():
+        for j, ref_row in valid_df.iterrows():
+            results.append({
+                "hyp": hyp_row["Video file"],
+                "ref": ref_row["Video file"],
+                "score": scores[i, j]
+            })
+    results_df = pd.DataFrame(results)
+    results_df.to_csv(output_file, index=False)
+    print(f"Scores saved to {output_file}")
+
+def main():
+    parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.")
+    parser.add_argument(
+        "emb_dir", type=Path, help="Path to the directory containing SignCLIP .npy files"
+    )
+    parser.add_argument(
+        "--split_file", type=Path, required=True, help="Path to the split CSV file (e.g., test.csv)"
+    )
+    parser.add_argument(
+        "--kind", type=str, choices=["cosine", "l2"], default="cosine",
+        help="Type of distance metric to use (default: cosine)"
+    )
+    args = parser.parse_args()
+
+    evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split, kind=args.kind)
+
+if __name__ == "__main__":
+    main()
+    print(f"THIS SCRIPT NEEDS TESTING")

From 41f75ed1a52af3887cb3d126f6ab766dc3baadc3 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 20 Nov 2024 16:41:33 -0500
Subject: [PATCH 03/27] initial attempt at pytest for signclip metric

---
 .../metrics/test_signclip_distance_metric.py  | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 pose_evaluation/metrics/test_signclip_distance_metric.py

diff --git a/pose_evaluation/metrics/test_signclip_distance_metric.py b/pose_evaluation/metrics/test_signclip_distance_metric.py
new file mode 100644
index 0000000..54f62cf
--- /dev/null
+++ b/pose_evaluation/metrics/test_signclip_distance_metric.py
@@ -0,0 +1,65 @@
+import pytest
+import numpy as np
+from pose_format import Pose
+from pose_evaluation.metrics.signclip_distance_metric import SignCLIPEmbeddingDistanceMetric
+
+# Mock a simple Pose object for compatibility (if not already available)
+class MockPose:
+    def __init__(self, data):
+        self.data = data
+
+@pytest.fixture
+def metric():
+    """Fixture to create a SignCLIPEmbeddingDistanceMetric instance."""
+    return SignCLIPEmbeddingDistanceMetric(kind="cosine")
+
+@pytest.fixture
+def embeddings():
+    """Fixture to create dummy embeddings for testing."""
+    # Generate 5 random 768-dimensional embeddings
+    return [np.random.rand(768) for _ in range(5)]
+
+def test_score_symmetric(metric):
+    """Test that the metric is symmetric for cosine distance."""
+    emb1 = np.random.rand(768)
+    emb2 = np.random.rand(768)
+
+    score1 = metric.score(emb1, emb2)
+    score2 = metric.score(emb2, emb1)
+
+    assert pytest.approx(score1) == score2, "Score should be symmetric."
+
+def test_score_with_path(metric, tmp_path):
+    """Test that score works with embeddings loaded from paths."""
+    emb1 = np.random.rand(768)
+    emb2 = np.random.rand(768)
+
+    # Save embeddings to temporary files
+    file1 = tmp_path / "emb1.npy"
+    file2 = tmp_path / "emb2.npy"
+    np.save(file1, emb1)
+    np.save(file2, emb2)
+
+    score = metric.score(file1, file2)
+    expected_score = metric.score(emb1, emb2)
+
+    assert pytest.approx(score) == expected_score, "Score with paths should match direct computation."
+
+def test_score_all(metric, embeddings):
+    """Test the score_all function."""
+    scores = metric.score_all(embeddings, embeddings)
+    assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all."
+    assert np.allclose(scores.diagonal(), 0), "Self-comparison scores should be zero for cosine distance."
+
+def test_score_all_with_different_sizes(metric):
+    """Test score_all with different sizes for hypotheses and references."""
+    hyps = [np.random.rand(768) for _ in range(3)]
+    refs = [np.random.rand(768) for _ in range(5)]
+
+    scores = metric.score_all(hyps, refs)
+    assert scores.shape == (len(hyps), len(refs)), "Output shape mismatch for score_all with different sizes."
+
+def test_score_all_edge_case(metric):
+    """Test score_all with empty inputs."""
+    scores = metric.score_all([], [])
+    assert scores.size == 0, "Score_all should return an empty array for empty inputs."

From da881d552a0d654d7667cb5a9ea02a79f1169c7e Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 20 Nov 2024 16:42:55 -0500
Subject: [PATCH 04/27] SignClip distances are just embedding distances. Make a
 base embedding metric

---
 .../metrics/base_embedding_metric.py          |  23 ++++
 .../metrics/signclip_distance_metric.py       |  51 +++++++++
 pose_evaluation/metrics/signclip_metric.py    | 102 ------------------
 3 files changed, 74 insertions(+), 102 deletions(-)
 create mode 100644 pose_evaluation/metrics/base_embedding_metric.py
 create mode 100644 pose_evaluation/metrics/signclip_distance_metric.py
 delete mode 100644 pose_evaluation/metrics/signclip_metric.py

diff --git a/pose_evaluation/metrics/base_embedding_metric.py b/pose_evaluation/metrics/base_embedding_metric.py
new file mode 100644
index 0000000..355fc2a
--- /dev/null
+++ b/pose_evaluation/metrics/base_embedding_metric.py
@@ -0,0 +1,23 @@
+from numpy import ndarray
+import torch
+import torch.nn.functional as F
+from pose_evaluation.metrics.base import BaseMetric
+
+class NumpyArrayEmbeddingMetric(BaseMetric[ndarray]):
+    def __init__(self, name: str, higher_is_better: bool = True, kind: str = "cosine", device: torch.device | str = None):
+        # Call the base class __init__ to initialize 'name' and 'higher_is_better'
+        super().__init__(name, higher_is_better)
+        
+        self.kind = kind
+
+        if device is None:
+            if torch.cuda.is_available():
+                self.cuda()
+        else:
+            self.device = torch.device(device) if isinstance(device, str) else device
+
+    def score(self, hypothesis: ndarray, reference: ndarray) -> float:
+        if self.kind == "cosine":
+            return F.cosine_similarity(hypothesis, reference)
+        elif self.kind == "l2":
+            return F.pairwise_distance(hypothesis, reference, p=2)
diff --git a/pose_evaluation/metrics/signclip_distance_metric.py b/pose_evaluation/metrics/signclip_distance_metric.py
new file mode 100644
index 0000000..3d0fd31
--- /dev/null
+++ b/pose_evaluation/metrics/signclip_distance_metric.py
@@ -0,0 +1,51 @@
+from pose_evaluation.metrics.base_embedding_metric import NumpyArrayEmbeddingMetric
+from typing import Literal
+import numpy as np
+from tqdm import tqdm
+from scipy.spatial.distance import cosine
+import torch
+import torch.nn.functional as F
+
+class SignCLIPEmbeddingDistanceMetric(NumpyArrayEmbeddingMetric):
+    def __init__(self, kind: str = "cosine", device: torch.device | str = "cuda"):
+        """
+        Initializes the metric with the specified distance type and device.
+
+        Args:
+            kind (str): The type of distance metric, either 'cosine' or 'l2'.
+            device (torch.device | str): The device to use ('cuda' or 'cpu').
+        """
+        self.kind = kind
+        self.device = torch.device(device) if isinstance(device, str) else device
+
+    
+
+    def score_all(self, embeddings: torch.Tensor) -> torch.Tensor:
+        """
+        Computes the pairwise distance matrix for the provided embeddings.
+
+        Args:
+            embeddings (torch.Tensor): A 2D tensor of shape (N, D), where N is the number
+                                        of embeddings and D is the feature dimension.
+
+        Returns:
+            torch.Tensor: A 2D tensor of shape (N, N) containing pairwise distances.
+        """
+        # Move embeddings to the specified device
+        embeddings = embeddings.to(self.device)
+
+        if self.kind == "cosine":
+            # Normalize embeddings to unit norm
+            embeddings = F.normalize(embeddings, p=2, dim=1)
+            # Compute pairwise cosine similarity
+            similarity_matrix = torch.matmul(embeddings, embeddings.T)  # Shape: (N, N)
+            distance_matrix = 1 - similarity_matrix  # Cosine distance = 1 - cosine similarity
+        elif self.kind == "l2":
+            # Compute pairwise L2 distance using broadcasting
+            diff = embeddings[:, None, :] - embeddings[None, :, :]  # Shape: (N, N, D)
+            distance_matrix = torch.norm(diff, dim=2)  # Shape: (N, N)
+        else:
+            raise ValueError(f"Unsupported distance metric: {self.kind}")
+
+        return distance_matrix
+
diff --git a/pose_evaluation/metrics/signclip_metric.py b/pose_evaluation/metrics/signclip_metric.py
deleted file mode 100644
index a82d23d..0000000
--- a/pose_evaluation/metrics/signclip_metric.py
+++ /dev/null
@@ -1,102 +0,0 @@
-from pose_evaluation.metrics.base_pose_metric import PoseMetric
-from pose_format import Pose
-from pathlib import Path
-from typing import Literal
-import numpy as np
-import itertools
-from tqdm import tqdm
-from scipy.spatial.distance import cosine
-import math
-import pandas as pd
-
-class SignCLIPEmbeddingDistanceMetric(PoseMetric):
-    def __init__(self,   
-                 model_id="baseline_temporal",
-                 kind: Literal["cosine", "l2"] = "cosine",
-                 higher_is_better: bool = False):
-        super().__init__(name=f"SignCLIPDistanceMetric {kind}", higher_is_better=False)
-
-        self.kind = kind
-
-    def load_precalculated_embedding(self, saved_embedding_path:Path) -> np.ndarray:
-
-        embedding = np.load(saved_embedding_path) # typically (1, 768)
-        if embedding.ndim == 2 and embedding.shape[0] == 1:
-            embedding = embedding[0] # new shape:(768, )
-        return embedding
-    
-    def embed_pose(self, pose:Pose)->np.ndarray:
-        # blocked by the fact that embedding with SignCLIP is nontrivial. 
-        # See https://github.com/sign-language-processing/pose-evaluation/issues/1
-        raise NotImplementedError
-    
-    def get_embedding(self, input: Path|np.ndarray|Pose)->np.ndarray:
-        if isinstance(input, np.ndarray):
-            # often (1, 768)
-            if input.ndim == 2 and input.shape[0] == 1:
-                input = input[0] # new shape:(768, )
-        elif isinstance(input, Path):
-            input = self.load_precalculated_embedding(input)
-        elif isinstance(input, Pose):
-            input = self.embed_pose(pose=input)
-
-        return input
-
-
-    def score(self, hypothesis: Path|np.ndarray|Pose, reference: Path|np.ndarray|Pose) -> float:
-        hypothesis = self.get_embedding(hypothesis)
-        reference = self.get_embedding(reference)            
-        
-        return cosine(hypothesis, reference)
-
-        
-    
-
-
-
-if __name__ =="__main__":
-    metric = SignCLIPEmbeddingDistanceMetric()
-
-    # embeddings_path = Path.cwd()/"ASL_Citizen_curated_sample_with_embeddings_from_all_models"/"embeddings"
-    embeddings_path = Path("/media/aqsa/Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex") 
-    embeddings_files = list(embeddings_path.glob("*.npy"))
-    # embeddings= [metric.load_precalculated_embedding(npy_file)  for npy_file in embeddings_path.glob("*.npy")]
-
-    print(f"Found {len(embeddings_files)} embeddings")
-
-    
-    # loaded = metric.load_precalculated_embedding('pose_evaluation/metrics/test_poses/241481900450897-HOUSE-using-model-sem-lex.npy')
-    
-    # print(f"That makes for {len(combinations)} combinations")
-    i = 0
-    entries =[]
-    out_file = Path.cwd()/"signclip_scores.csv"
-    pd.DataFrame(columns=["hyp","ref","score"]).to_csv(out_file, index=False)
-    for embedding, other_embedding in tqdm(itertools.combinations(embeddings_files, 2),
-                                           total=math.comb(len(embeddings_files), 2),
-                                           desc=f"Calculating scores, writing to {out_file}"):
-            score = metric.score(embedding, other_embedding)
-            entry = {
-                "hyp":embedding.stem.split("-")[0], # e.g. 0031311305138936874-FATHER-using-model-sem-lex.npy becomes 0031311305138936874
-                "ref":other_embedding.stem.split("-")[0], 
-                "score":score
-            }
-            entries.append(entry)
-            i = i+1
-            if i%1000 == 0:
-                # print(f"Collected {len(entries)} scores. Writing to {out_file} resetting")
-                df = pd.DataFrame.from_dict(entries)
-                df.to_csv(out_file, mode="a", index=False, header=False)
-                entries = []
-
-
-            # i = i+1
-            # if i % 1000 == 0:
-            #     print(i)
-            #     exit()
-            # print(f"Score between {embedding.stem} and {other_embedding.stem}: {score}")
-            
-    
-    
-
-    

From a89aab883bfba18566993dc5e0efb88257f83ebd Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 21 Nov 2024 17:05:24 -0500
Subject: [PATCH 05/27] CDL: Got some pytest tests running!

---
 pose_evaluation/metrics/.gitignore            |   1 +
 .../metrics/base_embedding_metric.py          |  25 +-
 pose_evaluation/metrics/conftest.py           |  35 ++
 .../metrics/embedding_distance_metric.py      | 104 ++++++
 .../metrics/test_embedding_distance_metric.py | 313 ++++++++++++++++++
 .../metrics/test_signclip_distance_metric.py  |  65 ----
 6 files changed, 458 insertions(+), 85 deletions(-)
 create mode 100644 pose_evaluation/metrics/.gitignore
 create mode 100644 pose_evaluation/metrics/conftest.py
 create mode 100644 pose_evaluation/metrics/embedding_distance_metric.py
 create mode 100644 pose_evaluation/metrics/test_embedding_distance_metric.py
 delete mode 100644 pose_evaluation/metrics/test_signclip_distance_metric.py

diff --git a/pose_evaluation/metrics/.gitignore b/pose_evaluation/metrics/.gitignore
new file mode 100644
index 0000000..3d0dbe4
--- /dev/null
+++ b/pose_evaluation/metrics/.gitignore
@@ -0,0 +1 @@
+tests/
\ No newline at end of file
diff --git a/pose_evaluation/metrics/base_embedding_metric.py b/pose_evaluation/metrics/base_embedding_metric.py
index 355fc2a..83ad763 100644
--- a/pose_evaluation/metrics/base_embedding_metric.py
+++ b/pose_evaluation/metrics/base_embedding_metric.py
@@ -1,23 +1,8 @@
-from numpy import ndarray
-import torch
-import torch.nn.functional as F
+from typing import TypeVar
 from pose_evaluation.metrics.base import BaseMetric
+import torch
 
-class NumpyArrayEmbeddingMetric(BaseMetric[ndarray]):
-    def __init__(self, name: str, higher_is_better: bool = True, kind: str = "cosine", device: torch.device | str = None):
-        # Call the base class __init__ to initialize 'name' and 'higher_is_better'
-        super().__init__(name, higher_is_better)
-        
-        self.kind = kind
-
-        if device is None:
-            if torch.cuda.is_available():
-                self.cuda()
-        else:
-            self.device = torch.device(device) if isinstance(device, str) else device
+# Define a type alias for embeddings (e.g., torch.Tensor)
+Embedding = TypeVar("Embedding", bound=torch.Tensor)
 
-    def score(self, hypothesis: ndarray, reference: ndarray) -> float:
-        if self.kind == "cosine":
-            return F.cosine_similarity(hypothesis, reference)
-        elif self.kind == "l2":
-            return F.pairwise_distance(hypothesis, reference, p=2)
+EmbeddingMetric = BaseMetric[Embedding]
\ No newline at end of file
diff --git a/pose_evaluation/metrics/conftest.py b/pose_evaluation/metrics/conftest.py
new file mode 100644
index 0000000..4b1129c
--- /dev/null
+++ b/pose_evaluation/metrics/conftest.py
@@ -0,0 +1,35 @@
+# conftest.py
+import pytest
+import shutil
+from pathlib import Path
+from typing import Callable, Union
+import torch
+import numpy as np
+
+@pytest.fixture(scope="session", autouse=True)  
+def clean_test_artifacts():
+    """Fixture to clean up test artifacts before each test session."""
+    test_artifacts_dir = Path(__file__).parent / "tests"  # Using Path
+    if test_artifacts_dir.exists():
+        shutil.rmtree(test_artifacts_dir)  # shutil.rmtree still works with Path
+    test_artifacts_dir.mkdir(parents=True, exist_ok=True)  # Using Path.mkdir
+    yield  # This allows the test session to run
+    # (Optional) You can add cleanup logic here to run after the session if needed
+
+
+# conftest.py
+from typing import Callable, Union
+import torch
+import numpy as np
+
+@pytest.fixture
+def distance_range_checker() -> Callable[[Union[torch.Tensor, np.ndarray], float, float], None]:
+    def _check_range(distances: Union[torch.Tensor, np.ndarray], min_val: float = 0, max_val: float = 2) -> None:
+        max_distance = distances.max().item()
+        min_distance = distances.min().item()
+
+        # Use np.isclose for comparisons with tolerance
+        assert np.isclose(min_distance, min_val, atol=1e-6) or min_val <= min_distance <= max_val, f"Minimum distance ({min_distance}) is outside the expected range [{min_val}, {max_val}]"
+        assert np.isclose(max_distance, max_val, atol=1e-6) or min_val <= max_distance <= max_val, f"Maximum distance ({max_distance}) is outside the expected range [{min_val}, {max_val}]"
+
+    return _check_range
\ No newline at end of file
diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py
new file mode 100644
index 0000000..84d6df1
--- /dev/null
+++ b/pose_evaluation/metrics/embedding_distance_metric.py
@@ -0,0 +1,104 @@
+from typing import Literal, Union, List
+import torch
+import torch.nn.functional as F
+import numpy as np
+
+from pose_evaluation.metrics.base_embedding_metric import EmbeddingMetric
+
+
+class EmbeddingDistanceMetric(EmbeddingMetric):
+    def __init__(self, kind: Literal["cosine", "l2"] = "cosine", device: Union[torch.device, str] = None):
+        """
+        Initialize the embedding distance metric.
+
+        Args:
+            kind (Literal["cosine", "l2"]): The type of distance metric.
+            device (torch.device | str): The device to use for computation. If None, automatically detects.
+        """
+        super().__init__(f"EmbeddingDistanceMetric {kind}", higher_is_better=False)
+        self.kind = kind
+        if device is None:
+            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        else:
+            self.device = torch.device(device) if isinstance(device, str) else device
+
+    def _to_tensor(self, data: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
+        """
+        Convert input to a PyTorch tensor if it is a NumPy array.
+
+        Args:
+            data (np.ndarray | torch.Tensor): Input data.
+
+        Returns:
+            torch.Tensor: Tensor on the correct device.
+        """
+        if isinstance(data, np.ndarray):
+            data = torch.tensor(data, dtype=torch.float32)
+        return data.to(self.device)
+
+    def score(self, hypothesis: Union[np.ndarray, torch.Tensor], reference: Union[np.ndarray, torch.Tensor]) -> float:
+        """
+        Compute the distance between two embeddings.
+
+        Args:
+            hypothesis (np.ndarray | torch.Tensor): A single embedding vector.
+            reference (np.ndarray | torch.Tensor): Another single embedding vector.
+
+        Returns:
+            float: The calculated distance.
+        """
+        hypothesis = self._to_tensor(hypothesis)
+        reference = self._to_tensor(reference)
+
+        if self.kind == "cosine":
+            # Normalize both embeddings to unit length
+            hypothesis = F.normalize(hypothesis, p=2, dim=0)
+            reference = F.normalize(reference, p=2, dim=0)
+            # Cosine similarity, converted to distance
+            similarity = torch.dot(hypothesis, reference).item()
+            return 1 - similarity
+        elif self.kind == "l2":
+            # L2 distance
+            return torch.norm(hypothesis - reference).item()
+        else:
+            raise ValueError(f"Unsupported distance metric: {self.kind}")
+
+    def score_all(
+        self,
+        hypotheses: List[Union[np.ndarray, torch.Tensor]],
+        references: List[Union[np.ndarray, torch.Tensor]],
+        progress_bar: bool = True,
+    ) -> torch.Tensor:
+        """
+        Compute the pairwise distance between all hypotheses and references. Expects 2D inputs.
+
+        Args:
+            hypotheses (list[np.ndarray | torch.Tensor]): List of hypothesis embeddings.
+            references (list[np.ndarray | torch.Tensor]): List of reference embeddings.
+            progress_bar (bool): Whether to display a progress bar.
+
+        Returns:
+            torch.Tensor, distance matrix. Row i is the distances of hypotheses[i] to all rows of references
+        """
+        # Convert inputs to tensors and stack
+        hypotheses = torch.stack([self._to_tensor(h) for h in hypotheses])
+        references = torch.stack([self._to_tensor(r) for r in references])
+
+        if self.kind == "cosine":
+            # Normalize the tensors along the feature dimension (dim=1)
+            normalized_hypotheses = F.normalize(hypotheses, dim=1)
+            normalized_references = F.normalize(references, dim=1)
+
+            # Calculate cosine similarity between all hypothesis-reference pairs
+            cosine_similarities = torch.matmul(normalized_hypotheses, normalized_references.T)
+
+            # Convert cosine similarities to cosine distances
+            distance_matrix = 1 - cosine_similarities
+        elif self.kind == "l2":
+            # Use broadcasting to calculate pairwise L2 distances
+            diff = hypotheses[:, None, :] - references[None, :, :]
+            distance_matrix = torch.norm(diff, dim=2)
+        else:
+            raise ValueError(f"Unsupported distance metric: {self.kind}")
+
+        return distance_matrix.cpu()
diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
new file mode 100644
index 0000000..c352386
--- /dev/null
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -0,0 +1,313 @@
+import pytest
+import numpy as np
+import torch
+from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric
+from pose_evaluation.metrics.conftest import distance_range_checker
+import matplotlib.pyplot as plt
+import logging
+from typing import List
+from pathlib import Path
+
+# TODO: many fixes. Including the fact that we test cosine but not Euclidean, 
+
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Device configuration for PyTorch
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+@pytest.fixture
+def cosine_metric():
+    """Fixture to create an EmbeddingDistanceMetric instance."""
+    return EmbeddingDistanceMetric(kind="cosine")
+
+
+@pytest.fixture
+def embeddings() -> List[torch.Tensor]:
+    """Fixture to create dummy embeddings for testing."""
+    return [random_tensor(768) for _ in range(5)]
+
+
+def save_and_plot_distances(distances, matrix_name, num_points, dim):
+    """Helper function to save distance matrix and plot distances."""
+    test_artifacts_dir = Path(__file__).parent / "tests"
+    output_path = test_artifacts_dir / f"distance_matrix_{matrix_name}_{num_points}_{dim}D.csv"
+    np.savetxt(output_path, distances.numpy(), delimiter=",", fmt="%.4f")
+    print(f"Distance matrix saved to {output_path}")
+
+    # Generate plot
+    plt.figure(figsize=(10, 6))
+    for i, row in enumerate(distances.numpy()):
+        plt.plot(row, label=f"Point {i}")
+    plt.title(f"Distance Matrix Rows ({matrix_name})")
+    plt.xlabel("Point Index")
+    plt.ylabel("Distance")
+    plt.legend()
+    plot_path = output_path.with_suffix(".png")
+    plt.savefig(plot_path)
+    print(f"Distances plot saved to {plot_path}")
+    plt.close()
+
+def random_tensor(size: int) -> torch.Tensor:
+    """Generate a random tensor on the appropriate device."""
+    return torch.rand(size, dtype=torch.float32, device=DEVICE)
+
+def generate_unit_circle_points(num_points: int, dim: int = 2) -> torch.Tensor:
+    angles = torch.linspace(0, 2 * np.pi, num_points + 1)[:-1]
+    x_coords = torch.cos(angles)
+    y_coords = torch.sin(angles)
+    points = torch.stack([x_coords, y_coords], dim=1)
+    if dim > 2:
+        padding = torch.zeros((num_points, dim - 2))
+        points = torch.cat([points, padding], dim=1)
+    return points
+
+def generate_orthogonal_rows_with_repeats(num_rows: int, dim: int) -> torch.Tensor:
+    orthogonal_rows = torch.empty(0, dim)
+    for _ in range(min(num_rows, dim)):
+        random_vector = torch.randn(1, dim)
+        if orthogonal_rows.shape[0] > 0:
+            random_vector -= (
+                torch.matmul(random_vector, orthogonal_rows.T) @ orthogonal_rows
+                / torch.norm(orthogonal_rows, dim=1, keepdim=True) ** 2
+            )
+        orthogonal_rows = torch.cat(
+            [orthogonal_rows, random_vector / torch.norm(random_vector)]
+        )
+    if num_rows > dim:
+        orthogonal_rows = orthogonal_rows.repeat(num_rows // dim + 1, 1)[:num_rows]
+    return orthogonal_rows
+
+
+def generate_orthogonal_rows_in_pairs(num_pairs: int, dim: int) -> torch.Tensor:
+    """
+    Generates a tensor with orthogonal rows in pairs.
+    The first row of each pair is orthogonal to the second row of the same pair.
+
+    Args:
+        num_pairs: The number of orthogonal pairs to generate.
+        dim: The dimensionality of the vectors.
+
+    Returns:
+        A PyTorch tensor with orthogonal rows in pairs.
+    """
+
+    orthogonal_rows = torch.empty(0, dim)
+    for _ in range(num_pairs):
+        # Generate the first vector of the pair
+        first_vector = torch.randn(1, dim)
+        first_vector = first_vector / torch.norm(first_vector)  # Normalize
+
+        # Generate the second vector orthogonal to the first
+        second_vector = torch.randn(1, dim)
+        second_vector = second_vector - (second_vector @ first_vector.T) * first_vector
+        second_vector = second_vector / torch.norm(second_vector)  # Normalize
+
+        # Concatenate the pair to the result
+        orthogonal_rows = torch.cat([orthogonal_rows, first_vector, second_vector], dim=0)
+
+    return orthogonal_rows
+
+def generate_ones_tensor(rows: int, dims: int) -> torch.Tensor:
+    """Generates a tensor with all elements equal to 1.0 (float)."""
+    return torch.ones(rows, dims, dtype=torch.float32)
+
+def generate_identity_matrix_rows(rows, cols):
+  """
+  Returns an identity matrix with the specified number of rows and columns.
+  """
+  identity = torch.eye(max(rows, cols))
+  return identity[:rows, :cols]
+
+def create_increasing_rows_tensor(num_rows: int, num_cols: int) -> torch.Tensor:
+    """
+    Creates a tensor where every row has identical values all the way across,
+    but increasing row by row.
+
+    Args:
+        num_rows: The number of rows in the tensor.
+        num_cols: The number of columns in the tensor.
+
+    Returns:
+        A PyTorch tensor with the specified properties.
+    """
+
+    tensor = torch.arange(1.0, num_rows + 1).unsqueeze(1).repeat(1, num_cols)
+    return tensor
+
+
+
+
+def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None:
+    """Test that the metric is symmetric for cosine distance."""
+    emb1 = random_tensor(768)
+    emb2 = random_tensor(768)
+
+    score1 = cosine_metric.score(emb1, emb2)
+    score2 = cosine_metric.score(emb2, emb1)
+
+    logger.info(f"Score 1: {score1}, Score 2: {score2}")
+    assert pytest.approx(score1) == score2, "Score should be symmetric."
+    
+
+def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path) -> None:
+    """Test that score works with embeddings loaded from file paths."""
+    emb1 = random_tensor(768).cpu().numpy()  # Save as NumPy for file storage
+    emb2 = random_tensor(768).cpu().numpy()
+
+    # Save embeddings to temporary files
+    file1 = tmp_path / "emb1.npy"
+    file2 = tmp_path / "emb2.npy"
+    np.save(file1, emb1)
+    np.save(file2, emb2)
+
+    # Load files as PyTorch tensors
+    emb1_loaded = torch.tensor(np.load(file1), dtype=torch.float32, device=DEVICE)
+    emb2_loaded = torch.tensor(np.load(file2), dtype=torch.float32, device=DEVICE)
+
+    score = cosine_metric.score(emb1_loaded, emb2_loaded)
+    expected_score = cosine_metric.score(torch.tensor(emb1, device=DEVICE), torch.tensor(emb2, device=DEVICE))
+
+    logger.info(f"Score from file: {score}, Direct score: {expected_score}")
+    assert pytest.approx(score) == expected_score, "Score with paths should match direct computation."
+
+
+def test_score_all_against_self(cosine_metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker) -> None:
+    """Test the score_all function."""
+    scores = cosine_metric.score_all(embeddings, embeddings)
+    assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all."
+    assert torch.allclose(torch.diagonal(scores), torch.zeros(len(embeddings), device=DEVICE), atol=1e-6), (
+        "Self-comparison scores should be zero for cosine distance."
+    )
+    distance_range_checker(scores, min_val=0, max_val=2)  
+    logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}")
+
+def test_score_all_with_different_sizes(cosine_metric, distance_range_checker):
+    """Test score_all with different sizes for hypotheses and references."""
+    hyps = [np.random.rand(768) for _ in range(3)]
+    refs = [np.random.rand(768) for _ in range(5)]
+
+    scores = cosine_metric.score_all(hyps, refs)
+    assert scores.shape == (len(hyps), len(refs)), f"Output shape mismatch ({scores.shape}) vs {(len(hyps), len(refs))} for score_all with different sizes. "
+    distance_range_checker(scores, min_val=0, max_val=2)  
+
+
+# def test_score_all_with_empty_inputs(metric):
+#     """Test score_all with empty inputs."""
+#     scores = metric.score_all([], [])
+#     assert scores.shape == (0,), f"Score_all should return an empty array for empty inputs. Output: {scores.shape}"
+
+def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None:
+    """Test the metric with invalid inputs."""
+    emb1 = random_tensor(768)
+    invalid_inputs = ["invalid_input", None, -1, 1]
+
+    for invalid_input in invalid_inputs:
+        with pytest.raises((TypeError, AttributeError)):
+            cosine_metric.score(emb1, invalid_input)
+
+    logger.info("Invalid input test passed.")
+
+def test_score_tensor_input(cosine_metric):
+    """Test score function with torch.Tensor inputs."""
+    emb1 = torch.rand(768)
+    emb2 = torch.rand(768)
+
+    score = cosine_metric.score(emb1, emb2)
+    assert isinstance(score, float), "Output should be a float."
+
+
+def test_score_ndarray_input(cosine_metric):
+    """Test score function with np.ndarray inputs."""
+    emb1 = np.random.rand(768)
+    emb2 = np.random.rand(768)
+
+    score = cosine_metric.score(emb1, emb2)
+    assert isinstance(score, float), "Output should be a float."
+
+
+def test_score_all_tensor_input(cosine_metric):
+    """Test score_all function with torch.Tensor inputs."""
+    hyps = [torch.rand(768) for _ in range(5)]
+    refs = [torch.rand(768) for _ in range(5)]
+
+    scores = cosine_metric.score_all(hyps, refs)
+    assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
+    assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
+
+
+def test_device_handling(cosine_metric):
+    """Test device handling for the metric."""
+    assert cosine_metric.device.type in ["cuda", "cpu"], "Device should be either 'cuda' or 'cpu'."
+    if torch.cuda.is_available():
+        assert cosine_metric.device.type == "cuda", "Should use 'cuda' when available."
+    else:
+        assert cosine_metric.device.type == "cpu", "Should use 'cpu' when CUDA is unavailable."
+
+
+def test_mixed_input(cosine_metric):
+    """Test score function with mixed input types."""
+    emb1 = np.random.rand(768)
+    emb2 = torch.rand(768)
+
+    score = cosine_metric.score(emb1, emb2)
+    assert isinstance(score, float), "Output should be a float."
+
+@pytest.mark.parametrize("num_points, dim", [(16, 2)])
+def test_unit_circle_points(cosine_metric, num_points, dim):
+    embeddings = generate_unit_circle_points(num_points, dim)
+    distances = cosine_metric.score_all(embeddings, embeddings)
+    save_and_plot_distances(distances=distances, matrix_name="Unit Circle", num_points=num_points, dim=dim)
+
+
+@pytest.mark.parametrize("num_points, dim", [(20, 2)])
+def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim):
+    embeddings = generate_orthogonal_rows_with_repeats(num_points, dim)
+    distances = cosine_metric.score_all(embeddings, embeddings)
+    save_and_plot_distances(distances=distances, matrix_name="Orthogonal Rows (with repeats)", num_points=num_points, dim=dim)
+
+    # Create expected pattern directly within the test function
+    expected_pattern = torch.zeros(num_points, num_points, dtype=torch.float32)
+    for i in range(num_points):
+        for j in range(num_points):
+            if (i + j) % 2 != 0:
+                expected_pattern[i, j] = 1
+
+    # We expect 0 1 0  across and down
+    assert torch.allclose(distances, expected_pattern, atol=1e-6), "Output does not match the expected alternating pattern"
+
+
+@pytest.mark.parametrize("num_points, dim", [(20, 2)])
+def test_orthogonal_rows_in_pairs(cosine_metric, num_points, dim, distance_range_checker):
+    embeddings = generate_orthogonal_rows_in_pairs(num_points, dim)
+    distances = cosine_metric.score_all(embeddings, embeddings)
+    save_and_plot_distances(distances, "orthogonal_rows_in_pairs", num_points, dim)
+    distance_range_checker(distances, min_val=0, max_val=2)  # Check distance range
+
+@pytest.mark.parametrize("num_points, dim", [(10, 5)])
+def test_ones_tensor(cosine_metric, num_points, dim, distance_range_checker):
+    embeddings = generate_ones_tensor(num_points, dim)
+    distances = cosine_metric.score_all(embeddings, embeddings)
+    save_and_plot_distances(distances, "ones_tensor", num_points, dim)
+    distance_range_checker(distances, min_val=0, max_val=0)  # Expect all distances to be 0
+
+
+@pytest.mark.parametrize("num_points, dim", [(15, 15)])  # dim should be equal to num_points for identity matrix
+def test_identity_matrix_rows(cosine_metric, num_points, dim, distance_range_checker):
+    embeddings = generate_identity_matrix_rows(num_points, dim)
+    distances = cosine_metric.score_all(embeddings, embeddings)
+    save_and_plot_distances(distances, "identity_matrix_rows", num_points, dim)
+    distance_range_checker(distances, min_val=0, max_val=2)  # Check distance range
+
+
+# def test_progress_bar(cosine_metric):
+#     """Test score_all with progress_bar argument."""
+#     hyps = [np.random.rand(768) for _ in range(5)]
+#     refs = [np.random.rand(768) for _ in range(5)]
+
+#     # Disable progress bar
+#     scores = cosine_metric.score_all(hyps, refs, progress_bar=False)
+#     assert len(scores) == len(hyps), "Output row count mismatch with progress_bar=False."
+#     assert len(scores[0]) == len(refs), "Output column count mismatch with progress_bar=False."
diff --git a/pose_evaluation/metrics/test_signclip_distance_metric.py b/pose_evaluation/metrics/test_signclip_distance_metric.py
deleted file mode 100644
index 54f62cf..0000000
--- a/pose_evaluation/metrics/test_signclip_distance_metric.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import pytest
-import numpy as np
-from pose_format import Pose
-from pose_evaluation.metrics.signclip_distance_metric import SignCLIPEmbeddingDistanceMetric
-
-# Mock a simple Pose object for compatibility (if not already available)
-class MockPose:
-    def __init__(self, data):
-        self.data = data
-
-@pytest.fixture
-def metric():
-    """Fixture to create a SignCLIPEmbeddingDistanceMetric instance."""
-    return SignCLIPEmbeddingDistanceMetric(kind="cosine")
-
-@pytest.fixture
-def embeddings():
-    """Fixture to create dummy embeddings for testing."""
-    # Generate 5 random 768-dimensional embeddings
-    return [np.random.rand(768) for _ in range(5)]
-
-def test_score_symmetric(metric):
-    """Test that the metric is symmetric for cosine distance."""
-    emb1 = np.random.rand(768)
-    emb2 = np.random.rand(768)
-
-    score1 = metric.score(emb1, emb2)
-    score2 = metric.score(emb2, emb1)
-
-    assert pytest.approx(score1) == score2, "Score should be symmetric."
-
-def test_score_with_path(metric, tmp_path):
-    """Test that score works with embeddings loaded from paths."""
-    emb1 = np.random.rand(768)
-    emb2 = np.random.rand(768)
-
-    # Save embeddings to temporary files
-    file1 = tmp_path / "emb1.npy"
-    file2 = tmp_path / "emb2.npy"
-    np.save(file1, emb1)
-    np.save(file2, emb2)
-
-    score = metric.score(file1, file2)
-    expected_score = metric.score(emb1, emb2)
-
-    assert pytest.approx(score) == expected_score, "Score with paths should match direct computation."
-
-def test_score_all(metric, embeddings):
-    """Test the score_all function."""
-    scores = metric.score_all(embeddings, embeddings)
-    assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all."
-    assert np.allclose(scores.diagonal(), 0), "Self-comparison scores should be zero for cosine distance."
-
-def test_score_all_with_different_sizes(metric):
-    """Test score_all with different sizes for hypotheses and references."""
-    hyps = [np.random.rand(768) for _ in range(3)]
-    refs = [np.random.rand(768) for _ in range(5)]
-
-    scores = metric.score_all(hyps, refs)
-    assert scores.shape == (len(hyps), len(refs)), "Output shape mismatch for score_all with different sizes."
-
-def test_score_all_edge_case(metric):
-    """Test score_all with empty inputs."""
-    scores = metric.score_all([], [])
-    assert scores.size == 0, "Score_all should return an empty array for empty inputs."

From 83f9153abfef99c5910d66d340571e69ccce41a8 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 22 Nov 2024 15:10:50 -0500
Subject: [PATCH 06/27] some updates to evaluate signclip script

---
 .../evaluation/evaluate_signclip.py           | 86 +++++++++++++++++--
 1 file changed, 79 insertions(+), 7 deletions(-)

diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py
index a67b684..60befb8 100644
--- a/pose_evaluation/evaluation/evaluate_signclip.py
+++ b/pose_evaluation/evaluation/evaluate_signclip.py
@@ -2,9 +2,12 @@
 from pathlib import Path
 import pandas as pd
 import numpy as np
-from pose_evaluation.metrics.signclip_distance_metric import SignCLIPEmbeddingDistanceMetric
+from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric
 from tqdm import tqdm
+import time
 
+# python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv
+# (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv 
 def load_embedding(file_path: Path) -> np.ndarray:
     """
     Load a SignCLIP embedding from a .npy file, ensuring it has the correct shape.
@@ -72,26 +75,95 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine"):
     embeddings = valid_df["embedding"].tolist()
 
     # Initialize metric
-    metric = SignCLIPEmbeddingDistanceMetric(kind=kind)
+    metric = EmbeddingDistanceMetric(kind=kind, device="cpu")
 
     # Compute all pairwise scores
     print(f"Computing {kind} distances for {len(embeddings)} embeddings...")
     
+    start_time = time.perf_counter()
     scores = metric.score_all(embeddings, embeddings)
+    score_duration = time.perf_counter() - start_time
+    print(f"Score_all took {score_duration:.3f} seconds")
+
+
+
+    # Extract the "Video file" column
+    files = valid_df["Video file"].tolist()
+
+    # Create output file path
+    output_file = Path("signclip_scores.csv")
+
+    # Start timer
+    start_time = time.perf_counter()
+
+    # Create the Cartesian product of `files` with itself
+    n = len(files)
+    data = {
+        "hyp": [files[i] for i in range(n) for j in range(n)],
+        "ref": [files[j] for i in range(n) for j in range(n)],
+        "score": scores.flatten()  # Flatten the 2D score matrix into a 1D array
+    }
+
+
+    # Construct the DataFrame
+    results_df = pd.DataFrame(data)
+
+    # Save to CSV
+    results_df.to_csv(output_file, index=False)
+
+    # End timer
+    end_time = time.perf_counter()
+    print(f"Saving DataFrame and writing to CSV took {end_time - start_time:.2f} seconds")
+
+
 
     # Save scores to a CSV file
-    output_file = emb_dir / "signclip_scores.csv"
+    output_file = Path("signclip_scores.csv")
     results = []
-    for i, hyp_row in valid_df.iterrows():
+    for i, hyp_row in tqdm(valid_df.iterrows(), total=valid_df.shape[0]):
         for j, ref_row in valid_df.iterrows():
             results.append({
                 "hyp": hyp_row["Video file"],
                 "ref": ref_row["Video file"],
-                "score": scores[i, j]
+                "score": scores[i, j].item()
             })
+
+    df_start = time.perf_counter()
     results_df = pd.DataFrame(results)
+    df_end = time.perf_counter()
+    df_duration = df_end - df_start
+    print(f"df took {df_duration}")
+
+
+
+    
+
+
+    csv_start = time.perf_counter()    
     results_df.to_csv(output_file, index=False)
-    print(f"Scores saved to {output_file}")
+    csv_end = time.perf_counter()
+    csv_duration = csv_end - csv_start
+    print(f"CSV took {csv_duration}")
+
+    json_start = time.perf_counter()
+    results_df.to_json(output_file.with_suffix(".json"), index=False)
+    json_end = time.perf_counter()
+    json_duration = json_end - json_start
+    print(f"JSON took {json_duration}")
+    
+    np_start = time.perf_counter()
+    np.save(output_file.with_suffix(".npy"), scores)
+    np_end = time.perf_counter()
+    np_duration = np_end-np_start
+    print(f"np took {np_duration}")
+    
+    
+    
+    
+    print(f"Scores of shape {scores.shape} saved to {output_file}")
+    read_back_in = np.load(output_file.with_suffix(".npy"))    
+    if np.allclose(read_back_in, scores):
+        print("yay! All the same!")
 
 def main():
     parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.")
@@ -107,7 +179,7 @@ def main():
     )
     args = parser.parse_args()
 
-    evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split, kind=args.kind)
+    evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, kind=args.kind)
 
 if __name__ == "__main__":
     main()

From 8680048e63248ee3d25676b07e50047c51188446 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 22 Nov 2024 17:11:35 -0500
Subject: [PATCH 07/27] CDL: messing around with in-class and out-of-class
 means

---
 .../evaluation/evaluate_signclip.py           | 247 ++++++++++--------
 1 file changed, 144 insertions(+), 103 deletions(-)

diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py
index 60befb8..76a2134 100644
--- a/pose_evaluation/evaluation/evaluate_signclip.py
+++ b/pose_evaluation/evaluation/evaluate_signclip.py
@@ -34,28 +34,35 @@ def match_embeddings_to_glosses(emb_dir: Path, split_df: pd.DataFrame) -> pd.Dat
     Returns:
         pd.DataFrame: Updated DataFrame with an additional column for embeddings.
     """
-    # Map video file IDs to embeddings
-    embeddings_map = {}
-    for npy_file in emb_dir.glob("*.npy"):
-        numerical_id = npy_file.stem.split("-")[0]
-        embeddings_map[numerical_id] = npy_file
-
-    # Match embeddings to glosses
-    embeddings = []
-    for _, row in split_df.iterrows():
-        video_file = row["Video file"]
+    import time
+
+    # Step 1: Create a mapping of numerical IDs to .npy files
+    map_start = time.perf_counter()
+    embeddings_map = {
+        npy_file.stem.split("-")[0]: npy_file
+        for npy_file in emb_dir.glob("*.npy")
+    }
+    map_end = time.perf_counter()
+    print(f"Creating embeddings map took {map_end - map_start:.4f} seconds")
+
+    # Step 2: Vectorized matching of embeddings
+    match_start = time.perf_counter()
+
+    def get_embedding(video_file):
         numerical_id = video_file.split("-")[0]
         npy_file = embeddings_map.get(numerical_id)
-
         if npy_file is not None:
-            embeddings.append(load_embedding(npy_file))
-        else:
-            embeddings.append(None)  # Placeholder if no matching file
+            return load_embedding(npy_file)
+        return None
+
+    split_df["embedding"] = split_df["Video file"].apply(get_embedding)
+    match_end = time.perf_counter()
+    print(f"Matching embeddings to glosses took {match_end - match_start:.4f} seconds")
 
-    split_df["embedding"] = embeddings
     return split_df
 
-def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine"):
+
+def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out_path=None):
     """
     Evaluate SignCLIP embeddings using score_all.
     
@@ -64,106 +71,136 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine"):
         split_file (Path): Path to the split CSV file.
         kind (str): Metric type ("cosine" or "l2"). Default is "cosine".
     """
-    # Load split file
+    overall_start = time.perf_counter()  # Start overall benchmarking
+
+    # Step 1: Load split file
+    split_load_start = time.perf_counter()
     split_df = pd.read_csv(split_file)
-    
-    # Match embeddings
-    split_df = match_embeddings_to_glosses(emb_dir, split_df)
-    
-    # Filter out rows without embeddings
-    valid_df = split_df.dropna(subset=["embedding"]).reset_index(drop=True)
-    embeddings = valid_df["embedding"].tolist()
+    split_load_end = time.perf_counter()
+    print(f"Loading split file took {split_load_end - split_load_start:.4f} seconds")
+    # print(f"{split_df.info()}")
 
-    # Initialize metric
+    # Step 2: Match embeddings to glosses
+    match_start = time.perf_counter()
+    split_df = match_embeddings_to_glosses(emb_dir, split_df)
+    match_end = time.perf_counter()
+    print(f"Matching embeddings to glosses took {match_end - match_start:.4f} seconds")
+    # print(split_df.info())
+
+    # Step 3: Filter out rows without embeddings
+    filter_start = time.perf_counter()
+    items_with_embeddings_df = split_df.dropna(subset=["embedding"]).reset_index(drop=True)
+    embeddings = items_with_embeddings_df["embedding"].tolist()
+    filter_end = time.perf_counter()
+    print(f"Filtering embeddings took {filter_end - filter_start:.4f} seconds")
+    print(items_with_embeddings_df.info())
+
+    # Step 4: Initialize the distance metric
+    metric_start = time.perf_counter()
     metric = EmbeddingDistanceMetric(kind=kind, device="cpu")
+    metric_end = time.perf_counter()
+    print(f"Initializing metric took {metric_end - metric_start:.4f} seconds")
 
-    # Compute all pairwise scores
+    # Step 5: Compute all pairwise scores
+    score_start = time.perf_counter()
     print(f"Computing {kind} distances for {len(embeddings)} embeddings...")
-    
-    start_time = time.perf_counter()
     scores = metric.score_all(embeddings, embeddings)
-    score_duration = time.perf_counter() - start_time
-    print(f"Score_all took {score_duration:.3f} seconds")
-
-
-
-    # Extract the "Video file" column
-    files = valid_df["Video file"].tolist()
-
-    # Create output file path
-    output_file = Path("signclip_scores.csv")
-
-    # Start timer
-    start_time = time.perf_counter()
-
-    # Create the Cartesian product of `files` with itself
-    n = len(files)
-    data = {
-        "hyp": [files[i] for i in range(n) for j in range(n)],
-        "ref": [files[j] for i in range(n) for j in range(n)],
-        "score": scores.flatten()  # Flatten the 2D score matrix into a 1D array
-    }
-
-
-    # Construct the DataFrame
-    results_df = pd.DataFrame(data)
-
-    # Save to CSV
-    results_df.to_csv(output_file, index=False)
-
-    # End timer
-    end_time = time.perf_counter()
-    print(f"Saving DataFrame and writing to CSV took {end_time - start_time:.2f} seconds")
-
+    score_end = time.perf_counter()
+    print(f"Score_all took {score_end - score_start:.3f} seconds")
+
+    # Step 6: Create output file path
+    output_file = out_path
+    if out_path is None:
+        output_file = Path(f"signclip_scores_{split_file.name}").with_suffix(".npz")
+
+    if not output_file.suffix == ".npz":
+        output_file = Path(f"{output_file}.npz")
+        
+
+    print(f"Scores will be saved to {output_file}")
+
+ 
+
+    # Step 7: Extract file list from DataFrame
+    files_start = time.perf_counter()
+    files = items_with_embeddings_df["Video file"].tolist()
+    files_end = time.perf_counter()
+    print(f"Extracting file list took {files_end - files_start:.4f} seconds")
+
+
+    analysis_start = time.perf_counter()    
+    index_to_check = 0
+    number_to_check = 10
+    print(f"The first {number_to_check} scores for {files[index_to_check]} to...")
+    for ref, score in list(zip(files, scores[index_to_check]))[:number_to_check]:
+        print("\t*------------->", f"{ref}".ljust(35), "\t", score.item())
+
+    unique_glosses = items_with_embeddings_df['Gloss'].unique()
+    print(f"We have a vocabulary of {len(unique_glosses)} glosses")
+    gloss_indices = {}
+    for gloss in items_with_embeddings_df['Gloss'].unique():
+        gloss_indices[gloss] = items_with_embeddings_df.index[items_with_embeddings_df['Gloss'] == gloss].tolist()
+
+    for gloss, indices in gloss_indices.items():
+        print(f"Here are the {len(indices)} indices for {gloss}:{indices}")
+
+    # Assuming 'scores' is your distance matrix and 'gloss_indices' is your dictionary of gloss indices
+    find_class_distances_start = time.perf_counter()
+    all_within_class_distances = np.array([])  # Initialize as empty NumPy array
+    all_between_class_distances = np.array([])  # Initialize as empty NumPy array
+
+    for gloss, indices in tqdm(gloss_indices.items()):
+        # Within-class distances
+        within_class_distances = scores[np.ix_(indices, indices)]
+        within_class_distances = within_class_distances[np.triu_indices(len(indices), k=1)]
+        all_within_class_distances = np.concatenate([all_within_class_distances, within_class_distances.ravel()])
+
+        # Between-class distances
+        other_indices = np.setdiff1d(np.arange(len(scores)), indices)
+        between_class_distances = scores[np.ix_(indices, other_indices)]
+        all_between_class_distances = np.concatenate([all_between_class_distances, between_class_distances.ravel()])
+    find_class_distances_end = time.perf_counter()
+    print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}")
 
+    
 
-    # Save scores to a CSV file
-    output_file = Path("signclip_scores.csv")
-    results = []
-    for i, hyp_row in tqdm(valid_df.iterrows(), total=valid_df.shape[0]):
-        for j, ref_row in valid_df.iterrows():
-            results.append({
-                "hyp": hyp_row["Video file"],
-                "ref": ref_row["Video file"],
-                "score": scores[i, j].item()
-            })
+    print(f"Mean within classes: {np.mean(all_within_class_distances)}")
+    print(f"Mean between classes: {np.mean(all_between_class_distances)}")
 
-    df_start = time.perf_counter()
-    results_df = pd.DataFrame(results)
-    df_end = time.perf_counter()
-    df_duration = df_end - df_start
-    print(f"df took {df_duration}")
+    
+    analysis_end = time.perf_counter()
+    analysis_duration = analysis_end - analysis_start
+    print(f"Analysis took {analysis_duration} seconds")
+    
+    
 
+    # Step 8: Save the scores and files to a compressed file
+    save_start = time.perf_counter()
+    np.savez(output_file, scores=scores, files=files)
+    save_end = time.perf_counter()
+    print(f"Saving scores and files took {save_end - save_start:.4f} seconds")
+    print(f"Scores of shape {scores.shape} with files list of length {len(files)} saved to {output_file}")
 
+    # Step 9: Read back the saved scores
+    read_start = time.perf_counter()
+    read_back_in = np.load(f"{output_file}")
+    read_end = time.perf_counter()
+    print(f"Reading back the file took {read_end - read_start:.4f} seconds")
 
-    
+    # Step 10: Verify if the read data matches the original scores
+    verify_start = time.perf_counter()
+    if np.allclose(read_back_in["scores"], scores):
+        print("Yay! All the same!")
+    else:
+        print("Mismatch found!")
+    verify_end = time.perf_counter()
+    print(f"Verification step took {verify_end - verify_start:.4f} seconds")
 
+    # Overall time
+    overall_end = time.perf_counter()
+    print(f"Total script runtime: {overall_end - overall_start:.4f} seconds")
 
-    csv_start = time.perf_counter()    
-    results_df.to_csv(output_file, index=False)
-    csv_end = time.perf_counter()
-    csv_duration = csv_end - csv_start
-    print(f"CSV took {csv_duration}")
 
-    json_start = time.perf_counter()
-    results_df.to_json(output_file.with_suffix(".json"), index=False)
-    json_end = time.perf_counter()
-    json_duration = json_end - json_start
-    print(f"JSON took {json_duration}")
-    
-    np_start = time.perf_counter()
-    np.save(output_file.with_suffix(".npy"), scores)
-    np_end = time.perf_counter()
-    np_duration = np_end-np_start
-    print(f"np took {np_duration}")
-    
-    
-    
-    
-    print(f"Scores of shape {scores.shape} saved to {output_file}")
-    read_back_in = np.load(output_file.with_suffix(".npy"))    
-    if np.allclose(read_back_in, scores):
-        print("yay! All the same!")
 
 def main():
     parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.")
@@ -177,10 +214,14 @@ def main():
         "--kind", type=str, choices=["cosine", "l2"], default="cosine",
         help="Type of distance metric to use (default: cosine)"
     )
+
+    parser.add_argument("--out_path", 
+                        type=Path, 
+                        help="Where to save output distance npz matrix+file list")
+
     args = parser.parse_args()
 
-    evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, kind=args.kind)
+    evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, kind=args.kind, out_path=args.out_path)
 
 if __name__ == "__main__":
     main()
-    print(f"THIS SCRIPT NEEDS TESTING")

From a6b22c3ed85eb9f78370fd2e1d8170cc82b533c8 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Mon, 25 Nov 2024 12:23:57 -0500
Subject: [PATCH 08/27] CDL: testing out in/out of class mean distance

---
 pose_evaluation/evaluation/evaluate_signclip.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py
index 76a2134..031716b 100644
--- a/pose_evaluation/evaluation/evaluate_signclip.py
+++ b/pose_evaluation/evaluation/evaluate_signclip.py
@@ -5,7 +5,7 @@
 from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric
 from tqdm import tqdm
 import time
-
+import torch
 # python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv
 # (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv 
 def load_embedding(file_path: Path) -> np.ndarray:
@@ -149,9 +149,12 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out
     all_within_class_distances = np.array([])  # Initialize as empty NumPy array
     all_between_class_distances = np.array([])  # Initialize as empty NumPy array
 
-    for gloss, indices in tqdm(gloss_indices.items()):
+    within_class_means_by_gloss = {}
+    for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"):
         # Within-class distances
         within_class_distances = scores[np.ix_(indices, indices)]
+        within_class_mean = torch.mean(within_class_distances)
+        within_class_means_by_gloss[gloss] = within_class_mean
         within_class_distances = within_class_distances[np.triu_indices(len(indices), k=1)]
         all_within_class_distances = np.concatenate([all_within_class_distances, within_class_distances.ravel()])
 
@@ -160,9 +163,12 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out
         between_class_distances = scores[np.ix_(indices, other_indices)]
         all_between_class_distances = np.concatenate([all_between_class_distances, between_class_distances.ravel()])
     find_class_distances_end = time.perf_counter()
+
+
     print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}")
 
-    
+    for gloss, mean in within_class_means_by_gloss.items():
+        print(f"Within {gloss}: {within_class_means_by_gloss[gloss]}")
 
     print(f"Mean within classes: {np.mean(all_within_class_distances)}")
     print(f"Mean between classes: {np.mean(all_between_class_distances)}")

From b89349642db0e6872ba836793a4c4c89492e0cbb Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Mon, 25 Nov 2024 14:59:54 -0500
Subject: [PATCH 09/27] CDL: trying to batch-process calculation of means

---
 .../evaluation/evaluate_signclip.py           | 161 +++++++++++++-----
 1 file changed, 122 insertions(+), 39 deletions(-)

diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py
index 031716b..41bcab6 100644
--- a/pose_evaluation/evaluation/evaluate_signclip.py
+++ b/pose_evaluation/evaluation/evaluate_signclip.py
@@ -6,6 +6,7 @@
 from tqdm import tqdm
 import time
 import torch
+from typing import List, Tuple
 # python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv
 # (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv 
 def load_embedding(file_path: Path) -> np.ndarray:
@@ -62,7 +63,105 @@ def get_embedding(video_file):
     return split_df
 
 
-def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out_path=None):
+def calculate_mean_distances(
+    distance_matrix: torch.Tensor, 
+    indices_a: torch.Tensor, 
+    indices_b: torch.Tensor, 
+    exclude_self: bool = False
+) -> float:
+    """
+    Calculate the mean of distances between two sets of indices in a 2D distance matrix.
+    
+    Args:
+        distance_matrix (torch.Tensor): A 2D tensor representing pairwise distances.
+        indices_a (torch.Tensor): A tensor of row indices.
+        indices_b (torch.Tensor): A tensor of column indices.
+        exclude_self (bool): Whether to exclude distances where indices_a == indices_b.
+        
+    Returns:
+        float: The mean distance between all pairs of (indices_a, indices_b).
+    """
+    # Create all pair combinations
+    row_indices, col_indices = torch.meshgrid(indices_a, indices_b, indexing="ij")
+    
+    if exclude_self:
+        # Apply a mask to remove self-distances
+        mask = row_indices != col_indices
+        row_indices = row_indices[mask]
+        col_indices = col_indices[mask]
+    
+    # Gather distances
+    selected_distances = distance_matrix[row_indices.flatten(), col_indices.flatten()]
+    
+    # Return the mean
+    return selected_distances.mean().item()
+
+def generate_synthetic_data(num_items, num_classes, num_items_per_class=4):
+    import random
+    torch.manual_seed(42)
+    random.seed(42)
+    # distance_matrix = torch.rand((num_items, num_items)) * 100
+    distance_matrix = torch.full((num_items, num_items), 10.0)
+    distance_matrix.fill_diagonal_(0)
+    indices = list(range(num_items))
+    random.shuffle(indices)
+
+    classes = {f"CLASS_{i}": torch.tensor([indices.pop() for _ in range(num_items_per_class)]) for i in range(num_classes)}
+    # Assign intra-class distances
+    mean_values_by_class ={}
+    for i, class_name in enumerate(classes.keys()):
+        mean_value = i+1
+        mean_values_by_class[class_name] = mean_value
+    for class_name, indices in classes.items():
+        mean_value = mean_values_by_class[class_name]
+        for i in indices:
+            for j in indices:
+                if i != j:  # Exclude self-distances
+                    distance_matrix[i, j] = mean_value
+    return classes, distance_matrix
+
+def calculate_class_means(gloss_indices, scores):
+    class_means_by_gloss = {}
+    all_indices = torch.arange(scores.size(0), dtype=int)
+
+    for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"):
+        indices = torch.LongTensor(indices)
+        class_means_by_gloss[gloss] ={} 
+        within_class_mean = calculate_mean_distances(scores, indices, indices, exclude_self=True)
+
+        class_means_by_gloss[gloss]["in_class"] = within_class_mean
+
+        complement_indices = all_indices[~torch.isin(all_indices, indices)]
+        without_class_mean = calculate_mean_distances(scores, indices, complement_indices)
+        class_means_by_gloss[gloss]["out_of_class"]=without_class_mean
+
+    return class_means_by_gloss
+
+#def calculate_class_means(gloss_indices, scores):
+#    all_within_class_distances = np.array([])  # Initialize as empty NumPy array
+#    all_between_class_distances = np.array([])  # Initialize as empty NumPy array
+#    within_class_means_by_gloss = {}
+#    for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"):
+#        # Within-class distances
+#        within_class_distances = scores[np.ix_(indices, indices)]
+#        within_class_mean = torch.mean(within_class_distances)
+#        within_class_means_by_gloss[gloss] = within_class_mean
+#        within_class_distances = within_class_distances[np.triu_indices(len(indices), k=1)]
+#        all_within_class_distances = np.concatenate([all_within_class_distances, within_class_distances.ravel()])
+#
+#        # Between-class distances
+#        other_indices = np.setdiff1d(np.arange(len(scores)), indices)
+#        between_class_distances = scores[np.ix_(indices, other_indices)]
+#        all_between_class_distances = np.concatenate([all_between_class_distances, between_class_distances.ravel()])
+#
+#    for gloss, mean in within_class_means_by_gloss.items():
+#        print(f"Within {gloss}: {within_class_means_by_gloss[gloss]}")
+#
+#    print(f"Mean within classes: {np.mean(all_within_class_distances)}")
+#    print(f"Mean between classes: {np.mean(all_between_class_distances)}")
+#    return within_class_means_by_gloss
+
+def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = "cosine"):
     """
     Evaluate SignCLIP embeddings using score_all.
     
@@ -108,18 +207,6 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out
     score_end = time.perf_counter()
     print(f"Score_all took {score_end - score_start:.3f} seconds")
 
-    # Step 6: Create output file path
-    output_file = out_path
-    if out_path is None:
-        output_file = Path(f"signclip_scores_{split_file.name}").with_suffix(".npz")
-
-    if not output_file.suffix == ".npz":
-        output_file = Path(f"{output_file}.npz")
-        
-
-    print(f"Scores will be saved to {output_file}")
-
- 
 
     # Step 7: Extract file list from DataFrame
     files_start = time.perf_counter()
@@ -144,34 +231,16 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out
     for gloss, indices in gloss_indices.items():
         print(f"Here are the {len(indices)} indices for {gloss}:{indices}")
 
-    # Assuming 'scores' is your distance matrix and 'gloss_indices' is your dictionary of gloss indices
     find_class_distances_start = time.perf_counter()
-    all_within_class_distances = np.array([])  # Initialize as empty NumPy array
-    all_between_class_distances = np.array([])  # Initialize as empty NumPy array
 
-    within_class_means_by_gloss = {}
-    for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"):
-        # Within-class distances
-        within_class_distances = scores[np.ix_(indices, indices)]
-        within_class_mean = torch.mean(within_class_distances)
-        within_class_means_by_gloss[gloss] = within_class_mean
-        within_class_distances = within_class_distances[np.triu_indices(len(indices), k=1)]
-        all_within_class_distances = np.concatenate([all_within_class_distances, within_class_distances.ravel()])
-
-        # Between-class distances
-        other_indices = np.setdiff1d(np.arange(len(scores)), indices)
-        between_class_distances = scores[np.ix_(indices, other_indices)]
-        all_between_class_distances = np.concatenate([all_between_class_distances, between_class_distances.ravel()])
-    find_class_distances_end = time.perf_counter()
+    #synthetic_classes, synthetic_distances = generate_synthetic_data(30000, 2700, 8)
+    #class_means = calculate_class_means(synthetic_classes, synthetic_distances)
+    class_means = calculate_class_means(gloss_indices, scores)
 
+    find_class_distances_end = time.perf_counter()
 
     print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}")
 
-    for gloss, mean in within_class_means_by_gloss.items():
-        print(f"Within {gloss}: {within_class_means_by_gloss[gloss]}")
-
-    print(f"Mean within classes: {np.mean(all_within_class_distances)}")
-    print(f"Mean between classes: {np.mean(all_between_class_distances)}")
 
     
     analysis_end = time.perf_counter()
@@ -179,17 +248,19 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out
     print(f"Analysis took {analysis_duration} seconds")
     
     
+    for gloss, means in class_means.items():
+        print(gloss, means)
 
     # Step 8: Save the scores and files to a compressed file
     save_start = time.perf_counter()
-    np.savez(output_file, scores=scores, files=files)
+    np.savez(out_path, scores=scores, files=files)
     save_end = time.perf_counter()
     print(f"Saving scores and files took {save_end - save_start:.4f} seconds")
-    print(f"Scores of shape {scores.shape} with files list of length {len(files)} saved to {output_file}")
+    print(f"Scores of shape {scores.shape} with files list of length {len(files)} saved to {out_path}")
 
     # Step 9: Read back the saved scores
     read_start = time.perf_counter()
-    read_back_in = np.load(f"{output_file}")
+    read_back_in = np.load(f"{out_path}")
     read_end = time.perf_counter()
     print(f"Reading back the file took {read_end - read_start:.4f} seconds")
 
@@ -225,9 +296,21 @@ def main():
                         type=Path, 
                         help="Where to save output distance npz matrix+file list")
 
+
+ 
     args = parser.parse_args()
+    
+
+    output_file = args.out_path
+    if output_file is None:
+        output_file = Path(f"signclip_scores_{args.split_file.name}").with_suffix(".npz")
+
+    if not output_file.suffix == ".npz":
+        output_file = Path(f"{output_file}.npz")
+
+    print(f"Scores will be saved to {output_file}")
 
-    evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, kind=args.kind, out_path=args.out_path)
+    evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, out_path=output_file, kind=args.kind)
 
 if __name__ == "__main__":
     main()

From 20bcba2bcf7ec20deabfd6bea7c703f12ac8c28f Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Mon, 25 Nov 2024 15:51:41 -0500
Subject: [PATCH 10/27] CDL: saving off the class means

---
 .../evaluation/evaluate_signclip.py           | 25 ++++++++++++++++---
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py
index 41bcab6..67d6be4 100644
--- a/pose_evaluation/evaluation/evaluate_signclip.py
+++ b/pose_evaluation/evaluation/evaluate_signclip.py
@@ -6,6 +6,7 @@
 from tqdm import tqdm
 import time
 import torch
+import json
 from typing import List, Tuple
 # python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv
 # (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv 
@@ -196,7 +197,8 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str =
 
     # Step 4: Initialize the distance metric
     metric_start = time.perf_counter()
-    metric = EmbeddingDistanceMetric(kind=kind, device="cpu")
+    #metric = EmbeddingDistanceMetric(kind=kind, device="cpu")
+    metric = EmbeddingDistanceMetric(kind=kind)
     metric_end = time.perf_counter()
     print(f"Initializing metric took {metric_end - metric_start:.4f} seconds")
 
@@ -228,7 +230,7 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str =
     for gloss in items_with_embeddings_df['Gloss'].unique():
         gloss_indices[gloss] = items_with_embeddings_df.index[items_with_embeddings_df['Gloss'] == gloss].tolist()
 
-    for gloss, indices in gloss_indices.items():
+    for gloss, indices in list(gloss_indices.items())[:10]:
         print(f"Here are the {len(indices)} indices for {gloss}:{indices}")
 
     find_class_distances_start = time.perf_counter()
@@ -237,6 +239,7 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str =
     #class_means = calculate_class_means(synthetic_classes, synthetic_distances)
     class_means = calculate_class_means(gloss_indices, scores)
 
+
     find_class_distances_end = time.perf_counter()
 
     print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}")
@@ -245,14 +248,28 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str =
     
     analysis_end = time.perf_counter()
     analysis_duration = analysis_end - analysis_start
+
+    in_class_means = [mean_dict["in_class"] for mean_dict in class_means.values()]
+    out_class_means = [mean_dict["out_of_class"] for mean_dict in class_means.values()]
+
+
+    for gloss, means in list(class_means.items())[:10]:
+        print(gloss, means)
+
+    print(f"Mean of in-class means: {np.mean(in_class_means)}")
+    print(f"Mean of out-of-class means: {np.mean(out_class_means)}")
+    
     print(f"Analysis took {analysis_duration} seconds")
     
     
-    for gloss, means in class_means.items():
-        print(gloss, means)
 
     # Step 8: Save the scores and files to a compressed file
+
     save_start = time.perf_counter()
+    class_means_json = out_path.with_name(f"{out_path.stem}_class_means").with_suffix(".json")
+    with open(class_means_json, "w") as f:
+        print(f"Writing class means to {f}")
+        json.dump(class_means, f)
     np.savez(out_path, scores=scores, files=files)
     save_end = time.perf_counter()
     print(f"Saving scores and files took {save_end - save_start:.4f} seconds")

From d15b92367e2939fdf9f6059ad9d13246ea96db06 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Mon, 25 Nov 2024 16:05:25 -0500
Subject: [PATCH 11/27] a bit of code cleanup

---
 .../evaluation/evaluate_signclip.py           | 117 ++++++++----------
 1 file changed, 51 insertions(+), 66 deletions(-)

diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py
index 67d6be4..d293c05 100644
--- a/pose_evaluation/evaluation/evaluate_signclip.py
+++ b/pose_evaluation/evaluation/evaluate_signclip.py
@@ -1,22 +1,21 @@
 import argparse
 from pathlib import Path
+import time
+import json
+import random
 import pandas as pd
 import numpy as np
-from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric
-from tqdm import tqdm
-import time
 import torch
-import json
-from typing import List, Tuple
-# python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv
-# (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv 
+from tqdm import tqdm
+from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric
+
 def load_embedding(file_path: Path) -> np.ndarray:
     """
     Load a SignCLIP embedding from a .npy file, ensuring it has the correct shape.
-    
+
     Args:
         file_path (Path): Path to the .npy file.
-    
+
     Returns:
         np.ndarray: The embedding with shape (768,).
     """
@@ -25,25 +24,22 @@ def load_embedding(file_path: Path) -> np.ndarray:
         embedding = embedding[0]  # Reduce shape from (1, 768) to (768,)
     return embedding
 
+
 def match_embeddings_to_glosses(emb_dir: Path, split_df: pd.DataFrame) -> pd.DataFrame:
     """
     Match .npy embeddings to the corresponding glosses based on the numerical ID.
-    
+
     Args:
         emb_dir (Path): Directory containing the .npy files.
         split_df (pd.DataFrame): DataFrame containing the split file with the "Video file" column.
-    
+
     Returns:
         pd.DataFrame: Updated DataFrame with an additional column for embeddings.
     """
-    import time
 
     # Step 1: Create a mapping of numerical IDs to .npy files
     map_start = time.perf_counter()
-    embeddings_map = {
-        npy_file.stem.split("-")[0]: npy_file
-        for npy_file in emb_dir.glob("*.npy")
-    }
+    embeddings_map = {npy_file.stem.split("-")[0]: npy_file for npy_file in emb_dir.glob("*.npy")}
     map_end = time.perf_counter()
     print(f"Creating embeddings map took {map_end - map_start:.4f} seconds")
 
@@ -65,40 +61,38 @@ def get_embedding(video_file):
 
 
 def calculate_mean_distances(
-    distance_matrix: torch.Tensor, 
-    indices_a: torch.Tensor, 
-    indices_b: torch.Tensor, 
-    exclude_self: bool = False
+    distance_matrix: torch.Tensor, indices_a: torch.Tensor, indices_b: torch.Tensor, exclude_self: bool = False
 ) -> float:
     """
     Calculate the mean of distances between two sets of indices in a 2D distance matrix.
-    
+
     Args:
         distance_matrix (torch.Tensor): A 2D tensor representing pairwise distances.
         indices_a (torch.Tensor): A tensor of row indices.
         indices_b (torch.Tensor): A tensor of column indices.
         exclude_self (bool): Whether to exclude distances where indices_a == indices_b.
-        
+
     Returns:
         float: The mean distance between all pairs of (indices_a, indices_b).
     """
     # Create all pair combinations
     row_indices, col_indices = torch.meshgrid(indices_a, indices_b, indexing="ij")
-    
+
     if exclude_self:
         # Apply a mask to remove self-distances
         mask = row_indices != col_indices
         row_indices = row_indices[mask]
         col_indices = col_indices[mask]
-    
+
     # Gather distances
     selected_distances = distance_matrix[row_indices.flatten(), col_indices.flatten()]
-    
+
     # Return the mean
     return selected_distances.mean().item()
 
+
 def generate_synthetic_data(num_items, num_classes, num_items_per_class=4):
-    import random
+
     torch.manual_seed(42)
     random.seed(42)
     # distance_matrix = torch.rand((num_items, num_items)) * 100
@@ -107,11 +101,13 @@ def generate_synthetic_data(num_items, num_classes, num_items_per_class=4):
     indices = list(range(num_items))
     random.shuffle(indices)
 
-    classes = {f"CLASS_{i}": torch.tensor([indices.pop() for _ in range(num_items_per_class)]) for i in range(num_classes)}
+    classes = {
+        f"CLASS_{i}": torch.tensor([indices.pop() for _ in range(num_items_per_class)]) for i in range(num_classes)
+    }
     # Assign intra-class distances
-    mean_values_by_class ={}
+    mean_values_by_class = {}
     for i, class_name in enumerate(classes.keys()):
-        mean_value = i+1
+        mean_value = i + 1
         mean_values_by_class[class_name] = mean_value
     for class_name, indices in classes.items():
         mean_value = mean_values_by_class[class_name]
@@ -121,24 +117,26 @@ def generate_synthetic_data(num_items, num_classes, num_items_per_class=4):
                     distance_matrix[i, j] = mean_value
     return classes, distance_matrix
 
+
 def calculate_class_means(gloss_indices, scores):
     class_means_by_gloss = {}
     all_indices = torch.arange(scores.size(0), dtype=int)
 
     for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"):
         indices = torch.LongTensor(indices)
-        class_means_by_gloss[gloss] ={} 
+        class_means_by_gloss[gloss] = {}
         within_class_mean = calculate_mean_distances(scores, indices, indices, exclude_self=True)
 
         class_means_by_gloss[gloss]["in_class"] = within_class_mean
 
         complement_indices = all_indices[~torch.isin(all_indices, indices)]
         without_class_mean = calculate_mean_distances(scores, indices, complement_indices)
-        class_means_by_gloss[gloss]["out_of_class"]=without_class_mean
+        class_means_by_gloss[gloss]["out_of_class"] = without_class_mean
 
     return class_means_by_gloss
 
-#def calculate_class_means(gloss_indices, scores):
+
+# def calculate_class_means(gloss_indices, scores):
 #    all_within_class_distances = np.array([])  # Initialize as empty NumPy array
 #    all_between_class_distances = np.array([])  # Initialize as empty NumPy array
 #    within_class_means_by_gloss = {}
@@ -162,10 +160,11 @@ def calculate_class_means(gloss_indices, scores):
 #    print(f"Mean between classes: {np.mean(all_between_class_distances)}")
 #    return within_class_means_by_gloss
 
-def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = "cosine"):
+
+def evaluate_signclip(emb_dir: Path, split_file: Path, out_path: Path, kind: str = "cosine"):
     """
     Evaluate SignCLIP embeddings using score_all.
-    
+
     Args:
         emb_dir (Path): Directory containing .npy embeddings.
         split_file (Path): Path to the split CSV file.
@@ -197,7 +196,7 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str =
 
     # Step 4: Initialize the distance metric
     metric_start = time.perf_counter()
-    #metric = EmbeddingDistanceMetric(kind=kind, device="cpu")
+    # metric = EmbeddingDistanceMetric(kind=kind, device="cpu")
     metric = EmbeddingDistanceMetric(kind=kind)
     metric_end = time.perf_counter()
     print(f"Initializing metric took {metric_end - metric_start:.4f} seconds")
@@ -209,59 +208,51 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str =
     score_end = time.perf_counter()
     print(f"Score_all took {score_end - score_start:.3f} seconds")
 
-
     # Step 7: Extract file list from DataFrame
     files_start = time.perf_counter()
     files = items_with_embeddings_df["Video file"].tolist()
     files_end = time.perf_counter()
     print(f"Extracting file list took {files_end - files_start:.4f} seconds")
 
-
-    analysis_start = time.perf_counter()    
+    analysis_start = time.perf_counter()
     index_to_check = 0
     number_to_check = 10
     print(f"The first {number_to_check} scores for {files[index_to_check]} to...")
     for ref, score in list(zip(files, scores[index_to_check]))[:number_to_check]:
         print("\t*------------->", f"{ref}".ljust(35), "\t", score.item())
 
-    unique_glosses = items_with_embeddings_df['Gloss'].unique()
+    unique_glosses = items_with_embeddings_df["Gloss"].unique()
     print(f"We have a vocabulary of {len(unique_glosses)} glosses")
     gloss_indices = {}
-    for gloss in items_with_embeddings_df['Gloss'].unique():
-        gloss_indices[gloss] = items_with_embeddings_df.index[items_with_embeddings_df['Gloss'] == gloss].tolist()
+    for gloss in items_with_embeddings_df["Gloss"].unique():
+        gloss_indices[gloss] = items_with_embeddings_df.index[items_with_embeddings_df["Gloss"] == gloss].tolist()
 
     for gloss, indices in list(gloss_indices.items())[:10]:
         print(f"Here are the {len(indices)} indices for {gloss}:{indices}")
 
     find_class_distances_start = time.perf_counter()
 
-    #synthetic_classes, synthetic_distances = generate_synthetic_data(30000, 2700, 8)
-    #class_means = calculate_class_means(synthetic_classes, synthetic_distances)
+    # synthetic_classes, synthetic_distances = generate_synthetic_data(30000, 2700, 8)
+    # class_means = calculate_class_means(synthetic_classes, synthetic_distances)
     class_means = calculate_class_means(gloss_indices, scores)
 
-
     find_class_distances_end = time.perf_counter()
 
     print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}")
 
-
-    
     analysis_end = time.perf_counter()
     analysis_duration = analysis_end - analysis_start
 
     in_class_means = [mean_dict["in_class"] for mean_dict in class_means.values()]
     out_class_means = [mean_dict["out_of_class"] for mean_dict in class_means.values()]
 
-
     for gloss, means in list(class_means.items())[:10]:
         print(gloss, means)
 
     print(f"Mean of in-class means: {np.mean(in_class_means)}")
     print(f"Mean of out-of-class means: {np.mean(out_class_means)}")
-    
+
     print(f"Analysis took {analysis_duration} seconds")
-    
-    
 
     # Step 8: Save the scores and files to a compressed file
 
@@ -295,39 +286,33 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str =
     print(f"Total script runtime: {overall_end - overall_start:.4f} seconds")
 
 
-
 def main():
     parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.")
+    parser.add_argument("emb_dir", type=Path, help="Path to the directory containing SignCLIP .npy files")
+    parser.add_argument("--split_file", type=Path, required=True, help="Path to the split CSV file (e.g., test.csv)")
     parser.add_argument(
-        "emb_dir", type=Path, help="Path to the directory containing SignCLIP .npy files"
-    )
-    parser.add_argument(
-        "--split_file", type=Path, required=True, help="Path to the split CSV file (e.g., test.csv)"
-    )
-    parser.add_argument(
-        "--kind", type=str, choices=["cosine", "l2"], default="cosine",
-        help="Type of distance metric to use (default: cosine)"
+        "--kind",
+        type=str,
+        choices=["cosine", "l2"],
+        default="cosine",
+        help="Type of distance metric to use (default: cosine)",
     )
 
-    parser.add_argument("--out_path", 
-                        type=Path, 
-                        help="Where to save output distance npz matrix+file list")
+    parser.add_argument("--out_path", type=Path, help="Where to save output distance npz matrix+file list")
 
-
- 
     args = parser.parse_args()
-    
 
     output_file = args.out_path
     if output_file is None:
         output_file = Path(f"signclip_scores_{args.split_file.name}").with_suffix(".npz")
 
-    if not output_file.suffix == ".npz":
+    if output_file.suffix != ".npz":
         output_file = Path(f"{output_file}.npz")
 
     print(f"Scores will be saved to {output_file}")
 
     evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, out_path=output_file, kind=args.kind)
 
+
 if __name__ == "__main__":
     main()

From 5f3b1ba402f857a00702faa8ffd8f2af33324e5a Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Mon, 25 Nov 2024 16:16:02 -0500
Subject: [PATCH 12/27] some code cleanup

---
 .../metrics/base_embedding_metric.py          |  2 +-
 .../metrics/embedding_distance_metric.py      |  6 +-
 .../metrics/signclip_distance_metric.py       |  4 +-
 .../metrics/test_embedding_distance_metric.py | 64 ++++++++++++-------
 4 files changed, 46 insertions(+), 30 deletions(-)

diff --git a/pose_evaluation/metrics/base_embedding_metric.py b/pose_evaluation/metrics/base_embedding_metric.py
index 83ad763..78aeb0e 100644
--- a/pose_evaluation/metrics/base_embedding_metric.py
+++ b/pose_evaluation/metrics/base_embedding_metric.py
@@ -5,4 +5,4 @@
 # Define a type alias for embeddings (e.g., torch.Tensor)
 Embedding = TypeVar("Embedding", bound=torch.Tensor)
 
-EmbeddingMetric = BaseMetric[Embedding]
\ No newline at end of file
+EmbeddingMetric = BaseMetric[Embedding]
diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py
index 84d6df1..b362404 100644
--- a/pose_evaluation/metrics/embedding_distance_metric.py
+++ b/pose_evaluation/metrics/embedding_distance_metric.py
@@ -57,11 +57,11 @@ def score(self, hypothesis: Union[np.ndarray, torch.Tensor], reference: Union[np
             # Cosine similarity, converted to distance
             similarity = torch.dot(hypothesis, reference).item()
             return 1 - similarity
-        elif self.kind == "l2":
+        if self.kind == "l2":
             # L2 distance
             return torch.norm(hypothesis - reference).item()
-        else:
-            raise ValueError(f"Unsupported distance metric: {self.kind}")
+
+        raise ValueError(f"Unsupported distance metric: {self.kind}")
 
     def score_all(
         self,
diff --git a/pose_evaluation/metrics/signclip_distance_metric.py b/pose_evaluation/metrics/signclip_distance_metric.py
index 3d0fd31..382be52 100644
--- a/pose_evaluation/metrics/signclip_distance_metric.py
+++ b/pose_evaluation/metrics/signclip_distance_metric.py
@@ -6,6 +6,7 @@
 import torch
 import torch.nn.functional as F
 
+
 class SignCLIPEmbeddingDistanceMetric(NumpyArrayEmbeddingMetric):
     def __init__(self, kind: str = "cosine", device: torch.device | str = "cuda"):
         """
@@ -18,8 +19,6 @@ def __init__(self, kind: str = "cosine", device: torch.device | str = "cuda"):
         self.kind = kind
         self.device = torch.device(device) if isinstance(device, str) else device
 
-    
-
     def score_all(self, embeddings: torch.Tensor) -> torch.Tensor:
         """
         Computes the pairwise distance matrix for the provided embeddings.
@@ -48,4 +47,3 @@ def score_all(self, embeddings: torch.Tensor) -> torch.Tensor:
             raise ValueError(f"Unsupported distance metric: {self.kind}")
 
         return distance_matrix
-
diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index c352386..4727029 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -8,7 +8,7 @@
 from typing import List
 from pathlib import Path
 
-# TODO: many fixes. Including the fact that we test cosine but not Euclidean, 
+# TODO: many fixes. Including the fact that we test cosine but not Euclidean,
 
 
 # Configure logging
@@ -18,6 +18,7 @@
 # Device configuration for PyTorch
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+
 @pytest.fixture
 def cosine_metric():
     """Fixture to create an EmbeddingDistanceMetric instance."""
@@ -50,10 +51,12 @@ def save_and_plot_distances(distances, matrix_name, num_points, dim):
     print(f"Distances plot saved to {plot_path}")
     plt.close()
 
+
 def random_tensor(size: int) -> torch.Tensor:
     """Generate a random tensor on the appropriate device."""
     return torch.rand(size, dtype=torch.float32, device=DEVICE)
 
+
 def generate_unit_circle_points(num_points: int, dim: int = 2) -> torch.Tensor:
     angles = torch.linspace(0, 2 * np.pi, num_points + 1)[:-1]
     x_coords = torch.cos(angles)
@@ -64,18 +67,18 @@ def generate_unit_circle_points(num_points: int, dim: int = 2) -> torch.Tensor:
         points = torch.cat([points, padding], dim=1)
     return points
 
+
 def generate_orthogonal_rows_with_repeats(num_rows: int, dim: int) -> torch.Tensor:
     orthogonal_rows = torch.empty(0, dim)
     for _ in range(min(num_rows, dim)):
         random_vector = torch.randn(1, dim)
         if orthogonal_rows.shape[0] > 0:
             random_vector -= (
-                torch.matmul(random_vector, orthogonal_rows.T) @ orthogonal_rows
+                torch.matmul(random_vector, orthogonal_rows.T)
+                @ orthogonal_rows
                 / torch.norm(orthogonal_rows, dim=1, keepdim=True) ** 2
             )
-        orthogonal_rows = torch.cat(
-            [orthogonal_rows, random_vector / torch.norm(random_vector)]
-        )
+        orthogonal_rows = torch.cat([orthogonal_rows, random_vector / torch.norm(random_vector)])
     if num_rows > dim:
         orthogonal_rows = orthogonal_rows.repeat(num_rows // dim + 1, 1)[:num_rows]
     return orthogonal_rows
@@ -110,16 +113,19 @@ def generate_orthogonal_rows_in_pairs(num_pairs: int, dim: int) -> torch.Tensor:
 
     return orthogonal_rows
 
+
 def generate_ones_tensor(rows: int, dims: int) -> torch.Tensor:
     """Generates a tensor with all elements equal to 1.0 (float)."""
     return torch.ones(rows, dims, dtype=torch.float32)
 
+
 def generate_identity_matrix_rows(rows, cols):
-  """
-  Returns an identity matrix with the specified number of rows and columns.
-  """
-  identity = torch.eye(max(rows, cols))
-  return identity[:rows, :cols]
+    """
+    Returns an identity matrix with the specified number of rows and columns.
+    """
+    identity = torch.eye(max(rows, cols))
+    return identity[:rows, :cols]
+
 
 def create_increasing_rows_tensor(num_rows: int, num_cols: int) -> torch.Tensor:
     """
@@ -138,8 +144,6 @@ def create_increasing_rows_tensor(num_rows: int, num_cols: int) -> torch.Tensor:
     return tensor
 
 
-
-
 def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None:
     """Test that the metric is symmetric for cosine distance."""
     emb1 = random_tensor(768)
@@ -150,9 +154,9 @@ def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None:
 
     logger.info(f"Score 1: {score1}, Score 2: {score2}")
     assert pytest.approx(score1) == score2, "Score should be symmetric."
-    
 
-def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path) -> None:
+
+def test_score_with_path(metric: EmbeddingDistanceMetric, tmp_path: Path) -> None:
     """Test that score works with embeddings loaded from file paths."""
     emb1 = random_tensor(768).cpu().numpy()  # Save as NumPy for file storage
     emb2 = random_tensor(768).cpu().numpy()
@@ -174,24 +178,30 @@ def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path)
     assert pytest.approx(score) == expected_score, "Score with paths should match direct computation."
 
 
-def test_score_all_against_self(cosine_metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker) -> None:
+def test_score_all_against_self(
+    metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker
+) -> None:
     """Test the score_all function."""
     scores = cosine_metric.score_all(embeddings, embeddings)
     assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all."
-    assert torch.allclose(torch.diagonal(scores), torch.zeros(len(embeddings), device=DEVICE), atol=1e-6), (
-        "Self-comparison scores should be zero for cosine distance."
-    )
-    distance_range_checker(scores, min_val=0, max_val=2)  
+    assert torch.allclose(
+        torch.diagonal(scores), torch.zeros(len(embeddings), device=DEVICE), atol=1e-6
+    ), "Self-comparison scores should be zero for cosine distance."
+    distance_range_checker(scores, min_val=0, max_val=2)
     logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}")
 
+
 def test_score_all_with_different_sizes(cosine_metric, distance_range_checker):
     """Test score_all with different sizes for hypotheses and references."""
     hyps = [np.random.rand(768) for _ in range(3)]
     refs = [np.random.rand(768) for _ in range(5)]
 
     scores = cosine_metric.score_all(hyps, refs)
-    assert scores.shape == (len(hyps), len(refs)), f"Output shape mismatch ({scores.shape}) vs {(len(hyps), len(refs))} for score_all with different sizes. "
-    distance_range_checker(scores, min_val=0, max_val=2)  
+    assert scores.shape == (
+        len(hyps),
+        len(refs),
+    ), f"Output shape mismatch ({scores.shape}) vs {(len(hyps), len(refs))} for score_all with different sizes. "
+    distance_range_checker(scores, min_val=0, max_val=2)
 
 
 # def test_score_all_with_empty_inputs(metric):
@@ -199,6 +209,7 @@ def test_score_all_with_different_sizes(cosine_metric, distance_range_checker):
 #     scores = metric.score_all([], [])
 #     assert scores.shape == (0,), f"Score_all should return an empty array for empty inputs. Output: {scores.shape}"
 
+
 def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None:
     """Test the metric with invalid inputs."""
     emb1 = random_tensor(768)
@@ -210,6 +221,7 @@ def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None:
 
     logger.info("Invalid input test passed.")
 
+
 def test_score_tensor_input(cosine_metric):
     """Test score function with torch.Tensor inputs."""
     emb1 = torch.rand(768)
@@ -255,6 +267,7 @@ def test_mixed_input(cosine_metric):
     score = cosine_metric.score(emb1, emb2)
     assert isinstance(score, float), "Output should be a float."
 
+
 @pytest.mark.parametrize("num_points, dim", [(16, 2)])
 def test_unit_circle_points(cosine_metric, num_points, dim):
     embeddings = generate_unit_circle_points(num_points, dim)
@@ -266,7 +279,9 @@ def test_unit_circle_points(cosine_metric, num_points, dim):
 def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim):
     embeddings = generate_orthogonal_rows_with_repeats(num_points, dim)
     distances = cosine_metric.score_all(embeddings, embeddings)
-    save_and_plot_distances(distances=distances, matrix_name="Orthogonal Rows (with repeats)", num_points=num_points, dim=dim)
+    save_and_plot_distances(
+        distances=distances, matrix_name="Orthogonal Rows (with repeats)", num_points=num_points, dim=dim
+    )
 
     # Create expected pattern directly within the test function
     expected_pattern = torch.zeros(num_points, num_points, dtype=torch.float32)
@@ -276,7 +291,9 @@ def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim):
                 expected_pattern[i, j] = 1
 
     # We expect 0 1 0  across and down
-    assert torch.allclose(distances, expected_pattern, atol=1e-6), "Output does not match the expected alternating pattern"
+    assert torch.allclose(
+        distances, expected_pattern, atol=1e-6
+    ), "Output does not match the expected alternating pattern"
 
 
 @pytest.mark.parametrize("num_points, dim", [(20, 2)])
@@ -286,6 +303,7 @@ def test_orthogonal_rows_in_pairs(cosine_metric, num_points, dim, distance_range
     save_and_plot_distances(distances, "orthogonal_rows_in_pairs", num_points, dim)
     distance_range_checker(distances, min_val=0, max_val=2)  # Check distance range
 
+
 @pytest.mark.parametrize("num_points, dim", [(10, 5)])
 def test_ones_tensor(cosine_metric, num_points, dim, distance_range_checker):
     embeddings = generate_ones_tensor(num_points, dim)

From 00ec4b8d8e6ab1e41b6d1dda6be09ca814960c4d Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Mon, 25 Nov 2024 16:22:28 -0500
Subject: [PATCH 13/27] Fixed a few pytests

---
 pose_evaluation/metrics/test_embedding_distance_metric.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index 4727029..147b98e 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -156,7 +156,7 @@ def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None:
     assert pytest.approx(score1) == score2, "Score should be symmetric."
 
 
-def test_score_with_path(metric: EmbeddingDistanceMetric, tmp_path: Path) -> None:
+def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path) -> None:
     """Test that score works with embeddings loaded from file paths."""
     emb1 = random_tensor(768).cpu().numpy()  # Save as NumPy for file storage
     emb2 = random_tensor(768).cpu().numpy()
@@ -179,13 +179,13 @@ def test_score_with_path(metric: EmbeddingDistanceMetric, tmp_path: Path) -> Non
 
 
 def test_score_all_against_self(
-    metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker
+    cosine_metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker
 ) -> None:
     """Test the score_all function."""
     scores = cosine_metric.score_all(embeddings, embeddings)
     assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all."
     assert torch.allclose(
-        torch.diagonal(scores), torch.zeros(len(embeddings), device=DEVICE), atol=1e-6
+        torch.diagonal(scores), torch.zeros(len(embeddings)), atol=1e-6
     ), "Self-comparison scores should be zero for cosine distance."
     distance_range_checker(scores, min_val=0, max_val=2)
     logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}")

From 03066be9bd9ad76f4ebcd9ccb52d452a62afe2ca Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 27 Nov 2024 10:16:00 -0500
Subject: [PATCH 14/27] Remove unneeded SignCLIP file

---
 .../metrics/signclip_distance_metric.py       | 49 -------------------
 1 file changed, 49 deletions(-)
 delete mode 100644 pose_evaluation/metrics/signclip_distance_metric.py

diff --git a/pose_evaluation/metrics/signclip_distance_metric.py b/pose_evaluation/metrics/signclip_distance_metric.py
deleted file mode 100644
index 382be52..0000000
--- a/pose_evaluation/metrics/signclip_distance_metric.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from pose_evaluation.metrics.base_embedding_metric import NumpyArrayEmbeddingMetric
-from typing import Literal
-import numpy as np
-from tqdm import tqdm
-from scipy.spatial.distance import cosine
-import torch
-import torch.nn.functional as F
-
-
-class SignCLIPEmbeddingDistanceMetric(NumpyArrayEmbeddingMetric):
-    def __init__(self, kind: str = "cosine", device: torch.device | str = "cuda"):
-        """
-        Initializes the metric with the specified distance type and device.
-
-        Args:
-            kind (str): The type of distance metric, either 'cosine' or 'l2'.
-            device (torch.device | str): The device to use ('cuda' or 'cpu').
-        """
-        self.kind = kind
-        self.device = torch.device(device) if isinstance(device, str) else device
-
-    def score_all(self, embeddings: torch.Tensor) -> torch.Tensor:
-        """
-        Computes the pairwise distance matrix for the provided embeddings.
-
-        Args:
-            embeddings (torch.Tensor): A 2D tensor of shape (N, D), where N is the number
-                                        of embeddings and D is the feature dimension.
-
-        Returns:
-            torch.Tensor: A 2D tensor of shape (N, N) containing pairwise distances.
-        """
-        # Move embeddings to the specified device
-        embeddings = embeddings.to(self.device)
-
-        if self.kind == "cosine":
-            # Normalize embeddings to unit norm
-            embeddings = F.normalize(embeddings, p=2, dim=1)
-            # Compute pairwise cosine similarity
-            similarity_matrix = torch.matmul(embeddings, embeddings.T)  # Shape: (N, N)
-            distance_matrix = 1 - similarity_matrix  # Cosine distance = 1 - cosine similarity
-        elif self.kind == "l2":
-            # Compute pairwise L2 distance using broadcasting
-            diff = embeddings[:, None, :] - embeddings[None, :, :]  # Shape: (N, N, D)
-            distance_matrix = torch.norm(diff, dim=2)  # Shape: (N, N)
-        else:
-            raise ValueError(f"Unsupported distance metric: {self.kind}")
-
-        return distance_matrix

From 4a1b9f22aef55838b9846f74e32891e14f3a2537 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 13:49:47 -0500
Subject: [PATCH 15/27] Use sentence-transformers utils for embedding distances

---
 .../metrics/embedding_distance_metric.py      | 184 ++++++++++++------
 pyproject.toml                                |   8 +-
 2 files changed, 132 insertions(+), 60 deletions(-)

diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py
index b362404..fdfa712 100644
--- a/pose_evaluation/metrics/embedding_distance_metric.py
+++ b/pose_evaluation/metrics/embedding_distance_metric.py
@@ -1,104 +1,170 @@
-from typing import Literal, Union, List
+from typing import Literal, List
 import torch
-import torch.nn.functional as F
+from torch import Tensor
 import numpy as np
-
+from sentence_transformers import util as st_util
 from pose_evaluation.metrics.base_embedding_metric import EmbeddingMetric
 
 
+# Useful reference: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/util.py#L31
+# * Helper functions such as batch_to_device, _convert_to_tensor, _convert_to_batch, _convert_to_batch_tensor
+# * a whole semantic search function, with chunking and top_k
+
+# See also pgvector's C implementation: https://github.com/pgvector/pgvector/blob/master/src/vector.c
+# * cosine_distance: https://github.com/pgvector/pgvector/blob/master/src/vector.c#L658
+# * l2_distance https://github.com/pgvector/pgvector/blob/master/src/vector.c#L566
+
+
 class EmbeddingDistanceMetric(EmbeddingMetric):
-    def __init__(self, kind: Literal["cosine", "l2"] = "cosine", device: Union[torch.device, str] = None):
+    def __init__(
+        self,
+        kind: Literal["cosine", "euclidean", "dot"] = "cosine",
+        device: torch.device | str = None,
+        dtype=torch.float64,
+    ):
         """
         Initialize the embedding distance metric.
 
         Args:
-            kind (Literal["cosine", "l2"]): The type of distance metric.
+            kind (Literal["cosine", "euclidean"]): The type of distance metric.
             device (torch.device | str): The device to use for computation. If None, automatically detects.
         """
         super().__init__(f"EmbeddingDistanceMetric {kind}", higher_is_better=False)
         self.kind = kind
         if device is None:
-            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+            self.device = torch.device(st_util.get_device_name())
         else:
             self.device = torch.device(device) if isinstance(device, str) else device
 
-    def _to_tensor(self, data: Union[np.ndarray, torch.Tensor]) -> torch.Tensor:
-        """
-        Convert input to a PyTorch tensor if it is a NumPy array.
+        self.dtype = dtype
 
-        Args:
-            data (np.ndarray | torch.Tensor): Input data.
+    def _to_device_tensor(self, data: list | np.ndarray | Tensor, dtype=None) -> Tensor:
+        if dtype is None:
+            dtype = self.dtype
+        return st_util._convert_to_tensor(data).to(device=self.device, dtype=dtype)
 
-        Returns:
-            torch.Tensor: Tensor on the correct device.
-        """
-        if isinstance(data, np.ndarray):
-            data = torch.tensor(data, dtype=torch.float32)
-        return data.to(self.device)
+    def _to_batch_tensor_on_device(self, data: list | np.ndarray | Tensor, dtype=None) -> Tensor:
+        if dtype is None:
+            dtype = self.dtype
+        return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=dtype)
 
-    def score(self, hypothesis: Union[np.ndarray, torch.Tensor], reference: Union[np.ndarray, torch.Tensor]) -> float:
+    def score(
+        self,
+        hypothesis: list | np.ndarray | Tensor,
+        reference: list | np.ndarray | Tensor,
+    ) -> float:
         """
         Compute the distance between two embeddings.
 
         Args:
-            hypothesis (np.ndarray | torch.Tensor): A single embedding vector.
-            reference (np.ndarray | torch.Tensor): Another single embedding vector.
+            hypothesis (list| np.ndarray | Tensor): A single embedding vector.
+            reference (list| np.ndarray | Tensor): Another single embedding vector.
 
         Returns:
             float: The calculated distance.
         """
-        hypothesis = self._to_tensor(hypothesis)
-        reference = self._to_tensor(reference)
-
-        if self.kind == "cosine":
-            # Normalize both embeddings to unit length
-            hypothesis = F.normalize(hypothesis, p=2, dim=0)
-            reference = F.normalize(reference, p=2, dim=0)
-            # Cosine similarity, converted to distance
-            similarity = torch.dot(hypothesis, reference).item()
-            return 1 - similarity
-        if self.kind == "l2":
-            # L2 distance
-            return torch.norm(hypothesis - reference).item()
+        if hypothesis is None or reference is None:
+            raise ValueError("Neither 'hypothesis' nor 'reference' can be None.")
 
-        raise ValueError(f"Unsupported distance metric: {self.kind}")
+        try:
+            hypothesis = self._to_batch_tensor_on_device(hypothesis)
+            reference = self._to_batch_tensor_on_device(reference)
+        except RuntimeError as e:
+            raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e
+        return self.score_all(hypothesis, reference).item()
 
     def score_all(
         self,
-        hypotheses: List[Union[np.ndarray, torch.Tensor]],
-        references: List[Union[np.ndarray, torch.Tensor]],
+        hypotheses: List[list | np.ndarray | Tensor],
+        references: List[list | np.ndarray | Tensor],
         progress_bar: bool = True,
-    ) -> torch.Tensor:
+    ) -> Tensor:
         """
-        Compute the pairwise distance between all hypotheses and references. Expects 2D inputs.
+        Compute the pairwise distance between all hypotheses and references.
+        Expects 2D inputs, where each element in the second dimension is one embedding
 
         Args:
-            hypotheses (list[np.ndarray | torch.Tensor]): List of hypothesis embeddings.
-            references (list[np.ndarray | torch.Tensor]): List of reference embeddings.
+            hypotheses (list[list| np.ndarray | Tensor]): List of hypothesis embeddings.
+            references (list[list| np.ndarray | Tensor]): List of reference embeddings.
             progress_bar (bool): Whether to display a progress bar.
 
         Returns:
-            torch.Tensor, distance matrix. Row i is the distances of hypotheses[i] to all rows of references
+            Tensor, distance matrix. Row i is the distances of hypotheses[i] to all rows of references
         """
         # Convert inputs to tensors and stack
-        hypotheses = torch.stack([self._to_tensor(h) for h in hypotheses])
-        references = torch.stack([self._to_tensor(r) for r in references])
-
-        if self.kind == "cosine":
-            # Normalize the tensors along the feature dimension (dim=1)
-            normalized_hypotheses = F.normalize(hypotheses, dim=1)
-            normalized_references = F.normalize(references, dim=1)
-
-            # Calculate cosine similarity between all hypothesis-reference pairs
-            cosine_similarities = torch.matmul(normalized_hypotheses, normalized_references.T)
-
-            # Convert cosine similarities to cosine distances
-            distance_matrix = 1 - cosine_similarities
-        elif self.kind == "l2":
-            # Use broadcasting to calculate pairwise L2 distances
-            diff = hypotheses[:, None, :] - references[None, :, :]
-            distance_matrix = torch.norm(diff, dim=2)
+        hypotheses = torch.stack([self._to_device_tensor(h) for h in hypotheses])
+        references = torch.stack([self._to_device_tensor(r) for r in references])
+
+        if self.kind == "dot":
+            distance_matrix = self.dot_product(hypotheses, references)
+
+        elif self.kind == "cosine":
+            distance_matrix = self.cosine_distances(hypotheses, references)
+
+        elif self.kind == "euclidean":
+            distance_matrix = self.euclidean_distances(hypotheses, references)
+
+        elif self.kind == "manhattan":
+            distance_matrix = self.manhattan_distances(hypotheses, references)
+
         else:
             raise ValueError(f"Unsupported distance metric: {self.kind}")
 
-        return distance_matrix.cpu()
+        return distance_matrix
+
+    def dot_product(self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor) -> Tensor:
+        # TODO: test if this gives the same thing as previous matmul implementation, see stack overflow link below:
+        # https://stackoverflow.com/questions/73924697/whats-the-difference-between-torch-mm-torch-matmul-and-torch-mul
+        return st_util.dot_score(hypotheses, references)
+
+    def euclidean_similarities(
+        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
+    ) -> Tensor:
+        """
+        Returns the negative L2 norm/euclidean distances, which is what sentence-transformers uses for similarities.
+        """
+        return st_util.euclidean_sim(hypotheses, references)
+
+    def euclidean_distances(
+        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
+    ) -> Tensor:
+        """
+        Seeing as how sentence-transformers just negates the distances to get "similarities",
+        We can re-negate to get them positive again.
+        """
+        return -self.euclidean_similarities(hypotheses, references)
+
+    def cosine_similarities(
+        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
+    ) -> Tensor:
+        """
+        Calculates cosine similarities, which can be thought of as the angle between two embeddings.
+        The min value is -1 (least similar/pointing directly away), and the max is 1 (exactly the same angle).
+        """
+        return st_util.cos_sim(hypotheses, references)
+
+    def cosine_distances(
+        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
+    ) -> Tensor:
+        """
+        Converts cosine similarities to distances by simply subtracting from 1.
+        Max distance is 2, min distance is 0.
+        """
+        return 1 - self.cosine_similarities(hypotheses, references)
+
+    def manhattan_similarities(
+        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
+    ) -> Tensor:
+        """
+        Get the L1/Manhattan similarities, aka negative distances.
+        """
+        return st_util.manhattan_sim(hypotheses, references)
+
+    def manhattan_distances(
+        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
+    ) -> Tensor:
+        """
+        Sentence transformers defines similarity as negative distances.
+        We can re-negate to recover the distances.
+        """
+        return -self.manhattan_similarities(hypotheses, references)
diff --git a/pyproject.toml b/pyproject.toml
index b38c04e..2629a9d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,8 +11,14 @@ readme = "README.md"
 dependencies = [
     "pose-format",
     "scipy",
+    "torch", 
+    "numpy", # possibly could replace all with torch
+    # for various vector/tensor similarities and distances in torch
+    "sentence-transformers",
+    # For reading .csv files, etc
+    "pandas", 
     # For segment similarity
-    "sign_language_segmentation @ git+https://github.com/sign-language-processing/segmentation"
+    #"sign_language_segmentation @ git+https://github.com/sign-language-processing/segmentation"
 ]
 
 [project.optional-dependencies]

From 12f612c768163934d0aff2e6456a10e3ca6f807a Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 13:54:26 -0500
Subject: [PATCH 16/27] CDL: updating the tests a bit

---
 pose_evaluation/metrics/test_embedding_distance_metric.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index 147b98e..24991c6 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -185,7 +185,7 @@ def test_score_all_against_self(
     scores = cosine_metric.score_all(embeddings, embeddings)
     assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all."
     assert torch.allclose(
-        torch.diagonal(scores), torch.zeros(len(embeddings)), atol=1e-6
+        torch.diagonal(scores), torch.zeros(len(embeddings),dtype=scores.dtype), atol=1e-6
     ), "Self-comparison scores should be zero for cosine distance."
     distance_range_checker(scores, min_val=0, max_val=2)
     logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}")
@@ -216,10 +216,10 @@ def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None:
     invalid_inputs = ["invalid_input", None, -1, 1]
 
     for invalid_input in invalid_inputs:
-        with pytest.raises((TypeError, AttributeError)):
+        with pytest.raises((TypeError, AttributeError, ValueError)):
             cosine_metric.score(emb1, invalid_input)
 
-    logger.info("Invalid input test passed.")
+    logger.info("Invalid input successfully crashed as expected.")
 
 
 def test_score_tensor_input(cosine_metric):
@@ -284,7 +284,7 @@ def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim):
     )
 
     # Create expected pattern directly within the test function
-    expected_pattern = torch.zeros(num_points, num_points, dtype=torch.float32)
+    expected_pattern = torch.zeros(num_points, num_points, dtype=distances.dtype)
     for i in range(num_points):
         for j in range(num_points):
             if (i + j) % 2 != 0:

From 3ca874ec668336700704f2159c5505b288f90bb5 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 14:16:54 -0500
Subject: [PATCH 17/27] Various pylint changes

---
 .../metrics/base_embedding_metric.py          |  3 ++-
 pose_evaluation/metrics/conftest.py           | 25 +++++++++----------
 .../metrics/test_embedding_distance_metric.py | 25 +++++++++++--------
 pyproject.toml                                |  3 ++-
 4 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/pose_evaluation/metrics/base_embedding_metric.py b/pose_evaluation/metrics/base_embedding_metric.py
index 78aeb0e..2fb61c8 100644
--- a/pose_evaluation/metrics/base_embedding_metric.py
+++ b/pose_evaluation/metrics/base_embedding_metric.py
@@ -1,6 +1,7 @@
 from typing import TypeVar
-from pose_evaluation.metrics.base import BaseMetric
 import torch
+from pose_evaluation.metrics.base import BaseMetric
+
 
 # Define a type alias for embeddings (e.g., torch.Tensor)
 Embedding = TypeVar("Embedding", bound=torch.Tensor)
diff --git a/pose_evaluation/metrics/conftest.py b/pose_evaluation/metrics/conftest.py
index 4b1129c..c0f44f7 100644
--- a/pose_evaluation/metrics/conftest.py
+++ b/pose_evaluation/metrics/conftest.py
@@ -1,12 +1,12 @@
-# conftest.py
-import pytest
 import shutil
 from pathlib import Path
 from typing import Callable, Union
 import torch
 import numpy as np
+import pytest
 
-@pytest.fixture(scope="session", autouse=True)  
+
+@pytest.fixture(scope="session", autouse=True)
 def clean_test_artifacts():
     """Fixture to clean up test artifacts before each test session."""
     test_artifacts_dir = Path(__file__).parent / "tests"  # Using Path
@@ -17,19 +17,18 @@ def clean_test_artifacts():
     # (Optional) You can add cleanup logic here to run after the session if needed
 
 
-# conftest.py
-from typing import Callable, Union
-import torch
-import numpy as np
-
-@pytest.fixture
-def distance_range_checker() -> Callable[[Union[torch.Tensor, np.ndarray], float, float], None]:
+@pytest.fixture(name="distance_range_checker")
+def fixture_distance_range_checker() -> Callable[[Union[torch.Tensor, np.ndarray], float, float], None]:
     def _check_range(distances: Union[torch.Tensor, np.ndarray], min_val: float = 0, max_val: float = 2) -> None:
         max_distance = distances.max().item()
         min_distance = distances.min().item()
 
         # Use np.isclose for comparisons with tolerance
-        assert np.isclose(min_distance, min_val, atol=1e-6) or min_val <= min_distance <= max_val, f"Minimum distance ({min_distance}) is outside the expected range [{min_val}, {max_val}]"
-        assert np.isclose(max_distance, max_val, atol=1e-6) or min_val <= max_distance <= max_val, f"Maximum distance ({max_distance}) is outside the expected range [{min_val}, {max_val}]"
+        assert (
+            np.isclose(min_distance, min_val, atol=1e-6) or min_val <= min_distance <= max_val
+        ), f"Minimum distance ({min_distance}) is outside the expected range [{min_val}, {max_val}]"
+        assert (
+            np.isclose(max_distance, max_val, atol=1e-6) or min_val <= max_distance <= max_val
+        ), f"Maximum distance ({max_distance}) is outside the expected range [{min_val}, {max_val}]"
 
-    return _check_range
\ No newline at end of file
+    return _check_range
diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index 24991c6..0f43b63 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -1,12 +1,15 @@
+from pathlib import Path
+from typing import List
+import logging
 import pytest
 import numpy as np
+import matplotlib.pyplot as plt
 import torch
 from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric
-from pose_evaluation.metrics.conftest import distance_range_checker
-import matplotlib.pyplot as plt
-import logging
-from typing import List
-from pathlib import Path
+
+# no need to import. https://github.com/pylint-dev/pylint/issues/3493#issuecomment-616761997
+# from pose_evaluation.metrics.conftest import distance_range_checker
+
 
 # TODO: many fixes. Including the fact that we test cosine but not Euclidean,
 
@@ -19,14 +22,16 @@
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
-@pytest.fixture
-def cosine_metric():
+# named the fixture this way to solve many pylint W0621
+# https://stackoverflow.com/questions/46089480/pytest-fixtures-redefining-name-from-outer-scope-pylint
+@pytest.fixture(name="cosine_metric")
+def fixture_cosine_metric():
     """Fixture to create an EmbeddingDistanceMetric instance."""
     return EmbeddingDistanceMetric(kind="cosine")
 
 
-@pytest.fixture
-def embeddings() -> List[torch.Tensor]:
+@pytest.fixture(name="embeddings")
+def fixture_embeddings() -> List[torch.Tensor]:
     """Fixture to create dummy embeddings for testing."""
     return [random_tensor(768) for _ in range(5)]
 
@@ -185,7 +190,7 @@ def test_score_all_against_self(
     scores = cosine_metric.score_all(embeddings, embeddings)
     assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all."
     assert torch.allclose(
-        torch.diagonal(scores), torch.zeros(len(embeddings),dtype=scores.dtype), atol=1e-6
+        torch.diagonal(scores), torch.zeros(len(embeddings), dtype=scores.dtype), atol=1e-6
     ), "Self-comparison scores should be zero for cosine distance."
     distance_range_checker(scores, min_val=0, max_val=2)
     logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}")
diff --git a/pyproject.toml b/pyproject.toml
index 2629a9d..893fa3d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,7 +18,7 @@ dependencies = [
     # For reading .csv files, etc
     "pandas", 
     # For segment similarity
-    #"sign_language_segmentation @ git+https://github.com/sign-language-processing/segmentation"
+    "sign_language_segmentation @ git+https://github.com/sign-language-processing/segmentation"
 ]
 
 [project.optional-dependencies]
@@ -40,6 +40,7 @@ disable = [
     "C0115", # Missing class docstring
     "C0116", # Missing function or method docstring
     "W0511", # TODO
+    "W1203", # use lazy % formatting in logging functions
 ]
 
 [tool.setuptools]

From d7fb10e53f0758b34b8b3e31ed14291ccb746577 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Wed, 4 Dec 2024 15:44:00 -0500
Subject: [PATCH 18/27] Various stylistic and commenting changes

---
 .../metrics/embedding_distance_metric.py      | 164 ++++++++++++------
 1 file changed, 108 insertions(+), 56 deletions(-)

diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py
index fdfa712..875f7f1 100644
--- a/pose_evaluation/metrics/embedding_distance_metric.py
+++ b/pose_evaluation/metrics/embedding_distance_metric.py
@@ -1,8 +1,12 @@
-from typing import Literal, List
+from typing import Literal, List, Union
+import logging
+
 import torch
 from torch import Tensor
+from torch.types import Number
 import numpy as np
 from sentence_transformers import util as st_util
+
 from pose_evaluation.metrics.base_embedding_metric import EmbeddingMetric
 
 
@@ -14,20 +18,29 @@
 # * cosine_distance: https://github.com/pgvector/pgvector/blob/master/src/vector.c#L658
 # * l2_distance https://github.com/pgvector/pgvector/blob/master/src/vector.c#L566
 
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+ValidDistanceKinds = Literal["cosine", "euclidean", "manhattan", "dot"]
+TensorConvertableType = Union[List, np.ndarray, Tensor]
+
 
 class EmbeddingDistanceMetric(EmbeddingMetric):
     def __init__(
         self,
-        kind: Literal["cosine", "euclidean", "dot"] = "cosine",
-        device: torch.device | str = None,
-        dtype=torch.float64,
+        kind: ValidDistanceKinds = "cosine",
+        device: Union[torch.device, str] = None,
+        dtype=torch.float32,
     ):
         """
         Initialize the embedding distance metric.
 
         Args:
-            kind (Literal["cosine", "euclidean"]): The type of distance metric.
-            device (torch.device | str): The device to use for computation. If None, automatically detects.
+            kind (ValidDistanceKinds): The type of distance metric.
+            device (Union[torch.device, str]): The device to use for computation.
+                If None, automatically detects.
+            dtype (torch.dtype): The data type to use for tensors.
+                If None, uses torch.get_default_dtype()
         """
         super().__init__(f"EmbeddingDistanceMetric {kind}", higher_is_better=False)
         self.kind = kind
@@ -36,32 +49,77 @@ def __init__(
         else:
             self.device = torch.device(device) if isinstance(device, str) else device
 
+        if dtype is None:
+            dtype = torch.get_default_dtype()
+
+        # Dispatch table for metric computations
+        self._metric_dispatch = {
+            "cosine": self.cosine_distances,
+            "euclidean": self.euclidean_distances,
+            "dot": self.dot_product,
+            "manhattan": self.manhattan_distances,
+        }
+
         self.dtype = dtype
 
-    def _to_device_tensor(self, data: list | np.ndarray | Tensor, dtype=None) -> Tensor:
+    def set_device(self, device: Union[torch.device, str]) -> None:
+        """
+        Explicitly set the device used for tensors.
+
+        Args:
+            device (Union[torch.device, str]): The device to use for computation.
+        """
+        self.device = torch.device(device)
+        logger.info(f"Device set to: {self.device}")
+
+    def _to_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> Tensor:
+        """
+        Convert input data to a tensor on the specified device.
+
+        Args:
+            data (TensorConvertableType: The input data to convert.
+            dtype (torch.dtype): The data type for the tensor.
+
+        Returns:
+            Tensor: Tensor representation of the data on the specified device.
+        """
         if dtype is None:
             dtype = self.dtype
         return st_util._convert_to_tensor(data).to(device=self.device, dtype=dtype)
 
-    def _to_batch_tensor_on_device(self, data: list | np.ndarray | Tensor, dtype=None) -> Tensor:
+    def _to_batch_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> Tensor:
+        """
+        Convert input data to a batch tensor on the specified device.
+
+        Args:
+            data (TensorConvertableType): The input data to convert.
+            dtype (torch.dtype): The data type for the tensor.
+
+        Returns:
+            Tensor: Batch tensor representation of the data on the specified device.
+        """
         if dtype is None:
             dtype = self.dtype
         return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=dtype)
 
     def score(
         self,
-        hypothesis: list | np.ndarray | Tensor,
-        reference: list | np.ndarray | Tensor,
-    ) -> float:
+        hypothesis: TensorConvertableType,
+        reference: TensorConvertableType,
+    ) -> Number:
         """
         Compute the distance between two embeddings.
 
         Args:
-            hypothesis (list| np.ndarray | Tensor): A single embedding vector.
-            reference (list| np.ndarray | Tensor): Another single embedding vector.
+            hypothesis (TensorConvertableType): A single embedding vector.
+            reference (TensorConvertableType): Another single embedding vector.
 
         Returns:
-            float: The calculated distance.
+            Number: The calculated distance.
+
+        Raises:
+            ValueError: If either input is None.
+            TypeError: If inputs cannot be converted to tensors.
         """
         if hypothesis is None or reference is None:
             raise ValueError("Neither 'hypothesis' nor 'reference' can be None.")
@@ -75,95 +133,89 @@ def score(
 
     def score_all(
         self,
-        hypotheses: List[list | np.ndarray | Tensor],
-        references: List[list | np.ndarray | Tensor],
+        hypotheses: Union[List[TensorConvertableType], Tensor],
+        references: Union[List[TensorConvertableType], Tensor],
         progress_bar: bool = True,
     ) -> Tensor:
         """
-        Compute the pairwise distance between all hypotheses and references.
-        Expects 2D inputs, where each element in the second dimension is one embedding
+        Compute the distance between all hypotheses and all references.
+
+        Expects 2D inputs. If not already Tensors, will attempt to convert them.
 
         Args:
-            hypotheses (list[list| np.ndarray | Tensor]): List of hypothesis embeddings.
-            references (list[list| np.ndarray | Tensor]): List of reference embeddings.
-            progress_bar (bool): Whether to display a progress bar.
+            hypotheses (Union[List[TensorConvertableType], Tensor]):
+                List of hypothesis embeddings or a single tensor.
+            references (Union[List[TensorConvertableType], Tensor]):
+                List of reference embeddings or a single tensor.
+            progress_bar (bool): Whether to display a progress bar. (not implemented yet)
 
         Returns:
-            Tensor, distance matrix. Row i is the distances of hypotheses[i] to all rows of references
+            Tensor: Distance matrix. Row `i` is the distances of `hypotheses[i]` to all rows of `references`.
+                Shape is be NxM, where N is the number of hypotheses, and M is the number of references
+
+        Raises:
+            ValueError: If the specified metric is unsupported.
         """
         # Convert inputs to tensors and stack
-        hypotheses = torch.stack([self._to_device_tensor(h) for h in hypotheses])
-        references = torch.stack([self._to_device_tensor(r) for r in references])
-
-        if self.kind == "dot":
-            distance_matrix = self.dot_product(hypotheses, references)
-
-        elif self.kind == "cosine":
-            distance_matrix = self.cosine_distances(hypotheses, references)
-
-        elif self.kind == "euclidean":
-            distance_matrix = self.euclidean_distances(hypotheses, references)
+        hypotheses = torch.stack([self._to_tensor_on_device(h) for h in hypotheses])
+        references = torch.stack([self._to_tensor_on_device(r) for r in references])
 
-        elif self.kind == "manhattan":
-            distance_matrix = self.manhattan_distances(hypotheses, references)
-
-        else:
+        if self.kind not in self._metric_dispatch:
+            logger.error(f"Unsupported distance metric: {self.kind}")
             raise ValueError(f"Unsupported distance metric: {self.kind}")
 
+        distance_matrix = self._metric_dispatch[self.kind](hypotheses, references)
         return distance_matrix
 
-    def dot_product(self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor) -> Tensor:
+    def dot_product(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
+        """
+        Compute the dot product between embeddings.
+        Uses sentence_transformers.util.dot_score
+        """
         # TODO: test if this gives the same thing as previous matmul implementation, see stack overflow link below:
         # https://stackoverflow.com/questions/73924697/whats-the-difference-between-torch-mm-torch-matmul-and-torch-mul
         return st_util.dot_score(hypotheses, references)
 
-    def euclidean_similarities(
-        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
-    ) -> Tensor:
+    def euclidean_similarities(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
         """
         Returns the negative L2 norm/euclidean distances, which is what sentence-transformers uses for similarities.
+        Uses sentence_transformers.util.euclidean_sim
         """
         return st_util.euclidean_sim(hypotheses, references)
 
-    def euclidean_distances(
-        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
-    ) -> Tensor:
+    def euclidean_distances(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
         """
         Seeing as how sentence-transformers just negates the distances to get "similarities",
         We can re-negate to get them positive again.
+        Uses sentence_transformers.util.euclidean_similarities
         """
         return -self.euclidean_similarities(hypotheses, references)
 
-    def cosine_similarities(
-        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
-    ) -> Tensor:
+    def cosine_similarities(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
         """
         Calculates cosine similarities, which can be thought of as the angle between two embeddings.
         The min value is -1 (least similar/pointing directly away), and the max is 1 (exactly the same angle).
+        Uses sentence_transformers.util.cos_sim
         """
         return st_util.cos_sim(hypotheses, references)
 
-    def cosine_distances(
-        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
-    ) -> Tensor:
+    def cosine_distances(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
         """
         Converts cosine similarities to distances by simply subtracting from 1.
         Max distance is 2, min distance is 0.
         """
         return 1 - self.cosine_similarities(hypotheses, references)
 
-    def manhattan_similarities(
-        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
-    ) -> Tensor:
+    def manhattan_similarities(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
         """
         Get the L1/Manhattan similarities, aka negative distances.
+        Uses sentence_transformers.util.manhattan_sim
         """
         return st_util.manhattan_sim(hypotheses, references)
 
-    def manhattan_distances(
-        self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor
-    ) -> Tensor:
+    def manhattan_distances(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
         """
+        Convert Manhattan similarities to distances.
         Sentence transformers defines similarity as negative distances.
         We can re-negate to recover the distances.
         """

From 884deb9108215375e9acc41dab783b03c21adb37 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 5 Dec 2024 13:19:33 -0500
Subject: [PATCH 19/27] Better handling of List to tensor conversions

---
 .../metrics/embedding_distance_metric.py      | 30 ++++++++++---------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py
index 875f7f1..29793c6 100644
--- a/pose_evaluation/metrics/embedding_distance_metric.py
+++ b/pose_evaluation/metrics/embedding_distance_metric.py
@@ -100,6 +100,16 @@ def _to_batch_tensor_on_device(self, data: TensorConvertableType, dtype=None) ->
         """
         if dtype is None:
             dtype = self.dtype
+
+        # better performance this way, see https://github.com/pytorch/pytorch/issues/13918
+        if isinstance(data, list) and all(isinstance(x, np.ndarray) for x in data):
+            data = np.asanyarray(data)
+
+        if isinstance(data, list) and all(isinstance(x, torch.Tensor) for x in data):
+            # prevents ValueError: only one element tensors can be converted to Python scalars
+            # https://stackoverflow.com/questions/55050717/converting-list-of-tensors-to-tensors-pytorch
+            data = torch.stack(data)
+
         return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=dtype)
 
     def score(
@@ -117,18 +127,8 @@ def score(
         Returns:
             Number: The calculated distance.
 
-        Raises:
-            ValueError: If either input is None.
-            TypeError: If inputs cannot be converted to tensors.
         """
-        if hypothesis is None or reference is None:
-            raise ValueError("Neither 'hypothesis' nor 'reference' can be None.")
 
-        try:
-            hypothesis = self._to_batch_tensor_on_device(hypothesis)
-            reference = self._to_batch_tensor_on_device(reference)
-        except RuntimeError as e:
-            raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e
         return self.score_all(hypothesis, reference).item()
 
     def score_all(
@@ -154,11 +154,14 @@ def score_all(
                 Shape is be NxM, where N is the number of hypotheses, and M is the number of references
 
         Raises:
+            TypeError: If either hypotheses or references cannot be converted to a batch tensor
             ValueError: If the specified metric is unsupported.
         """
-        # Convert inputs to tensors and stack
-        hypotheses = torch.stack([self._to_tensor_on_device(h) for h in hypotheses])
-        references = torch.stack([self._to_tensor_on_device(r) for r in references])
+        try:
+            hypotheses = self._to_batch_tensor_on_device(hypotheses)
+            references = self._to_batch_tensor_on_device(references)
+        except RuntimeError as e:
+            raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e
 
         if self.kind not in self._metric_dispatch:
             logger.error(f"Unsupported distance metric: {self.kind}")
@@ -172,7 +175,6 @@ def dot_product(self, hypotheses: TensorConvertableType, references: TensorConve
         Compute the dot product between embeddings.
         Uses sentence_transformers.util.dot_score
         """
-        # TODO: test if this gives the same thing as previous matmul implementation, see stack overflow link below:
         # https://stackoverflow.com/questions/73924697/whats-the-difference-between-torch-mm-torch-matmul-and-torch-mul
         return st_util.dot_score(hypotheses, references)
 

From 4934c5d06f796a3e0c102919aa620d31d0ee255d Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 5 Dec 2024 13:21:13 -0500
Subject: [PATCH 20/27] Adding some tests, including for List handling

---
 .../metrics/test_embedding_distance_metric.py | 93 ++++++++++++++++---
 1 file changed, 80 insertions(+), 13 deletions(-)

diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index 0f43b63..7124151 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import List
+from typing import List, Callable, Tuple
 import logging
 import pytest
 import numpy as np
@@ -20,6 +20,7 @@
 
 # Device configuration for PyTorch
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+torch.set_default_device(DEVICE) # so that we get arrays on the same device
 
 
 # named the fixture this way to solve many pylint W0621
@@ -35,9 +36,16 @@ def fixture_embeddings() -> List[torch.Tensor]:
     """Fixture to create dummy embeddings for testing."""
     return [random_tensor(768) for _ in range(5)]
 
+def call_and_call_with_inputs_swapped(hyp:torch.Tensor, ref:torch.Tensor, scoring_function:Callable[[torch.Tensor, torch.Tensor], torch.Tensor])->Tuple[torch.Tensor, torch.Tensor]:
+    score1 = scoring_function(hyp, ref)
+    score2 = scoring_function(ref, hyp)
+    return score1, score2
+    
 
 def save_and_plot_distances(distances, matrix_name, num_points, dim):
     """Helper function to save distance matrix and plot distances."""
+
+    distances = distances.cpu()
     test_artifacts_dir = Path(__file__).parent / "tests"
     output_path = test_artifacts_dir / f"distance_matrix_{matrix_name}_{num_points}_{dim}D.csv"
     np.savetxt(output_path, distances.numpy(), delimiter=",", fmt="%.4f")
@@ -154,8 +162,9 @@ def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None:
     emb1 = random_tensor(768)
     emb2 = random_tensor(768)
 
-    score1 = cosine_metric.score(emb1, emb2)
-    score2 = cosine_metric.score(emb2, emb1)
+    # score1 = cosine_metric.score(emb1, emb2)
+    # score2 = cosine_metric.score(emb2, emb1)
+    score1, score2 = call_and_call_with_inputs_swapped(emb1, emb2, cosine_metric.score)
 
     logger.info(f"Score 1: {score1}, Score 2: {score2}")
     assert pytest.approx(score1) == score2, "Score should be symmetric."
@@ -196,6 +205,18 @@ def test_score_all_against_self(
     logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}")
 
 
+def test_score_all_with_one_vs_batch(cosine_metric, distance_range_checker):
+    hyps = [np.random.rand(768) for _ in range(3)]
+    refs = np.random.rand(768)
+
+    # scores = cosine_metric.score_all(hyps, refs)
+    scores, scores2 = call_and_call_with_inputs_swapped(hyps, refs, cosine_metric.score_all)
+    
+
+    assert scores.shape == (len(hyps), 1)
+    assert scores2.shape == (1, len(hyps))
+    distance_range_checker(scores, min_val=0, max_val=2)
+
 def test_score_all_with_different_sizes(cosine_metric, distance_range_checker):
     """Test score_all with different sizes for hypotheses and references."""
     hyps = [np.random.rand(768) for _ in range(3)]
@@ -209,20 +230,50 @@ def test_score_all_with_different_sizes(cosine_metric, distance_range_checker):
     distance_range_checker(scores, min_val=0, max_val=2)
 
 
-# def test_score_all_with_empty_inputs(metric):
-#     """Test score_all with empty inputs."""
-#     scores = metric.score_all([], [])
-#     assert scores.shape == (0,), f"Score_all should return an empty array for empty inputs. Output: {scores.shape}"
+def test_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistanceMetric) -> None:
+    hyp = random_tensor(768)
+    ref = random_tensor(769)
+
+    with pytest.raises(RuntimeError):
+        # gives RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x768 and 769x1
+        # TODO: we should probably raise a more descriptive/helpful error/ ValueError
+        call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
 
+def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> None:
+    hyp = random_tensor(768)
+    for ref in range (-2, 2):
+        with pytest.raises(IndexError):
+            # TODO: we should probably raise a more descriptive/helpful error/ ValueError
+            # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
+            call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
+
+    logger.info("Invalid input successfully crashed as expected.")
 
-def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None:
+def test_invalid_input_noncontainernonnumber_types(cosine_metric: EmbeddingDistanceMetric) -> None:
+    hyp = random_tensor(768)
+    invalid_inputs = [
+        "invalid_input", 
+        True
+        ]
+    for ref in invalid_inputs:
+        with pytest.raises((TypeError, IndexError)):
+            # TypeError: new(): invalid data type 'str'
+            # but True gives IndexError
+            # TODO: better TypeError, more descriptive
+            call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
+
+def test_invalid_input_empty_containers(cosine_metric: EmbeddingDistanceMetric) -> None:
     """Test the metric with invalid inputs."""
     emb1 = random_tensor(768)
-    invalid_inputs = ["invalid_input", None, -1, 1]
+    invalid_inputs = ["", list(), dict(), tuple(), set()]
 
     for invalid_input in invalid_inputs:
-        with pytest.raises((TypeError, AttributeError, ValueError)):
-            cosine_metric.score(emb1, invalid_input)
+        with pytest.raises((RuntimeError, TypeError, IndexError)):
+            # gives RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x768 and 0x1)
+            # "" gives TypeError: new(): invalid data type 'str'
+            # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
+            # TODO: we should probably raise a more descriptive/helpful error/ ValueError
+            call_and_call_with_inputs_swapped(emb1, invalid_input, cosine_metric.score)
 
     logger.info("Invalid input successfully crashed as expected.")
 
@@ -244,9 +295,16 @@ def test_score_ndarray_input(cosine_metric):
     score = cosine_metric.score(emb1, emb2)
     assert isinstance(score, float), "Output should be a float."
 
+def test_score_all_list_of_lists_of_floats(cosine_metric):
+    """Does a 2D list of floats work? """
+    hyps = [[np.random.rand() for _ in range(768)] for _ in range(5)]
+    refs = [[np.random.rand() for _ in range(768)] for _ in range(5)]
+    scores = cosine_metric.score_all(hyps, refs)
+    assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
+    assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
 
-def test_score_all_tensor_input(cosine_metric):
-    """Test score_all function with torch.Tensor inputs."""
+def test_score_all_list_of_tensor_input(cosine_metric):
+    """Test score_all function with List of torch.Tensor inputs."""
     hyps = [torch.rand(768) for _ in range(5)]
     refs = [torch.rand(768) for _ in range(5)]
 
@@ -254,6 +312,15 @@ def test_score_all_tensor_input(cosine_metric):
     assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
     assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
 
+def test_score_all_list_of_ndarray_input(cosine_metric):
+    """Test score_all function with List of np.ndarray inputs."""
+    hyps = [np.random.rand(768) for _ in range(5)]
+    refs = [np.random.rand(768) for _ in range(5)]
+
+    scores = cosine_metric.score_all(hyps, refs)
+    assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
+    assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"    
+
 
 def test_device_handling(cosine_metric):
     """Test device handling for the metric."""

From a495c67866097e026d14b2bfd4e00f6dd7cb9e47 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 5 Dec 2024 13:59:44 -0500
Subject: [PATCH 21/27] CDL: a few pylint changes

---
 .../metrics/test_embedding_distance_metric.py | 30 +++++++++++--------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index 7124151..479e4b2 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -20,7 +20,7 @@
 
 # Device configuration for PyTorch
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-torch.set_default_device(DEVICE) # so that we get arrays on the same device
+torch.set_default_device(DEVICE)  # so that we get arrays on the same device
 
 
 # named the fixture this way to solve many pylint W0621
@@ -36,11 +36,14 @@ def fixture_embeddings() -> List[torch.Tensor]:
     """Fixture to create dummy embeddings for testing."""
     return [random_tensor(768) for _ in range(5)]
 
-def call_and_call_with_inputs_swapped(hyp:torch.Tensor, ref:torch.Tensor, scoring_function:Callable[[torch.Tensor, torch.Tensor], torch.Tensor])->Tuple[torch.Tensor, torch.Tensor]:
+
+def call_and_call_with_inputs_swapped(
+    hyp: torch.Tensor, ref: torch.Tensor, scoring_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor]
+) -> Tuple[torch.Tensor, torch.Tensor]:
     score1 = scoring_function(hyp, ref)
     score2 = scoring_function(ref, hyp)
     return score1, score2
-    
+
 
 def save_and_plot_distances(distances, matrix_name, num_points, dim):
     """Helper function to save distance matrix and plot distances."""
@@ -211,12 +214,12 @@ def test_score_all_with_one_vs_batch(cosine_metric, distance_range_checker):
 
     # scores = cosine_metric.score_all(hyps, refs)
     scores, scores2 = call_and_call_with_inputs_swapped(hyps, refs, cosine_metric.score_all)
-    
 
     assert scores.shape == (len(hyps), 1)
     assert scores2.shape == (1, len(hyps))
     distance_range_checker(scores, min_val=0, max_val=2)
 
+
 def test_score_all_with_different_sizes(cosine_metric, distance_range_checker):
     """Test score_all with different sizes for hypotheses and references."""
     hyps = [np.random.rand(768) for _ in range(3)]
@@ -239,9 +242,10 @@ def test_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistan
         # TODO: we should probably raise a more descriptive/helpful error/ ValueError
         call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
 
+
 def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> None:
     hyp = random_tensor(768)
-    for ref in range (-2, 2):
+    for ref in range(-2, 2):
         with pytest.raises(IndexError):
             # TODO: we should probably raise a more descriptive/helpful error/ ValueError
             # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
@@ -249,12 +253,10 @@ def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) ->
 
     logger.info("Invalid input successfully crashed as expected.")
 
+
 def test_invalid_input_noncontainernonnumber_types(cosine_metric: EmbeddingDistanceMetric) -> None:
     hyp = random_tensor(768)
-    invalid_inputs = [
-        "invalid_input", 
-        True
-        ]
+    invalid_inputs = ["invalid_input", True]
     for ref in invalid_inputs:
         with pytest.raises((TypeError, IndexError)):
             # TypeError: new(): invalid data type 'str'
@@ -262,10 +264,11 @@ def test_invalid_input_noncontainernonnumber_types(cosine_metric: EmbeddingDista
             # TODO: better TypeError, more descriptive
             call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
 
+
 def test_invalid_input_empty_containers(cosine_metric: EmbeddingDistanceMetric) -> None:
     """Test the metric with invalid inputs."""
     emb1 = random_tensor(768)
-    invalid_inputs = ["", list(), dict(), tuple(), set()]
+    invalid_inputs = ["", [], {}, tuple(), set()]
 
     for invalid_input in invalid_inputs:
         with pytest.raises((RuntimeError, TypeError, IndexError)):
@@ -295,14 +298,16 @@ def test_score_ndarray_input(cosine_metric):
     score = cosine_metric.score(emb1, emb2)
     assert isinstance(score, float), "Output should be a float."
 
+
 def test_score_all_list_of_lists_of_floats(cosine_metric):
-    """Does a 2D list of floats work? """
+    """Does a 2D list of floats work?"""
     hyps = [[np.random.rand() for _ in range(768)] for _ in range(5)]
     refs = [[np.random.rand() for _ in range(768)] for _ in range(5)]
     scores = cosine_metric.score_all(hyps, refs)
     assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
     assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
 
+
 def test_score_all_list_of_tensor_input(cosine_metric):
     """Test score_all function with List of torch.Tensor inputs."""
     hyps = [torch.rand(768) for _ in range(5)]
@@ -312,6 +317,7 @@ def test_score_all_list_of_tensor_input(cosine_metric):
     assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
     assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
 
+
 def test_score_all_list_of_ndarray_input(cosine_metric):
     """Test score_all function with List of np.ndarray inputs."""
     hyps = [np.random.rand(768) for _ in range(5)]
@@ -319,7 +325,7 @@ def test_score_all_list_of_ndarray_input(cosine_metric):
 
     scores = cosine_metric.score_all(hyps, refs)
     assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
-    assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"    
+    assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
 
 
 def test_device_handling(cosine_metric):

From 0e54bf99df1b5b2d832b7a46504c9b02bbdb5a63 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Thu, 5 Dec 2024 16:52:25 -0500
Subject: [PATCH 22/27] CDL: some requested changes. Remove redundant variable,
 remove unused dtype arg, rename set_device, etc

---
 .../metrics/embedding_distance_metric.py      | 43 ++++---------------
 1 file changed, 8 insertions(+), 35 deletions(-)

diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py
index 29793c6..b1ca669 100644
--- a/pose_evaluation/metrics/embedding_distance_metric.py
+++ b/pose_evaluation/metrics/embedding_distance_metric.py
@@ -30,13 +30,11 @@ def __init__(
         self,
         kind: ValidDistanceKinds = "cosine",
         device: Union[torch.device, str] = None,
-        dtype=torch.float32,
+        dtype=None,
     ):
         """
-        Initialize the embedding distance metric.
-
         Args:
-            kind (ValidDistanceKinds): The type of distance metric.
+            kind (ValidDistanceKinds): The type of distance metric, e.g. "cosine", or "euclidean".
             device (Union[torch.device, str]): The device to use for computation.
                 If None, automatically detects.
             dtype (torch.dtype): The data type to use for tensors.
@@ -52,6 +50,8 @@ def __init__(
         if dtype is None:
             dtype = torch.get_default_dtype()
 
+        self.dtype = dtype
+
         # Dispatch table for metric computations
         self._metric_dispatch = {
             "cosine": self.cosine_distances,
@@ -60,9 +60,7 @@ def __init__(
             "manhattan": self.manhattan_distances,
         }
 
-        self.dtype = dtype
-
-    def set_device(self, device: Union[torch.device, str]) -> None:
+    def to(self, device: Union[torch.device, str]) -> None:
         """
         Explicitly set the device used for tensors.
 
@@ -72,35 +70,16 @@ def set_device(self, device: Union[torch.device, str]) -> None:
         self.device = torch.device(device)
         logger.info(f"Device set to: {self.device}")
 
-    def _to_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> Tensor:
-        """
-        Convert input data to a tensor on the specified device.
-
-        Args:
-            data (TensorConvertableType: The input data to convert.
-            dtype (torch.dtype): The data type for the tensor.
-
-        Returns:
-            Tensor: Tensor representation of the data on the specified device.
-        """
-        if dtype is None:
-            dtype = self.dtype
-        return st_util._convert_to_tensor(data).to(device=self.device, dtype=dtype)
-
-    def _to_batch_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> Tensor:
+    def _to_batch_tensor_on_device(self, data: TensorConvertableType) -> Tensor:
         """
         Convert input data to a batch tensor on the specified device.
 
         Args:
             data (TensorConvertableType): The input data to convert.
-            dtype (torch.dtype): The data type for the tensor.
 
         Returns:
             Tensor: Batch tensor representation of the data on the specified device.
         """
-        if dtype is None:
-            dtype = self.dtype
-
         # better performance this way, see https://github.com/pytorch/pytorch/issues/13918
         if isinstance(data, list) and all(isinstance(x, np.ndarray) for x in data):
             data = np.asanyarray(data)
@@ -110,7 +89,7 @@ def _to_batch_tensor_on_device(self, data: TensorConvertableType, dtype=None) ->
             # https://stackoverflow.com/questions/55050717/converting-list-of-tensors-to-tensors-pytorch
             data = torch.stack(data)
 
-        return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=dtype)
+        return st_util._convert_to_batch_tensor(data).to(device=self.device)
 
     def score(
         self,
@@ -128,7 +107,6 @@ def score(
             Number: The calculated distance.
 
         """
-
         return self.score_all(hypothesis, reference).item()
 
     def score_all(
@@ -163,12 +141,7 @@ def score_all(
         except RuntimeError as e:
             raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e
 
-        if self.kind not in self._metric_dispatch:
-            logger.error(f"Unsupported distance metric: {self.kind}")
-            raise ValueError(f"Unsupported distance metric: {self.kind}")
-
-        distance_matrix = self._metric_dispatch[self.kind](hypotheses, references)
-        return distance_matrix
+        return self._metric_dispatch[self.kind](hypotheses, references)
 
     def dot_product(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
         """

From cb45301b1b8d8cbea654e6c90b197eca9a973ecb Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 6 Dec 2024 11:51:44 -0500
Subject: [PATCH 23/27] Add distance_matrix shape checker fixture

---
 pose_evaluation/metrics/conftest.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/pose_evaluation/metrics/conftest.py b/pose_evaluation/metrics/conftest.py
index c0f44f7..c04f587 100644
--- a/pose_evaluation/metrics/conftest.py
+++ b/pose_evaluation/metrics/conftest.py
@@ -17,9 +17,25 @@ def clean_test_artifacts():
     # (Optional) You can add cleanup logic here to run after the session if needed
 
 
+@pytest.fixture(name="distance_matrix_shape_checker")
+def fixture_distance_matrix_shape_checker() -> Callable[[torch.Tensor, torch.Tensor], None]:
+    def _check_shape(hyp_count: int, ref_count: int, distance_matrix: torch.Tensor):
+
+        expected_shape = torch.Size([hyp_count, ref_count])
+        assert (
+            distance_matrix.shape == expected_shape
+        ), f"For M={hyp_count} hypotheses, N={ref_count} references,  Distance Matrix should be MxN={expected_shape}. Instead, received {distance_matrix.shape}"
+
+    return _check_shape
+
+
 @pytest.fixture(name="distance_range_checker")
 def fixture_distance_range_checker() -> Callable[[Union[torch.Tensor, np.ndarray], float, float], None]:
-    def _check_range(distances: Union[torch.Tensor, np.ndarray], min_val: float = 0, max_val: float = 2) -> None:
+    def _check_range(
+        distances: Union[torch.Tensor, np.ndarray],
+        min_val: float = 0,
+        max_val: float = 2,
+    ) -> None:
         max_distance = distances.max().item()
         min_distance = distances.min().item()
 

From 000e3468e5c3d33bdf5168627b4068d6ddf576f3 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 6 Dec 2024 12:25:54 -0500
Subject: [PATCH 24/27] Various pull request changes including an ndim
 assertion, use of distance_matrix_shape_checker, removing redundant args
 documentation,

---
 .../metrics/embedding_distance_metric.py      |  21 +-
 .../metrics/test_embedding_distance_metric.py | 214 ++++++++++++------
 2 files changed, 155 insertions(+), 80 deletions(-)

diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py
index b1ca669..2c165b0 100644
--- a/pose_evaluation/metrics/embedding_distance_metric.py
+++ b/pose_evaluation/metrics/embedding_distance_metric.py
@@ -69,14 +69,12 @@ def to(self, device: Union[torch.device, str]) -> None:
         """
         self.device = torch.device(device)
         logger.info(f"Device set to: {self.device}")
+        return self
 
     def _to_batch_tensor_on_device(self, data: TensorConvertableType) -> Tensor:
         """
         Convert input data to a batch tensor on the specified device.
 
-        Args:
-            data (TensorConvertableType): The input data to convert.
-
         Returns:
             Tensor: Batch tensor representation of the data on the specified device.
         """
@@ -89,7 +87,7 @@ def _to_batch_tensor_on_device(self, data: TensorConvertableType) -> Tensor:
             # https://stackoverflow.com/questions/55050717/converting-list-of-tensors-to-tensors-pytorch
             data = torch.stack(data)
 
-        return st_util._convert_to_batch_tensor(data).to(device=self.device)
+        return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=self.dtype)
 
     def score(
         self,
@@ -99,10 +97,6 @@ def score(
         """
         Compute the distance between two embeddings.
 
-        Args:
-            hypothesis (TensorConvertableType): A single embedding vector.
-            reference (TensorConvertableType): Another single embedding vector.
-
         Returns:
             Number: The calculated distance.
 
@@ -120,13 +114,6 @@ def score_all(
 
         Expects 2D inputs. If not already Tensors, will attempt to convert them.
 
-        Args:
-            hypotheses (Union[List[TensorConvertableType], Tensor]):
-                List of hypothesis embeddings or a single tensor.
-            references (Union[List[TensorConvertableType], Tensor]):
-                List of reference embeddings or a single tensor.
-            progress_bar (bool): Whether to display a progress bar. (not implemented yet)
-
         Returns:
             Tensor: Distance matrix. Row `i` is the distances of `hypotheses[i]` to all rows of `references`.
                 Shape is be NxM, where N is the number of hypotheses, and M is the number of references
@@ -141,6 +128,10 @@ def score_all(
         except RuntimeError as e:
             raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e
 
+        assert (
+            hypotheses.ndim == 2 and references.ndim == 2
+        ), f"score_all received non-2D input: hypotheses: {hypotheses.shape}, references: {references.shape}"
+
         return self._metric_dispatch[self.kind](hypotheses, references)
 
     def dot_product(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor:
diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index 479e4b2..0f08bb9 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -1,3 +1,4 @@
+import itertools
 from pathlib import Path
 from typing import List, Callable, Tuple
 import logging
@@ -7,9 +8,6 @@
 import torch
 from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric
 
-# no need to import. https://github.com/pylint-dev/pylint/issues/3493#issuecomment-616761997
-# from pose_evaluation.metrics.conftest import distance_range_checker
-
 
 # TODO: many fixes. Including the fact that we test cosine but not Euclidean,
 
@@ -37,14 +35,54 @@ def fixture_embeddings() -> List[torch.Tensor]:
     return [random_tensor(768) for _ in range(5)]
 
 
+def test_shape_checker(distance_matrix_shape_checker):
+    emb_len = 768
+    hyps = torch.rand((3, emb_len))
+    refs = torch.rand((4, emb_len))
+
+    m = hyps.shape[0]
+    n = refs.shape[0]
+
+    wrong_shapes = [1, m, n, emb_len]
+    wrong_shapes.extend(list(itertools.permutations(wrong_shapes, r=2)))
+    for wrong_shape in wrong_shapes:
+        if wrong_shape != (m, n):
+            distances_with_wrong_shape = torch.rand(wrong_shape)
+            with pytest.raises(AssertionError, match="Distance Matrix should be MxN"):
+                # This SHOULD happen. If this doesn't happen then the checker itself is not working.
+                distance_matrix_shape_checker(m, n, distances_with_wrong_shape)
+
+
 def call_and_call_with_inputs_swapped(
-    hyp: torch.Tensor, ref: torch.Tensor, scoring_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor]
+    hyps: torch.Tensor, refs: torch.Tensor, scoring_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor]
 ) -> Tuple[torch.Tensor, torch.Tensor]:
-    score1 = scoring_function(hyp, ref)
-    score2 = scoring_function(ref, hyp)
+    score1 = scoring_function(hyps, refs)
+    score2 = scoring_function(refs, hyps)
     return score1, score2
 
 
+def call_with_both_input_orders_and_do_standard_checks(
+    hyps: torch.Tensor,
+    refs: torch.Tensor,
+    scoring_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor],
+    distance_range_checker,
+    distance_matrix_shape_checker,
+    expected_shape: Tuple = None,
+):
+    scores, scores2 = call_and_call_with_inputs_swapped(hyps, refs, scoring_function)
+    if expected_shape is not None:
+        m, n = expected_shape
+    else:
+        m = hyps.shape[0]
+        n = refs.shape[0]
+    distance_range_checker(scores, min_val=0, max_val=2)
+    distance_range_checker(scores2, min_val=0, max_val=2)
+    distance_matrix_shape_checker(m, n, scores)
+    distance_matrix_shape_checker(n, m, scores2)
+
+    return scores, scores2
+
+
 def save_and_plot_distances(distances, matrix_name, num_points, dim):
     """Helper function to save distance matrix and plot distances."""
 
@@ -165,8 +203,6 @@ def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None:
     emb1 = random_tensor(768)
     emb2 = random_tensor(768)
 
-    # score1 = cosine_metric.score(emb1, emb2)
-    # score2 = cosine_metric.score(emb2, emb1)
     score1, score2 = call_and_call_with_inputs_swapped(emb1, emb2, cosine_metric.score)
 
     logger.info(f"Score 1: {score1}, Score 2: {score2}")
@@ -196,44 +232,46 @@ def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path)
 
 
 def test_score_all_against_self(
-    cosine_metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker
+    cosine_metric: EmbeddingDistanceMetric,
+    embeddings: List[torch.Tensor],
+    distance_range_checker,
+    distance_matrix_shape_checker,
 ) -> None:
     """Test the score_all function."""
     scores = cosine_metric.score_all(embeddings, embeddings)
-    assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all."
+    distance_matrix_shape_checker(len(embeddings), len(embeddings), scores)
+    distance_range_checker(scores, min_val=0, max_val=2)
+
     assert torch.allclose(
         torch.diagonal(scores), torch.zeros(len(embeddings), dtype=scores.dtype), atol=1e-6
     ), "Self-comparison scores should be zero for cosine distance."
-    distance_range_checker(scores, min_val=0, max_val=2)
+
     logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}")
 
 
-def test_score_all_with_one_vs_batch(cosine_metric, distance_range_checker):
+def test_score_all_with_one_vs_batch(cosine_metric, distance_range_checker, distance_matrix_shape_checker):
     hyps = [np.random.rand(768) for _ in range(3)]
     refs = np.random.rand(768)
 
-    # scores = cosine_metric.score_all(hyps, refs)
-    scores, scores2 = call_and_call_with_inputs_swapped(hyps, refs, cosine_metric.score_all)
+    expected_shape = (len(hyps), 1)
 
-    assert scores.shape == (len(hyps), 1)
-    assert scores2.shape == (1, len(hyps))
-    distance_range_checker(scores, min_val=0, max_val=2)
+    call_with_both_input_orders_and_do_standard_checks(
+        hyps, refs, cosine_metric.score_all, distance_range_checker, distance_matrix_shape_checker, expected_shape
+    )
 
 
-def test_score_all_with_different_sizes(cosine_metric, distance_range_checker):
+def test_score_all_with_different_sizes(cosine_metric, distance_range_checker, distance_matrix_shape_checker):
     """Test score_all with different sizes for hypotheses and references."""
     hyps = [np.random.rand(768) for _ in range(3)]
     refs = [np.random.rand(768) for _ in range(5)]
 
-    scores = cosine_metric.score_all(hyps, refs)
-    assert scores.shape == (
-        len(hyps),
-        len(refs),
-    ), f"Output shape mismatch ({scores.shape}) vs {(len(hyps), len(refs))} for score_all with different sizes. "
-    distance_range_checker(scores, min_val=0, max_val=2)
+    expected_shape = (len(hyps), len(refs))
+    call_with_both_input_orders_and_do_standard_checks(
+        hyps, refs, cosine_metric.score_all, distance_range_checker, distance_matrix_shape_checker, expected_shape
+    )
 
 
-def test_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistanceMetric) -> None:
+def test_score_with_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistanceMetric) -> None:
     hyp = random_tensor(768)
     ref = random_tensor(769)
 
@@ -243,10 +281,10 @@ def test_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistan
         call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
 
 
-def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> None:
+def test_score_with_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> None:
     hyp = random_tensor(768)
     for ref in range(-2, 2):
-        with pytest.raises(IndexError):
+        with pytest.raises(AssertionError, match="score_all received non-2D input"):
             # TODO: we should probably raise a more descriptive/helpful error/ ValueError
             # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
             call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
@@ -254,18 +292,23 @@ def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) ->
     logger.info("Invalid input successfully crashed as expected.")
 
 
-def test_invalid_input_noncontainernonnumber_types(cosine_metric: EmbeddingDistanceMetric) -> None:
+def test_score_with_invalid_input_string(cosine_metric: EmbeddingDistanceMetric) -> None:
+    hyp = "invalid input"
+    ref = random_tensor(768)
+    with pytest.raises(TypeError, match="invalid data type 'str'"):
+        call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
+
+
+def test_score_with_invalid_input_bool(cosine_metric: EmbeddingDistanceMetric) -> None:
     hyp = random_tensor(768)
-    invalid_inputs = ["invalid_input", True]
+    invalid_inputs = [True, False]
     for ref in invalid_inputs:
-        with pytest.raises((TypeError, IndexError)):
-            # TypeError: new(): invalid data type 'str'
-            # but True gives IndexError
-            # TODO: better TypeError, more descriptive
+        with pytest.raises(AssertionError, match="score_all received non-2D input"):
             call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score)
+            # TODO: why does a bool make it all the way there?
 
 
-def test_invalid_input_empty_containers(cosine_metric: EmbeddingDistanceMetric) -> None:
+def test_score_with_invalid_input_empty_containers(cosine_metric: EmbeddingDistanceMetric) -> None:
     """Test the metric with invalid inputs."""
     emb1 = random_tensor(768)
     invalid_inputs = ["", [], {}, tuple(), set()]
@@ -299,33 +342,61 @@ def test_score_ndarray_input(cosine_metric):
     assert isinstance(score, float), "Output should be a float."
 
 
-def test_score_all_list_of_lists_of_floats(cosine_metric):
+def test_score_all_list_of_lists_of_floats(
+    cosine_metric,
+    distance_range_checker,
+    distance_matrix_shape_checker,
+):
     """Does a 2D list of floats work?"""
     hyps = [[np.random.rand() for _ in range(768)] for _ in range(5)]
     refs = [[np.random.rand() for _ in range(768)] for _ in range(5)]
-    scores = cosine_metric.score_all(hyps, refs)
-    assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
-    assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
+    expected_shape = (len(hyps), len(refs))
+
+    call_with_both_input_orders_and_do_standard_checks(
+        hyps,
+        refs,
+        cosine_metric.score_all,
+        distance_range_checker,
+        distance_matrix_shape_checker,
+        expected_shape=expected_shape,
+    )
 
 
-def test_score_all_list_of_tensor_input(cosine_metric):
+def test_score_all_list_of_tensor_input(cosine_metric, distance_range_checker, distance_matrix_shape_checker):
     """Test score_all function with List of torch.Tensor inputs."""
     hyps = [torch.rand(768) for _ in range(5)]
     refs = [torch.rand(768) for _ in range(5)]
 
-    scores = cosine_metric.score_all(hyps, refs)
-    assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
-    assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
+    expected_shape = (len(hyps), len(refs))
 
+    call_with_both_input_orders_and_do_standard_checks(
+        hyps,
+        refs,
+        cosine_metric.score_all,
+        distance_range_checker,
+        distance_matrix_shape_checker,
+        expected_shape=expected_shape,
+    )
 
-def test_score_all_list_of_ndarray_input(cosine_metric):
+
+def test_score_all_list_of_ndarray_input(
+    cosine_metric,
+    distance_range_checker,
+    distance_matrix_shape_checker,
+):
     """Test score_all function with List of np.ndarray inputs."""
     hyps = [np.random.rand(768) for _ in range(5)]
     refs = [np.random.rand(768) for _ in range(5)]
-
-    scores = cosine_metric.score_all(hyps, refs)
-    assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}"
-    assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}"
+    expected_shape = (len(hyps), len(refs))
+
+    call_with_both_input_orders_and_do_standard_checks(
+        hyps,
+        refs,
+        cosine_metric.score_all,
+        distance_range_checker,
+        distance_matrix_shape_checker,
+        expected_shape=expected_shape,
+    )
 
 
 def test_device_handling(cosine_metric):
@@ -337,20 +408,39 @@ def test_device_handling(cosine_metric):
         assert cosine_metric.device.type == "cpu", "Should use 'cpu' when CUDA is unavailable."
 
 
-def test_mixed_input(cosine_metric):
+def test_score_mixed_input_types(cosine_metric):
     """Test score function with mixed input types."""
     emb1 = np.random.rand(768)
     emb2 = torch.rand(768)
 
-    score = cosine_metric.score(emb1, emb2)
-    assert isinstance(score, float), "Output should be a float."
+    all_scores = call_and_call_with_inputs_swapped(emb1, emb2, cosine_metric.score)
+    assert all([isinstance(score, float) for score in all_scores]), "Output should be a float."
+
+
+def test_score_all_mixed_input_types(cosine_metric, distance_range_checker, distance_matrix_shape_checker):
+    """Test score function with mixed input types."""
+    hyps = np.random.rand(5, 768)
+    refs = torch.rand(3, 768)
+
+    expected_shape = (5, 3)
+
+    call_with_both_input_orders_and_do_standard_checks(
+        hyps,
+        refs,
+        cosine_metric.score_all,
+        distance_range_checker,
+        distance_matrix_shape_checker,
+        expected_shape=expected_shape,
+    )
 
 
 @pytest.mark.parametrize("num_points, dim", [(16, 2)])
-def test_unit_circle_points(cosine_metric, num_points, dim):
+def test_unit_circle_points(cosine_metric, num_points, dim, distance_range_checker, distance_matrix_shape_checker):
     embeddings = generate_unit_circle_points(num_points, dim)
     distances = cosine_metric.score_all(embeddings, embeddings)
     save_and_plot_distances(distances=distances, matrix_name="Unit Circle", num_points=num_points, dim=dim)
+    distance_range_checker(distances, min_val=0, max_val=2)  # Check distance range
+    distance_matrix_shape_checker(embeddings.shape[0], embeddings.shape[0], distances)
 
 
 @pytest.mark.parametrize("num_points, dim", [(20, 2)])
@@ -375,35 +465,29 @@ def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim):
 
 
 @pytest.mark.parametrize("num_points, dim", [(20, 2)])
-def test_orthogonal_rows_in_pairs(cosine_metric, num_points, dim, distance_range_checker):
+def test_orthogonal_rows_in_pairs(
+    cosine_metric, num_points, dim, distance_range_checker, distance_matrix_shape_checker
+):
     embeddings = generate_orthogonal_rows_in_pairs(num_points, dim)
     distances = cosine_metric.score_all(embeddings, embeddings)
     save_and_plot_distances(distances, "orthogonal_rows_in_pairs", num_points, dim)
     distance_range_checker(distances, min_val=0, max_val=2)  # Check distance range
+    distance_matrix_shape_checker(embeddings.shape[0], embeddings.shape[0], distances)
 
 
 @pytest.mark.parametrize("num_points, dim", [(10, 5)])
-def test_ones_tensor(cosine_metric, num_points, dim, distance_range_checker):
+def test_ones_tensor(cosine_metric, num_points, dim, distance_range_checker, distance_matrix_shape_checker):
     embeddings = generate_ones_tensor(num_points, dim)
     distances = cosine_metric.score_all(embeddings, embeddings)
     save_and_plot_distances(distances, "ones_tensor", num_points, dim)
     distance_range_checker(distances, min_val=0, max_val=0)  # Expect all distances to be 0
+    distance_matrix_shape_checker(embeddings.shape[0], embeddings.shape[0], distances)
 
 
 @pytest.mark.parametrize("num_points, dim", [(15, 15)])  # dim should be equal to num_points for identity matrix
-def test_identity_matrix_rows(cosine_metric, num_points, dim, distance_range_checker):
+def test_identity_matrix_rows(cosine_metric, num_points, dim, distance_range_checker, distance_matrix_shape_checker):
     embeddings = generate_identity_matrix_rows(num_points, dim)
     distances = cosine_metric.score_all(embeddings, embeddings)
     save_and_plot_distances(distances, "identity_matrix_rows", num_points, dim)
     distance_range_checker(distances, min_val=0, max_val=2)  # Check distance range
-
-
-# def test_progress_bar(cosine_metric):
-#     """Test score_all with progress_bar argument."""
-#     hyps = [np.random.rand(768) for _ in range(5)]
-#     refs = [np.random.rand(768) for _ in range(5)]
-
-#     # Disable progress bar
-#     scores = cosine_metric.score_all(hyps, refs, progress_bar=False)
-#     assert len(scores) == len(hyps), "Output row count mismatch with progress_bar=False."
-#     assert len(scores[0]) == len(refs), "Output column count mismatch with progress_bar=False."
+    distance_matrix_shape_checker(embeddings.shape[0], embeddings.shape[0], distances)

From dca700b2de54d9e7b086e92940b0ff33ae02aef7 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 6 Dec 2024 12:27:10 -0500
Subject: [PATCH 25/27] CDL: change test_artifacts_dir name to 'temp'

---
 pose_evaluation/metrics/test_embedding_distance_metric.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py
index 0f08bb9..ab275c6 100644
--- a/pose_evaluation/metrics/test_embedding_distance_metric.py
+++ b/pose_evaluation/metrics/test_embedding_distance_metric.py
@@ -87,7 +87,7 @@ def save_and_plot_distances(distances, matrix_name, num_points, dim):
     """Helper function to save distance matrix and plot distances."""
 
     distances = distances.cpu()
-    test_artifacts_dir = Path(__file__).parent / "tests"
+    test_artifacts_dir = Path(__file__).parent / "temp"
     output_path = test_artifacts_dir / f"distance_matrix_{matrix_name}_{num_points}_{dim}D.csv"
     np.savetxt(output_path, distances.numpy(), delimiter=",", fmt="%.4f")
     print(f"Distance matrix saved to {output_path}")

From d9eb0b2cb9127ec1833c27651aaaa8cb1840a7d3 Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 6 Dec 2024 12:28:03 -0500
Subject: [PATCH 26/27] Update gitignore

---
 pose_evaluation/metrics/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pose_evaluation/metrics/.gitignore b/pose_evaluation/metrics/.gitignore
index 3d0dbe4..cd78447 100644
--- a/pose_evaluation/metrics/.gitignore
+++ b/pose_evaluation/metrics/.gitignore
@@ -1 +1 @@
-tests/
\ No newline at end of file
+temp/
\ No newline at end of file

From 73ebd75a7566c737b02ec7a36e27e09ec0a18c5b Mon Sep 17 00:00:00 2001
From: Colin Leong <122366389+cleong110@users.noreply.github.com>
Date: Fri, 6 Dec 2024 12:31:33 -0500
Subject: [PATCH 27/27] Took out one more redundant 'args' comment

---
 pose_evaluation/metrics/embedding_distance_metric.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py
index 2c165b0..6044faa 100644
--- a/pose_evaluation/metrics/embedding_distance_metric.py
+++ b/pose_evaluation/metrics/embedding_distance_metric.py
@@ -63,9 +63,6 @@ def __init__(
     def to(self, device: Union[torch.device, str]) -> None:
         """
         Explicitly set the device used for tensors.
-
-        Args:
-            device (Union[torch.device, str]): The device to use for computation.
         """
         self.device = torch.device(device)
         logger.info(f"Device set to: {self.device}")