From 0ee64bc6d84ada9a6033ae4427e5caaffec1405d Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Thu, 14 Nov 2024 16:50:38 -0500 Subject: [PATCH 01/27] CDL: initial pass at a signclip-based metric, I cannot embed on the fly but I can load in the .npy files --- pose_evaluation/metrics/signclip_metric.py | 102 +++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 pose_evaluation/metrics/signclip_metric.py diff --git a/pose_evaluation/metrics/signclip_metric.py b/pose_evaluation/metrics/signclip_metric.py new file mode 100644 index 0000000..a82d23d --- /dev/null +++ b/pose_evaluation/metrics/signclip_metric.py @@ -0,0 +1,102 @@ +from pose_evaluation.metrics.base_pose_metric import PoseMetric +from pose_format import Pose +from pathlib import Path +from typing import Literal +import numpy as np +import itertools +from tqdm import tqdm +from scipy.spatial.distance import cosine +import math +import pandas as pd + +class SignCLIPEmbeddingDistanceMetric(PoseMetric): + def __init__(self, + model_id="baseline_temporal", + kind: Literal["cosine", "l2"] = "cosine", + higher_is_better: bool = False): + super().__init__(name=f"SignCLIPDistanceMetric {kind}", higher_is_better=False) + + self.kind = kind + + def load_precalculated_embedding(self, saved_embedding_path:Path) -> np.ndarray: + + embedding = np.load(saved_embedding_path) # typically (1, 768) + if embedding.ndim == 2 and embedding.shape[0] == 1: + embedding = embedding[0] # new shape:(768, ) + return embedding + + def embed_pose(self, pose:Pose)->np.ndarray: + # blocked by the fact that embedding with SignCLIP is nontrivial. + # See https://github.com/sign-language-processing/pose-evaluation/issues/1 + raise NotImplementedError + + def get_embedding(self, input: Path|np.ndarray|Pose)->np.ndarray: + if isinstance(input, np.ndarray): + # often (1, 768) + if input.ndim == 2 and input.shape[0] == 1: + input = input[0] # new shape:(768, ) + elif isinstance(input, Path): + input = self.load_precalculated_embedding(input) + elif isinstance(input, Pose): + input = self.embed_pose(pose=input) + + return input + + + def score(self, hypothesis: Path|np.ndarray|Pose, reference: Path|np.ndarray|Pose) -> float: + hypothesis = self.get_embedding(hypothesis) + reference = self.get_embedding(reference) + + return cosine(hypothesis, reference) + + + + + + +if __name__ =="__main__": + metric = SignCLIPEmbeddingDistanceMetric() + + # embeddings_path = Path.cwd()/"ASL_Citizen_curated_sample_with_embeddings_from_all_models"/"embeddings" + embeddings_path = Path("/media/aqsa/Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex") + embeddings_files = list(embeddings_path.glob("*.npy")) + # embeddings= [metric.load_precalculated_embedding(npy_file) for npy_file in embeddings_path.glob("*.npy")] + + print(f"Found {len(embeddings_files)} embeddings") + + + # loaded = metric.load_precalculated_embedding('pose_evaluation/metrics/test_poses/241481900450897-HOUSE-using-model-sem-lex.npy') + + # print(f"That makes for {len(combinations)} combinations") + i = 0 + entries =[] + out_file = Path.cwd()/"signclip_scores.csv" + pd.DataFrame(columns=["hyp","ref","score"]).to_csv(out_file, index=False) + for embedding, other_embedding in tqdm(itertools.combinations(embeddings_files, 2), + total=math.comb(len(embeddings_files), 2), + desc=f"Calculating scores, writing to {out_file}"): + score = metric.score(embedding, other_embedding) + entry = { + "hyp":embedding.stem.split("-")[0], # e.g. 0031311305138936874-FATHER-using-model-sem-lex.npy becomes 0031311305138936874 + "ref":other_embedding.stem.split("-")[0], + "score":score + } + entries.append(entry) + i = i+1 + if i%1000 == 0: + # print(f"Collected {len(entries)} scores. Writing to {out_file} resetting") + df = pd.DataFrame.from_dict(entries) + df.to_csv(out_file, mode="a", index=False, header=False) + entries = [] + + + # i = i+1 + # if i % 1000 == 0: + # print(i) + # exit() + # print(f"Score between {embedding.stem} and {other_embedding.stem}: {score}") + + + + + From e3241fbd18ffe3a729cb8b05a3342c975a52aaf3 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:41:09 -0500 Subject: [PATCH 02/27] initial attempt at an evaluation script --- .../evaluation/evaluate_signclip.py | 114 ++++++++++++++++++ 1 file changed, 114 insertions(+) create mode 100644 pose_evaluation/evaluation/evaluate_signclip.py diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py new file mode 100644 index 0000000..a67b684 --- /dev/null +++ b/pose_evaluation/evaluation/evaluate_signclip.py @@ -0,0 +1,114 @@ +import argparse +from pathlib import Path +import pandas as pd +import numpy as np +from pose_evaluation.metrics.signclip_distance_metric import SignCLIPEmbeddingDistanceMetric +from tqdm import tqdm + +def load_embedding(file_path: Path) -> np.ndarray: + """ + Load a SignCLIP embedding from a .npy file, ensuring it has the correct shape. + + Args: + file_path (Path): Path to the .npy file. + + Returns: + np.ndarray: The embedding with shape (768,). + """ + embedding = np.load(file_path) + if embedding.ndim == 2 and embedding.shape[0] == 1: + embedding = embedding[0] # Reduce shape from (1, 768) to (768,) + return embedding + +def match_embeddings_to_glosses(emb_dir: Path, split_df: pd.DataFrame) -> pd.DataFrame: + """ + Match .npy embeddings to the corresponding glosses based on the numerical ID. + + Args: + emb_dir (Path): Directory containing the .npy files. + split_df (pd.DataFrame): DataFrame containing the split file with the "Video file" column. + + Returns: + pd.DataFrame: Updated DataFrame with an additional column for embeddings. + """ + # Map video file IDs to embeddings + embeddings_map = {} + for npy_file in emb_dir.glob("*.npy"): + numerical_id = npy_file.stem.split("-")[0] + embeddings_map[numerical_id] = npy_file + + # Match embeddings to glosses + embeddings = [] + for _, row in split_df.iterrows(): + video_file = row["Video file"] + numerical_id = video_file.split("-")[0] + npy_file = embeddings_map.get(numerical_id) + + if npy_file is not None: + embeddings.append(load_embedding(npy_file)) + else: + embeddings.append(None) # Placeholder if no matching file + + split_df["embedding"] = embeddings + return split_df + +def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine"): + """ + Evaluate SignCLIP embeddings using score_all. + + Args: + emb_dir (Path): Directory containing .npy embeddings. + split_file (Path): Path to the split CSV file. + kind (str): Metric type ("cosine" or "l2"). Default is "cosine". + """ + # Load split file + split_df = pd.read_csv(split_file) + + # Match embeddings + split_df = match_embeddings_to_glosses(emb_dir, split_df) + + # Filter out rows without embeddings + valid_df = split_df.dropna(subset=["embedding"]).reset_index(drop=True) + embeddings = valid_df["embedding"].tolist() + + # Initialize metric + metric = SignCLIPEmbeddingDistanceMetric(kind=kind) + + # Compute all pairwise scores + print(f"Computing {kind} distances for {len(embeddings)} embeddings...") + + scores = metric.score_all(embeddings, embeddings) + + # Save scores to a CSV file + output_file = emb_dir / "signclip_scores.csv" + results = [] + for i, hyp_row in valid_df.iterrows(): + for j, ref_row in valid_df.iterrows(): + results.append({ + "hyp": hyp_row["Video file"], + "ref": ref_row["Video file"], + "score": scores[i, j] + }) + results_df = pd.DataFrame(results) + results_df.to_csv(output_file, index=False) + print(f"Scores saved to {output_file}") + +def main(): + parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.") + parser.add_argument( + "emb_dir", type=Path, help="Path to the directory containing SignCLIP .npy files" + ) + parser.add_argument( + "--split_file", type=Path, required=True, help="Path to the split CSV file (e.g., test.csv)" + ) + parser.add_argument( + "--kind", type=str, choices=["cosine", "l2"], default="cosine", + help="Type of distance metric to use (default: cosine)" + ) + args = parser.parse_args() + + evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split, kind=args.kind) + +if __name__ == "__main__": + main() + print(f"THIS SCRIPT NEEDS TESTING") From 41f75ed1a52af3887cb3d126f6ab766dc3baadc3 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:41:33 -0500 Subject: [PATCH 03/27] initial attempt at pytest for signclip metric --- .../metrics/test_signclip_distance_metric.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 pose_evaluation/metrics/test_signclip_distance_metric.py diff --git a/pose_evaluation/metrics/test_signclip_distance_metric.py b/pose_evaluation/metrics/test_signclip_distance_metric.py new file mode 100644 index 0000000..54f62cf --- /dev/null +++ b/pose_evaluation/metrics/test_signclip_distance_metric.py @@ -0,0 +1,65 @@ +import pytest +import numpy as np +from pose_format import Pose +from pose_evaluation.metrics.signclip_distance_metric import SignCLIPEmbeddingDistanceMetric + +# Mock a simple Pose object for compatibility (if not already available) +class MockPose: + def __init__(self, data): + self.data = data + +@pytest.fixture +def metric(): + """Fixture to create a SignCLIPEmbeddingDistanceMetric instance.""" + return SignCLIPEmbeddingDistanceMetric(kind="cosine") + +@pytest.fixture +def embeddings(): + """Fixture to create dummy embeddings for testing.""" + # Generate 5 random 768-dimensional embeddings + return [np.random.rand(768) for _ in range(5)] + +def test_score_symmetric(metric): + """Test that the metric is symmetric for cosine distance.""" + emb1 = np.random.rand(768) + emb2 = np.random.rand(768) + + score1 = metric.score(emb1, emb2) + score2 = metric.score(emb2, emb1) + + assert pytest.approx(score1) == score2, "Score should be symmetric." + +def test_score_with_path(metric, tmp_path): + """Test that score works with embeddings loaded from paths.""" + emb1 = np.random.rand(768) + emb2 = np.random.rand(768) + + # Save embeddings to temporary files + file1 = tmp_path / "emb1.npy" + file2 = tmp_path / "emb2.npy" + np.save(file1, emb1) + np.save(file2, emb2) + + score = metric.score(file1, file2) + expected_score = metric.score(emb1, emb2) + + assert pytest.approx(score) == expected_score, "Score with paths should match direct computation." + +def test_score_all(metric, embeddings): + """Test the score_all function.""" + scores = metric.score_all(embeddings, embeddings) + assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all." + assert np.allclose(scores.diagonal(), 0), "Self-comparison scores should be zero for cosine distance." + +def test_score_all_with_different_sizes(metric): + """Test score_all with different sizes for hypotheses and references.""" + hyps = [np.random.rand(768) for _ in range(3)] + refs = [np.random.rand(768) for _ in range(5)] + + scores = metric.score_all(hyps, refs) + assert scores.shape == (len(hyps), len(refs)), "Output shape mismatch for score_all with different sizes." + +def test_score_all_edge_case(metric): + """Test score_all with empty inputs.""" + scores = metric.score_all([], []) + assert scores.size == 0, "Score_all should return an empty array for empty inputs." From da881d552a0d654d7667cb5a9ea02a79f1169c7e Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 20 Nov 2024 16:42:55 -0500 Subject: [PATCH 04/27] SignClip distances are just embedding distances. Make a base embedding metric --- .../metrics/base_embedding_metric.py | 23 ++++ .../metrics/signclip_distance_metric.py | 51 +++++++++ pose_evaluation/metrics/signclip_metric.py | 102 ------------------ 3 files changed, 74 insertions(+), 102 deletions(-) create mode 100644 pose_evaluation/metrics/base_embedding_metric.py create mode 100644 pose_evaluation/metrics/signclip_distance_metric.py delete mode 100644 pose_evaluation/metrics/signclip_metric.py diff --git a/pose_evaluation/metrics/base_embedding_metric.py b/pose_evaluation/metrics/base_embedding_metric.py new file mode 100644 index 0000000..355fc2a --- /dev/null +++ b/pose_evaluation/metrics/base_embedding_metric.py @@ -0,0 +1,23 @@ +from numpy import ndarray +import torch +import torch.nn.functional as F +from pose_evaluation.metrics.base import BaseMetric + +class NumpyArrayEmbeddingMetric(BaseMetric[ndarray]): + def __init__(self, name: str, higher_is_better: bool = True, kind: str = "cosine", device: torch.device | str = None): + # Call the base class __init__ to initialize 'name' and 'higher_is_better' + super().__init__(name, higher_is_better) + + self.kind = kind + + if device is None: + if torch.cuda.is_available(): + self.cuda() + else: + self.device = torch.device(device) if isinstance(device, str) else device + + def score(self, hypothesis: ndarray, reference: ndarray) -> float: + if self.kind == "cosine": + return F.cosine_similarity(hypothesis, reference) + elif self.kind == "l2": + return F.pairwise_distance(hypothesis, reference, p=2) diff --git a/pose_evaluation/metrics/signclip_distance_metric.py b/pose_evaluation/metrics/signclip_distance_metric.py new file mode 100644 index 0000000..3d0fd31 --- /dev/null +++ b/pose_evaluation/metrics/signclip_distance_metric.py @@ -0,0 +1,51 @@ +from pose_evaluation.metrics.base_embedding_metric import NumpyArrayEmbeddingMetric +from typing import Literal +import numpy as np +from tqdm import tqdm +from scipy.spatial.distance import cosine +import torch +import torch.nn.functional as F + +class SignCLIPEmbeddingDistanceMetric(NumpyArrayEmbeddingMetric): + def __init__(self, kind: str = "cosine", device: torch.device | str = "cuda"): + """ + Initializes the metric with the specified distance type and device. + + Args: + kind (str): The type of distance metric, either 'cosine' or 'l2'. + device (torch.device | str): The device to use ('cuda' or 'cpu'). + """ + self.kind = kind + self.device = torch.device(device) if isinstance(device, str) else device + + + + def score_all(self, embeddings: torch.Tensor) -> torch.Tensor: + """ + Computes the pairwise distance matrix for the provided embeddings. + + Args: + embeddings (torch.Tensor): A 2D tensor of shape (N, D), where N is the number + of embeddings and D is the feature dimension. + + Returns: + torch.Tensor: A 2D tensor of shape (N, N) containing pairwise distances. + """ + # Move embeddings to the specified device + embeddings = embeddings.to(self.device) + + if self.kind == "cosine": + # Normalize embeddings to unit norm + embeddings = F.normalize(embeddings, p=2, dim=1) + # Compute pairwise cosine similarity + similarity_matrix = torch.matmul(embeddings, embeddings.T) # Shape: (N, N) + distance_matrix = 1 - similarity_matrix # Cosine distance = 1 - cosine similarity + elif self.kind == "l2": + # Compute pairwise L2 distance using broadcasting + diff = embeddings[:, None, :] - embeddings[None, :, :] # Shape: (N, N, D) + distance_matrix = torch.norm(diff, dim=2) # Shape: (N, N) + else: + raise ValueError(f"Unsupported distance metric: {self.kind}") + + return distance_matrix + diff --git a/pose_evaluation/metrics/signclip_metric.py b/pose_evaluation/metrics/signclip_metric.py deleted file mode 100644 index a82d23d..0000000 --- a/pose_evaluation/metrics/signclip_metric.py +++ /dev/null @@ -1,102 +0,0 @@ -from pose_evaluation.metrics.base_pose_metric import PoseMetric -from pose_format import Pose -from pathlib import Path -from typing import Literal -import numpy as np -import itertools -from tqdm import tqdm -from scipy.spatial.distance import cosine -import math -import pandas as pd - -class SignCLIPEmbeddingDistanceMetric(PoseMetric): - def __init__(self, - model_id="baseline_temporal", - kind: Literal["cosine", "l2"] = "cosine", - higher_is_better: bool = False): - super().__init__(name=f"SignCLIPDistanceMetric {kind}", higher_is_better=False) - - self.kind = kind - - def load_precalculated_embedding(self, saved_embedding_path:Path) -> np.ndarray: - - embedding = np.load(saved_embedding_path) # typically (1, 768) - if embedding.ndim == 2 and embedding.shape[0] == 1: - embedding = embedding[0] # new shape:(768, ) - return embedding - - def embed_pose(self, pose:Pose)->np.ndarray: - # blocked by the fact that embedding with SignCLIP is nontrivial. - # See https://github.com/sign-language-processing/pose-evaluation/issues/1 - raise NotImplementedError - - def get_embedding(self, input: Path|np.ndarray|Pose)->np.ndarray: - if isinstance(input, np.ndarray): - # often (1, 768) - if input.ndim == 2 and input.shape[0] == 1: - input = input[0] # new shape:(768, ) - elif isinstance(input, Path): - input = self.load_precalculated_embedding(input) - elif isinstance(input, Pose): - input = self.embed_pose(pose=input) - - return input - - - def score(self, hypothesis: Path|np.ndarray|Pose, reference: Path|np.ndarray|Pose) -> float: - hypothesis = self.get_embedding(hypothesis) - reference = self.get_embedding(reference) - - return cosine(hypothesis, reference) - - - - - - -if __name__ =="__main__": - metric = SignCLIPEmbeddingDistanceMetric() - - # embeddings_path = Path.cwd()/"ASL_Citizen_curated_sample_with_embeddings_from_all_models"/"embeddings" - embeddings_path = Path("/media/aqsa/Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex") - embeddings_files = list(embeddings_path.glob("*.npy")) - # embeddings= [metric.load_precalculated_embedding(npy_file) for npy_file in embeddings_path.glob("*.npy")] - - print(f"Found {len(embeddings_files)} embeddings") - - - # loaded = metric.load_precalculated_embedding('pose_evaluation/metrics/test_poses/241481900450897-HOUSE-using-model-sem-lex.npy') - - # print(f"That makes for {len(combinations)} combinations") - i = 0 - entries =[] - out_file = Path.cwd()/"signclip_scores.csv" - pd.DataFrame(columns=["hyp","ref","score"]).to_csv(out_file, index=False) - for embedding, other_embedding in tqdm(itertools.combinations(embeddings_files, 2), - total=math.comb(len(embeddings_files), 2), - desc=f"Calculating scores, writing to {out_file}"): - score = metric.score(embedding, other_embedding) - entry = { - "hyp":embedding.stem.split("-")[0], # e.g. 0031311305138936874-FATHER-using-model-sem-lex.npy becomes 0031311305138936874 - "ref":other_embedding.stem.split("-")[0], - "score":score - } - entries.append(entry) - i = i+1 - if i%1000 == 0: - # print(f"Collected {len(entries)} scores. Writing to {out_file} resetting") - df = pd.DataFrame.from_dict(entries) - df.to_csv(out_file, mode="a", index=False, header=False) - entries = [] - - - # i = i+1 - # if i % 1000 == 0: - # print(i) - # exit() - # print(f"Score between {embedding.stem} and {other_embedding.stem}: {score}") - - - - - From a89aab883bfba18566993dc5e0efb88257f83ebd Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Thu, 21 Nov 2024 17:05:24 -0500 Subject: [PATCH 05/27] CDL: Got some pytest tests running! --- pose_evaluation/metrics/.gitignore | 1 + .../metrics/base_embedding_metric.py | 25 +- pose_evaluation/metrics/conftest.py | 35 ++ .../metrics/embedding_distance_metric.py | 104 ++++++ .../metrics/test_embedding_distance_metric.py | 313 ++++++++++++++++++ .../metrics/test_signclip_distance_metric.py | 65 ---- 6 files changed, 458 insertions(+), 85 deletions(-) create mode 100644 pose_evaluation/metrics/.gitignore create mode 100644 pose_evaluation/metrics/conftest.py create mode 100644 pose_evaluation/metrics/embedding_distance_metric.py create mode 100644 pose_evaluation/metrics/test_embedding_distance_metric.py delete mode 100644 pose_evaluation/metrics/test_signclip_distance_metric.py diff --git a/pose_evaluation/metrics/.gitignore b/pose_evaluation/metrics/.gitignore new file mode 100644 index 0000000..3d0dbe4 --- /dev/null +++ b/pose_evaluation/metrics/.gitignore @@ -0,0 +1 @@ +tests/ \ No newline at end of file diff --git a/pose_evaluation/metrics/base_embedding_metric.py b/pose_evaluation/metrics/base_embedding_metric.py index 355fc2a..83ad763 100644 --- a/pose_evaluation/metrics/base_embedding_metric.py +++ b/pose_evaluation/metrics/base_embedding_metric.py @@ -1,23 +1,8 @@ -from numpy import ndarray -import torch -import torch.nn.functional as F +from typing import TypeVar from pose_evaluation.metrics.base import BaseMetric +import torch -class NumpyArrayEmbeddingMetric(BaseMetric[ndarray]): - def __init__(self, name: str, higher_is_better: bool = True, kind: str = "cosine", device: torch.device | str = None): - # Call the base class __init__ to initialize 'name' and 'higher_is_better' - super().__init__(name, higher_is_better) - - self.kind = kind - - if device is None: - if torch.cuda.is_available(): - self.cuda() - else: - self.device = torch.device(device) if isinstance(device, str) else device +# Define a type alias for embeddings (e.g., torch.Tensor) +Embedding = TypeVar("Embedding", bound=torch.Tensor) - def score(self, hypothesis: ndarray, reference: ndarray) -> float: - if self.kind == "cosine": - return F.cosine_similarity(hypothesis, reference) - elif self.kind == "l2": - return F.pairwise_distance(hypothesis, reference, p=2) +EmbeddingMetric = BaseMetric[Embedding] \ No newline at end of file diff --git a/pose_evaluation/metrics/conftest.py b/pose_evaluation/metrics/conftest.py new file mode 100644 index 0000000..4b1129c --- /dev/null +++ b/pose_evaluation/metrics/conftest.py @@ -0,0 +1,35 @@ +# conftest.py +import pytest +import shutil +from pathlib import Path +from typing import Callable, Union +import torch +import numpy as np + +@pytest.fixture(scope="session", autouse=True) +def clean_test_artifacts(): + """Fixture to clean up test artifacts before each test session.""" + test_artifacts_dir = Path(__file__).parent / "tests" # Using Path + if test_artifacts_dir.exists(): + shutil.rmtree(test_artifacts_dir) # shutil.rmtree still works with Path + test_artifacts_dir.mkdir(parents=True, exist_ok=True) # Using Path.mkdir + yield # This allows the test session to run + # (Optional) You can add cleanup logic here to run after the session if needed + + +# conftest.py +from typing import Callable, Union +import torch +import numpy as np + +@pytest.fixture +def distance_range_checker() -> Callable[[Union[torch.Tensor, np.ndarray], float, float], None]: + def _check_range(distances: Union[torch.Tensor, np.ndarray], min_val: float = 0, max_val: float = 2) -> None: + max_distance = distances.max().item() + min_distance = distances.min().item() + + # Use np.isclose for comparisons with tolerance + assert np.isclose(min_distance, min_val, atol=1e-6) or min_val <= min_distance <= max_val, f"Minimum distance ({min_distance}) is outside the expected range [{min_val}, {max_val}]" + assert np.isclose(max_distance, max_val, atol=1e-6) or min_val <= max_distance <= max_val, f"Maximum distance ({max_distance}) is outside the expected range [{min_val}, {max_val}]" + + return _check_range \ No newline at end of file diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py new file mode 100644 index 0000000..84d6df1 --- /dev/null +++ b/pose_evaluation/metrics/embedding_distance_metric.py @@ -0,0 +1,104 @@ +from typing import Literal, Union, List +import torch +import torch.nn.functional as F +import numpy as np + +from pose_evaluation.metrics.base_embedding_metric import EmbeddingMetric + + +class EmbeddingDistanceMetric(EmbeddingMetric): + def __init__(self, kind: Literal["cosine", "l2"] = "cosine", device: Union[torch.device, str] = None): + """ + Initialize the embedding distance metric. + + Args: + kind (Literal["cosine", "l2"]): The type of distance metric. + device (torch.device | str): The device to use for computation. If None, automatically detects. + """ + super().__init__(f"EmbeddingDistanceMetric {kind}", higher_is_better=False) + self.kind = kind + if device is None: + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + else: + self.device = torch.device(device) if isinstance(device, str) else device + + def _to_tensor(self, data: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: + """ + Convert input to a PyTorch tensor if it is a NumPy array. + + Args: + data (np.ndarray | torch.Tensor): Input data. + + Returns: + torch.Tensor: Tensor on the correct device. + """ + if isinstance(data, np.ndarray): + data = torch.tensor(data, dtype=torch.float32) + return data.to(self.device) + + def score(self, hypothesis: Union[np.ndarray, torch.Tensor], reference: Union[np.ndarray, torch.Tensor]) -> float: + """ + Compute the distance between two embeddings. + + Args: + hypothesis (np.ndarray | torch.Tensor): A single embedding vector. + reference (np.ndarray | torch.Tensor): Another single embedding vector. + + Returns: + float: The calculated distance. + """ + hypothesis = self._to_tensor(hypothesis) + reference = self._to_tensor(reference) + + if self.kind == "cosine": + # Normalize both embeddings to unit length + hypothesis = F.normalize(hypothesis, p=2, dim=0) + reference = F.normalize(reference, p=2, dim=0) + # Cosine similarity, converted to distance + similarity = torch.dot(hypothesis, reference).item() + return 1 - similarity + elif self.kind == "l2": + # L2 distance + return torch.norm(hypothesis - reference).item() + else: + raise ValueError(f"Unsupported distance metric: {self.kind}") + + def score_all( + self, + hypotheses: List[Union[np.ndarray, torch.Tensor]], + references: List[Union[np.ndarray, torch.Tensor]], + progress_bar: bool = True, + ) -> torch.Tensor: + """ + Compute the pairwise distance between all hypotheses and references. Expects 2D inputs. + + Args: + hypotheses (list[np.ndarray | torch.Tensor]): List of hypothesis embeddings. + references (list[np.ndarray | torch.Tensor]): List of reference embeddings. + progress_bar (bool): Whether to display a progress bar. + + Returns: + torch.Tensor, distance matrix. Row i is the distances of hypotheses[i] to all rows of references + """ + # Convert inputs to tensors and stack + hypotheses = torch.stack([self._to_tensor(h) for h in hypotheses]) + references = torch.stack([self._to_tensor(r) for r in references]) + + if self.kind == "cosine": + # Normalize the tensors along the feature dimension (dim=1) + normalized_hypotheses = F.normalize(hypotheses, dim=1) + normalized_references = F.normalize(references, dim=1) + + # Calculate cosine similarity between all hypothesis-reference pairs + cosine_similarities = torch.matmul(normalized_hypotheses, normalized_references.T) + + # Convert cosine similarities to cosine distances + distance_matrix = 1 - cosine_similarities + elif self.kind == "l2": + # Use broadcasting to calculate pairwise L2 distances + diff = hypotheses[:, None, :] - references[None, :, :] + distance_matrix = torch.norm(diff, dim=2) + else: + raise ValueError(f"Unsupported distance metric: {self.kind}") + + return distance_matrix.cpu() diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py new file mode 100644 index 0000000..c352386 --- /dev/null +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -0,0 +1,313 @@ +import pytest +import numpy as np +import torch +from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric +from pose_evaluation.metrics.conftest import distance_range_checker +import matplotlib.pyplot as plt +import logging +from typing import List +from pathlib import Path + +# TODO: many fixes. Including the fact that we test cosine but not Euclidean, + + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Device configuration for PyTorch +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +@pytest.fixture +def cosine_metric(): + """Fixture to create an EmbeddingDistanceMetric instance.""" + return EmbeddingDistanceMetric(kind="cosine") + + +@pytest.fixture +def embeddings() -> List[torch.Tensor]: + """Fixture to create dummy embeddings for testing.""" + return [random_tensor(768) for _ in range(5)] + + +def save_and_plot_distances(distances, matrix_name, num_points, dim): + """Helper function to save distance matrix and plot distances.""" + test_artifacts_dir = Path(__file__).parent / "tests" + output_path = test_artifacts_dir / f"distance_matrix_{matrix_name}_{num_points}_{dim}D.csv" + np.savetxt(output_path, distances.numpy(), delimiter=",", fmt="%.4f") + print(f"Distance matrix saved to {output_path}") + + # Generate plot + plt.figure(figsize=(10, 6)) + for i, row in enumerate(distances.numpy()): + plt.plot(row, label=f"Point {i}") + plt.title(f"Distance Matrix Rows ({matrix_name})") + plt.xlabel("Point Index") + plt.ylabel("Distance") + plt.legend() + plot_path = output_path.with_suffix(".png") + plt.savefig(plot_path) + print(f"Distances plot saved to {plot_path}") + plt.close() + +def random_tensor(size: int) -> torch.Tensor: + """Generate a random tensor on the appropriate device.""" + return torch.rand(size, dtype=torch.float32, device=DEVICE) + +def generate_unit_circle_points(num_points: int, dim: int = 2) -> torch.Tensor: + angles = torch.linspace(0, 2 * np.pi, num_points + 1)[:-1] + x_coords = torch.cos(angles) + y_coords = torch.sin(angles) + points = torch.stack([x_coords, y_coords], dim=1) + if dim > 2: + padding = torch.zeros((num_points, dim - 2)) + points = torch.cat([points, padding], dim=1) + return points + +def generate_orthogonal_rows_with_repeats(num_rows: int, dim: int) -> torch.Tensor: + orthogonal_rows = torch.empty(0, dim) + for _ in range(min(num_rows, dim)): + random_vector = torch.randn(1, dim) + if orthogonal_rows.shape[0] > 0: + random_vector -= ( + torch.matmul(random_vector, orthogonal_rows.T) @ orthogonal_rows + / torch.norm(orthogonal_rows, dim=1, keepdim=True) ** 2 + ) + orthogonal_rows = torch.cat( + [orthogonal_rows, random_vector / torch.norm(random_vector)] + ) + if num_rows > dim: + orthogonal_rows = orthogonal_rows.repeat(num_rows // dim + 1, 1)[:num_rows] + return orthogonal_rows + + +def generate_orthogonal_rows_in_pairs(num_pairs: int, dim: int) -> torch.Tensor: + """ + Generates a tensor with orthogonal rows in pairs. + The first row of each pair is orthogonal to the second row of the same pair. + + Args: + num_pairs: The number of orthogonal pairs to generate. + dim: The dimensionality of the vectors. + + Returns: + A PyTorch tensor with orthogonal rows in pairs. + """ + + orthogonal_rows = torch.empty(0, dim) + for _ in range(num_pairs): + # Generate the first vector of the pair + first_vector = torch.randn(1, dim) + first_vector = first_vector / torch.norm(first_vector) # Normalize + + # Generate the second vector orthogonal to the first + second_vector = torch.randn(1, dim) + second_vector = second_vector - (second_vector @ first_vector.T) * first_vector + second_vector = second_vector / torch.norm(second_vector) # Normalize + + # Concatenate the pair to the result + orthogonal_rows = torch.cat([orthogonal_rows, first_vector, second_vector], dim=0) + + return orthogonal_rows + +def generate_ones_tensor(rows: int, dims: int) -> torch.Tensor: + """Generates a tensor with all elements equal to 1.0 (float).""" + return torch.ones(rows, dims, dtype=torch.float32) + +def generate_identity_matrix_rows(rows, cols): + """ + Returns an identity matrix with the specified number of rows and columns. + """ + identity = torch.eye(max(rows, cols)) + return identity[:rows, :cols] + +def create_increasing_rows_tensor(num_rows: int, num_cols: int) -> torch.Tensor: + """ + Creates a tensor where every row has identical values all the way across, + but increasing row by row. + + Args: + num_rows: The number of rows in the tensor. + num_cols: The number of columns in the tensor. + + Returns: + A PyTorch tensor with the specified properties. + """ + + tensor = torch.arange(1.0, num_rows + 1).unsqueeze(1).repeat(1, num_cols) + return tensor + + + + +def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None: + """Test that the metric is symmetric for cosine distance.""" + emb1 = random_tensor(768) + emb2 = random_tensor(768) + + score1 = cosine_metric.score(emb1, emb2) + score2 = cosine_metric.score(emb2, emb1) + + logger.info(f"Score 1: {score1}, Score 2: {score2}") + assert pytest.approx(score1) == score2, "Score should be symmetric." + + +def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path) -> None: + """Test that score works with embeddings loaded from file paths.""" + emb1 = random_tensor(768).cpu().numpy() # Save as NumPy for file storage + emb2 = random_tensor(768).cpu().numpy() + + # Save embeddings to temporary files + file1 = tmp_path / "emb1.npy" + file2 = tmp_path / "emb2.npy" + np.save(file1, emb1) + np.save(file2, emb2) + + # Load files as PyTorch tensors + emb1_loaded = torch.tensor(np.load(file1), dtype=torch.float32, device=DEVICE) + emb2_loaded = torch.tensor(np.load(file2), dtype=torch.float32, device=DEVICE) + + score = cosine_metric.score(emb1_loaded, emb2_loaded) + expected_score = cosine_metric.score(torch.tensor(emb1, device=DEVICE), torch.tensor(emb2, device=DEVICE)) + + logger.info(f"Score from file: {score}, Direct score: {expected_score}") + assert pytest.approx(score) == expected_score, "Score with paths should match direct computation." + + +def test_score_all_against_self(cosine_metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker) -> None: + """Test the score_all function.""" + scores = cosine_metric.score_all(embeddings, embeddings) + assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all." + assert torch.allclose(torch.diagonal(scores), torch.zeros(len(embeddings), device=DEVICE), atol=1e-6), ( + "Self-comparison scores should be zero for cosine distance." + ) + distance_range_checker(scores, min_val=0, max_val=2) + logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}") + +def test_score_all_with_different_sizes(cosine_metric, distance_range_checker): + """Test score_all with different sizes for hypotheses and references.""" + hyps = [np.random.rand(768) for _ in range(3)] + refs = [np.random.rand(768) for _ in range(5)] + + scores = cosine_metric.score_all(hyps, refs) + assert scores.shape == (len(hyps), len(refs)), f"Output shape mismatch ({scores.shape}) vs {(len(hyps), len(refs))} for score_all with different sizes. " + distance_range_checker(scores, min_val=0, max_val=2) + + +# def test_score_all_with_empty_inputs(metric): +# """Test score_all with empty inputs.""" +# scores = metric.score_all([], []) +# assert scores.shape == (0,), f"Score_all should return an empty array for empty inputs. Output: {scores.shape}" + +def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None: + """Test the metric with invalid inputs.""" + emb1 = random_tensor(768) + invalid_inputs = ["invalid_input", None, -1, 1] + + for invalid_input in invalid_inputs: + with pytest.raises((TypeError, AttributeError)): + cosine_metric.score(emb1, invalid_input) + + logger.info("Invalid input test passed.") + +def test_score_tensor_input(cosine_metric): + """Test score function with torch.Tensor inputs.""" + emb1 = torch.rand(768) + emb2 = torch.rand(768) + + score = cosine_metric.score(emb1, emb2) + assert isinstance(score, float), "Output should be a float." + + +def test_score_ndarray_input(cosine_metric): + """Test score function with np.ndarray inputs.""" + emb1 = np.random.rand(768) + emb2 = np.random.rand(768) + + score = cosine_metric.score(emb1, emb2) + assert isinstance(score, float), "Output should be a float." + + +def test_score_all_tensor_input(cosine_metric): + """Test score_all function with torch.Tensor inputs.""" + hyps = [torch.rand(768) for _ in range(5)] + refs = [torch.rand(768) for _ in range(5)] + + scores = cosine_metric.score_all(hyps, refs) + assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" + assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" + + +def test_device_handling(cosine_metric): + """Test device handling for the metric.""" + assert cosine_metric.device.type in ["cuda", "cpu"], "Device should be either 'cuda' or 'cpu'." + if torch.cuda.is_available(): + assert cosine_metric.device.type == "cuda", "Should use 'cuda' when available." + else: + assert cosine_metric.device.type == "cpu", "Should use 'cpu' when CUDA is unavailable." + + +def test_mixed_input(cosine_metric): + """Test score function with mixed input types.""" + emb1 = np.random.rand(768) + emb2 = torch.rand(768) + + score = cosine_metric.score(emb1, emb2) + assert isinstance(score, float), "Output should be a float." + +@pytest.mark.parametrize("num_points, dim", [(16, 2)]) +def test_unit_circle_points(cosine_metric, num_points, dim): + embeddings = generate_unit_circle_points(num_points, dim) + distances = cosine_metric.score_all(embeddings, embeddings) + save_and_plot_distances(distances=distances, matrix_name="Unit Circle", num_points=num_points, dim=dim) + + +@pytest.mark.parametrize("num_points, dim", [(20, 2)]) +def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim): + embeddings = generate_orthogonal_rows_with_repeats(num_points, dim) + distances = cosine_metric.score_all(embeddings, embeddings) + save_and_plot_distances(distances=distances, matrix_name="Orthogonal Rows (with repeats)", num_points=num_points, dim=dim) + + # Create expected pattern directly within the test function + expected_pattern = torch.zeros(num_points, num_points, dtype=torch.float32) + for i in range(num_points): + for j in range(num_points): + if (i + j) % 2 != 0: + expected_pattern[i, j] = 1 + + # We expect 0 1 0 across and down + assert torch.allclose(distances, expected_pattern, atol=1e-6), "Output does not match the expected alternating pattern" + + +@pytest.mark.parametrize("num_points, dim", [(20, 2)]) +def test_orthogonal_rows_in_pairs(cosine_metric, num_points, dim, distance_range_checker): + embeddings = generate_orthogonal_rows_in_pairs(num_points, dim) + distances = cosine_metric.score_all(embeddings, embeddings) + save_and_plot_distances(distances, "orthogonal_rows_in_pairs", num_points, dim) + distance_range_checker(distances, min_val=0, max_val=2) # Check distance range + +@pytest.mark.parametrize("num_points, dim", [(10, 5)]) +def test_ones_tensor(cosine_metric, num_points, dim, distance_range_checker): + embeddings = generate_ones_tensor(num_points, dim) + distances = cosine_metric.score_all(embeddings, embeddings) + save_and_plot_distances(distances, "ones_tensor", num_points, dim) + distance_range_checker(distances, min_val=0, max_val=0) # Expect all distances to be 0 + + +@pytest.mark.parametrize("num_points, dim", [(15, 15)]) # dim should be equal to num_points for identity matrix +def test_identity_matrix_rows(cosine_metric, num_points, dim, distance_range_checker): + embeddings = generate_identity_matrix_rows(num_points, dim) + distances = cosine_metric.score_all(embeddings, embeddings) + save_and_plot_distances(distances, "identity_matrix_rows", num_points, dim) + distance_range_checker(distances, min_val=0, max_val=2) # Check distance range + + +# def test_progress_bar(cosine_metric): +# """Test score_all with progress_bar argument.""" +# hyps = [np.random.rand(768) for _ in range(5)] +# refs = [np.random.rand(768) for _ in range(5)] + +# # Disable progress bar +# scores = cosine_metric.score_all(hyps, refs, progress_bar=False) +# assert len(scores) == len(hyps), "Output row count mismatch with progress_bar=False." +# assert len(scores[0]) == len(refs), "Output column count mismatch with progress_bar=False." diff --git a/pose_evaluation/metrics/test_signclip_distance_metric.py b/pose_evaluation/metrics/test_signclip_distance_metric.py deleted file mode 100644 index 54f62cf..0000000 --- a/pose_evaluation/metrics/test_signclip_distance_metric.py +++ /dev/null @@ -1,65 +0,0 @@ -import pytest -import numpy as np -from pose_format import Pose -from pose_evaluation.metrics.signclip_distance_metric import SignCLIPEmbeddingDistanceMetric - -# Mock a simple Pose object for compatibility (if not already available) -class MockPose: - def __init__(self, data): - self.data = data - -@pytest.fixture -def metric(): - """Fixture to create a SignCLIPEmbeddingDistanceMetric instance.""" - return SignCLIPEmbeddingDistanceMetric(kind="cosine") - -@pytest.fixture -def embeddings(): - """Fixture to create dummy embeddings for testing.""" - # Generate 5 random 768-dimensional embeddings - return [np.random.rand(768) for _ in range(5)] - -def test_score_symmetric(metric): - """Test that the metric is symmetric for cosine distance.""" - emb1 = np.random.rand(768) - emb2 = np.random.rand(768) - - score1 = metric.score(emb1, emb2) - score2 = metric.score(emb2, emb1) - - assert pytest.approx(score1) == score2, "Score should be symmetric." - -def test_score_with_path(metric, tmp_path): - """Test that score works with embeddings loaded from paths.""" - emb1 = np.random.rand(768) - emb2 = np.random.rand(768) - - # Save embeddings to temporary files - file1 = tmp_path / "emb1.npy" - file2 = tmp_path / "emb2.npy" - np.save(file1, emb1) - np.save(file2, emb2) - - score = metric.score(file1, file2) - expected_score = metric.score(emb1, emb2) - - assert pytest.approx(score) == expected_score, "Score with paths should match direct computation." - -def test_score_all(metric, embeddings): - """Test the score_all function.""" - scores = metric.score_all(embeddings, embeddings) - assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all." - assert np.allclose(scores.diagonal(), 0), "Self-comparison scores should be zero for cosine distance." - -def test_score_all_with_different_sizes(metric): - """Test score_all with different sizes for hypotheses and references.""" - hyps = [np.random.rand(768) for _ in range(3)] - refs = [np.random.rand(768) for _ in range(5)] - - scores = metric.score_all(hyps, refs) - assert scores.shape == (len(hyps), len(refs)), "Output shape mismatch for score_all with different sizes." - -def test_score_all_edge_case(metric): - """Test score_all with empty inputs.""" - scores = metric.score_all([], []) - assert scores.size == 0, "Score_all should return an empty array for empty inputs." From 83f9153abfef99c5910d66d340571e69ccce41a8 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Fri, 22 Nov 2024 15:10:50 -0500 Subject: [PATCH 06/27] some updates to evaluate signclip script --- .../evaluation/evaluate_signclip.py | 86 +++++++++++++++++-- 1 file changed, 79 insertions(+), 7 deletions(-) diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py index a67b684..60befb8 100644 --- a/pose_evaluation/evaluation/evaluate_signclip.py +++ b/pose_evaluation/evaluation/evaluate_signclip.py @@ -2,9 +2,12 @@ from pathlib import Path import pandas as pd import numpy as np -from pose_evaluation.metrics.signclip_distance_metric import SignCLIPEmbeddingDistanceMetric +from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric from tqdm import tqdm +import time +# python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv +# (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv def load_embedding(file_path: Path) -> np.ndarray: """ Load a SignCLIP embedding from a .npy file, ensuring it has the correct shape. @@ -72,26 +75,95 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine"): embeddings = valid_df["embedding"].tolist() # Initialize metric - metric = SignCLIPEmbeddingDistanceMetric(kind=kind) + metric = EmbeddingDistanceMetric(kind=kind, device="cpu") # Compute all pairwise scores print(f"Computing {kind} distances for {len(embeddings)} embeddings...") + start_time = time.perf_counter() scores = metric.score_all(embeddings, embeddings) + score_duration = time.perf_counter() - start_time + print(f"Score_all took {score_duration:.3f} seconds") + + + + # Extract the "Video file" column + files = valid_df["Video file"].tolist() + + # Create output file path + output_file = Path("signclip_scores.csv") + + # Start timer + start_time = time.perf_counter() + + # Create the Cartesian product of `files` with itself + n = len(files) + data = { + "hyp": [files[i] for i in range(n) for j in range(n)], + "ref": [files[j] for i in range(n) for j in range(n)], + "score": scores.flatten() # Flatten the 2D score matrix into a 1D array + } + + + # Construct the DataFrame + results_df = pd.DataFrame(data) + + # Save to CSV + results_df.to_csv(output_file, index=False) + + # End timer + end_time = time.perf_counter() + print(f"Saving DataFrame and writing to CSV took {end_time - start_time:.2f} seconds") + + # Save scores to a CSV file - output_file = emb_dir / "signclip_scores.csv" + output_file = Path("signclip_scores.csv") results = [] - for i, hyp_row in valid_df.iterrows(): + for i, hyp_row in tqdm(valid_df.iterrows(), total=valid_df.shape[0]): for j, ref_row in valid_df.iterrows(): results.append({ "hyp": hyp_row["Video file"], "ref": ref_row["Video file"], - "score": scores[i, j] + "score": scores[i, j].item() }) + + df_start = time.perf_counter() results_df = pd.DataFrame(results) + df_end = time.perf_counter() + df_duration = df_end - df_start + print(f"df took {df_duration}") + + + + + + + csv_start = time.perf_counter() results_df.to_csv(output_file, index=False) - print(f"Scores saved to {output_file}") + csv_end = time.perf_counter() + csv_duration = csv_end - csv_start + print(f"CSV took {csv_duration}") + + json_start = time.perf_counter() + results_df.to_json(output_file.with_suffix(".json"), index=False) + json_end = time.perf_counter() + json_duration = json_end - json_start + print(f"JSON took {json_duration}") + + np_start = time.perf_counter() + np.save(output_file.with_suffix(".npy"), scores) + np_end = time.perf_counter() + np_duration = np_end-np_start + print(f"np took {np_duration}") + + + + + print(f"Scores of shape {scores.shape} saved to {output_file}") + read_back_in = np.load(output_file.with_suffix(".npy")) + if np.allclose(read_back_in, scores): + print("yay! All the same!") def main(): parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.") @@ -107,7 +179,7 @@ def main(): ) args = parser.parse_args() - evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split, kind=args.kind) + evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, kind=args.kind) if __name__ == "__main__": main() From 8680048e63248ee3d25676b07e50047c51188446 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Fri, 22 Nov 2024 17:11:35 -0500 Subject: [PATCH 07/27] CDL: messing around with in-class and out-of-class means --- .../evaluation/evaluate_signclip.py | 247 ++++++++++-------- 1 file changed, 144 insertions(+), 103 deletions(-) diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py index 60befb8..76a2134 100644 --- a/pose_evaluation/evaluation/evaluate_signclip.py +++ b/pose_evaluation/evaluation/evaluate_signclip.py @@ -34,28 +34,35 @@ def match_embeddings_to_glosses(emb_dir: Path, split_df: pd.DataFrame) -> pd.Dat Returns: pd.DataFrame: Updated DataFrame with an additional column for embeddings. """ - # Map video file IDs to embeddings - embeddings_map = {} - for npy_file in emb_dir.glob("*.npy"): - numerical_id = npy_file.stem.split("-")[0] - embeddings_map[numerical_id] = npy_file - - # Match embeddings to glosses - embeddings = [] - for _, row in split_df.iterrows(): - video_file = row["Video file"] + import time + + # Step 1: Create a mapping of numerical IDs to .npy files + map_start = time.perf_counter() + embeddings_map = { + npy_file.stem.split("-")[0]: npy_file + for npy_file in emb_dir.glob("*.npy") + } + map_end = time.perf_counter() + print(f"Creating embeddings map took {map_end - map_start:.4f} seconds") + + # Step 2: Vectorized matching of embeddings + match_start = time.perf_counter() + + def get_embedding(video_file): numerical_id = video_file.split("-")[0] npy_file = embeddings_map.get(numerical_id) - if npy_file is not None: - embeddings.append(load_embedding(npy_file)) - else: - embeddings.append(None) # Placeholder if no matching file + return load_embedding(npy_file) + return None + + split_df["embedding"] = split_df["Video file"].apply(get_embedding) + match_end = time.perf_counter() + print(f"Matching embeddings to glosses took {match_end - match_start:.4f} seconds") - split_df["embedding"] = embeddings return split_df -def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine"): + +def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out_path=None): """ Evaluate SignCLIP embeddings using score_all. @@ -64,106 +71,136 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine"): split_file (Path): Path to the split CSV file. kind (str): Metric type ("cosine" or "l2"). Default is "cosine". """ - # Load split file + overall_start = time.perf_counter() # Start overall benchmarking + + # Step 1: Load split file + split_load_start = time.perf_counter() split_df = pd.read_csv(split_file) - - # Match embeddings - split_df = match_embeddings_to_glosses(emb_dir, split_df) - - # Filter out rows without embeddings - valid_df = split_df.dropna(subset=["embedding"]).reset_index(drop=True) - embeddings = valid_df["embedding"].tolist() + split_load_end = time.perf_counter() + print(f"Loading split file took {split_load_end - split_load_start:.4f} seconds") + # print(f"{split_df.info()}") - # Initialize metric + # Step 2: Match embeddings to glosses + match_start = time.perf_counter() + split_df = match_embeddings_to_glosses(emb_dir, split_df) + match_end = time.perf_counter() + print(f"Matching embeddings to glosses took {match_end - match_start:.4f} seconds") + # print(split_df.info()) + + # Step 3: Filter out rows without embeddings + filter_start = time.perf_counter() + items_with_embeddings_df = split_df.dropna(subset=["embedding"]).reset_index(drop=True) + embeddings = items_with_embeddings_df["embedding"].tolist() + filter_end = time.perf_counter() + print(f"Filtering embeddings took {filter_end - filter_start:.4f} seconds") + print(items_with_embeddings_df.info()) + + # Step 4: Initialize the distance metric + metric_start = time.perf_counter() metric = EmbeddingDistanceMetric(kind=kind, device="cpu") + metric_end = time.perf_counter() + print(f"Initializing metric took {metric_end - metric_start:.4f} seconds") - # Compute all pairwise scores + # Step 5: Compute all pairwise scores + score_start = time.perf_counter() print(f"Computing {kind} distances for {len(embeddings)} embeddings...") - - start_time = time.perf_counter() scores = metric.score_all(embeddings, embeddings) - score_duration = time.perf_counter() - start_time - print(f"Score_all took {score_duration:.3f} seconds") - - - - # Extract the "Video file" column - files = valid_df["Video file"].tolist() - - # Create output file path - output_file = Path("signclip_scores.csv") - - # Start timer - start_time = time.perf_counter() - - # Create the Cartesian product of `files` with itself - n = len(files) - data = { - "hyp": [files[i] for i in range(n) for j in range(n)], - "ref": [files[j] for i in range(n) for j in range(n)], - "score": scores.flatten() # Flatten the 2D score matrix into a 1D array - } - - - # Construct the DataFrame - results_df = pd.DataFrame(data) - - # Save to CSV - results_df.to_csv(output_file, index=False) - - # End timer - end_time = time.perf_counter() - print(f"Saving DataFrame and writing to CSV took {end_time - start_time:.2f} seconds") - + score_end = time.perf_counter() + print(f"Score_all took {score_end - score_start:.3f} seconds") + + # Step 6: Create output file path + output_file = out_path + if out_path is None: + output_file = Path(f"signclip_scores_{split_file.name}").with_suffix(".npz") + + if not output_file.suffix == ".npz": + output_file = Path(f"{output_file}.npz") + + + print(f"Scores will be saved to {output_file}") + + + + # Step 7: Extract file list from DataFrame + files_start = time.perf_counter() + files = items_with_embeddings_df["Video file"].tolist() + files_end = time.perf_counter() + print(f"Extracting file list took {files_end - files_start:.4f} seconds") + + + analysis_start = time.perf_counter() + index_to_check = 0 + number_to_check = 10 + print(f"The first {number_to_check} scores for {files[index_to_check]} to...") + for ref, score in list(zip(files, scores[index_to_check]))[:number_to_check]: + print("\t*------------->", f"{ref}".ljust(35), "\t", score.item()) + + unique_glosses = items_with_embeddings_df['Gloss'].unique() + print(f"We have a vocabulary of {len(unique_glosses)} glosses") + gloss_indices = {} + for gloss in items_with_embeddings_df['Gloss'].unique(): + gloss_indices[gloss] = items_with_embeddings_df.index[items_with_embeddings_df['Gloss'] == gloss].tolist() + + for gloss, indices in gloss_indices.items(): + print(f"Here are the {len(indices)} indices for {gloss}:{indices}") + + # Assuming 'scores' is your distance matrix and 'gloss_indices' is your dictionary of gloss indices + find_class_distances_start = time.perf_counter() + all_within_class_distances = np.array([]) # Initialize as empty NumPy array + all_between_class_distances = np.array([]) # Initialize as empty NumPy array + + for gloss, indices in tqdm(gloss_indices.items()): + # Within-class distances + within_class_distances = scores[np.ix_(indices, indices)] + within_class_distances = within_class_distances[np.triu_indices(len(indices), k=1)] + all_within_class_distances = np.concatenate([all_within_class_distances, within_class_distances.ravel()]) + + # Between-class distances + other_indices = np.setdiff1d(np.arange(len(scores)), indices) + between_class_distances = scores[np.ix_(indices, other_indices)] + all_between_class_distances = np.concatenate([all_between_class_distances, between_class_distances.ravel()]) + find_class_distances_end = time.perf_counter() + print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}") + - # Save scores to a CSV file - output_file = Path("signclip_scores.csv") - results = [] - for i, hyp_row in tqdm(valid_df.iterrows(), total=valid_df.shape[0]): - for j, ref_row in valid_df.iterrows(): - results.append({ - "hyp": hyp_row["Video file"], - "ref": ref_row["Video file"], - "score": scores[i, j].item() - }) + print(f"Mean within classes: {np.mean(all_within_class_distances)}") + print(f"Mean between classes: {np.mean(all_between_class_distances)}") - df_start = time.perf_counter() - results_df = pd.DataFrame(results) - df_end = time.perf_counter() - df_duration = df_end - df_start - print(f"df took {df_duration}") + + analysis_end = time.perf_counter() + analysis_duration = analysis_end - analysis_start + print(f"Analysis took {analysis_duration} seconds") + + + # Step 8: Save the scores and files to a compressed file + save_start = time.perf_counter() + np.savez(output_file, scores=scores, files=files) + save_end = time.perf_counter() + print(f"Saving scores and files took {save_end - save_start:.4f} seconds") + print(f"Scores of shape {scores.shape} with files list of length {len(files)} saved to {output_file}") + # Step 9: Read back the saved scores + read_start = time.perf_counter() + read_back_in = np.load(f"{output_file}") + read_end = time.perf_counter() + print(f"Reading back the file took {read_end - read_start:.4f} seconds") - + # Step 10: Verify if the read data matches the original scores + verify_start = time.perf_counter() + if np.allclose(read_back_in["scores"], scores): + print("Yay! All the same!") + else: + print("Mismatch found!") + verify_end = time.perf_counter() + print(f"Verification step took {verify_end - verify_start:.4f} seconds") + # Overall time + overall_end = time.perf_counter() + print(f"Total script runtime: {overall_end - overall_start:.4f} seconds") - csv_start = time.perf_counter() - results_df.to_csv(output_file, index=False) - csv_end = time.perf_counter() - csv_duration = csv_end - csv_start - print(f"CSV took {csv_duration}") - json_start = time.perf_counter() - results_df.to_json(output_file.with_suffix(".json"), index=False) - json_end = time.perf_counter() - json_duration = json_end - json_start - print(f"JSON took {json_duration}") - - np_start = time.perf_counter() - np.save(output_file.with_suffix(".npy"), scores) - np_end = time.perf_counter() - np_duration = np_end-np_start - print(f"np took {np_duration}") - - - - - print(f"Scores of shape {scores.shape} saved to {output_file}") - read_back_in = np.load(output_file.with_suffix(".npy")) - if np.allclose(read_back_in, scores): - print("yay! All the same!") def main(): parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.") @@ -177,10 +214,14 @@ def main(): "--kind", type=str, choices=["cosine", "l2"], default="cosine", help="Type of distance metric to use (default: cosine)" ) + + parser.add_argument("--out_path", + type=Path, + help="Where to save output distance npz matrix+file list") + args = parser.parse_args() - evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, kind=args.kind) + evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, kind=args.kind, out_path=args.out_path) if __name__ == "__main__": main() - print(f"THIS SCRIPT NEEDS TESTING") From a6b22c3ed85eb9f78370fd2e1d8170cc82b533c8 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Mon, 25 Nov 2024 12:23:57 -0500 Subject: [PATCH 08/27] CDL: testing out in/out of class mean distance --- pose_evaluation/evaluation/evaluate_signclip.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py index 76a2134..031716b 100644 --- a/pose_evaluation/evaluation/evaluate_signclip.py +++ b/pose_evaluation/evaluation/evaluate_signclip.py @@ -5,7 +5,7 @@ from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric from tqdm import tqdm import time - +import torch # python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv # (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv def load_embedding(file_path: Path) -> np.ndarray: @@ -149,9 +149,12 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out all_within_class_distances = np.array([]) # Initialize as empty NumPy array all_between_class_distances = np.array([]) # Initialize as empty NumPy array - for gloss, indices in tqdm(gloss_indices.items()): + within_class_means_by_gloss = {} + for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"): # Within-class distances within_class_distances = scores[np.ix_(indices, indices)] + within_class_mean = torch.mean(within_class_distances) + within_class_means_by_gloss[gloss] = within_class_mean within_class_distances = within_class_distances[np.triu_indices(len(indices), k=1)] all_within_class_distances = np.concatenate([all_within_class_distances, within_class_distances.ravel()]) @@ -160,9 +163,12 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out between_class_distances = scores[np.ix_(indices, other_indices)] all_between_class_distances = np.concatenate([all_between_class_distances, between_class_distances.ravel()]) find_class_distances_end = time.perf_counter() + + print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}") - + for gloss, mean in within_class_means_by_gloss.items(): + print(f"Within {gloss}: {within_class_means_by_gloss[gloss]}") print(f"Mean within classes: {np.mean(all_within_class_distances)}") print(f"Mean between classes: {np.mean(all_between_class_distances)}") From b89349642db0e6872ba836793a4c4c89492e0cbb Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Mon, 25 Nov 2024 14:59:54 -0500 Subject: [PATCH 09/27] CDL: trying to batch-process calculation of means --- .../evaluation/evaluate_signclip.py | 161 +++++++++++++----- 1 file changed, 122 insertions(+), 39 deletions(-) diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py index 031716b..41bcab6 100644 --- a/pose_evaluation/evaluation/evaluate_signclip.py +++ b/pose_evaluation/evaluation/evaluate_signclip.py @@ -6,6 +6,7 @@ from tqdm import tqdm import time import torch +from typing import List, Tuple # python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv # (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv def load_embedding(file_path: Path) -> np.ndarray: @@ -62,7 +63,105 @@ def get_embedding(video_file): return split_df -def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out_path=None): +def calculate_mean_distances( + distance_matrix: torch.Tensor, + indices_a: torch.Tensor, + indices_b: torch.Tensor, + exclude_self: bool = False +) -> float: + """ + Calculate the mean of distances between two sets of indices in a 2D distance matrix. + + Args: + distance_matrix (torch.Tensor): A 2D tensor representing pairwise distances. + indices_a (torch.Tensor): A tensor of row indices. + indices_b (torch.Tensor): A tensor of column indices. + exclude_self (bool): Whether to exclude distances where indices_a == indices_b. + + Returns: + float: The mean distance between all pairs of (indices_a, indices_b). + """ + # Create all pair combinations + row_indices, col_indices = torch.meshgrid(indices_a, indices_b, indexing="ij") + + if exclude_self: + # Apply a mask to remove self-distances + mask = row_indices != col_indices + row_indices = row_indices[mask] + col_indices = col_indices[mask] + + # Gather distances + selected_distances = distance_matrix[row_indices.flatten(), col_indices.flatten()] + + # Return the mean + return selected_distances.mean().item() + +def generate_synthetic_data(num_items, num_classes, num_items_per_class=4): + import random + torch.manual_seed(42) + random.seed(42) + # distance_matrix = torch.rand((num_items, num_items)) * 100 + distance_matrix = torch.full((num_items, num_items), 10.0) + distance_matrix.fill_diagonal_(0) + indices = list(range(num_items)) + random.shuffle(indices) + + classes = {f"CLASS_{i}": torch.tensor([indices.pop() for _ in range(num_items_per_class)]) for i in range(num_classes)} + # Assign intra-class distances + mean_values_by_class ={} + for i, class_name in enumerate(classes.keys()): + mean_value = i+1 + mean_values_by_class[class_name] = mean_value + for class_name, indices in classes.items(): + mean_value = mean_values_by_class[class_name] + for i in indices: + for j in indices: + if i != j: # Exclude self-distances + distance_matrix[i, j] = mean_value + return classes, distance_matrix + +def calculate_class_means(gloss_indices, scores): + class_means_by_gloss = {} + all_indices = torch.arange(scores.size(0), dtype=int) + + for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"): + indices = torch.LongTensor(indices) + class_means_by_gloss[gloss] ={} + within_class_mean = calculate_mean_distances(scores, indices, indices, exclude_self=True) + + class_means_by_gloss[gloss]["in_class"] = within_class_mean + + complement_indices = all_indices[~torch.isin(all_indices, indices)] + without_class_mean = calculate_mean_distances(scores, indices, complement_indices) + class_means_by_gloss[gloss]["out_of_class"]=without_class_mean + + return class_means_by_gloss + +#def calculate_class_means(gloss_indices, scores): +# all_within_class_distances = np.array([]) # Initialize as empty NumPy array +# all_between_class_distances = np.array([]) # Initialize as empty NumPy array +# within_class_means_by_gloss = {} +# for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"): +# # Within-class distances +# within_class_distances = scores[np.ix_(indices, indices)] +# within_class_mean = torch.mean(within_class_distances) +# within_class_means_by_gloss[gloss] = within_class_mean +# within_class_distances = within_class_distances[np.triu_indices(len(indices), k=1)] +# all_within_class_distances = np.concatenate([all_within_class_distances, within_class_distances.ravel()]) +# +# # Between-class distances +# other_indices = np.setdiff1d(np.arange(len(scores)), indices) +# between_class_distances = scores[np.ix_(indices, other_indices)] +# all_between_class_distances = np.concatenate([all_between_class_distances, between_class_distances.ravel()]) +# +# for gloss, mean in within_class_means_by_gloss.items(): +# print(f"Within {gloss}: {within_class_means_by_gloss[gloss]}") +# +# print(f"Mean within classes: {np.mean(all_within_class_distances)}") +# print(f"Mean between classes: {np.mean(all_between_class_distances)}") +# return within_class_means_by_gloss + +def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = "cosine"): """ Evaluate SignCLIP embeddings using score_all. @@ -108,18 +207,6 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out score_end = time.perf_counter() print(f"Score_all took {score_end - score_start:.3f} seconds") - # Step 6: Create output file path - output_file = out_path - if out_path is None: - output_file = Path(f"signclip_scores_{split_file.name}").with_suffix(".npz") - - if not output_file.suffix == ".npz": - output_file = Path(f"{output_file}.npz") - - - print(f"Scores will be saved to {output_file}") - - # Step 7: Extract file list from DataFrame files_start = time.perf_counter() @@ -144,34 +231,16 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out for gloss, indices in gloss_indices.items(): print(f"Here are the {len(indices)} indices for {gloss}:{indices}") - # Assuming 'scores' is your distance matrix and 'gloss_indices' is your dictionary of gloss indices find_class_distances_start = time.perf_counter() - all_within_class_distances = np.array([]) # Initialize as empty NumPy array - all_between_class_distances = np.array([]) # Initialize as empty NumPy array - within_class_means_by_gloss = {} - for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"): - # Within-class distances - within_class_distances = scores[np.ix_(indices, indices)] - within_class_mean = torch.mean(within_class_distances) - within_class_means_by_gloss[gloss] = within_class_mean - within_class_distances = within_class_distances[np.triu_indices(len(indices), k=1)] - all_within_class_distances = np.concatenate([all_within_class_distances, within_class_distances.ravel()]) - - # Between-class distances - other_indices = np.setdiff1d(np.arange(len(scores)), indices) - between_class_distances = scores[np.ix_(indices, other_indices)] - all_between_class_distances = np.concatenate([all_between_class_distances, between_class_distances.ravel()]) - find_class_distances_end = time.perf_counter() + #synthetic_classes, synthetic_distances = generate_synthetic_data(30000, 2700, 8) + #class_means = calculate_class_means(synthetic_classes, synthetic_distances) + class_means = calculate_class_means(gloss_indices, scores) + find_class_distances_end = time.perf_counter() print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}") - for gloss, mean in within_class_means_by_gloss.items(): - print(f"Within {gloss}: {within_class_means_by_gloss[gloss]}") - - print(f"Mean within classes: {np.mean(all_within_class_distances)}") - print(f"Mean between classes: {np.mean(all_between_class_distances)}") analysis_end = time.perf_counter() @@ -179,17 +248,19 @@ def evaluate_signclip(emb_dir: Path, split_file: Path, kind: str = "cosine", out print(f"Analysis took {analysis_duration} seconds") + for gloss, means in class_means.items(): + print(gloss, means) # Step 8: Save the scores and files to a compressed file save_start = time.perf_counter() - np.savez(output_file, scores=scores, files=files) + np.savez(out_path, scores=scores, files=files) save_end = time.perf_counter() print(f"Saving scores and files took {save_end - save_start:.4f} seconds") - print(f"Scores of shape {scores.shape} with files list of length {len(files)} saved to {output_file}") + print(f"Scores of shape {scores.shape} with files list of length {len(files)} saved to {out_path}") # Step 9: Read back the saved scores read_start = time.perf_counter() - read_back_in = np.load(f"{output_file}") + read_back_in = np.load(f"{out_path}") read_end = time.perf_counter() print(f"Reading back the file took {read_end - read_start:.4f} seconds") @@ -225,9 +296,21 @@ def main(): type=Path, help="Where to save output distance npz matrix+file list") + + args = parser.parse_args() + + + output_file = args.out_path + if output_file is None: + output_file = Path(f"signclip_scores_{args.split_file.name}").with_suffix(".npz") + + if not output_file.suffix == ".npz": + output_file = Path(f"{output_file}.npz") + + print(f"Scores will be saved to {output_file}") - evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, kind=args.kind, out_path=args.out_path) + evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, out_path=output_file, kind=args.kind) if __name__ == "__main__": main() From 20bcba2bcf7ec20deabfd6bea7c703f12ac8c28f Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Mon, 25 Nov 2024 15:51:41 -0500 Subject: [PATCH 10/27] CDL: saving off the class means --- .../evaluation/evaluate_signclip.py | 25 ++++++++++++++++--- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py index 41bcab6..67d6be4 100644 --- a/pose_evaluation/evaluation/evaluate_signclip.py +++ b/pose_evaluation/evaluation/evaluate_signclip.py @@ -6,6 +6,7 @@ from tqdm import tqdm import time import torch +import json from typing import List, Tuple # python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv # (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv @@ -196,7 +197,8 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = # Step 4: Initialize the distance metric metric_start = time.perf_counter() - metric = EmbeddingDistanceMetric(kind=kind, device="cpu") + #metric = EmbeddingDistanceMetric(kind=kind, device="cpu") + metric = EmbeddingDistanceMetric(kind=kind) metric_end = time.perf_counter() print(f"Initializing metric took {metric_end - metric_start:.4f} seconds") @@ -228,7 +230,7 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = for gloss in items_with_embeddings_df['Gloss'].unique(): gloss_indices[gloss] = items_with_embeddings_df.index[items_with_embeddings_df['Gloss'] == gloss].tolist() - for gloss, indices in gloss_indices.items(): + for gloss, indices in list(gloss_indices.items())[:10]: print(f"Here are the {len(indices)} indices for {gloss}:{indices}") find_class_distances_start = time.perf_counter() @@ -237,6 +239,7 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = #class_means = calculate_class_means(synthetic_classes, synthetic_distances) class_means = calculate_class_means(gloss_indices, scores) + find_class_distances_end = time.perf_counter() print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}") @@ -245,14 +248,28 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = analysis_end = time.perf_counter() analysis_duration = analysis_end - analysis_start + + in_class_means = [mean_dict["in_class"] for mean_dict in class_means.values()] + out_class_means = [mean_dict["out_of_class"] for mean_dict in class_means.values()] + + + for gloss, means in list(class_means.items())[:10]: + print(gloss, means) + + print(f"Mean of in-class means: {np.mean(in_class_means)}") + print(f"Mean of out-of-class means: {np.mean(out_class_means)}") + print(f"Analysis took {analysis_duration} seconds") - for gloss, means in class_means.items(): - print(gloss, means) # Step 8: Save the scores and files to a compressed file + save_start = time.perf_counter() + class_means_json = out_path.with_name(f"{out_path.stem}_class_means").with_suffix(".json") + with open(class_means_json, "w") as f: + print(f"Writing class means to {f}") + json.dump(class_means, f) np.savez(out_path, scores=scores, files=files) save_end = time.perf_counter() print(f"Saving scores and files took {save_end - save_start:.4f} seconds") From d15b92367e2939fdf9f6059ad9d13246ea96db06 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:05:25 -0500 Subject: [PATCH 11/27] a bit of code cleanup --- .../evaluation/evaluate_signclip.py | 117 ++++++++---------- 1 file changed, 51 insertions(+), 66 deletions(-) diff --git a/pose_evaluation/evaluation/evaluate_signclip.py b/pose_evaluation/evaluation/evaluate_signclip.py index 67d6be4..d293c05 100644 --- a/pose_evaluation/evaluation/evaluate_signclip.py +++ b/pose_evaluation/evaluation/evaluate_signclip.py @@ -1,22 +1,21 @@ import argparse from pathlib import Path +import time +import json +import random import pandas as pd import numpy as np -from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric -from tqdm import tqdm -import time import torch -import json -from typing import List, Tuple -# python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/400_words_10_examples_each.csv -# (pose_evaluation) (base) vlab@vlab-desktop:~/projects/sign_language_processing/pose-evaluation/pose_evaluation$ python evaluation/evaluate_signclip.py /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/embeddings/sem-lex/ --split_file /media/vlab/Aqsa-Deep-Storage/colin/ASL_Citizen/splits/20x5_curated_sample.csv +from tqdm import tqdm +from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric + def load_embedding(file_path: Path) -> np.ndarray: """ Load a SignCLIP embedding from a .npy file, ensuring it has the correct shape. - + Args: file_path (Path): Path to the .npy file. - + Returns: np.ndarray: The embedding with shape (768,). """ @@ -25,25 +24,22 @@ def load_embedding(file_path: Path) -> np.ndarray: embedding = embedding[0] # Reduce shape from (1, 768) to (768,) return embedding + def match_embeddings_to_glosses(emb_dir: Path, split_df: pd.DataFrame) -> pd.DataFrame: """ Match .npy embeddings to the corresponding glosses based on the numerical ID. - + Args: emb_dir (Path): Directory containing the .npy files. split_df (pd.DataFrame): DataFrame containing the split file with the "Video file" column. - + Returns: pd.DataFrame: Updated DataFrame with an additional column for embeddings. """ - import time # Step 1: Create a mapping of numerical IDs to .npy files map_start = time.perf_counter() - embeddings_map = { - npy_file.stem.split("-")[0]: npy_file - for npy_file in emb_dir.glob("*.npy") - } + embeddings_map = {npy_file.stem.split("-")[0]: npy_file for npy_file in emb_dir.glob("*.npy")} map_end = time.perf_counter() print(f"Creating embeddings map took {map_end - map_start:.4f} seconds") @@ -65,40 +61,38 @@ def get_embedding(video_file): def calculate_mean_distances( - distance_matrix: torch.Tensor, - indices_a: torch.Tensor, - indices_b: torch.Tensor, - exclude_self: bool = False + distance_matrix: torch.Tensor, indices_a: torch.Tensor, indices_b: torch.Tensor, exclude_self: bool = False ) -> float: """ Calculate the mean of distances between two sets of indices in a 2D distance matrix. - + Args: distance_matrix (torch.Tensor): A 2D tensor representing pairwise distances. indices_a (torch.Tensor): A tensor of row indices. indices_b (torch.Tensor): A tensor of column indices. exclude_self (bool): Whether to exclude distances where indices_a == indices_b. - + Returns: float: The mean distance between all pairs of (indices_a, indices_b). """ # Create all pair combinations row_indices, col_indices = torch.meshgrid(indices_a, indices_b, indexing="ij") - + if exclude_self: # Apply a mask to remove self-distances mask = row_indices != col_indices row_indices = row_indices[mask] col_indices = col_indices[mask] - + # Gather distances selected_distances = distance_matrix[row_indices.flatten(), col_indices.flatten()] - + # Return the mean return selected_distances.mean().item() + def generate_synthetic_data(num_items, num_classes, num_items_per_class=4): - import random + torch.manual_seed(42) random.seed(42) # distance_matrix = torch.rand((num_items, num_items)) * 100 @@ -107,11 +101,13 @@ def generate_synthetic_data(num_items, num_classes, num_items_per_class=4): indices = list(range(num_items)) random.shuffle(indices) - classes = {f"CLASS_{i}": torch.tensor([indices.pop() for _ in range(num_items_per_class)]) for i in range(num_classes)} + classes = { + f"CLASS_{i}": torch.tensor([indices.pop() for _ in range(num_items_per_class)]) for i in range(num_classes) + } # Assign intra-class distances - mean_values_by_class ={} + mean_values_by_class = {} for i, class_name in enumerate(classes.keys()): - mean_value = i+1 + mean_value = i + 1 mean_values_by_class[class_name] = mean_value for class_name, indices in classes.items(): mean_value = mean_values_by_class[class_name] @@ -121,24 +117,26 @@ def generate_synthetic_data(num_items, num_classes, num_items_per_class=4): distance_matrix[i, j] = mean_value return classes, distance_matrix + def calculate_class_means(gloss_indices, scores): class_means_by_gloss = {} all_indices = torch.arange(scores.size(0), dtype=int) for gloss, indices in tqdm(gloss_indices.items(), desc="Finding mean values by gloss"): indices = torch.LongTensor(indices) - class_means_by_gloss[gloss] ={} + class_means_by_gloss[gloss] = {} within_class_mean = calculate_mean_distances(scores, indices, indices, exclude_self=True) class_means_by_gloss[gloss]["in_class"] = within_class_mean complement_indices = all_indices[~torch.isin(all_indices, indices)] without_class_mean = calculate_mean_distances(scores, indices, complement_indices) - class_means_by_gloss[gloss]["out_of_class"]=without_class_mean + class_means_by_gloss[gloss]["out_of_class"] = without_class_mean return class_means_by_gloss -#def calculate_class_means(gloss_indices, scores): + +# def calculate_class_means(gloss_indices, scores): # all_within_class_distances = np.array([]) # Initialize as empty NumPy array # all_between_class_distances = np.array([]) # Initialize as empty NumPy array # within_class_means_by_gloss = {} @@ -162,10 +160,11 @@ def calculate_class_means(gloss_indices, scores): # print(f"Mean between classes: {np.mean(all_between_class_distances)}") # return within_class_means_by_gloss -def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = "cosine"): + +def evaluate_signclip(emb_dir: Path, split_file: Path, out_path: Path, kind: str = "cosine"): """ Evaluate SignCLIP embeddings using score_all. - + Args: emb_dir (Path): Directory containing .npy embeddings. split_file (Path): Path to the split CSV file. @@ -197,7 +196,7 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = # Step 4: Initialize the distance metric metric_start = time.perf_counter() - #metric = EmbeddingDistanceMetric(kind=kind, device="cpu") + # metric = EmbeddingDistanceMetric(kind=kind, device="cpu") metric = EmbeddingDistanceMetric(kind=kind) metric_end = time.perf_counter() print(f"Initializing metric took {metric_end - metric_start:.4f} seconds") @@ -209,59 +208,51 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = score_end = time.perf_counter() print(f"Score_all took {score_end - score_start:.3f} seconds") - # Step 7: Extract file list from DataFrame files_start = time.perf_counter() files = items_with_embeddings_df["Video file"].tolist() files_end = time.perf_counter() print(f"Extracting file list took {files_end - files_start:.4f} seconds") - - analysis_start = time.perf_counter() + analysis_start = time.perf_counter() index_to_check = 0 number_to_check = 10 print(f"The first {number_to_check} scores for {files[index_to_check]} to...") for ref, score in list(zip(files, scores[index_to_check]))[:number_to_check]: print("\t*------------->", f"{ref}".ljust(35), "\t", score.item()) - unique_glosses = items_with_embeddings_df['Gloss'].unique() + unique_glosses = items_with_embeddings_df["Gloss"].unique() print(f"We have a vocabulary of {len(unique_glosses)} glosses") gloss_indices = {} - for gloss in items_with_embeddings_df['Gloss'].unique(): - gloss_indices[gloss] = items_with_embeddings_df.index[items_with_embeddings_df['Gloss'] == gloss].tolist() + for gloss in items_with_embeddings_df["Gloss"].unique(): + gloss_indices[gloss] = items_with_embeddings_df.index[items_with_embeddings_df["Gloss"] == gloss].tolist() for gloss, indices in list(gloss_indices.items())[:10]: print(f"Here are the {len(indices)} indices for {gloss}:{indices}") find_class_distances_start = time.perf_counter() - #synthetic_classes, synthetic_distances = generate_synthetic_data(30000, 2700, 8) - #class_means = calculate_class_means(synthetic_classes, synthetic_distances) + # synthetic_classes, synthetic_distances = generate_synthetic_data(30000, 2700, 8) + # class_means = calculate_class_means(synthetic_classes, synthetic_distances) class_means = calculate_class_means(gloss_indices, scores) - find_class_distances_end = time.perf_counter() print(f"Finding within and without took {find_class_distances_end-find_class_distances_start}") - - analysis_end = time.perf_counter() analysis_duration = analysis_end - analysis_start in_class_means = [mean_dict["in_class"] for mean_dict in class_means.values()] out_class_means = [mean_dict["out_of_class"] for mean_dict in class_means.values()] - for gloss, means in list(class_means.items())[:10]: print(gloss, means) print(f"Mean of in-class means: {np.mean(in_class_means)}") print(f"Mean of out-of-class means: {np.mean(out_class_means)}") - + print(f"Analysis took {analysis_duration} seconds") - - # Step 8: Save the scores and files to a compressed file @@ -295,39 +286,33 @@ def evaluate_signclip(emb_dir: Path, split_file:Path, out_path:Path, kind: str = print(f"Total script runtime: {overall_end - overall_start:.4f} seconds") - def main(): parser = argparse.ArgumentParser(description="Evaluate SignCLIP embeddings with score_all.") + parser.add_argument("emb_dir", type=Path, help="Path to the directory containing SignCLIP .npy files") + parser.add_argument("--split_file", type=Path, required=True, help="Path to the split CSV file (e.g., test.csv)") parser.add_argument( - "emb_dir", type=Path, help="Path to the directory containing SignCLIP .npy files" - ) - parser.add_argument( - "--split_file", type=Path, required=True, help="Path to the split CSV file (e.g., test.csv)" - ) - parser.add_argument( - "--kind", type=str, choices=["cosine", "l2"], default="cosine", - help="Type of distance metric to use (default: cosine)" + "--kind", + type=str, + choices=["cosine", "l2"], + default="cosine", + help="Type of distance metric to use (default: cosine)", ) - parser.add_argument("--out_path", - type=Path, - help="Where to save output distance npz matrix+file list") + parser.add_argument("--out_path", type=Path, help="Where to save output distance npz matrix+file list") - - args = parser.parse_args() - output_file = args.out_path if output_file is None: output_file = Path(f"signclip_scores_{args.split_file.name}").with_suffix(".npz") - if not output_file.suffix == ".npz": + if output_file.suffix != ".npz": output_file = Path(f"{output_file}.npz") print(f"Scores will be saved to {output_file}") evaluate_signclip(emb_dir=args.emb_dir, split_file=args.split_file, out_path=output_file, kind=args.kind) + if __name__ == "__main__": main() From 5f3b1ba402f857a00702faa8ffd8f2af33324e5a Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:16:02 -0500 Subject: [PATCH 12/27] some code cleanup --- .../metrics/base_embedding_metric.py | 2 +- .../metrics/embedding_distance_metric.py | 6 +- .../metrics/signclip_distance_metric.py | 4 +- .../metrics/test_embedding_distance_metric.py | 64 ++++++++++++------- 4 files changed, 46 insertions(+), 30 deletions(-) diff --git a/pose_evaluation/metrics/base_embedding_metric.py b/pose_evaluation/metrics/base_embedding_metric.py index 83ad763..78aeb0e 100644 --- a/pose_evaluation/metrics/base_embedding_metric.py +++ b/pose_evaluation/metrics/base_embedding_metric.py @@ -5,4 +5,4 @@ # Define a type alias for embeddings (e.g., torch.Tensor) Embedding = TypeVar("Embedding", bound=torch.Tensor) -EmbeddingMetric = BaseMetric[Embedding] \ No newline at end of file +EmbeddingMetric = BaseMetric[Embedding] diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py index 84d6df1..b362404 100644 --- a/pose_evaluation/metrics/embedding_distance_metric.py +++ b/pose_evaluation/metrics/embedding_distance_metric.py @@ -57,11 +57,11 @@ def score(self, hypothesis: Union[np.ndarray, torch.Tensor], reference: Union[np # Cosine similarity, converted to distance similarity = torch.dot(hypothesis, reference).item() return 1 - similarity - elif self.kind == "l2": + if self.kind == "l2": # L2 distance return torch.norm(hypothesis - reference).item() - else: - raise ValueError(f"Unsupported distance metric: {self.kind}") + + raise ValueError(f"Unsupported distance metric: {self.kind}") def score_all( self, diff --git a/pose_evaluation/metrics/signclip_distance_metric.py b/pose_evaluation/metrics/signclip_distance_metric.py index 3d0fd31..382be52 100644 --- a/pose_evaluation/metrics/signclip_distance_metric.py +++ b/pose_evaluation/metrics/signclip_distance_metric.py @@ -6,6 +6,7 @@ import torch import torch.nn.functional as F + class SignCLIPEmbeddingDistanceMetric(NumpyArrayEmbeddingMetric): def __init__(self, kind: str = "cosine", device: torch.device | str = "cuda"): """ @@ -18,8 +19,6 @@ def __init__(self, kind: str = "cosine", device: torch.device | str = "cuda"): self.kind = kind self.device = torch.device(device) if isinstance(device, str) else device - - def score_all(self, embeddings: torch.Tensor) -> torch.Tensor: """ Computes the pairwise distance matrix for the provided embeddings. @@ -48,4 +47,3 @@ def score_all(self, embeddings: torch.Tensor) -> torch.Tensor: raise ValueError(f"Unsupported distance metric: {self.kind}") return distance_matrix - diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py index c352386..4727029 100644 --- a/pose_evaluation/metrics/test_embedding_distance_metric.py +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -8,7 +8,7 @@ from typing import List from pathlib import Path -# TODO: many fixes. Including the fact that we test cosine but not Euclidean, +# TODO: many fixes. Including the fact that we test cosine but not Euclidean, # Configure logging @@ -18,6 +18,7 @@ # Device configuration for PyTorch DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") + @pytest.fixture def cosine_metric(): """Fixture to create an EmbeddingDistanceMetric instance.""" @@ -50,10 +51,12 @@ def save_and_plot_distances(distances, matrix_name, num_points, dim): print(f"Distances plot saved to {plot_path}") plt.close() + def random_tensor(size: int) -> torch.Tensor: """Generate a random tensor on the appropriate device.""" return torch.rand(size, dtype=torch.float32, device=DEVICE) + def generate_unit_circle_points(num_points: int, dim: int = 2) -> torch.Tensor: angles = torch.linspace(0, 2 * np.pi, num_points + 1)[:-1] x_coords = torch.cos(angles) @@ -64,18 +67,18 @@ def generate_unit_circle_points(num_points: int, dim: int = 2) -> torch.Tensor: points = torch.cat([points, padding], dim=1) return points + def generate_orthogonal_rows_with_repeats(num_rows: int, dim: int) -> torch.Tensor: orthogonal_rows = torch.empty(0, dim) for _ in range(min(num_rows, dim)): random_vector = torch.randn(1, dim) if orthogonal_rows.shape[0] > 0: random_vector -= ( - torch.matmul(random_vector, orthogonal_rows.T) @ orthogonal_rows + torch.matmul(random_vector, orthogonal_rows.T) + @ orthogonal_rows / torch.norm(orthogonal_rows, dim=1, keepdim=True) ** 2 ) - orthogonal_rows = torch.cat( - [orthogonal_rows, random_vector / torch.norm(random_vector)] - ) + orthogonal_rows = torch.cat([orthogonal_rows, random_vector / torch.norm(random_vector)]) if num_rows > dim: orthogonal_rows = orthogonal_rows.repeat(num_rows // dim + 1, 1)[:num_rows] return orthogonal_rows @@ -110,16 +113,19 @@ def generate_orthogonal_rows_in_pairs(num_pairs: int, dim: int) -> torch.Tensor: return orthogonal_rows + def generate_ones_tensor(rows: int, dims: int) -> torch.Tensor: """Generates a tensor with all elements equal to 1.0 (float).""" return torch.ones(rows, dims, dtype=torch.float32) + def generate_identity_matrix_rows(rows, cols): - """ - Returns an identity matrix with the specified number of rows and columns. - """ - identity = torch.eye(max(rows, cols)) - return identity[:rows, :cols] + """ + Returns an identity matrix with the specified number of rows and columns. + """ + identity = torch.eye(max(rows, cols)) + return identity[:rows, :cols] + def create_increasing_rows_tensor(num_rows: int, num_cols: int) -> torch.Tensor: """ @@ -138,8 +144,6 @@ def create_increasing_rows_tensor(num_rows: int, num_cols: int) -> torch.Tensor: return tensor - - def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None: """Test that the metric is symmetric for cosine distance.""" emb1 = random_tensor(768) @@ -150,9 +154,9 @@ def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None: logger.info(f"Score 1: {score1}, Score 2: {score2}") assert pytest.approx(score1) == score2, "Score should be symmetric." - -def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path) -> None: + +def test_score_with_path(metric: EmbeddingDistanceMetric, tmp_path: Path) -> None: """Test that score works with embeddings loaded from file paths.""" emb1 = random_tensor(768).cpu().numpy() # Save as NumPy for file storage emb2 = random_tensor(768).cpu().numpy() @@ -174,24 +178,30 @@ def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path) assert pytest.approx(score) == expected_score, "Score with paths should match direct computation." -def test_score_all_against_self(cosine_metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker) -> None: +def test_score_all_against_self( + metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker +) -> None: """Test the score_all function.""" scores = cosine_metric.score_all(embeddings, embeddings) assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all." - assert torch.allclose(torch.diagonal(scores), torch.zeros(len(embeddings), device=DEVICE), atol=1e-6), ( - "Self-comparison scores should be zero for cosine distance." - ) - distance_range_checker(scores, min_val=0, max_val=2) + assert torch.allclose( + torch.diagonal(scores), torch.zeros(len(embeddings), device=DEVICE), atol=1e-6 + ), "Self-comparison scores should be zero for cosine distance." + distance_range_checker(scores, min_val=0, max_val=2) logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}") + def test_score_all_with_different_sizes(cosine_metric, distance_range_checker): """Test score_all with different sizes for hypotheses and references.""" hyps = [np.random.rand(768) for _ in range(3)] refs = [np.random.rand(768) for _ in range(5)] scores = cosine_metric.score_all(hyps, refs) - assert scores.shape == (len(hyps), len(refs)), f"Output shape mismatch ({scores.shape}) vs {(len(hyps), len(refs))} for score_all with different sizes. " - distance_range_checker(scores, min_val=0, max_val=2) + assert scores.shape == ( + len(hyps), + len(refs), + ), f"Output shape mismatch ({scores.shape}) vs {(len(hyps), len(refs))} for score_all with different sizes. " + distance_range_checker(scores, min_val=0, max_val=2) # def test_score_all_with_empty_inputs(metric): @@ -199,6 +209,7 @@ def test_score_all_with_different_sizes(cosine_metric, distance_range_checker): # scores = metric.score_all([], []) # assert scores.shape == (0,), f"Score_all should return an empty array for empty inputs. Output: {scores.shape}" + def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None: """Test the metric with invalid inputs.""" emb1 = random_tensor(768) @@ -210,6 +221,7 @@ def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None: logger.info("Invalid input test passed.") + def test_score_tensor_input(cosine_metric): """Test score function with torch.Tensor inputs.""" emb1 = torch.rand(768) @@ -255,6 +267,7 @@ def test_mixed_input(cosine_metric): score = cosine_metric.score(emb1, emb2) assert isinstance(score, float), "Output should be a float." + @pytest.mark.parametrize("num_points, dim", [(16, 2)]) def test_unit_circle_points(cosine_metric, num_points, dim): embeddings = generate_unit_circle_points(num_points, dim) @@ -266,7 +279,9 @@ def test_unit_circle_points(cosine_metric, num_points, dim): def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim): embeddings = generate_orthogonal_rows_with_repeats(num_points, dim) distances = cosine_metric.score_all(embeddings, embeddings) - save_and_plot_distances(distances=distances, matrix_name="Orthogonal Rows (with repeats)", num_points=num_points, dim=dim) + save_and_plot_distances( + distances=distances, matrix_name="Orthogonal Rows (with repeats)", num_points=num_points, dim=dim + ) # Create expected pattern directly within the test function expected_pattern = torch.zeros(num_points, num_points, dtype=torch.float32) @@ -276,7 +291,9 @@ def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim): expected_pattern[i, j] = 1 # We expect 0 1 0 across and down - assert torch.allclose(distances, expected_pattern, atol=1e-6), "Output does not match the expected alternating pattern" + assert torch.allclose( + distances, expected_pattern, atol=1e-6 + ), "Output does not match the expected alternating pattern" @pytest.mark.parametrize("num_points, dim", [(20, 2)]) @@ -286,6 +303,7 @@ def test_orthogonal_rows_in_pairs(cosine_metric, num_points, dim, distance_range save_and_plot_distances(distances, "orthogonal_rows_in_pairs", num_points, dim) distance_range_checker(distances, min_val=0, max_val=2) # Check distance range + @pytest.mark.parametrize("num_points, dim", [(10, 5)]) def test_ones_tensor(cosine_metric, num_points, dim, distance_range_checker): embeddings = generate_ones_tensor(num_points, dim) From 00ec4b8d8e6ab1e41b6d1dda6be09ca814960c4d Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:22:28 -0500 Subject: [PATCH 13/27] Fixed a few pytests --- pose_evaluation/metrics/test_embedding_distance_metric.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py index 4727029..147b98e 100644 --- a/pose_evaluation/metrics/test_embedding_distance_metric.py +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -156,7 +156,7 @@ def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None: assert pytest.approx(score1) == score2, "Score should be symmetric." -def test_score_with_path(metric: EmbeddingDistanceMetric, tmp_path: Path) -> None: +def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path) -> None: """Test that score works with embeddings loaded from file paths.""" emb1 = random_tensor(768).cpu().numpy() # Save as NumPy for file storage emb2 = random_tensor(768).cpu().numpy() @@ -179,13 +179,13 @@ def test_score_with_path(metric: EmbeddingDistanceMetric, tmp_path: Path) -> Non def test_score_all_against_self( - metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker + cosine_metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker ) -> None: """Test the score_all function.""" scores = cosine_metric.score_all(embeddings, embeddings) assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all." assert torch.allclose( - torch.diagonal(scores), torch.zeros(len(embeddings), device=DEVICE), atol=1e-6 + torch.diagonal(scores), torch.zeros(len(embeddings)), atol=1e-6 ), "Self-comparison scores should be zero for cosine distance." distance_range_checker(scores, min_val=0, max_val=2) logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}") From 03066be9bd9ad76f4ebcd9ccb52d452a62afe2ca Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 27 Nov 2024 10:16:00 -0500 Subject: [PATCH 14/27] Remove unneeded SignCLIP file --- .../metrics/signclip_distance_metric.py | 49 ------------------- 1 file changed, 49 deletions(-) delete mode 100644 pose_evaluation/metrics/signclip_distance_metric.py diff --git a/pose_evaluation/metrics/signclip_distance_metric.py b/pose_evaluation/metrics/signclip_distance_metric.py deleted file mode 100644 index 382be52..0000000 --- a/pose_evaluation/metrics/signclip_distance_metric.py +++ /dev/null @@ -1,49 +0,0 @@ -from pose_evaluation.metrics.base_embedding_metric import NumpyArrayEmbeddingMetric -from typing import Literal -import numpy as np -from tqdm import tqdm -from scipy.spatial.distance import cosine -import torch -import torch.nn.functional as F - - -class SignCLIPEmbeddingDistanceMetric(NumpyArrayEmbeddingMetric): - def __init__(self, kind: str = "cosine", device: torch.device | str = "cuda"): - """ - Initializes the metric with the specified distance type and device. - - Args: - kind (str): The type of distance metric, either 'cosine' or 'l2'. - device (torch.device | str): The device to use ('cuda' or 'cpu'). - """ - self.kind = kind - self.device = torch.device(device) if isinstance(device, str) else device - - def score_all(self, embeddings: torch.Tensor) -> torch.Tensor: - """ - Computes the pairwise distance matrix for the provided embeddings. - - Args: - embeddings (torch.Tensor): A 2D tensor of shape (N, D), where N is the number - of embeddings and D is the feature dimension. - - Returns: - torch.Tensor: A 2D tensor of shape (N, N) containing pairwise distances. - """ - # Move embeddings to the specified device - embeddings = embeddings.to(self.device) - - if self.kind == "cosine": - # Normalize embeddings to unit norm - embeddings = F.normalize(embeddings, p=2, dim=1) - # Compute pairwise cosine similarity - similarity_matrix = torch.matmul(embeddings, embeddings.T) # Shape: (N, N) - distance_matrix = 1 - similarity_matrix # Cosine distance = 1 - cosine similarity - elif self.kind == "l2": - # Compute pairwise L2 distance using broadcasting - diff = embeddings[:, None, :] - embeddings[None, :, :] # Shape: (N, N, D) - distance_matrix = torch.norm(diff, dim=2) # Shape: (N, N) - else: - raise ValueError(f"Unsupported distance metric: {self.kind}") - - return distance_matrix From 4a1b9f22aef55838b9846f74e32891e14f3a2537 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:49:47 -0500 Subject: [PATCH 15/27] Use sentence-transformers utils for embedding distances --- .../metrics/embedding_distance_metric.py | 184 ++++++++++++------ pyproject.toml | 8 +- 2 files changed, 132 insertions(+), 60 deletions(-) diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py index b362404..fdfa712 100644 --- a/pose_evaluation/metrics/embedding_distance_metric.py +++ b/pose_evaluation/metrics/embedding_distance_metric.py @@ -1,104 +1,170 @@ -from typing import Literal, Union, List +from typing import Literal, List import torch -import torch.nn.functional as F +from torch import Tensor import numpy as np - +from sentence_transformers import util as st_util from pose_evaluation.metrics.base_embedding_metric import EmbeddingMetric +# Useful reference: https://github.com/UKPLab/sentence-transformers/blob/master/sentence_transformers/util.py#L31 +# * Helper functions such as batch_to_device, _convert_to_tensor, _convert_to_batch, _convert_to_batch_tensor +# * a whole semantic search function, with chunking and top_k + +# See also pgvector's C implementation: https://github.com/pgvector/pgvector/blob/master/src/vector.c +# * cosine_distance: https://github.com/pgvector/pgvector/blob/master/src/vector.c#L658 +# * l2_distance https://github.com/pgvector/pgvector/blob/master/src/vector.c#L566 + + class EmbeddingDistanceMetric(EmbeddingMetric): - def __init__(self, kind: Literal["cosine", "l2"] = "cosine", device: Union[torch.device, str] = None): + def __init__( + self, + kind: Literal["cosine", "euclidean", "dot"] = "cosine", + device: torch.device | str = None, + dtype=torch.float64, + ): """ Initialize the embedding distance metric. Args: - kind (Literal["cosine", "l2"]): The type of distance metric. + kind (Literal["cosine", "euclidean"]): The type of distance metric. device (torch.device | str): The device to use for computation. If None, automatically detects. """ super().__init__(f"EmbeddingDistanceMetric {kind}", higher_is_better=False) self.kind = kind if device is None: - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.device = torch.device(st_util.get_device_name()) else: self.device = torch.device(device) if isinstance(device, str) else device - def _to_tensor(self, data: Union[np.ndarray, torch.Tensor]) -> torch.Tensor: - """ - Convert input to a PyTorch tensor if it is a NumPy array. + self.dtype = dtype - Args: - data (np.ndarray | torch.Tensor): Input data. + def _to_device_tensor(self, data: list | np.ndarray | Tensor, dtype=None) -> Tensor: + if dtype is None: + dtype = self.dtype + return st_util._convert_to_tensor(data).to(device=self.device, dtype=dtype) - Returns: - torch.Tensor: Tensor on the correct device. - """ - if isinstance(data, np.ndarray): - data = torch.tensor(data, dtype=torch.float32) - return data.to(self.device) + def _to_batch_tensor_on_device(self, data: list | np.ndarray | Tensor, dtype=None) -> Tensor: + if dtype is None: + dtype = self.dtype + return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=dtype) - def score(self, hypothesis: Union[np.ndarray, torch.Tensor], reference: Union[np.ndarray, torch.Tensor]) -> float: + def score( + self, + hypothesis: list | np.ndarray | Tensor, + reference: list | np.ndarray | Tensor, + ) -> float: """ Compute the distance between two embeddings. Args: - hypothesis (np.ndarray | torch.Tensor): A single embedding vector. - reference (np.ndarray | torch.Tensor): Another single embedding vector. + hypothesis (list| np.ndarray | Tensor): A single embedding vector. + reference (list| np.ndarray | Tensor): Another single embedding vector. Returns: float: The calculated distance. """ - hypothesis = self._to_tensor(hypothesis) - reference = self._to_tensor(reference) - - if self.kind == "cosine": - # Normalize both embeddings to unit length - hypothesis = F.normalize(hypothesis, p=2, dim=0) - reference = F.normalize(reference, p=2, dim=0) - # Cosine similarity, converted to distance - similarity = torch.dot(hypothesis, reference).item() - return 1 - similarity - if self.kind == "l2": - # L2 distance - return torch.norm(hypothesis - reference).item() + if hypothesis is None or reference is None: + raise ValueError("Neither 'hypothesis' nor 'reference' can be None.") - raise ValueError(f"Unsupported distance metric: {self.kind}") + try: + hypothesis = self._to_batch_tensor_on_device(hypothesis) + reference = self._to_batch_tensor_on_device(reference) + except RuntimeError as e: + raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e + return self.score_all(hypothesis, reference).item() def score_all( self, - hypotheses: List[Union[np.ndarray, torch.Tensor]], - references: List[Union[np.ndarray, torch.Tensor]], + hypotheses: List[list | np.ndarray | Tensor], + references: List[list | np.ndarray | Tensor], progress_bar: bool = True, - ) -> torch.Tensor: + ) -> Tensor: """ - Compute the pairwise distance between all hypotheses and references. Expects 2D inputs. + Compute the pairwise distance between all hypotheses and references. + Expects 2D inputs, where each element in the second dimension is one embedding Args: - hypotheses (list[np.ndarray | torch.Tensor]): List of hypothesis embeddings. - references (list[np.ndarray | torch.Tensor]): List of reference embeddings. + hypotheses (list[list| np.ndarray | Tensor]): List of hypothesis embeddings. + references (list[list| np.ndarray | Tensor]): List of reference embeddings. progress_bar (bool): Whether to display a progress bar. Returns: - torch.Tensor, distance matrix. Row i is the distances of hypotheses[i] to all rows of references + Tensor, distance matrix. Row i is the distances of hypotheses[i] to all rows of references """ # Convert inputs to tensors and stack - hypotheses = torch.stack([self._to_tensor(h) for h in hypotheses]) - references = torch.stack([self._to_tensor(r) for r in references]) - - if self.kind == "cosine": - # Normalize the tensors along the feature dimension (dim=1) - normalized_hypotheses = F.normalize(hypotheses, dim=1) - normalized_references = F.normalize(references, dim=1) - - # Calculate cosine similarity between all hypothesis-reference pairs - cosine_similarities = torch.matmul(normalized_hypotheses, normalized_references.T) - - # Convert cosine similarities to cosine distances - distance_matrix = 1 - cosine_similarities - elif self.kind == "l2": - # Use broadcasting to calculate pairwise L2 distances - diff = hypotheses[:, None, :] - references[None, :, :] - distance_matrix = torch.norm(diff, dim=2) + hypotheses = torch.stack([self._to_device_tensor(h) for h in hypotheses]) + references = torch.stack([self._to_device_tensor(r) for r in references]) + + if self.kind == "dot": + distance_matrix = self.dot_product(hypotheses, references) + + elif self.kind == "cosine": + distance_matrix = self.cosine_distances(hypotheses, references) + + elif self.kind == "euclidean": + distance_matrix = self.euclidean_distances(hypotheses, references) + + elif self.kind == "manhattan": + distance_matrix = self.manhattan_distances(hypotheses, references) + else: raise ValueError(f"Unsupported distance metric: {self.kind}") - return distance_matrix.cpu() + return distance_matrix + + def dot_product(self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor) -> Tensor: + # TODO: test if this gives the same thing as previous matmul implementation, see stack overflow link below: + # https://stackoverflow.com/questions/73924697/whats-the-difference-between-torch-mm-torch-matmul-and-torch-mul + return st_util.dot_score(hypotheses, references) + + def euclidean_similarities( + self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor + ) -> Tensor: + """ + Returns the negative L2 norm/euclidean distances, which is what sentence-transformers uses for similarities. + """ + return st_util.euclidean_sim(hypotheses, references) + + def euclidean_distances( + self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor + ) -> Tensor: + """ + Seeing as how sentence-transformers just negates the distances to get "similarities", + We can re-negate to get them positive again. + """ + return -self.euclidean_similarities(hypotheses, references) + + def cosine_similarities( + self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor + ) -> Tensor: + """ + Calculates cosine similarities, which can be thought of as the angle between two embeddings. + The min value is -1 (least similar/pointing directly away), and the max is 1 (exactly the same angle). + """ + return st_util.cos_sim(hypotheses, references) + + def cosine_distances( + self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor + ) -> Tensor: + """ + Converts cosine similarities to distances by simply subtracting from 1. + Max distance is 2, min distance is 0. + """ + return 1 - self.cosine_similarities(hypotheses, references) + + def manhattan_similarities( + self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor + ) -> Tensor: + """ + Get the L1/Manhattan similarities, aka negative distances. + """ + return st_util.manhattan_sim(hypotheses, references) + + def manhattan_distances( + self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor + ) -> Tensor: + """ + Sentence transformers defines similarity as negative distances. + We can re-negate to recover the distances. + """ + return -self.manhattan_similarities(hypotheses, references) diff --git a/pyproject.toml b/pyproject.toml index b38c04e..2629a9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,8 +11,14 @@ readme = "README.md" dependencies = [ "pose-format", "scipy", + "torch", + "numpy", # possibly could replace all with torch + # for various vector/tensor similarities and distances in torch + "sentence-transformers", + # For reading .csv files, etc + "pandas", # For segment similarity - "sign_language_segmentation @ git+https://github.com/sign-language-processing/segmentation" + #"sign_language_segmentation @ git+https://github.com/sign-language-processing/segmentation" ] [project.optional-dependencies] From 12f612c768163934d0aff2e6456a10e3ca6f807a Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 13:54:26 -0500 Subject: [PATCH 16/27] CDL: updating the tests a bit --- pose_evaluation/metrics/test_embedding_distance_metric.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py index 147b98e..24991c6 100644 --- a/pose_evaluation/metrics/test_embedding_distance_metric.py +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -185,7 +185,7 @@ def test_score_all_against_self( scores = cosine_metric.score_all(embeddings, embeddings) assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all." assert torch.allclose( - torch.diagonal(scores), torch.zeros(len(embeddings)), atol=1e-6 + torch.diagonal(scores), torch.zeros(len(embeddings),dtype=scores.dtype), atol=1e-6 ), "Self-comparison scores should be zero for cosine distance." distance_range_checker(scores, min_val=0, max_val=2) logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}") @@ -216,10 +216,10 @@ def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None: invalid_inputs = ["invalid_input", None, -1, 1] for invalid_input in invalid_inputs: - with pytest.raises((TypeError, AttributeError)): + with pytest.raises((TypeError, AttributeError, ValueError)): cosine_metric.score(emb1, invalid_input) - logger.info("Invalid input test passed.") + logger.info("Invalid input successfully crashed as expected.") def test_score_tensor_input(cosine_metric): @@ -284,7 +284,7 @@ def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim): ) # Create expected pattern directly within the test function - expected_pattern = torch.zeros(num_points, num_points, dtype=torch.float32) + expected_pattern = torch.zeros(num_points, num_points, dtype=distances.dtype) for i in range(num_points): for j in range(num_points): if (i + j) % 2 != 0: From 3ca874ec668336700704f2159c5505b288f90bb5 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:16:54 -0500 Subject: [PATCH 17/27] Various pylint changes --- .../metrics/base_embedding_metric.py | 3 ++- pose_evaluation/metrics/conftest.py | 25 +++++++++---------- .../metrics/test_embedding_distance_metric.py | 25 +++++++++++-------- pyproject.toml | 3 ++- 4 files changed, 31 insertions(+), 25 deletions(-) diff --git a/pose_evaluation/metrics/base_embedding_metric.py b/pose_evaluation/metrics/base_embedding_metric.py index 78aeb0e..2fb61c8 100644 --- a/pose_evaluation/metrics/base_embedding_metric.py +++ b/pose_evaluation/metrics/base_embedding_metric.py @@ -1,6 +1,7 @@ from typing import TypeVar -from pose_evaluation.metrics.base import BaseMetric import torch +from pose_evaluation.metrics.base import BaseMetric + # Define a type alias for embeddings (e.g., torch.Tensor) Embedding = TypeVar("Embedding", bound=torch.Tensor) diff --git a/pose_evaluation/metrics/conftest.py b/pose_evaluation/metrics/conftest.py index 4b1129c..c0f44f7 100644 --- a/pose_evaluation/metrics/conftest.py +++ b/pose_evaluation/metrics/conftest.py @@ -1,12 +1,12 @@ -# conftest.py -import pytest import shutil from pathlib import Path from typing import Callable, Union import torch import numpy as np +import pytest -@pytest.fixture(scope="session", autouse=True) + +@pytest.fixture(scope="session", autouse=True) def clean_test_artifacts(): """Fixture to clean up test artifacts before each test session.""" test_artifacts_dir = Path(__file__).parent / "tests" # Using Path @@ -17,19 +17,18 @@ def clean_test_artifacts(): # (Optional) You can add cleanup logic here to run after the session if needed -# conftest.py -from typing import Callable, Union -import torch -import numpy as np - -@pytest.fixture -def distance_range_checker() -> Callable[[Union[torch.Tensor, np.ndarray], float, float], None]: +@pytest.fixture(name="distance_range_checker") +def fixture_distance_range_checker() -> Callable[[Union[torch.Tensor, np.ndarray], float, float], None]: def _check_range(distances: Union[torch.Tensor, np.ndarray], min_val: float = 0, max_val: float = 2) -> None: max_distance = distances.max().item() min_distance = distances.min().item() # Use np.isclose for comparisons with tolerance - assert np.isclose(min_distance, min_val, atol=1e-6) or min_val <= min_distance <= max_val, f"Minimum distance ({min_distance}) is outside the expected range [{min_val}, {max_val}]" - assert np.isclose(max_distance, max_val, atol=1e-6) or min_val <= max_distance <= max_val, f"Maximum distance ({max_distance}) is outside the expected range [{min_val}, {max_val}]" + assert ( + np.isclose(min_distance, min_val, atol=1e-6) or min_val <= min_distance <= max_val + ), f"Minimum distance ({min_distance}) is outside the expected range [{min_val}, {max_val}]" + assert ( + np.isclose(max_distance, max_val, atol=1e-6) or min_val <= max_distance <= max_val + ), f"Maximum distance ({max_distance}) is outside the expected range [{min_val}, {max_val}]" - return _check_range \ No newline at end of file + return _check_range diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py index 24991c6..0f43b63 100644 --- a/pose_evaluation/metrics/test_embedding_distance_metric.py +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -1,12 +1,15 @@ +from pathlib import Path +from typing import List +import logging import pytest import numpy as np +import matplotlib.pyplot as plt import torch from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric -from pose_evaluation.metrics.conftest import distance_range_checker -import matplotlib.pyplot as plt -import logging -from typing import List -from pathlib import Path + +# no need to import. https://github.com/pylint-dev/pylint/issues/3493#issuecomment-616761997 +# from pose_evaluation.metrics.conftest import distance_range_checker + # TODO: many fixes. Including the fact that we test cosine but not Euclidean, @@ -19,14 +22,16 @@ DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") -@pytest.fixture -def cosine_metric(): +# named the fixture this way to solve many pylint W0621 +# https://stackoverflow.com/questions/46089480/pytest-fixtures-redefining-name-from-outer-scope-pylint +@pytest.fixture(name="cosine_metric") +def fixture_cosine_metric(): """Fixture to create an EmbeddingDistanceMetric instance.""" return EmbeddingDistanceMetric(kind="cosine") -@pytest.fixture -def embeddings() -> List[torch.Tensor]: +@pytest.fixture(name="embeddings") +def fixture_embeddings() -> List[torch.Tensor]: """Fixture to create dummy embeddings for testing.""" return [random_tensor(768) for _ in range(5)] @@ -185,7 +190,7 @@ def test_score_all_against_self( scores = cosine_metric.score_all(embeddings, embeddings) assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all." assert torch.allclose( - torch.diagonal(scores), torch.zeros(len(embeddings),dtype=scores.dtype), atol=1e-6 + torch.diagonal(scores), torch.zeros(len(embeddings), dtype=scores.dtype), atol=1e-6 ), "Self-comparison scores should be zero for cosine distance." distance_range_checker(scores, min_val=0, max_val=2) logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}") diff --git a/pyproject.toml b/pyproject.toml index 2629a9d..893fa3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ dependencies = [ # For reading .csv files, etc "pandas", # For segment similarity - #"sign_language_segmentation @ git+https://github.com/sign-language-processing/segmentation" + "sign_language_segmentation @ git+https://github.com/sign-language-processing/segmentation" ] [project.optional-dependencies] @@ -40,6 +40,7 @@ disable = [ "C0115", # Missing class docstring "C0116", # Missing function or method docstring "W0511", # TODO + "W1203", # use lazy % formatting in logging functions ] [tool.setuptools] From d7fb10e53f0758b34b8b3e31ed14291ccb746577 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:44:00 -0500 Subject: [PATCH 18/27] Various stylistic and commenting changes --- .../metrics/embedding_distance_metric.py | 164 ++++++++++++------ 1 file changed, 108 insertions(+), 56 deletions(-) diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py index fdfa712..875f7f1 100644 --- a/pose_evaluation/metrics/embedding_distance_metric.py +++ b/pose_evaluation/metrics/embedding_distance_metric.py @@ -1,8 +1,12 @@ -from typing import Literal, List +from typing import Literal, List, Union +import logging + import torch from torch import Tensor +from torch.types import Number import numpy as np from sentence_transformers import util as st_util + from pose_evaluation.metrics.base_embedding_metric import EmbeddingMetric @@ -14,20 +18,29 @@ # * cosine_distance: https://github.com/pgvector/pgvector/blob/master/src/vector.c#L658 # * l2_distance https://github.com/pgvector/pgvector/blob/master/src/vector.c#L566 +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +ValidDistanceKinds = Literal["cosine", "euclidean", "manhattan", "dot"] +TensorConvertableType = Union[List, np.ndarray, Tensor] + class EmbeddingDistanceMetric(EmbeddingMetric): def __init__( self, - kind: Literal["cosine", "euclidean", "dot"] = "cosine", - device: torch.device | str = None, - dtype=torch.float64, + kind: ValidDistanceKinds = "cosine", + device: Union[torch.device, str] = None, + dtype=torch.float32, ): """ Initialize the embedding distance metric. Args: - kind (Literal["cosine", "euclidean"]): The type of distance metric. - device (torch.device | str): The device to use for computation. If None, automatically detects. + kind (ValidDistanceKinds): The type of distance metric. + device (Union[torch.device, str]): The device to use for computation. + If None, automatically detects. + dtype (torch.dtype): The data type to use for tensors. + If None, uses torch.get_default_dtype() """ super().__init__(f"EmbeddingDistanceMetric {kind}", higher_is_better=False) self.kind = kind @@ -36,32 +49,77 @@ def __init__( else: self.device = torch.device(device) if isinstance(device, str) else device + if dtype is None: + dtype = torch.get_default_dtype() + + # Dispatch table for metric computations + self._metric_dispatch = { + "cosine": self.cosine_distances, + "euclidean": self.euclidean_distances, + "dot": self.dot_product, + "manhattan": self.manhattan_distances, + } + self.dtype = dtype - def _to_device_tensor(self, data: list | np.ndarray | Tensor, dtype=None) -> Tensor: + def set_device(self, device: Union[torch.device, str]) -> None: + """ + Explicitly set the device used for tensors. + + Args: + device (Union[torch.device, str]): The device to use for computation. + """ + self.device = torch.device(device) + logger.info(f"Device set to: {self.device}") + + def _to_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> Tensor: + """ + Convert input data to a tensor on the specified device. + + Args: + data (TensorConvertableType: The input data to convert. + dtype (torch.dtype): The data type for the tensor. + + Returns: + Tensor: Tensor representation of the data on the specified device. + """ if dtype is None: dtype = self.dtype return st_util._convert_to_tensor(data).to(device=self.device, dtype=dtype) - def _to_batch_tensor_on_device(self, data: list | np.ndarray | Tensor, dtype=None) -> Tensor: + def _to_batch_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> Tensor: + """ + Convert input data to a batch tensor on the specified device. + + Args: + data (TensorConvertableType): The input data to convert. + dtype (torch.dtype): The data type for the tensor. + + Returns: + Tensor: Batch tensor representation of the data on the specified device. + """ if dtype is None: dtype = self.dtype return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=dtype) def score( self, - hypothesis: list | np.ndarray | Tensor, - reference: list | np.ndarray | Tensor, - ) -> float: + hypothesis: TensorConvertableType, + reference: TensorConvertableType, + ) -> Number: """ Compute the distance between two embeddings. Args: - hypothesis (list| np.ndarray | Tensor): A single embedding vector. - reference (list| np.ndarray | Tensor): Another single embedding vector. + hypothesis (TensorConvertableType): A single embedding vector. + reference (TensorConvertableType): Another single embedding vector. Returns: - float: The calculated distance. + Number: The calculated distance. + + Raises: + ValueError: If either input is None. + TypeError: If inputs cannot be converted to tensors. """ if hypothesis is None or reference is None: raise ValueError("Neither 'hypothesis' nor 'reference' can be None.") @@ -75,95 +133,89 @@ def score( def score_all( self, - hypotheses: List[list | np.ndarray | Tensor], - references: List[list | np.ndarray | Tensor], + hypotheses: Union[List[TensorConvertableType], Tensor], + references: Union[List[TensorConvertableType], Tensor], progress_bar: bool = True, ) -> Tensor: """ - Compute the pairwise distance between all hypotheses and references. - Expects 2D inputs, where each element in the second dimension is one embedding + Compute the distance between all hypotheses and all references. + + Expects 2D inputs. If not already Tensors, will attempt to convert them. Args: - hypotheses (list[list| np.ndarray | Tensor]): List of hypothesis embeddings. - references (list[list| np.ndarray | Tensor]): List of reference embeddings. - progress_bar (bool): Whether to display a progress bar. + hypotheses (Union[List[TensorConvertableType], Tensor]): + List of hypothesis embeddings or a single tensor. + references (Union[List[TensorConvertableType], Tensor]): + List of reference embeddings or a single tensor. + progress_bar (bool): Whether to display a progress bar. (not implemented yet) Returns: - Tensor, distance matrix. Row i is the distances of hypotheses[i] to all rows of references + Tensor: Distance matrix. Row `i` is the distances of `hypotheses[i]` to all rows of `references`. + Shape is be NxM, where N is the number of hypotheses, and M is the number of references + + Raises: + ValueError: If the specified metric is unsupported. """ # Convert inputs to tensors and stack - hypotheses = torch.stack([self._to_device_tensor(h) for h in hypotheses]) - references = torch.stack([self._to_device_tensor(r) for r in references]) - - if self.kind == "dot": - distance_matrix = self.dot_product(hypotheses, references) - - elif self.kind == "cosine": - distance_matrix = self.cosine_distances(hypotheses, references) - - elif self.kind == "euclidean": - distance_matrix = self.euclidean_distances(hypotheses, references) + hypotheses = torch.stack([self._to_tensor_on_device(h) for h in hypotheses]) + references = torch.stack([self._to_tensor_on_device(r) for r in references]) - elif self.kind == "manhattan": - distance_matrix = self.manhattan_distances(hypotheses, references) - - else: + if self.kind not in self._metric_dispatch: + logger.error(f"Unsupported distance metric: {self.kind}") raise ValueError(f"Unsupported distance metric: {self.kind}") + distance_matrix = self._metric_dispatch[self.kind](hypotheses, references) return distance_matrix - def dot_product(self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor) -> Tensor: + def dot_product(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: + """ + Compute the dot product between embeddings. + Uses sentence_transformers.util.dot_score + """ # TODO: test if this gives the same thing as previous matmul implementation, see stack overflow link below: # https://stackoverflow.com/questions/73924697/whats-the-difference-between-torch-mm-torch-matmul-and-torch-mul return st_util.dot_score(hypotheses, references) - def euclidean_similarities( - self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor - ) -> Tensor: + def euclidean_similarities(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: """ Returns the negative L2 norm/euclidean distances, which is what sentence-transformers uses for similarities. + Uses sentence_transformers.util.euclidean_sim """ return st_util.euclidean_sim(hypotheses, references) - def euclidean_distances( - self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor - ) -> Tensor: + def euclidean_distances(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: """ Seeing as how sentence-transformers just negates the distances to get "similarities", We can re-negate to get them positive again. + Uses sentence_transformers.util.euclidean_similarities """ return -self.euclidean_similarities(hypotheses, references) - def cosine_similarities( - self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor - ) -> Tensor: + def cosine_similarities(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: """ Calculates cosine similarities, which can be thought of as the angle between two embeddings. The min value is -1 (least similar/pointing directly away), and the max is 1 (exactly the same angle). + Uses sentence_transformers.util.cos_sim """ return st_util.cos_sim(hypotheses, references) - def cosine_distances( - self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor - ) -> Tensor: + def cosine_distances(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: """ Converts cosine similarities to distances by simply subtracting from 1. Max distance is 2, min distance is 0. """ return 1 - self.cosine_similarities(hypotheses, references) - def manhattan_similarities( - self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor - ) -> Tensor: + def manhattan_similarities(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: """ Get the L1/Manhattan similarities, aka negative distances. + Uses sentence_transformers.util.manhattan_sim """ return st_util.manhattan_sim(hypotheses, references) - def manhattan_distances( - self, hypotheses: list | np.ndarray | Tensor, references: list | np.ndarray | Tensor - ) -> Tensor: + def manhattan_distances(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: """ + Convert Manhattan similarities to distances. Sentence transformers defines similarity as negative distances. We can re-negate to recover the distances. """ From 884deb9108215375e9acc41dab783b03c21adb37 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:19:33 -0500 Subject: [PATCH 19/27] Better handling of List to tensor conversions --- .../metrics/embedding_distance_metric.py | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py index 875f7f1..29793c6 100644 --- a/pose_evaluation/metrics/embedding_distance_metric.py +++ b/pose_evaluation/metrics/embedding_distance_metric.py @@ -100,6 +100,16 @@ def _to_batch_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> """ if dtype is None: dtype = self.dtype + + # better performance this way, see https://github.com/pytorch/pytorch/issues/13918 + if isinstance(data, list) and all(isinstance(x, np.ndarray) for x in data): + data = np.asanyarray(data) + + if isinstance(data, list) and all(isinstance(x, torch.Tensor) for x in data): + # prevents ValueError: only one element tensors can be converted to Python scalars + # https://stackoverflow.com/questions/55050717/converting-list-of-tensors-to-tensors-pytorch + data = torch.stack(data) + return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=dtype) def score( @@ -117,18 +127,8 @@ def score( Returns: Number: The calculated distance. - Raises: - ValueError: If either input is None. - TypeError: If inputs cannot be converted to tensors. """ - if hypothesis is None or reference is None: - raise ValueError("Neither 'hypothesis' nor 'reference' can be None.") - try: - hypothesis = self._to_batch_tensor_on_device(hypothesis) - reference = self._to_batch_tensor_on_device(reference) - except RuntimeError as e: - raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e return self.score_all(hypothesis, reference).item() def score_all( @@ -154,11 +154,14 @@ def score_all( Shape is be NxM, where N is the number of hypotheses, and M is the number of references Raises: + TypeError: If either hypotheses or references cannot be converted to a batch tensor ValueError: If the specified metric is unsupported. """ - # Convert inputs to tensors and stack - hypotheses = torch.stack([self._to_tensor_on_device(h) for h in hypotheses]) - references = torch.stack([self._to_tensor_on_device(r) for r in references]) + try: + hypotheses = self._to_batch_tensor_on_device(hypotheses) + references = self._to_batch_tensor_on_device(references) + except RuntimeError as e: + raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e if self.kind not in self._metric_dispatch: logger.error(f"Unsupported distance metric: {self.kind}") @@ -172,7 +175,6 @@ def dot_product(self, hypotheses: TensorConvertableType, references: TensorConve Compute the dot product between embeddings. Uses sentence_transformers.util.dot_score """ - # TODO: test if this gives the same thing as previous matmul implementation, see stack overflow link below: # https://stackoverflow.com/questions/73924697/whats-the-difference-between-torch-mm-torch-matmul-and-torch-mul return st_util.dot_score(hypotheses, references) From 4934c5d06f796a3e0c102919aa620d31d0ee255d Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:21:13 -0500 Subject: [PATCH 20/27] Adding some tests, including for List handling --- .../metrics/test_embedding_distance_metric.py | 93 ++++++++++++++++--- 1 file changed, 80 insertions(+), 13 deletions(-) diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py index 0f43b63..7124151 100644 --- a/pose_evaluation/metrics/test_embedding_distance_metric.py +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List +from typing import List, Callable, Tuple import logging import pytest import numpy as np @@ -20,6 +20,7 @@ # Device configuration for PyTorch DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") +torch.set_default_device(DEVICE) # so that we get arrays on the same device # named the fixture this way to solve many pylint W0621 @@ -35,9 +36,16 @@ def fixture_embeddings() -> List[torch.Tensor]: """Fixture to create dummy embeddings for testing.""" return [random_tensor(768) for _ in range(5)] +def call_and_call_with_inputs_swapped(hyp:torch.Tensor, ref:torch.Tensor, scoring_function:Callable[[torch.Tensor, torch.Tensor], torch.Tensor])->Tuple[torch.Tensor, torch.Tensor]: + score1 = scoring_function(hyp, ref) + score2 = scoring_function(ref, hyp) + return score1, score2 + def save_and_plot_distances(distances, matrix_name, num_points, dim): """Helper function to save distance matrix and plot distances.""" + + distances = distances.cpu() test_artifacts_dir = Path(__file__).parent / "tests" output_path = test_artifacts_dir / f"distance_matrix_{matrix_name}_{num_points}_{dim}D.csv" np.savetxt(output_path, distances.numpy(), delimiter=",", fmt="%.4f") @@ -154,8 +162,9 @@ def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None: emb1 = random_tensor(768) emb2 = random_tensor(768) - score1 = cosine_metric.score(emb1, emb2) - score2 = cosine_metric.score(emb2, emb1) + # score1 = cosine_metric.score(emb1, emb2) + # score2 = cosine_metric.score(emb2, emb1) + score1, score2 = call_and_call_with_inputs_swapped(emb1, emb2, cosine_metric.score) logger.info(f"Score 1: {score1}, Score 2: {score2}") assert pytest.approx(score1) == score2, "Score should be symmetric." @@ -196,6 +205,18 @@ def test_score_all_against_self( logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}") +def test_score_all_with_one_vs_batch(cosine_metric, distance_range_checker): + hyps = [np.random.rand(768) for _ in range(3)] + refs = np.random.rand(768) + + # scores = cosine_metric.score_all(hyps, refs) + scores, scores2 = call_and_call_with_inputs_swapped(hyps, refs, cosine_metric.score_all) + + + assert scores.shape == (len(hyps), 1) + assert scores2.shape == (1, len(hyps)) + distance_range_checker(scores, min_val=0, max_val=2) + def test_score_all_with_different_sizes(cosine_metric, distance_range_checker): """Test score_all with different sizes for hypotheses and references.""" hyps = [np.random.rand(768) for _ in range(3)] @@ -209,20 +230,50 @@ def test_score_all_with_different_sizes(cosine_metric, distance_range_checker): distance_range_checker(scores, min_val=0, max_val=2) -# def test_score_all_with_empty_inputs(metric): -# """Test score_all with empty inputs.""" -# scores = metric.score_all([], []) -# assert scores.shape == (0,), f"Score_all should return an empty array for empty inputs. Output: {scores.shape}" +def test_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistanceMetric) -> None: + hyp = random_tensor(768) + ref = random_tensor(769) + + with pytest.raises(RuntimeError): + # gives RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x768 and 769x1 + # TODO: we should probably raise a more descriptive/helpful error/ ValueError + call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) +def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> None: + hyp = random_tensor(768) + for ref in range (-2, 2): + with pytest.raises(IndexError): + # TODO: we should probably raise a more descriptive/helpful error/ ValueError + # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1) + call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) + + logger.info("Invalid input successfully crashed as expected.") -def test_invalid_input(cosine_metric: EmbeddingDistanceMetric) -> None: +def test_invalid_input_noncontainernonnumber_types(cosine_metric: EmbeddingDistanceMetric) -> None: + hyp = random_tensor(768) + invalid_inputs = [ + "invalid_input", + True + ] + for ref in invalid_inputs: + with pytest.raises((TypeError, IndexError)): + # TypeError: new(): invalid data type 'str' + # but True gives IndexError + # TODO: better TypeError, more descriptive + call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) + +def test_invalid_input_empty_containers(cosine_metric: EmbeddingDistanceMetric) -> None: """Test the metric with invalid inputs.""" emb1 = random_tensor(768) - invalid_inputs = ["invalid_input", None, -1, 1] + invalid_inputs = ["", list(), dict(), tuple(), set()] for invalid_input in invalid_inputs: - with pytest.raises((TypeError, AttributeError, ValueError)): - cosine_metric.score(emb1, invalid_input) + with pytest.raises((RuntimeError, TypeError, IndexError)): + # gives RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x768 and 0x1) + # "" gives TypeError: new(): invalid data type 'str' + # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1) + # TODO: we should probably raise a more descriptive/helpful error/ ValueError + call_and_call_with_inputs_swapped(emb1, invalid_input, cosine_metric.score) logger.info("Invalid input successfully crashed as expected.") @@ -244,9 +295,16 @@ def test_score_ndarray_input(cosine_metric): score = cosine_metric.score(emb1, emb2) assert isinstance(score, float), "Output should be a float." +def test_score_all_list_of_lists_of_floats(cosine_metric): + """Does a 2D list of floats work? """ + hyps = [[np.random.rand() for _ in range(768)] for _ in range(5)] + refs = [[np.random.rand() for _ in range(768)] for _ in range(5)] + scores = cosine_metric.score_all(hyps, refs) + assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" + assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" -def test_score_all_tensor_input(cosine_metric): - """Test score_all function with torch.Tensor inputs.""" +def test_score_all_list_of_tensor_input(cosine_metric): + """Test score_all function with List of torch.Tensor inputs.""" hyps = [torch.rand(768) for _ in range(5)] refs = [torch.rand(768) for _ in range(5)] @@ -254,6 +312,15 @@ def test_score_all_tensor_input(cosine_metric): assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" +def test_score_all_list_of_ndarray_input(cosine_metric): + """Test score_all function with List of np.ndarray inputs.""" + hyps = [np.random.rand(768) for _ in range(5)] + refs = [np.random.rand(768) for _ in range(5)] + + scores = cosine_metric.score_all(hyps, refs) + assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" + assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" + def test_device_handling(cosine_metric): """Test device handling for the metric.""" From a495c67866097e026d14b2bfd4e00f6dd7cb9e47 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:59:44 -0500 Subject: [PATCH 21/27] CDL: a few pylint changes --- .../metrics/test_embedding_distance_metric.py | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py index 7124151..479e4b2 100644 --- a/pose_evaluation/metrics/test_embedding_distance_metric.py +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -20,7 +20,7 @@ # Device configuration for PyTorch DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") -torch.set_default_device(DEVICE) # so that we get arrays on the same device +torch.set_default_device(DEVICE) # so that we get arrays on the same device # named the fixture this way to solve many pylint W0621 @@ -36,11 +36,14 @@ def fixture_embeddings() -> List[torch.Tensor]: """Fixture to create dummy embeddings for testing.""" return [random_tensor(768) for _ in range(5)] -def call_and_call_with_inputs_swapped(hyp:torch.Tensor, ref:torch.Tensor, scoring_function:Callable[[torch.Tensor, torch.Tensor], torch.Tensor])->Tuple[torch.Tensor, torch.Tensor]: + +def call_and_call_with_inputs_swapped( + hyp: torch.Tensor, ref: torch.Tensor, scoring_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] +) -> Tuple[torch.Tensor, torch.Tensor]: score1 = scoring_function(hyp, ref) score2 = scoring_function(ref, hyp) return score1, score2 - + def save_and_plot_distances(distances, matrix_name, num_points, dim): """Helper function to save distance matrix and plot distances.""" @@ -211,12 +214,12 @@ def test_score_all_with_one_vs_batch(cosine_metric, distance_range_checker): # scores = cosine_metric.score_all(hyps, refs) scores, scores2 = call_and_call_with_inputs_swapped(hyps, refs, cosine_metric.score_all) - assert scores.shape == (len(hyps), 1) assert scores2.shape == (1, len(hyps)) distance_range_checker(scores, min_val=0, max_val=2) + def test_score_all_with_different_sizes(cosine_metric, distance_range_checker): """Test score_all with different sizes for hypotheses and references.""" hyps = [np.random.rand(768) for _ in range(3)] @@ -239,9 +242,10 @@ def test_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistan # TODO: we should probably raise a more descriptive/helpful error/ ValueError call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) + def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> None: hyp = random_tensor(768) - for ref in range (-2, 2): + for ref in range(-2, 2): with pytest.raises(IndexError): # TODO: we should probably raise a more descriptive/helpful error/ ValueError # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1) @@ -249,12 +253,10 @@ def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> logger.info("Invalid input successfully crashed as expected.") + def test_invalid_input_noncontainernonnumber_types(cosine_metric: EmbeddingDistanceMetric) -> None: hyp = random_tensor(768) - invalid_inputs = [ - "invalid_input", - True - ] + invalid_inputs = ["invalid_input", True] for ref in invalid_inputs: with pytest.raises((TypeError, IndexError)): # TypeError: new(): invalid data type 'str' @@ -262,10 +264,11 @@ def test_invalid_input_noncontainernonnumber_types(cosine_metric: EmbeddingDista # TODO: better TypeError, more descriptive call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) + def test_invalid_input_empty_containers(cosine_metric: EmbeddingDistanceMetric) -> None: """Test the metric with invalid inputs.""" emb1 = random_tensor(768) - invalid_inputs = ["", list(), dict(), tuple(), set()] + invalid_inputs = ["", [], {}, tuple(), set()] for invalid_input in invalid_inputs: with pytest.raises((RuntimeError, TypeError, IndexError)): @@ -295,14 +298,16 @@ def test_score_ndarray_input(cosine_metric): score = cosine_metric.score(emb1, emb2) assert isinstance(score, float), "Output should be a float." + def test_score_all_list_of_lists_of_floats(cosine_metric): - """Does a 2D list of floats work? """ + """Does a 2D list of floats work?""" hyps = [[np.random.rand() for _ in range(768)] for _ in range(5)] refs = [[np.random.rand() for _ in range(768)] for _ in range(5)] scores = cosine_metric.score_all(hyps, refs) assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" + def test_score_all_list_of_tensor_input(cosine_metric): """Test score_all function with List of torch.Tensor inputs.""" hyps = [torch.rand(768) for _ in range(5)] @@ -312,6 +317,7 @@ def test_score_all_list_of_tensor_input(cosine_metric): assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" + def test_score_all_list_of_ndarray_input(cosine_metric): """Test score_all function with List of np.ndarray inputs.""" hyps = [np.random.rand(768) for _ in range(5)] @@ -319,7 +325,7 @@ def test_score_all_list_of_ndarray_input(cosine_metric): scores = cosine_metric.score_all(hyps, refs) assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" - assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" + assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" def test_device_handling(cosine_metric): From 0e54bf99df1b5b2d832b7a46504c9b02bbdb5a63 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:52:25 -0500 Subject: [PATCH 22/27] CDL: some requested changes. Remove redundant variable, remove unused dtype arg, rename set_device, etc --- .../metrics/embedding_distance_metric.py | 43 ++++--------------- 1 file changed, 8 insertions(+), 35 deletions(-) diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py index 29793c6..b1ca669 100644 --- a/pose_evaluation/metrics/embedding_distance_metric.py +++ b/pose_evaluation/metrics/embedding_distance_metric.py @@ -30,13 +30,11 @@ def __init__( self, kind: ValidDistanceKinds = "cosine", device: Union[torch.device, str] = None, - dtype=torch.float32, + dtype=None, ): """ - Initialize the embedding distance metric. - Args: - kind (ValidDistanceKinds): The type of distance metric. + kind (ValidDistanceKinds): The type of distance metric, e.g. "cosine", or "euclidean". device (Union[torch.device, str]): The device to use for computation. If None, automatically detects. dtype (torch.dtype): The data type to use for tensors. @@ -52,6 +50,8 @@ def __init__( if dtype is None: dtype = torch.get_default_dtype() + self.dtype = dtype + # Dispatch table for metric computations self._metric_dispatch = { "cosine": self.cosine_distances, @@ -60,9 +60,7 @@ def __init__( "manhattan": self.manhattan_distances, } - self.dtype = dtype - - def set_device(self, device: Union[torch.device, str]) -> None: + def to(self, device: Union[torch.device, str]) -> None: """ Explicitly set the device used for tensors. @@ -72,35 +70,16 @@ def set_device(self, device: Union[torch.device, str]) -> None: self.device = torch.device(device) logger.info(f"Device set to: {self.device}") - def _to_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> Tensor: - """ - Convert input data to a tensor on the specified device. - - Args: - data (TensorConvertableType: The input data to convert. - dtype (torch.dtype): The data type for the tensor. - - Returns: - Tensor: Tensor representation of the data on the specified device. - """ - if dtype is None: - dtype = self.dtype - return st_util._convert_to_tensor(data).to(device=self.device, dtype=dtype) - - def _to_batch_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> Tensor: + def _to_batch_tensor_on_device(self, data: TensorConvertableType) -> Tensor: """ Convert input data to a batch tensor on the specified device. Args: data (TensorConvertableType): The input data to convert. - dtype (torch.dtype): The data type for the tensor. Returns: Tensor: Batch tensor representation of the data on the specified device. """ - if dtype is None: - dtype = self.dtype - # better performance this way, see https://github.com/pytorch/pytorch/issues/13918 if isinstance(data, list) and all(isinstance(x, np.ndarray) for x in data): data = np.asanyarray(data) @@ -110,7 +89,7 @@ def _to_batch_tensor_on_device(self, data: TensorConvertableType, dtype=None) -> # https://stackoverflow.com/questions/55050717/converting-list-of-tensors-to-tensors-pytorch data = torch.stack(data) - return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=dtype) + return st_util._convert_to_batch_tensor(data).to(device=self.device) def score( self, @@ -128,7 +107,6 @@ def score( Number: The calculated distance. """ - return self.score_all(hypothesis, reference).item() def score_all( @@ -163,12 +141,7 @@ def score_all( except RuntimeError as e: raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e - if self.kind not in self._metric_dispatch: - logger.error(f"Unsupported distance metric: {self.kind}") - raise ValueError(f"Unsupported distance metric: {self.kind}") - - distance_matrix = self._metric_dispatch[self.kind](hypotheses, references) - return distance_matrix + return self._metric_dispatch[self.kind](hypotheses, references) def dot_product(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: """ From cb45301b1b8d8cbea654e6c90b197eca9a973ecb Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Fri, 6 Dec 2024 11:51:44 -0500 Subject: [PATCH 23/27] Add distance_matrix shape checker fixture --- pose_evaluation/metrics/conftest.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pose_evaluation/metrics/conftest.py b/pose_evaluation/metrics/conftest.py index c0f44f7..c04f587 100644 --- a/pose_evaluation/metrics/conftest.py +++ b/pose_evaluation/metrics/conftest.py @@ -17,9 +17,25 @@ def clean_test_artifacts(): # (Optional) You can add cleanup logic here to run after the session if needed +@pytest.fixture(name="distance_matrix_shape_checker") +def fixture_distance_matrix_shape_checker() -> Callable[[torch.Tensor, torch.Tensor], None]: + def _check_shape(hyp_count: int, ref_count: int, distance_matrix: torch.Tensor): + + expected_shape = torch.Size([hyp_count, ref_count]) + assert ( + distance_matrix.shape == expected_shape + ), f"For M={hyp_count} hypotheses, N={ref_count} references, Distance Matrix should be MxN={expected_shape}. Instead, received {distance_matrix.shape}" + + return _check_shape + + @pytest.fixture(name="distance_range_checker") def fixture_distance_range_checker() -> Callable[[Union[torch.Tensor, np.ndarray], float, float], None]: - def _check_range(distances: Union[torch.Tensor, np.ndarray], min_val: float = 0, max_val: float = 2) -> None: + def _check_range( + distances: Union[torch.Tensor, np.ndarray], + min_val: float = 0, + max_val: float = 2, + ) -> None: max_distance = distances.max().item() min_distance = distances.min().item() From 000e3468e5c3d33bdf5168627b4068d6ddf576f3 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:25:54 -0500 Subject: [PATCH 24/27] Various pull request changes including an ndim assertion, use of distance_matrix_shape_checker, removing redundant args documentation, --- .../metrics/embedding_distance_metric.py | 21 +- .../metrics/test_embedding_distance_metric.py | 214 ++++++++++++------ 2 files changed, 155 insertions(+), 80 deletions(-) diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py index b1ca669..2c165b0 100644 --- a/pose_evaluation/metrics/embedding_distance_metric.py +++ b/pose_evaluation/metrics/embedding_distance_metric.py @@ -69,14 +69,12 @@ def to(self, device: Union[torch.device, str]) -> None: """ self.device = torch.device(device) logger.info(f"Device set to: {self.device}") + return self def _to_batch_tensor_on_device(self, data: TensorConvertableType) -> Tensor: """ Convert input data to a batch tensor on the specified device. - Args: - data (TensorConvertableType): The input data to convert. - Returns: Tensor: Batch tensor representation of the data on the specified device. """ @@ -89,7 +87,7 @@ def _to_batch_tensor_on_device(self, data: TensorConvertableType) -> Tensor: # https://stackoverflow.com/questions/55050717/converting-list-of-tensors-to-tensors-pytorch data = torch.stack(data) - return st_util._convert_to_batch_tensor(data).to(device=self.device) + return st_util._convert_to_batch_tensor(data).to(device=self.device, dtype=self.dtype) def score( self, @@ -99,10 +97,6 @@ def score( """ Compute the distance between two embeddings. - Args: - hypothesis (TensorConvertableType): A single embedding vector. - reference (TensorConvertableType): Another single embedding vector. - Returns: Number: The calculated distance. @@ -120,13 +114,6 @@ def score_all( Expects 2D inputs. If not already Tensors, will attempt to convert them. - Args: - hypotheses (Union[List[TensorConvertableType], Tensor]): - List of hypothesis embeddings or a single tensor. - references (Union[List[TensorConvertableType], Tensor]): - List of reference embeddings or a single tensor. - progress_bar (bool): Whether to display a progress bar. (not implemented yet) - Returns: Tensor: Distance matrix. Row `i` is the distances of `hypotheses[i]` to all rows of `references`. Shape is be NxM, where N is the number of hypotheses, and M is the number of references @@ -141,6 +128,10 @@ def score_all( except RuntimeError as e: raise TypeError(f"Inputs must support conversion to device tensors: {e}") from e + assert ( + hypotheses.ndim == 2 and references.ndim == 2 + ), f"score_all received non-2D input: hypotheses: {hypotheses.shape}, references: {references.shape}" + return self._metric_dispatch[self.kind](hypotheses, references) def dot_product(self, hypotheses: TensorConvertableType, references: TensorConvertableType) -> Tensor: diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py index 479e4b2..0f08bb9 100644 --- a/pose_evaluation/metrics/test_embedding_distance_metric.py +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -1,3 +1,4 @@ +import itertools from pathlib import Path from typing import List, Callable, Tuple import logging @@ -7,9 +8,6 @@ import torch from pose_evaluation.metrics.embedding_distance_metric import EmbeddingDistanceMetric -# no need to import. https://github.com/pylint-dev/pylint/issues/3493#issuecomment-616761997 -# from pose_evaluation.metrics.conftest import distance_range_checker - # TODO: many fixes. Including the fact that we test cosine but not Euclidean, @@ -37,14 +35,54 @@ def fixture_embeddings() -> List[torch.Tensor]: return [random_tensor(768) for _ in range(5)] +def test_shape_checker(distance_matrix_shape_checker): + emb_len = 768 + hyps = torch.rand((3, emb_len)) + refs = torch.rand((4, emb_len)) + + m = hyps.shape[0] + n = refs.shape[0] + + wrong_shapes = [1, m, n, emb_len] + wrong_shapes.extend(list(itertools.permutations(wrong_shapes, r=2))) + for wrong_shape in wrong_shapes: + if wrong_shape != (m, n): + distances_with_wrong_shape = torch.rand(wrong_shape) + with pytest.raises(AssertionError, match="Distance Matrix should be MxN"): + # This SHOULD happen. If this doesn't happen then the checker itself is not working. + distance_matrix_shape_checker(m, n, distances_with_wrong_shape) + + def call_and_call_with_inputs_swapped( - hyp: torch.Tensor, ref: torch.Tensor, scoring_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] + hyps: torch.Tensor, refs: torch.Tensor, scoring_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] ) -> Tuple[torch.Tensor, torch.Tensor]: - score1 = scoring_function(hyp, ref) - score2 = scoring_function(ref, hyp) + score1 = scoring_function(hyps, refs) + score2 = scoring_function(refs, hyps) return score1, score2 +def call_with_both_input_orders_and_do_standard_checks( + hyps: torch.Tensor, + refs: torch.Tensor, + scoring_function: Callable[[torch.Tensor, torch.Tensor], torch.Tensor], + distance_range_checker, + distance_matrix_shape_checker, + expected_shape: Tuple = None, +): + scores, scores2 = call_and_call_with_inputs_swapped(hyps, refs, scoring_function) + if expected_shape is not None: + m, n = expected_shape + else: + m = hyps.shape[0] + n = refs.shape[0] + distance_range_checker(scores, min_val=0, max_val=2) + distance_range_checker(scores2, min_val=0, max_val=2) + distance_matrix_shape_checker(m, n, scores) + distance_matrix_shape_checker(n, m, scores2) + + return scores, scores2 + + def save_and_plot_distances(distances, matrix_name, num_points, dim): """Helper function to save distance matrix and plot distances.""" @@ -165,8 +203,6 @@ def test_score_symmetric(cosine_metric: EmbeddingDistanceMetric) -> None: emb1 = random_tensor(768) emb2 = random_tensor(768) - # score1 = cosine_metric.score(emb1, emb2) - # score2 = cosine_metric.score(emb2, emb1) score1, score2 = call_and_call_with_inputs_swapped(emb1, emb2, cosine_metric.score) logger.info(f"Score 1: {score1}, Score 2: {score2}") @@ -196,44 +232,46 @@ def test_score_with_path(cosine_metric: EmbeddingDistanceMetric, tmp_path: Path) def test_score_all_against_self( - cosine_metric: EmbeddingDistanceMetric, embeddings: List[torch.Tensor], distance_range_checker + cosine_metric: EmbeddingDistanceMetric, + embeddings: List[torch.Tensor], + distance_range_checker, + distance_matrix_shape_checker, ) -> None: """Test the score_all function.""" scores = cosine_metric.score_all(embeddings, embeddings) - assert scores.shape == (len(embeddings), len(embeddings)), "Output shape mismatch for score_all." + distance_matrix_shape_checker(len(embeddings), len(embeddings), scores) + distance_range_checker(scores, min_val=0, max_val=2) + assert torch.allclose( torch.diagonal(scores), torch.zeros(len(embeddings), dtype=scores.dtype), atol=1e-6 ), "Self-comparison scores should be zero for cosine distance." - distance_range_checker(scores, min_val=0, max_val=2) + logger.info(f"Score matrix shape: {scores.shape}, Diagonal values: {torch.diagonal(scores)}") -def test_score_all_with_one_vs_batch(cosine_metric, distance_range_checker): +def test_score_all_with_one_vs_batch(cosine_metric, distance_range_checker, distance_matrix_shape_checker): hyps = [np.random.rand(768) for _ in range(3)] refs = np.random.rand(768) - # scores = cosine_metric.score_all(hyps, refs) - scores, scores2 = call_and_call_with_inputs_swapped(hyps, refs, cosine_metric.score_all) + expected_shape = (len(hyps), 1) - assert scores.shape == (len(hyps), 1) - assert scores2.shape == (1, len(hyps)) - distance_range_checker(scores, min_val=0, max_val=2) + call_with_both_input_orders_and_do_standard_checks( + hyps, refs, cosine_metric.score_all, distance_range_checker, distance_matrix_shape_checker, expected_shape + ) -def test_score_all_with_different_sizes(cosine_metric, distance_range_checker): +def test_score_all_with_different_sizes(cosine_metric, distance_range_checker, distance_matrix_shape_checker): """Test score_all with different sizes for hypotheses and references.""" hyps = [np.random.rand(768) for _ in range(3)] refs = [np.random.rand(768) for _ in range(5)] - scores = cosine_metric.score_all(hyps, refs) - assert scores.shape == ( - len(hyps), - len(refs), - ), f"Output shape mismatch ({scores.shape}) vs {(len(hyps), len(refs))} for score_all with different sizes. " - distance_range_checker(scores, min_val=0, max_val=2) + expected_shape = (len(hyps), len(refs)) + call_with_both_input_orders_and_do_standard_checks( + hyps, refs, cosine_metric.score_all, distance_range_checker, distance_matrix_shape_checker, expected_shape + ) -def test_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistanceMetric) -> None: +def test_score_with_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistanceMetric) -> None: hyp = random_tensor(768) ref = random_tensor(769) @@ -243,10 +281,10 @@ def test_invalid_input_mismatched_embedding_sizes(cosine_metric: EmbeddingDistan call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) -def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> None: +def test_score_with_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> None: hyp = random_tensor(768) for ref in range(-2, 2): - with pytest.raises(IndexError): + with pytest.raises(AssertionError, match="score_all received non-2D input"): # TODO: we should probably raise a more descriptive/helpful error/ ValueError # IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1) call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) @@ -254,18 +292,23 @@ def test_invalid_input_single_number(cosine_metric: EmbeddingDistanceMetric) -> logger.info("Invalid input successfully crashed as expected.") -def test_invalid_input_noncontainernonnumber_types(cosine_metric: EmbeddingDistanceMetric) -> None: +def test_score_with_invalid_input_string(cosine_metric: EmbeddingDistanceMetric) -> None: + hyp = "invalid input" + ref = random_tensor(768) + with pytest.raises(TypeError, match="invalid data type 'str'"): + call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) + + +def test_score_with_invalid_input_bool(cosine_metric: EmbeddingDistanceMetric) -> None: hyp = random_tensor(768) - invalid_inputs = ["invalid_input", True] + invalid_inputs = [True, False] for ref in invalid_inputs: - with pytest.raises((TypeError, IndexError)): - # TypeError: new(): invalid data type 'str' - # but True gives IndexError - # TODO: better TypeError, more descriptive + with pytest.raises(AssertionError, match="score_all received non-2D input"): call_and_call_with_inputs_swapped(hyp, ref, cosine_metric.score) + # TODO: why does a bool make it all the way there? -def test_invalid_input_empty_containers(cosine_metric: EmbeddingDistanceMetric) -> None: +def test_score_with_invalid_input_empty_containers(cosine_metric: EmbeddingDistanceMetric) -> None: """Test the metric with invalid inputs.""" emb1 = random_tensor(768) invalid_inputs = ["", [], {}, tuple(), set()] @@ -299,33 +342,61 @@ def test_score_ndarray_input(cosine_metric): assert isinstance(score, float), "Output should be a float." -def test_score_all_list_of_lists_of_floats(cosine_metric): +def test_score_all_list_of_lists_of_floats( + cosine_metric, + distance_range_checker, + distance_matrix_shape_checker, +): """Does a 2D list of floats work?""" hyps = [[np.random.rand() for _ in range(768)] for _ in range(5)] refs = [[np.random.rand() for _ in range(768)] for _ in range(5)] - scores = cosine_metric.score_all(hyps, refs) - assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" - assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" + expected_shape = (len(hyps), len(refs)) + + call_with_both_input_orders_and_do_standard_checks( + hyps, + refs, + cosine_metric.score_all, + distance_range_checker, + distance_matrix_shape_checker, + expected_shape=expected_shape, + ) -def test_score_all_list_of_tensor_input(cosine_metric): +def test_score_all_list_of_tensor_input(cosine_metric, distance_range_checker, distance_matrix_shape_checker): """Test score_all function with List of torch.Tensor inputs.""" hyps = [torch.rand(768) for _ in range(5)] refs = [torch.rand(768) for _ in range(5)] - scores = cosine_metric.score_all(hyps, refs) - assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" - assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" + expected_shape = (len(hyps), len(refs)) + call_with_both_input_orders_and_do_standard_checks( + hyps, + refs, + cosine_metric.score_all, + distance_range_checker, + distance_matrix_shape_checker, + expected_shape=expected_shape, + ) -def test_score_all_list_of_ndarray_input(cosine_metric): + +def test_score_all_list_of_ndarray_input( + cosine_metric, + distance_range_checker, + distance_matrix_shape_checker, +): """Test score_all function with List of np.ndarray inputs.""" hyps = [np.random.rand(768) for _ in range(5)] refs = [np.random.rand(768) for _ in range(5)] - - scores = cosine_metric.score_all(hyps, refs) - assert len(scores) == len(hyps), f"Output row count mismatch for torch.Tensor input. Shape:{scores.shape}" - assert len(scores[0]) == len(refs), f"Output column count mismatch for torch.Tensor input. Shape:{scores.shape}" + expected_shape = (len(hyps), len(refs)) + + call_with_both_input_orders_and_do_standard_checks( + hyps, + refs, + cosine_metric.score_all, + distance_range_checker, + distance_matrix_shape_checker, + expected_shape=expected_shape, + ) def test_device_handling(cosine_metric): @@ -337,20 +408,39 @@ def test_device_handling(cosine_metric): assert cosine_metric.device.type == "cpu", "Should use 'cpu' when CUDA is unavailable." -def test_mixed_input(cosine_metric): +def test_score_mixed_input_types(cosine_metric): """Test score function with mixed input types.""" emb1 = np.random.rand(768) emb2 = torch.rand(768) - score = cosine_metric.score(emb1, emb2) - assert isinstance(score, float), "Output should be a float." + all_scores = call_and_call_with_inputs_swapped(emb1, emb2, cosine_metric.score) + assert all([isinstance(score, float) for score in all_scores]), "Output should be a float." + + +def test_score_all_mixed_input_types(cosine_metric, distance_range_checker, distance_matrix_shape_checker): + """Test score function with mixed input types.""" + hyps = np.random.rand(5, 768) + refs = torch.rand(3, 768) + + expected_shape = (5, 3) + + call_with_both_input_orders_and_do_standard_checks( + hyps, + refs, + cosine_metric.score_all, + distance_range_checker, + distance_matrix_shape_checker, + expected_shape=expected_shape, + ) @pytest.mark.parametrize("num_points, dim", [(16, 2)]) -def test_unit_circle_points(cosine_metric, num_points, dim): +def test_unit_circle_points(cosine_metric, num_points, dim, distance_range_checker, distance_matrix_shape_checker): embeddings = generate_unit_circle_points(num_points, dim) distances = cosine_metric.score_all(embeddings, embeddings) save_and_plot_distances(distances=distances, matrix_name="Unit Circle", num_points=num_points, dim=dim) + distance_range_checker(distances, min_val=0, max_val=2) # Check distance range + distance_matrix_shape_checker(embeddings.shape[0], embeddings.shape[0], distances) @pytest.mark.parametrize("num_points, dim", [(20, 2)]) @@ -375,35 +465,29 @@ def test_orthogonal_rows_with_repeats_2d(cosine_metric, num_points, dim): @pytest.mark.parametrize("num_points, dim", [(20, 2)]) -def test_orthogonal_rows_in_pairs(cosine_metric, num_points, dim, distance_range_checker): +def test_orthogonal_rows_in_pairs( + cosine_metric, num_points, dim, distance_range_checker, distance_matrix_shape_checker +): embeddings = generate_orthogonal_rows_in_pairs(num_points, dim) distances = cosine_metric.score_all(embeddings, embeddings) save_and_plot_distances(distances, "orthogonal_rows_in_pairs", num_points, dim) distance_range_checker(distances, min_val=0, max_val=2) # Check distance range + distance_matrix_shape_checker(embeddings.shape[0], embeddings.shape[0], distances) @pytest.mark.parametrize("num_points, dim", [(10, 5)]) -def test_ones_tensor(cosine_metric, num_points, dim, distance_range_checker): +def test_ones_tensor(cosine_metric, num_points, dim, distance_range_checker, distance_matrix_shape_checker): embeddings = generate_ones_tensor(num_points, dim) distances = cosine_metric.score_all(embeddings, embeddings) save_and_plot_distances(distances, "ones_tensor", num_points, dim) distance_range_checker(distances, min_val=0, max_val=0) # Expect all distances to be 0 + distance_matrix_shape_checker(embeddings.shape[0], embeddings.shape[0], distances) @pytest.mark.parametrize("num_points, dim", [(15, 15)]) # dim should be equal to num_points for identity matrix -def test_identity_matrix_rows(cosine_metric, num_points, dim, distance_range_checker): +def test_identity_matrix_rows(cosine_metric, num_points, dim, distance_range_checker, distance_matrix_shape_checker): embeddings = generate_identity_matrix_rows(num_points, dim) distances = cosine_metric.score_all(embeddings, embeddings) save_and_plot_distances(distances, "identity_matrix_rows", num_points, dim) distance_range_checker(distances, min_val=0, max_val=2) # Check distance range - - -# def test_progress_bar(cosine_metric): -# """Test score_all with progress_bar argument.""" -# hyps = [np.random.rand(768) for _ in range(5)] -# refs = [np.random.rand(768) for _ in range(5)] - -# # Disable progress bar -# scores = cosine_metric.score_all(hyps, refs, progress_bar=False) -# assert len(scores) == len(hyps), "Output row count mismatch with progress_bar=False." -# assert len(scores[0]) == len(refs), "Output column count mismatch with progress_bar=False." + distance_matrix_shape_checker(embeddings.shape[0], embeddings.shape[0], distances) From dca700b2de54d9e7b086e92940b0ff33ae02aef7 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:27:10 -0500 Subject: [PATCH 25/27] CDL: change test_artifacts_dir name to 'temp' --- pose_evaluation/metrics/test_embedding_distance_metric.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pose_evaluation/metrics/test_embedding_distance_metric.py b/pose_evaluation/metrics/test_embedding_distance_metric.py index 0f08bb9..ab275c6 100644 --- a/pose_evaluation/metrics/test_embedding_distance_metric.py +++ b/pose_evaluation/metrics/test_embedding_distance_metric.py @@ -87,7 +87,7 @@ def save_and_plot_distances(distances, matrix_name, num_points, dim): """Helper function to save distance matrix and plot distances.""" distances = distances.cpu() - test_artifacts_dir = Path(__file__).parent / "tests" + test_artifacts_dir = Path(__file__).parent / "temp" output_path = test_artifacts_dir / f"distance_matrix_{matrix_name}_{num_points}_{dim}D.csv" np.savetxt(output_path, distances.numpy(), delimiter=",", fmt="%.4f") print(f"Distance matrix saved to {output_path}") From d9eb0b2cb9127ec1833c27651aaaa8cb1840a7d3 Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:28:03 -0500 Subject: [PATCH 26/27] Update gitignore --- pose_evaluation/metrics/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pose_evaluation/metrics/.gitignore b/pose_evaluation/metrics/.gitignore index 3d0dbe4..cd78447 100644 --- a/pose_evaluation/metrics/.gitignore +++ b/pose_evaluation/metrics/.gitignore @@ -1 +1 @@ -tests/ \ No newline at end of file +temp/ \ No newline at end of file From 73ebd75a7566c737b02ec7a36e27e09ec0a18c5b Mon Sep 17 00:00:00 2001 From: Colin Leong <122366389+cleong110@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:31:33 -0500 Subject: [PATCH 27/27] Took out one more redundant 'args' comment --- pose_evaluation/metrics/embedding_distance_metric.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pose_evaluation/metrics/embedding_distance_metric.py b/pose_evaluation/metrics/embedding_distance_metric.py index 2c165b0..6044faa 100644 --- a/pose_evaluation/metrics/embedding_distance_metric.py +++ b/pose_evaluation/metrics/embedding_distance_metric.py @@ -63,9 +63,6 @@ def __init__( def to(self, device: Union[torch.device, str]) -> None: """ Explicitly set the device used for tensors. - - Args: - device (Union[torch.device, str]): The device to use for computation. """ self.device = torch.device(device) logger.info(f"Device set to: {self.device}")