Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug - test adjustment, hub cleanup #13

Merged
merged 26 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
8120f70
adjusting tests
rhysdg Jul 8, 2024
678a1ed
adjusting class reference
rhysdg Jul 8, 2024
049054d
adjusting readme example
rhysdg Jul 8, 2024
8f30807
expanding dims on the fly
rhysdg Jul 9, 2024
06e0e83
int64 tpe qucik fix - before tracking back through siglip tokenizer
rhysdg Jul 9, 2024
5ccd221
reinstating collections
rhysdg Jul 9, 2024
2b2ab91
reinstating collections
rhysdg Jul 9, 2024
0cc4dce
cleaning up hub artifacts, reinstating itertools repeat
rhysdg Jul 9, 2024
ee7eb7d
further cleanup
rhysdg Jul 9, 2024
1d2abdc
further stripping out unused functionality
rhysdg Jul 9, 2024
2ef9a66
dropping chat templates
rhysdg Jul 9, 2024
a96e24f
dropping perf utils and reinstating vocab files empty dict
rhysdg Jul 9, 2024
6643a59
dropping env error raise at resolving hub nased gguf
rhysdg Jul 9, 2024
08ac29c
adding missing sentencepiece dependency
rhysdg Jul 9, 2024
ee121a9
moving sentencepiece module to utils
rhysdg Jul 9, 2024
f583c2f
updating to torch 2.10
rhysdg Jul 9, 2024
25d45ba
bumping torch
rhysdg Jul 9, 2024
d2f8144
dropping cuda execution provider form cpu mode
rhysdg Jul 9, 2024
fb465b0
dropping get_available_providers
rhysdg Jul 9, 2024
17a06aa
falling back to python 3.10 workflows until Added Token resolved
rhysdg Jul 9, 2024
5c57af9
dropping setter interaction and casting to string
rhysdg Jul 9, 2024
ff55839
Merge branch 'bug-test-adjustment' of https://github.com/rhysdg/sam-a…
rhysdg Jul 9, 2024
1cfb9a2
bumping back to 3.11
rhysdg Jul 9, 2024
78ebfa7
adding resolution at assertion
rhysdg Jul 9, 2024
3ccca8f
Merge branch 'bug-test-adjustment' of https://github.com/rhysdg/sam-a…
rhysdg Jul 9, 2024
8f24138
setting type to clip at insttantiation test
rhysdg Jul 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ Last of all the aim here is to keep up with the latest optimised foundation mode
- SigLIP is available and recommended by default given the innovation made at with it's loss function leading to better inference. model types however can be changed at instantiation with:

```python
onnx_model = OnnxClip(batch_size=16, type='siglip_full')
onnx_model = OnnxLip(batch_size=16, type='siglip_full')
```

- Notice also cosine similrity at `get_similarity_scores` is adusted to handle multiple context - in other words a handful of text embedding can be sent as 'contexts', and send to the function to be evaluated against a single image or a batch of images.
Expand Down Expand Up @@ -143,7 +143,7 @@ Last of all the aim here is to keep up with the latest optimised foundation mode

for k,v in contexts.items():
print(f'\ncontext: {k}\n')
for text, p in zip(texts[k], probs[k][0]):
for text, p in zip(texts[k], probs[k]):
print(f"Probability that the image is '{text}': {p:.3f}")
```

Expand Down
32 changes: 21 additions & 11 deletions clip/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,14 +88,14 @@ def get_probabilities(image_embedding: list,
if image_embedding.ndim == 1:
# Convert to 2-D array using x[np.newaxis, :]
# and remove the extra dimension at the end.
res_dict[key] = softmax(get_similarity_scores(
res_dict[key] = softmax(get_probabilities(
image_embedding[np.newaxis, :], query
)[0])

if query.ndim == 1:
# Convert to 2-D array using x[np.newaxis, :]
# and remove the extra dimension at the end.
res_dict[key] = softmax(get_similarity_scores(
res_dict[key] = softmax(get_probabilities(
image_embedding, query[np.newaxis, :]
)[:, 0])

Expand Down Expand Up @@ -136,11 +136,15 @@ def __init__(
passing large amounts of data (perhaps ~100 or more).

"""
assert device in ['cpu', 'cuda'], 'please use either cuda or cpu!'

self.providers = [
'CUDAExecutionProvider',
'CPUExecutionProvider'
]

if device == 'cuda':
self.providers.insert(0, 'CUDAExecutionProvider')

if trt:
self.providers.insert(0, 'TensorrtExecutionProvider')

Expand All @@ -167,6 +171,7 @@ def __init__(

self.image_model, self.text_model = self._load_models(model)


if 'siglip' in type:
#currently only supporting 384
assert size in [384, 224], 'please choose either a 384, or 224 input size for SigLIP!'
Expand Down Expand Up @@ -246,7 +251,7 @@ def _load_model(self, path: str):

# `providers` need to be set explicitly since ORT 1.9
return ort.InferenceSession(
path, providers=ort.get_available_providers()
path, providers=self.providers
)

def get_image_embeddings(
Expand All @@ -268,9 +273,15 @@ def get_image_embeddings(
"""
if not with_batching or self._batch_size is None:
# Preprocess images
images = [
self._preprocessor.encode_image(image) for image in images
]
if 'siglip' in self.type:
images = [
np.expand_dims(self._siglip_preprocessor(image).numpy(), 0) for image in images
]
else:
images = [
self._preprocessor.encode_image(image) for image in images
]



if not images:
Expand Down Expand Up @@ -320,17 +331,16 @@ def get_text_embeddings(

if self.type == 'siglip':

text = self._siglip_tokenizer(incoming,
text = self._siglip_tokenizer(texts,
return_tensors='np',
padding="max_length",
truncation=True
)
if len(text) == 0:
return self._get_empty_embedding()

incoming = {"input_ids": text}

hidden, pooled = self.text_model.run(None, incoming)
#text is already in a input_ids keypair here
hidden, pooled = self.text_model.run(None, {'input_ids': text['input_ids'].astype(np.int64)})

#needs adjusting to a list followed by np.concatenate
self.hidden_text = hidden
Expand Down
4 changes: 3 additions & 1 deletion clip/siglip_image_processor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numbers
import random
import warnings
import collections
from itertools import repeat
from dataclasses import dataclass, asdict
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union

Expand Down Expand Up @@ -415,4 +417,4 @@ def image_transform(
ToTensor(),
normalize,
])
return Compose(transforms)
return Compose(transforms)
4 changes: 1 addition & 3 deletions clip/siglip_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from .utils.tokenization_utils import PreTrainedTokenizer
from .utils.tokenization_utils_base import AddedToken
from transformers.utils import sentencepiece_model_pb2_new as sentencepiece_model_pb2
from .utils import sentencepiece_model_pb2_new as sentencepiece_model_pb2

if TYPE_CHECKING:
from ...tokenization_utils_base import TextInput
Expand All @@ -40,8 +40,6 @@
SPIECE_UNDERLINE = "▁"




class SiglipTokenizer(PreTrainedTokenizer):
"""
Construct a Siglip tokenizer. Based on [SentencePiece](https://github.com/google/sentencepiece).
Expand Down
48 changes: 48 additions & 0 deletions clip/utils/sentencepiece_model_pb2_new.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 12 additions & 4 deletions clip/utils/tokenization_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,10 @@ def __init__(self, **kwargs):

# 4. If some of the special tokens are not part of the vocab, we add them, at the end.
# the order of addition is the same as self.SPECIAL_TOKENS_ATTRIBUTES following `tokenizers`
#Adding str(token) to resolve AddedToken unshashable type

self._add_tokens(
[token for token in self.all_special_tokens_extended if token not in self._added_tokens_encoder],
[token for token in self.all_special_tokens_extended if str(token) not in self._added_tokens_encoder],
special_tokens=True,
)

Expand Down Expand Up @@ -552,8 +554,13 @@ def _add_tokens(self, new_tokens: Union[List[str], List[AddedToken]], special_to
elif special_tokens:
# doing token.special=True changes the normalization! will fix in rust
# this is important and the only reason why the AddedTokens in each class are normalized by default
token.__setstate__({"special": True, "normalized": token.normalized})
if token in self._added_tokens_decoder:
#token.__setstate__({"special": True, "normalized": token.normalized})
#token.__setstate__({"special": True, "normalized": token.normalized})
token.special = True
token.normalized = token.normalized

#resolving unhashable type AddedToke wiht str(token)
if str(token) in self._added_tokens_decoder:
continue
if not token.special and token.normalized and getattr(self, "do_lower_case", False):
# Normalize if requested
Expand All @@ -576,9 +583,10 @@ def _add_tokens(self, new_tokens: Union[List[str], List[AddedToken]], special_to
self._update_trie()
return added_tokens

#Adding str(token) to resolve AddedToken unshashable type
def _update_trie(self, unique_no_split_tokens: Optional[str] = []):
for token in self._added_tokens_decoder.values():
if token not in self.tokens_trie._tokens:
if str(token) not in self.tokens_trie._tokens:
self.tokens_trie.add(token.content)
for token in unique_no_split_tokens:
if token not in self.tokens_trie._tokens:
Expand Down
Loading
Loading