Merge pull request #79 from msaroufim/msaroufim/imagebindlib

ImageBind as a Library
facebookresearch · Jul 14, 2023 · 95d27c7 · 95d27c7
2 parents f6238aa + 810a4f2
commit 95d27c7
Show file tree

Hide file tree

Showing 10 changed files with 34 additions and 12 deletions.
diff --git a/README.md b/README.md
@@ -58,7 +58,7 @@ Install pytorch 1.13+ and other 3rd party dependencies.
 conda create --name imagebind python=3.8 -y
 conda activate imagebind
 
-pip install -r requirements.txt
+pip install .
 ```
 
 For windows users, you might need to install `soundfile` for reading/writing audio files. (Thanks @congyue1977)
@@ -71,10 +71,10 @@ pip install soundfile
 Extract and compare features across modalities (e.g. Image, Text and Audio).
 
 ```python
-import data
+from imagebind import data
 import torch
-from models import imagebind_model
-from models.imagebind_model import ModalityType
+from imagebind.models import imagebind_model
+from imagebind.models.imagebind_model import ModalityType
 
 text_list=["A dog.", "A car", "A bird"]
 image_paths=[".assets/dog_image.jpg", ".assets/car_image.jpg", ".assets/bird_image.jpg"]

diff --git a/imagebind/__init__.py b/imagebind/__init__.py
@@ -0,0 +1,3 @@
+from imagebind import data
+from imagebind.models import imagebind_model
+from imagebind.models.imagebind_model import ModalityType
diff --git a/data.py → imagebind/data.py b/data.py → imagebind/data.py
@@ -18,7 +18,7 @@
 from torchvision import transforms
 from torchvision.transforms._transforms_video import NormalizeVideo
 
-from models.multimodal_preprocessors import SimpleTokenizer
+from imagebind.models.multimodal_preprocessors import SimpleTokenizer
 
 DEFAULT_AUDIO_FRAME_SHIFT_MS = 10  # in milliseconds
 

diff --git a/models/__init__.py → imagebind/models/__init__.py b/models/__init__.py → imagebind/models/__init__.py
diff --git a/models/helpers.py → imagebind/models/helpers.py b/models/helpers.py → imagebind/models/helpers.py
diff --git a/models/imagebind_model.py → imagebind/models/imagebind_model.py b/models/imagebind_model.py → imagebind/models/imagebind_model.py
@@ -13,16 +13,16 @@
 import torch
 import torch.nn as nn
 
-from models.helpers import (EinOpsRearrange, LearnableLogitScaling, Normalize,
+from imagebind.models.helpers import (EinOpsRearrange, LearnableLogitScaling, Normalize,
                             SelectElement, SelectEOSAndProject)
-from models.multimodal_preprocessors import (AudioPreprocessor,
+from imagebind.models.multimodal_preprocessors import (AudioPreprocessor,
                                              IMUPreprocessor, PadIm2Video,
                                              PatchEmbedGeneric,
                                              RGBDTPreprocessor,
                                              SpatioTemporalPosEmbeddingHelper,
                                              TextPreprocessor,
                                              ThermalPreprocessor)
-from models.transformer import MultiheadAttention, SimpleTransformer
+from imagebind.models.transformer import MultiheadAttention, SimpleTransformer
 
 ModalityType = SimpleNamespace(
     VISION="vision",

diff --git a/models/multimodal_preprocessors.py → imagebind/models/multimodal_preprocessors.py b/models/multimodal_preprocessors.py → imagebind/models/multimodal_preprocessors.py
@@ -20,7 +20,7 @@
 from iopath.common.file_io import g_pathmgr
 from timm.models.layers import trunc_normal_
 
-from models.helpers import VerboseNNModule, cast_if_src_dtype
+from imagebind.models.helpers import VerboseNNModule, cast_if_src_dtype
 
 
 def get_sinusoid_encoding_table(n_position, d_hid):

diff --git a/models/transformer.py → imagebind/models/transformer.py b/models/transformer.py → imagebind/models/transformer.py
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,3 @@
---extra-index-url https://download.pytorch.org/whl/cu113
 torch==1.13.0
 torchvision==0.14.0
 torchaudio==0.13.0
@@ -8,9 +7,9 @@ ftfy
 regex
 einops
 fvcore
-decord==0.6.0
+eva-decord==0.6.1
 iopath
-numpy
+numpy>=1.19
 matplotlib
 types-regex
 mayavi

diff --git a/setup.py b/setup.py
@@ -0,0 +1,20 @@
+from setuptools import setup, find_packages
+
+with open('requirements.txt') as f:
+    required = f.read().splitlines()
+
+setup(
+    name='imagebind',
+    version='0.1.0',
+    packages=find_packages(),
+    description='A brief description of the package',
+    long_description=open('README.md').read(),
+    long_description_content_type="text/markdown",
+    url='https://github.com/facebookresearch/ImageBind',
+    classifiers=[
+        'Programming Language :: Python :: 3',
+        'License :: Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International',
+    ],
+    install_requires=required,
+    dependency_links=['https://download.pytorch.org/whl/cu113'],
+)