-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimagecaptioningmodel1.py
82 lines (61 loc) · 2.59 KB
/
imagecaptioningmodel1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from google.colab import drive
drive.mount('/content/drive')
import kagglehub
# Download latest version
path = kagglehub.dataset_download("dataclusterlabs/vehicle-image-captioning-dataset")
print("Path to dataset files:", path)
import os
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
# Set the path to the dataset in Google Drive
image_dir = '/content/drive/MyDrive/vehicle-image-captioning-dataset/indian_vehicle_images/indian_vehicle_images' # Adjust as necessary
captions = {}
# Load model and processor
processor = BlipProcessor.from_pretrained('Salesforce/blip-image-captioning-base')
model = BlipForConditionalGeneration.from_pretrained('Salesforce/blip-image-captioning-base')
# Loop through images and generate captions
for image_name in os.listdir(image_dir):
if image_name.endswith(('.png', '.jpg', '.jpeg')):
image_path = os.path.join(image_dir, image_name)
image = Image.open(image_path)
# Prepare the image
inputs = processor(images=image, return_tensors='pt')
# Generate caption
out = model.generate(**inputs)
caption = processor.decode(out[0], skip_special_tokens=True)
captions[image_name] = caption
# Display captions
for img, cap in captions.items():
print(f'{img}: {cap}')
# Save captions to a text file in Google Drive
with open('/content/drive/MyDrive/captions.txt', 'w') as f:
for img, cap in captions.items():
f.write(f'{img}: {cap}\n')
print('Captions saved to /content/drive/MyDrive/captions.txt')
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
# Load the captions CSV file
#captions_df = pd.read_csv('/content/drive/My Drive/vehicle-image-captioning-dataset/captions.csv')
#************************above line is commented because its not necessary but if the lower portion doesnt work, uncomment it ***********
# Function to display image and caption
def display_image_caption():
"""Displays an image and its corresponding caption.
Args:
image_id: The ID of the image to display.
"""
image_path = '/content/drive/MyDrive/vehicle-image-captioning-dataset/indian_vehicle_images/indian_vehicle_images'
images = os.listdir(image_path)
image_id = images[0]
print(image_id)
caption = captions[image_id]
img_path=os.path.join(image_path, image_id)
img = mpimg.imread(img_path)
plt.imshow(img)
plt.axis('off') # Turn off axis numbers and ticks
plt.title(caption)
plt.show()
# Example usage: Display image with ID '1'
display_image_caption()
caption = captions[['image_id'] == image_id]['caption'].values[0]