Skip to content

Commit

Permalink
av-asr: move video loading outside detector (#3498)
Browse files Browse the repository at this point in the history
Summary:
This PR moves video loading outside detector during pre-processing.

Pull Request resolved: #3498

Reviewed By: mthrok

Differential Revision: D47811044

Pulled By: mpc001

fbshipit-source-id: f17839b695b13d3cf2d9db343d7e9a0202eea7d5
  • Loading branch information
Pingchuan Ma authored and facebook-github-bot committed Jul 26, 2023
1 parent da21202 commit c977afe
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 7 deletions.
2 changes: 1 addition & 1 deletion examples/avsr/data_prep/data/data_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ def load_data(self, data_filename, transform=True):
audio = self.audio_process(audio, sample_rate)
return audio
if self.modality == "video":
landmarks = self.landmarks_detector(data_filename)
video = self.load_video(data_filename)
landmarks = self.landmarks_detector(video)
video = self.video_process(video, landmarks)
video = torch.tensor(video)
return video
Expand Down
4 changes: 1 addition & 3 deletions examples/avsr/data_prep/detectors/mediapipe/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import mediapipe as mp

import numpy as np
import torchvision

warnings.filterwarnings("ignore")

Expand All @@ -29,8 +28,7 @@ def __call__(self, video_frames):
assert any(l is not None for l in landmarks), "Cannot detect any frames in the video"
return landmarks

def detect(self, filename, detector):
video_frames = torchvision.io.read_video(filename, pts_unit="sec")[0].numpy()
def detect(self, video_frames, detector):
landmarks = []
for frame in video_frames:
results = detector.process(frame)
Expand Down
4 changes: 1 addition & 3 deletions examples/avsr/data_prep/detectors/retinaface/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import warnings

import numpy as np
import torchvision
from ibug.face_detection import RetinaFacePredictor

warnings.filterwarnings("ignore")
Expand All @@ -19,8 +18,7 @@ def __init__(self, device="cuda:0", model_name="resnet50"):
device=device, threshold=0.8, model=RetinaFacePredictor.get_model(model_name)
)

def __call__(self, filename):
video_frames = torchvision.io.read_video(filename, pts_unit="sec")[0].numpy()
def __call__(self, video_frames):
landmarks = []
for frame in video_frames:
detected_faces = self.face_detector(frame, rgb=False)
Expand Down

0 comments on commit c977afe

Please sign in to comment.