diff --git a/backend/AnimatedDrawings/Dockerfile b/backend/AnimatedDrawings/Dockerfile index 61cd8c0a1..ca67ddc79 100644 --- a/backend/AnimatedDrawings/Dockerfile +++ b/backend/AnimatedDrawings/Dockerfile @@ -1,4 +1,4 @@ -FROM continuumio/miniconda3 +FROM continuumio/miniconda3:24.1.2-0 ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONUNBUFFERED 1 @@ -7,16 +7,19 @@ COPY . /app RUN mkdir -p /app/application/capsuleSkin WORKDIR /app -RUN conda create --name animated_drawings python=3.8.13 - -SHELL ["conda", "run", "-n", "animated_drawings", "/bin/bash", "-c"] +RUN conda install python=3.8.13 -y # Install gcc and python3-dev -RUN apt-get update && apt-get install -y gcc python3-dev supervisor - -RUN apt-get install -y libosmesa6-dev freeglut3-dev && \ - apt-get install -y libglfw3-dev libgles2-mesa-dev && \ - apt-get install -y libosmesa6 +RUN apt-get clean +RUN apt-get update && \ + apt-get install --no-install-recommends -y \ + supervisor gcc build-essential \ + libosmesa6-dev freeglut3-dev \ + libglfw3-dev libgles2-mesa-dev \ + libosmesa6 \ + libglib2.0-0 libsm6 libxrender1 libxext6 \ + ffmpeg libavcodec-extra \ + && rm -rf /var/lib/apt/lists/* COPY supervisord.conf /etc/supervisor/supervisord.conf diff --git a/backend/AnimatedDrawings/application/animation_queue.py b/backend/AnimatedDrawings/application/animation_queue.py index b685d2159..7b5809df4 100644 --- a/backend/AnimatedDrawings/application/animation_queue.py +++ b/backend/AnimatedDrawings/application/animation_queue.py @@ -10,7 +10,7 @@ from application.model.retarget import Retarget from application.task.tasks import create_animation, save_capsule_skin, \ send_notification -from kombu_connection_pool import connection, connections +from application.kombu_connection_pool import connection, connections class AnimationQueueController: @@ -57,7 +57,7 @@ def callback( :return: """ try: - self.logger.debug('메시지 수신 완료, 콜백 동작') + self.logger.info('메시지 수신 완료, 콜백 동작') parsed_data = self.parse_body(body) filename = f"capsuleSkin/{parsed_data['memberId']}/{uuid.uuid4()}.gif" @@ -77,9 +77,10 @@ def callback( ).apply_async( ignore_result=True ) + self.logger.info("celery 작업 전달 완료") message.ack() - self.logger.debug('celery에 작업 전달 완료') + self.logger.info("메시지 큐 ack 전달 완료") except Exception as e: self.logger.exception('작업 큐 메시지 처리 오류 %r', e) message.reject() diff --git a/backend/AnimatedDrawings/application/kombu_connection_pool.py b/backend/AnimatedDrawings/application/kombu_connection_pool.py index e0ce840c5..4ce3c061b 100644 --- a/backend/AnimatedDrawings/application/kombu_connection_pool.py +++ b/backend/AnimatedDrawings/application/kombu_connection_pool.py @@ -20,6 +20,7 @@ def errback(exc, interval): if QueueConfig.PROTOCOL == 'amqps': import ssl + ssl_option = { 'cert_reqs': ssl.CERT_REQUIRED } @@ -44,6 +45,18 @@ def errback(exc, interval): routing_key=QueueConfig.CAPSULE_SKIN_REQUEST_QUEUE_NAME) capsule_skin_queue.declare(channel=channel) + notification_dlx_name = f'fail.{QueueConfig.NOTIFICATION_EXCHANGE_NAME}' + notification_dlq_name = f'fail.{QueueConfig.NOTIFICATION_QUEUE_NAME}' + notification_dlx = Exchange( + name=notification_dlx_name, + type='fanout', + durable=True) + notification_dlq = Queue( + name=notification_dlq_name, + exchange=notification_dlx, + routing_key=notification_dlx_name) + notification_dlq.declare(channel=channel) + notification_exchange = Exchange( name=QueueConfig.NOTIFICATION_EXCHANGE_NAME, type='direct', @@ -51,6 +64,9 @@ def errback(exc, interval): notification_queue = Queue(name=QueueConfig.NOTIFICATION_QUEUE_NAME, exchange=notification_exchange, - routing_key=QueueConfig.NOTIFICATION_QUEUE_NAME) + routing_key=QueueConfig.NOTIFICATION_QUEUE_NAME, + queue_arguments={ + 'x-dead-letter-exchange': notification_dlx_name, + }) notification_queue.declare(channel=channel) logger.info('레빗 엠큐 큐, 익스체인지 설정 완료') diff --git a/backend/AnimatedDrawings/application/task/base_task.py b/backend/AnimatedDrawings/application/task/base_task.py index ab9f5487c..eedcb74b0 100644 --- a/backend/AnimatedDrawings/application/task/base_task.py +++ b/backend/AnimatedDrawings/application/task/base_task.py @@ -27,10 +27,10 @@ def on_after_setup_logger(logger, **kwargs): def before_start(self, task_id, args, kwargs): self.task_logger.debug(kwargs) - self.task_logger.debug('태스크 처리 시작 %s', task_id) + self.task_logger.info('task_id: %s - 태스크 처리 시작', task_id) def on_failure(self, exc, task_id, args, kwargs, einfo): - self.task_logger.exception('태스크 처리 실패 %s', task_id, exc_info=einfo) + self.task_logger.exception('task_id: %s - 태스크 처리 실패', task_id, exc_info=einfo) request_data = json.dumps({ 'memberId': kwargs['input_data']['memberId'], 'skinName': kwargs['input_data']['skinName'], @@ -40,27 +40,27 @@ def on_failure(self, exc, task_id, args, kwargs, einfo): 'status': NotificationStatus.FAIL.value }, ensure_ascii=False) with producers[connection].acquire(block=True) as producer: - exchange = Exchange(name=QueueConfig.NOTIFICATION_EXCHANGE_NAME, + notification_exchange = Exchange(name=QueueConfig.NOTIFICATION_EXCHANGE_NAME, type='direct', durable=True) - queue = Queue(name=QueueConfig.NOTIFICATION_QUEUE_NAME, - exchange=exchange, + notification_queue = Queue(name=QueueConfig.NOTIFICATION_QUEUE_NAME, + exchange=notification_exchange, routing_key=QueueConfig.NOTIFICATION_QUEUE_NAME) producer.publish( request_data, - declare=[queue], - exchange=exchange, + declare=[notification_queue], + exchange=notification_exchange, content_type='application/json', routing_key=QueueConfig.NOTIFICATION_QUEUE_NAME, ) def on_retry(self, exc, task_id, args, kwargs, einfo): self.task_logger.debug(kwargs) - self.task_logger.exception('태스크 재시도 %s', task_id, exc_info=einfo) + self.task_logger.exception('task_id: %s - 태스크 재시도', task_id, exc_info=einfo) def on_success(self, retval, task_id, args, kwargs): self.task_logger.debug(args) - self.task_logger.debug('태스크 처리 성공 %s', task_id) + self.task_logger.info('task_id: %s - 태스크 처리 성공', task_id) diff --git a/backend/AnimatedDrawings/application/task/tasks.py b/backend/AnimatedDrawings/application/task/tasks.py index 67737b214..3c6cc79f3 100644 --- a/backend/AnimatedDrawings/application/task/tasks.py +++ b/backend/AnimatedDrawings/application/task/tasks.py @@ -5,6 +5,7 @@ from pathlib import Path import requests +from celery.utils.log import get_task_logger from kombu import Exchange, Queue from sqlalchemy import create_engine from sqlalchemy.orm import Session @@ -25,42 +26,55 @@ engine = create_engine(DatabaseConfig.get_database_url()) s3_bucket_name = S3Config.S3_BUCKET_NAME +logger = get_task_logger(__name__) -@celery.task(base=LogErrorsTask) -def create_animation(input_data: dict, filename: str): +@celery.task(base=LogErrorsTask, bind=True) +def create_animation(self, input_data: dict, filename: str): """ 애니메이션 생성 task + :param self: 현재 태스크 정보 :param input_data: 입력 데이터(dict) - imageUrl, motionName, retarget, skinName, memberId, memberName :param filename: 원격지에 저장될 파일 이름 ex) capsuleSkin/2/1234.gif :return: """ + logger.info("task_id: %s - S3 이미지 get 시작", self.request.id) img_bytes = requests.get(input_data['imageUrl']).content + logger.info("task_id: %s - S3 이미지 get 완료", self.request.id) temporary_directory = f'capsuleSkin/{uuid.uuid4()}' result = Path(temporary_directory) result.mkdir(exist_ok=True) + logger.info("task_id: %s - 이미지에서 경계 상자와 관절 추출 시작", self.request.id) image_to_annotations(img_bytes, result) + logger.info("task_id: %s - 이미지에서 경계 상자와 관절 성공", self.request.id) + + logger.info("task_id: %s - 추출된 정보로 애니메이션 생성 시작", self.request.id) annotations_to_animation(temporary_directory, input_data['motionName'], input_data['retarget']) + logger.info("task_id: %s - 추출된 정보로 애니메이션 생성 완료", self.request.id) + logger.info("task_id: %s - GIF 로딩", self.request.id) with open(f'{temporary_directory}/video.gif', 'rb') as image: gif_bytes = bytearray(image.read()) + logger.info("task_id: %s - GIF 로딩 완료", self.request.id) output_wrapper = get_object_wrapper(s3_bucket_name, filename) output_wrapper.put(gif_bytes) + logger.info("task_id: %s - S3 업로드 완료", self.request.id) if os.path.exists(temporary_directory): shutil.rmtree(temporary_directory) -@celery.task(base=LogErrorsTask) -def save_capsule_skin(_, input_data: dict, filename: str): +@celery.task(base=LogErrorsTask, bind=True) +def save_capsule_skin(self, _, input_data: dict, filename: str): """ 캡슐 스킨 생성 정보 DB 저장 태스크 + :param self: 현재 태스크 정보 :param _: 이전 task 결과 :param input_data: 입력 데이터(dict) - imageUrl, motionName, retarget, skinName, memberId, memberName :param filename: 원격지에 저장될 파일 이름 ex) capsuleSkin/2/1234.gif @@ -73,15 +87,18 @@ def save_capsule_skin(_, input_data: dict, filename: str): retarget=Retarget(input_data['retarget']).name, member_id=input_data['memberId']) + logger.info("task_id: %s - 캡슐 스킨 DB 저장 시작", self.request.id) with Session(engine) as session: session.add(capsule_skin) session.commit() + logger.info("task_id: %s - 캡슐 스킨 DB 저장 완료", self.request.id) -@celery.task(base=LogErrorsTask) -def send_notification(_, input_data: dict, filename: str): +@celery.task(base=LogErrorsTask, bind=True) +def send_notification(self, _, input_data: dict, filename: str): """ 캡슐 스킨 생성 완료 알림 전송 태스크 + :param self: 현재 태스크 정보 :param _: 이전 task 결과 :param input_data: 입력 데이터(dict) - imageUrl, motionName, retarget, skinName, memberId, memberName :param filename: 원격지에 저장될 파일 이름 ex) capsuleSkin/2/1234.gif @@ -96,6 +113,7 @@ def send_notification(_, input_data: dict, filename: str): 'status': NotificationStatus.SUCCESS.value }, ensure_ascii=False) + logger.info("task_id: %s - 캡슐 스킨 생성 완료 알림 전송 시작", self.request.id) with producers[connection].acquire(block=True) as producer: exchange = Exchange(name=QueueConfig.NOTIFICATION_EXCHANGE_NAME, type='direct', @@ -112,3 +130,4 @@ def send_notification(_, input_data: dict, filename: str): content_type='application/json', routing_key=QueueConfig.NOTIFICATION_QUEUE_NAME, ) + logger.info("task_id: %s - 캡슐 스킨 생성 완료 알림 전송 완료", self.request.id) diff --git a/backend/AnimatedDrawings/supervisord.conf b/backend/AnimatedDrawings/supervisord.conf index 9d2598896..b5e26671a 100644 --- a/backend/AnimatedDrawings/supervisord.conf +++ b/backend/AnimatedDrawings/supervisord.conf @@ -10,7 +10,7 @@ supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface [program:flower] directory = /app/application -command = /opt/conda/envs/animated_drawings/bin/celery -A celery_app flower --conf="/app/application/config/flowerconfig.py" +command = /opt/conda/bin/celery -A celery_app flower --conf="/app/application/config/flowerconfig.py" priority = 100 loglevel=info stdout_logfile = /app/application/log/flower.log @@ -22,7 +22,7 @@ stderr_logfile_backups = 30 [program:application] directory = /app/application -command = /opt/conda/envs/animated_drawings/bin/python3 -u animation_queue.py +command = /opt/conda/bin/python3 -u animation_queue.py loglevel=info priority = 100 stdout_logfile = /app/application/log/application.log @@ -35,7 +35,7 @@ environment=ENVIRONMENT=%(ENV_ENVIRONMENT)s,PYOPENGL_PLATFORM="osmesa" [program:celery] directory = /app/application -command = /opt/conda/envs/animated_drawings/bin/celery -A celery_app worker --without-heartbeat --without-gossip --without-mingle +command = /opt/conda/bin/celery -A celery_app worker --loglevel INFO priority = 200 loglevel=info stdout_logfile = /app/application/log/celeryd.log diff --git a/backend/AnimatedDrawings/torchserve/Dockerfile b/backend/AnimatedDrawings/torchserve/Dockerfile index 4f0f3ee85..21f62413b 100644 --- a/backend/AnimatedDrawings/torchserve/Dockerfile +++ b/backend/AnimatedDrawings/torchserve/Dockerfile @@ -1,6 +1,6 @@ # syntax = docker/dockerfile:1.2 -FROM continuumio/miniconda3 +FROM continuumio/miniconda3:24.1.2-0 # install os dependencies RUN mkdir -p /usr/share/man/man1 @@ -8,7 +8,6 @@ RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ ca-certificates \ curl \ - python3-pip \ vim \ sudo \ default-jre \ @@ -17,26 +16,37 @@ RUN apt-get update && \ build-essential \ && rm -rf /var/lib/apt/lists/* +RUN conda install python=3.8.13 -y + # install python dependencies -RUN pip install openmim==0.3.9 -RUN pip install torch==2.1.2 -RUN pip install mmcv-full==1.7.0 +RUN pip install openmim +RUN pip install torch==2.0.0 +RUN mim install mmcv-full==1.7.0 RUN pip install mmdet==2.27.0 -RUN pip install torchserve==0.9.0 +RUN pip install torchserve # bugfix for xtcocoapi, an mmpose dependency -RUN git clone https://github.com/hjessmith/xtcocoapi-bugfix.git -WORKDIR xtcocoapi-bugfix +RUN git clone https://github.com/jin-s13/xtcocoapi.git +WORKDIR xtcocoapi RUN pip install -r requirements.txt RUN python setup.py install WORKDIR / RUN pip install mmpose==0.29.0 +RUN pip install torchvision==0.15.1 # solve torch version problem +RUN pip install numpy==1.24.4 #solve numpy version problem +# prep torchserve # prep torchserve RUN mkdir -p /home/torchserve/model-store RUN wget https://github.com/facebookresearch/AnimatedDrawings/releases/download/v0.0.1/drawn_humanoid_detector.mar -P /home/torchserve/model-store/ RUN wget https://github.com/facebookresearch/AnimatedDrawings/releases/download/v0.0.1/drawn_humanoid_pose_estimator.mar -P /home/torchserve/model-store/ -COPY config.properties /home/torchserve/config.properties +COPY config.properties.template /home/torchserve/config.properties.template +COPY log4j2.xml /home/torchserve/log4j2.xml + +ENV MAX_HEAP_SIZE=8g +ENV INIT_HEAP_SIZE=4g -# starting command -CMD /opt/conda/bin/torchserve --start --ts-config /home/torchserve/config.properties && sleep infinity +CMD sed "s/{{MAX_HEAP_SIZE}}/$MAX_HEAP_SIZE/g; s/{{INIT_HEAP_SIZE}}/$INIT_HEAP_SIZE/g" \ + /home/torchserve/config.properties.template > /home/torchserve/config.properties && \ + /opt/conda/bin/torchserve --start --ts-config /home/torchserve/config.properties --disable-token-auth && \ + sleep infinity diff --git a/backend/AnimatedDrawings/torchserve/config.properties b/backend/AnimatedDrawings/torchserve/config.properties deleted file mode 100644 index 64d8507f3..000000000 --- a/backend/AnimatedDrawings/torchserve/config.properties +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# This source code is licensed under the MIT license found in the -# LICENSE file in the root directory of this source tree. - -inference_address=http://0.0.0.0:8080 -management_address=http://0.0.0.0:8081 -metrics_address=http://0.0.0.0:8082 -model_store=/home/torchserve/model-store -load_models=all -vmargs=-Xmx8g -Xms4g -XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -default_workers_per_model=8 -job_queue_size=1000 -netty_client_threads=8 -number_of_netty_threads=8 diff --git a/backend/AnimatedDrawings/torchserve/config.properties.template b/backend/AnimatedDrawings/torchserve/config.properties.template new file mode 100644 index 000000000..9ce095c8e --- /dev/null +++ b/backend/AnimatedDrawings/torchserve/config.properties.template @@ -0,0 +1,23 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +inference_address=http://0.0.0.0:8080 +management_address=http://0.0.0.0:8081 +metrics_address=http://0.0.0.0:8082 +model_store=/home/torchserve/model-store +load_models=all + +vmargs=-Xmx{{MAX_HEAP_SIZE}} -Xms{{INIT_HEAP_SIZE}} -XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -Dlog4j.configurationFile=file:///home/torchserve/log4j2.xml + +ipex_enable=true +cpu_launcher_enable=true +cpu_launcher_args=--use_logical_core + +async_logging=true + +batch_size = 1 +batch_delay=100 +concurrency=1 +default_workers_per_model=1 +job_queue_size=1000 \ No newline at end of file diff --git a/backend/AnimatedDrawings/torchserve/log4j2.xml b/backend/AnimatedDrawings/torchserve/log4j2.xml new file mode 100644 index 000000000..0e3e459b3 --- /dev/null +++ b/backend/AnimatedDrawings/torchserve/log4j2.xml @@ -0,0 +1,85 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file