tukcomCD2024 · seokho-1116 · Aug 11, 2024 · Aug 6, 2024 · Aug 6, 2024 · Aug 6, 2024
diff --git a/backend/AnimatedDrawings/Dockerfile b/backend/AnimatedDrawings/Dockerfile
@@ -1,4 +1,4 @@
-FROM continuumio/miniconda3
+FROM continuumio/miniconda3:24.1.2-0
 
 ENV PYTHONDONTWRITEBYTECODE 1
 ENV PYTHONUNBUFFERED 1
@@ -7,16 +7,19 @@ COPY . /app
 RUN mkdir -p /app/application/capsuleSkin
 WORKDIR /app
 
-RUN conda create --name animated_drawings python=3.8.13
-
-SHELL ["conda", "run", "-n", "animated_drawings", "/bin/bash", "-c"]
+RUN conda install python=3.8.13 -y
 
 # Install gcc and python3-dev
-RUN apt-get update && apt-get install -y gcc python3-dev supervisor
-
-RUN apt-get install -y libosmesa6-dev freeglut3-dev &&  \
-    apt-get install -y libglfw3-dev libgles2-mesa-dev &&  \
-    apt-get install -y libosmesa6
+RUN apt-get clean
+RUN apt-get update && \
+    apt-get install --no-install-recommends -y \
+    supervisor gcc build-essential \
+    libosmesa6-dev freeglut3-dev  \
+    libglfw3-dev libgles2-mesa-dev \
+    libosmesa6 \
+    libglib2.0-0 libsm6 libxrender1 libxext6 \
+    ffmpeg libavcodec-extra \
+    && rm -rf /var/lib/apt/lists/*
 
 COPY supervisord.conf /etc/supervisor/supervisord.conf
 

diff --git a/backend/AnimatedDrawings/application/animation_queue.py b/backend/AnimatedDrawings/application/animation_queue.py
@@ -10,7 +10,7 @@
 from application.model.retarget import Retarget
 from application.task.tasks import create_animation, save_capsule_skin, \
     send_notification
-from kombu_connection_pool import connection, connections
+from application.kombu_connection_pool import connection, connections
 
 
 class AnimationQueueController:
@@ -57,7 +57,7 @@ def callback(
         :return:
         """
         try:
-            self.logger.debug('메시지 수신 완료, 콜백 동작')
+            self.logger.info('메시지 수신 완료, 콜백 동작')
             parsed_data = self.parse_body(body)
 
             filename = f"capsuleSkin/{parsed_data['memberId']}/{uuid.uuid4()}.gif"
@@ -77,9 +77,10 @@ def callback(
             ).apply_async(
                 ignore_result=True
             )
+            self.logger.info("celery 작업 전달 완료")
 
             message.ack()
-            self.logger.debug('celery에 작업 전달 완료')
+            self.logger.info("메시지 큐 ack 전달 완료")
         except Exception as e:
             self.logger.exception('작업 큐 메시지 처리 오류 %r', e)
             message.reject()

diff --git a/backend/AnimatedDrawings/application/task/base_task.py b/backend/AnimatedDrawings/application/task/base_task.py
@@ -27,10 +27,10 @@ def on_after_setup_logger(logger, **kwargs):
 
     def before_start(self, task_id, args, kwargs):
         self.task_logger.debug(kwargs)
-        self.task_logger.debug('태스크 처리 시작 %s', task_id)
+        self.task_logger.info('task_id: %s - 태스크 처리 시작', task_id)
 
     def on_failure(self, exc, task_id, args, kwargs, einfo):
-        self.task_logger.exception('태스크 처리 실패 %s', task_id, exc_info=einfo)
+        self.task_logger.exception('task_id: %s - 태스크 처리 실패', task_id, exc_info=einfo)
         request_data = json.dumps({
             'memberId': kwargs['input_data']['memberId'],
             'skinName': kwargs['input_data']['skinName'],
@@ -40,27 +40,27 @@ def on_failure(self, exc, task_id, args, kwargs, einfo):
             'status': NotificationStatus.FAIL.value
         }, ensure_ascii=False)
         with producers[connection].acquire(block=True) as producer:
-            exchange = Exchange(name=QueueConfig.NOTIFICATION_EXCHANGE_NAME,
+            notification_exchange = Exchange(name=QueueConfig.NOTIFICATION_EXCHANGE_NAME,
                                 type='direct',
                                 durable=True)
 
-            queue = Queue(name=QueueConfig.NOTIFICATION_QUEUE_NAME,
-                          exchange=exchange,
+            notification_queue = Queue(name=QueueConfig.NOTIFICATION_QUEUE_NAME,
+                          exchange=notification_exchange,
                           routing_key=QueueConfig.NOTIFICATION_QUEUE_NAME)
 
             producer.publish(
                 request_data,
-                declare=[queue],
-                exchange=exchange,
+                declare=[notification_queue],
+                exchange=notification_exchange,
                 content_type='application/json',
                 routing_key=QueueConfig.NOTIFICATION_QUEUE_NAME,
             )
 
     def on_retry(self, exc, task_id, args, kwargs, einfo):
         self.task_logger.debug(kwargs)
-        self.task_logger.exception('태스크 재시도 %s', task_id, exc_info=einfo)
+        self.task_logger.exception('task_id: %s - 태스크 재시도', task_id, exc_info=einfo)
 
     def on_success(self, retval, task_id, args, kwargs):
         self.task_logger.debug(args)
 
-        self.task_logger.debug('태스크 처리 성공 %s', task_id)
+        self.task_logger.info('task_id: %s - 태스크 처리 성공', task_id)
diff --git a/backend/AnimatedDrawings/application/task/tasks.py b/backend/AnimatedDrawings/application/task/tasks.py
@@ -5,6 +5,7 @@
 from pathlib import Path
 
 import requests
+from celery.utils.log import get_task_logger
 from kombu import Exchange, Queue
 from sqlalchemy import create_engine
 from sqlalchemy.orm import Session
@@ -25,42 +26,55 @@
 
 engine = create_engine(DatabaseConfig.get_database_url())
 s3_bucket_name = S3Config.S3_BUCKET_NAME
+logger = get_task_logger(__name__)
 
 
-@celery.task(base=LogErrorsTask)
-def create_animation(input_data: dict, filename: str):
+@celery.task(base=LogErrorsTask, bind=True)
+def create_animation(self, input_data: dict, filename: str):
     """
     애니메이션 생성 task
+    :param self: 현재 태스크 정보
     :param input_data: 입력 데이터(dict) - imageUrl, motionName, retarget, skinName, memberId, memberName
     :param filename: 원격지에 저장될 파일 이름 ex) capsuleSkin/2/1234.gif
     :return:
     """
+    logger.info("task_id: %s - S3 이미지 get 시작", self.request.id)
     img_bytes = requests.get(input_data['imageUrl']).content
+    logger.info("task_id: %s - S3 이미지 get 완료", self.request.id)
 
     temporary_directory = f'capsuleSkin/{uuid.uuid4()}'
     result = Path(temporary_directory)
     result.mkdir(exist_ok=True)
 
+    logger.info("task_id: %s - 이미지에서 경계 상자와 관절 추출 시작", self.request.id)
     image_to_annotations(img_bytes, result)
+    logger.info("task_id: %s - 이미지에서 경계 상자와 관절 성공", self.request.id)
+
+    logger.info("task_id: %s - 추출된 정보로 애니메이션 생성 시작", self.request.id)
     annotations_to_animation(temporary_directory,
                              input_data['motionName'],
                              input_data['retarget'])
+    logger.info("task_id: %s - 추출된 정보로 애니메이션 생성 완료", self.request.id)
 
+    logger.info("task_id: %s - GIF 로딩", self.request.id)
     with open(f'{temporary_directory}/video.gif', 'rb') as image:
         gif_bytes = bytearray(image.read())
+    logger.info("task_id: %s - GIF 로딩 완료", self.request.id)
 
     output_wrapper = get_object_wrapper(s3_bucket_name, filename)
 
     output_wrapper.put(gif_bytes)
+    logger.info("task_id: %s - S3 업로드 완료", self.request.id)
 
     if os.path.exists(temporary_directory):
         shutil.rmtree(temporary_directory)
 
 
-@celery.task(base=LogErrorsTask)
-def save_capsule_skin(_, input_data: dict, filename: str):
+@celery.task(base=LogErrorsTask, bind=True)
+def save_capsule_skin(self, _, input_data: dict, filename: str):
     """
     캡슐 스킨 생성 정보 DB 저장 태스크
+    :param self: 현재 태스크 정보
     :param _: 이전 task 결과
     :param input_data: 입력 데이터(dict) - imageUrl, motionName, retarget, skinName, memberId, memberName
     :param filename: 원격지에 저장될 파일 이름 ex) capsuleSkin/2/1234.gif
@@ -73,15 +87,18 @@ def save_capsule_skin(_, input_data: dict, filename: str):
                                retarget=Retarget(input_data['retarget']).name,
                                member_id=input_data['memberId'])
 
+    logger.info("task_id: %s - 캡슐 스킨 DB 저장 시작", self.request.id)
     with Session(engine) as session:
         session.add(capsule_skin)
         session.commit()
+    logger.info("task_id: %s - 캡슐 스킨 DB 저장 완료", self.request.id)
 
 
-@celery.task(base=LogErrorsTask)
-def send_notification(_, input_data: dict, filename: str):
+@celery.task(base=LogErrorsTask, bind=True)
+def send_notification(self, _, input_data: dict, filename: str):
     """
     캡슐 스킨 생성 완료 알림 전송 태스크
+    :param self: 현재 태스크 정보
     :param _: 이전 task 결과
     :param input_data: 입력 데이터(dict) - imageUrl, motionName, retarget, skinName, memberId, memberName
     :param filename: 원격지에 저장될 파일 이름 ex) capsuleSkin/2/1234.gif
@@ -96,6 +113,7 @@ def send_notification(_, input_data: dict, filename: str):
         'status': NotificationStatus.SUCCESS.value
     }, ensure_ascii=False)
 
+    logger.info("task_id: %s - 캡슐 스킨 생성 완료 알림 전송 시작", self.request.id)
     with producers[connection].acquire(block=True) as producer:
         exchange = Exchange(name=QueueConfig.NOTIFICATION_EXCHANGE_NAME,
                             type='direct',
@@ -112,3 +130,4 @@ def send_notification(_, input_data: dict, filename: str):
             content_type='application/json',
             routing_key=QueueConfig.NOTIFICATION_QUEUE_NAME,
         )
+    logger.info("task_id: %s - 캡슐 스킨 생성 완료 알림 전송 완료", self.request.id)
diff --git a/backend/AnimatedDrawings/supervisord.conf b/backend/AnimatedDrawings/supervisord.conf
@@ -10,7 +10,7 @@ supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
 
 [program:flower]
 directory = /app/application
-command = /opt/conda/envs/animated_drawings/bin/celery -A celery_app flower --conf="/app/application/config/flowerconfig.py"
+command = /opt/conda/bin/celery -A celery_app flower --conf="/app/application/config/flowerconfig.py"
 priority = 100
 loglevel=info
 stdout_logfile = /app/application/log/flower.log
@@ -22,7 +22,7 @@ stderr_logfile_backups = 30
 
 [program:application]
 directory = /app/application
-command = /opt/conda/envs/animated_drawings/bin/python3 -u animation_queue.py
+command = /opt/conda/bin/python3 -u animation_queue.py
 loglevel=info
 priority = 100
 stdout_logfile = /app/application/log/application.log
@@ -35,7 +35,7 @@ environment=ENVIRONMENT=%(ENV_ENVIRONMENT)s,PYOPENGL_PLATFORM="osmesa"
 
 [program:celery]
 directory = /app/application
-command = /opt/conda/envs/animated_drawings/bin/celery -A celery_app worker --without-heartbeat --without-gossip --without-mingle
+command = /opt/conda/bin/celery -A celery_app worker --loglevel INFO
 priority = 200
 loglevel=info
 stdout_logfile = /app/application/log/celeryd.log

diff --git a/backend/AnimatedDrawings/torchserve/Dockerfile b/backend/AnimatedDrawings/torchserve/Dockerfile
@@ -1,14 +1,13 @@
 # syntax = docker/dockerfile:1.2
 
-FROM continuumio/miniconda3
+FROM continuumio/miniconda3:24.1.2-0
 
 # install os dependencies
 RUN mkdir -p /usr/share/man/man1
 RUN apt-get update && \
     DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
     ca-certificates \
     curl \
-    python3-pip \
     vim \
     sudo \
     default-jre \
@@ -17,26 +16,37 @@ RUN apt-get update && \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
 
+RUN conda install python=3.8.13 -y
+
 # install python dependencies
-RUN pip install openmim==0.3.9
-RUN pip install torch==2.1.2
-RUN pip install mmcv-full==1.7.0
+RUN pip install openmim
+RUN pip install torch==2.0.0
+RUN mim install mmcv-full==1.7.0
 RUN pip install mmdet==2.27.0
-RUN pip install torchserve==0.9.0
+RUN pip install torchserve
 
 # bugfix for xtcocoapi, an mmpose dependency
-RUN git clone https://github.com/hjessmith/xtcocoapi-bugfix.git
-WORKDIR xtcocoapi-bugfix
+RUN git clone https://github.com/jin-s13/xtcocoapi.git
+WORKDIR xtcocoapi
 RUN pip install -r requirements.txt
 RUN python setup.py install
 WORKDIR /
 RUN pip install mmpose==0.29.0
+RUN pip install torchvision==0.15.1  # solve torch version problem
+RUN pip install numpy==1.24.4  #solve numpy version problem
 
+# prep torchserve
 # prep torchserve
 RUN mkdir -p /home/torchserve/model-store
 RUN wget https://github.com/facebookresearch/AnimatedDrawings/releases/download/v0.0.1/drawn_humanoid_detector.mar -P /home/torchserve/model-store/
 RUN wget https://github.com/facebookresearch/AnimatedDrawings/releases/download/v0.0.1/drawn_humanoid_pose_estimator.mar -P /home/torchserve/model-store/
-COPY config.properties /home/torchserve/config.properties
+COPY config.properties.template /home/torchserve/config.properties.template
+COPY log4j2.xml /home/torchserve/log4j2.xml
+
+ENV MAX_HEAP_SIZE=8g
+ENV INIT_HEAP_SIZE=4g
 
-# starting command
-CMD /opt/conda/bin/torchserve --start --ts-config /home/torchserve/config.properties && sleep infinity
+CMD sed "s/{{MAX_HEAP_SIZE}}/$MAX_HEAP_SIZE/g; s/{{INIT_HEAP_SIZE}}/$INIT_HEAP_SIZE/g" \
+    /home/torchserve/config.properties.template > /home/torchserve/config.properties && \
+    /opt/conda/bin/torchserve --start --ts-config /home/torchserve/config.properties --disable-token-auth && \
+    sleep infinity
diff --git a/backend/AnimatedDrawings/torchserve/config.properties b/backend/AnimatedDrawings/torchserve/config.properties
diff --git a/backend/AnimatedDrawings/torchserve/config.properties.template b/backend/AnimatedDrawings/torchserve/config.properties.template
@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+inference_address=http://0.0.0.0:8080
+management_address=http://0.0.0.0:8081
+metrics_address=http://0.0.0.0:8082
+model_store=/home/torchserve/model-store
+load_models=all
+
+vmargs=-Xmx{{MAX_HEAP_SIZE}} -Xms{{INIT_HEAP_SIZE}} -XX:+ExitOnOutOfMemoryError -XX:+HeapDumpOnOutOfMemoryError -Dlog4j.configurationFile=file:///home/torchserve/log4j2.xml
+
+ipex_enable=true
+cpu_launcher_enable=true
+cpu_launcher_args=--use_logical_core
+
+async_logging=true
+
+batch_size = 1
+batch_delay=100
+concurrency=1
+default_workers_per_model=1
+job_queue_size=1000