-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from linto-ai/next
merge next -> master
- Loading branch information
Showing
17 changed files
with
636 additions
and
142 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# SERVING PARAMETERS | ||
SERVICE_MODE=task | ||
|
||
# SERVICE PARAMETERS | ||
SERVICES_BROKER=redis://172.17.0.1:6379 | ||
BROKER_PASS= | ||
|
||
# SERVICE DISCOVERY | ||
SERVICE_NAME=linto-punctuation | ||
LANGUAGE=fr-FR | ||
# QUEUE_NAME=(Optionnal) | ||
# MODEL_INFO=This model does something | ||
|
||
# CONCURRENCY | ||
CONCURRENCY=2 |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,13 @@ | ||
FROM python:3.8 | ||
LABEL maintainer="[email protected]" | ||
ENV PYTHONUNBUFFERED TRUE | ||
ENV IMAGE_NAME linto-platform-diarization | ||
FROM python:3.9 | ||
LABEL maintainer="[email protected]" | ||
|
||
RUN apt-get update \ | ||
&& apt-get install --no-install-recommends -y \ | ||
ca-certificates \ | ||
g++ \ | ||
openjdk-11-jre-headless \ | ||
curl \ | ||
wget | ||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends \ | ||
ca-certificates \ | ||
g++ \ | ||
curl \ | ||
libtinfo5 \ | ||
wget | ||
|
||
# Rust compiler for tokenizers | ||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y | ||
|
@@ -18,8 +16,8 @@ ENV PATH="/root/.cargo/bin:${PATH}" | |
WORKDIR /usr/src/app | ||
|
||
# Python dependencies | ||
COPY requirements.txt ./ | ||
RUN pip install --no-cache-dir -r requirements.txt | ||
COPY requirements.txt . | ||
RUN pip3 install --no-cache-dir -r requirements.txt -f https://download.pytorch.org/whl/torch_stable.html | ||
|
||
# Supervisor | ||
COPY celery_app /usr/src/app/celery_app | ||
|
@@ -28,13 +26,8 @@ COPY document /usr/src/app/document | |
COPY punctuation /usr/src/app/punctuation | ||
RUN mkdir /usr/src/app/model-store | ||
RUN mkdir -p /usr/src/app/tmp | ||
COPY config.properties /usr/src/app/config.properties | ||
COPY RELEASE.md ./ | ||
COPY docker-entrypoint.sh wait-for-it.sh healthcheck.sh ./ | ||
|
||
# Grep CURRENT VERSION | ||
RUN export VERSION=$(awk -v RS='' '/#/ {print; exit}' RELEASE.md | head -1 | sed 's/#//' | sed 's/ //') | ||
|
||
ENV PYTHONPATH="${PYTHONPATH}:/usr/src/app/punctuation" | ||
HEALTHCHECK CMD ./healthcheck.sh | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
FROM python:3.9 | ||
LABEL maintainer="[email protected]" | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y --no-install-recommends \ | ||
ca-certificates \ | ||
g++ \ | ||
openjdk-11-jre-headless \ | ||
curl \ | ||
wget | ||
|
||
# Rust compiler for tokenizers | ||
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y | ||
ENV PATH="/root/.cargo/bin:${PATH}" | ||
|
||
WORKDIR /usr/src/app | ||
|
||
# Python dependencies | ||
COPY requirements.cpu.txt . | ||
RUN pip3 install --no-cache-dir -r requirements.cpu.txt -f https://download.pytorch.org/whl/torch_stable.html | ||
|
||
# Supervisor | ||
COPY celery_app /usr/src/app/celery_app | ||
COPY http_server /usr/src/app/http_server | ||
COPY document /usr/src/app/document | ||
COPY punctuation /usr/src/app/punctuation | ||
RUN mkdir /usr/src/app/model-store | ||
RUN mkdir -p /usr/src/app/tmp | ||
COPY docker-entrypoint.sh wait-for-it.sh healthcheck.sh ./ | ||
|
||
ENV PYTHONPATH="${PYTHONPATH}:/usr/src/app/punctuation" | ||
HEALTHCHECK CMD ./healthcheck.sh | ||
|
||
ENV TEMP=/usr/src/app/tmp | ||
ENTRYPOINT ["./docker-entrypoint.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
# 2.0.0 | ||
- Integration of recasepunc | ||
|
||
# 1.1.1 | ||
- Fix error on empty sentences | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,53 +1,26 @@ | ||
import json | ||
from typing import Union | ||
|
||
import requests | ||
|
||
from celery_app.celeryapp import celery | ||
|
||
from punctuation.recasepunc import load_model, generate_predictions | ||
|
||
MODEL = load_model() | ||
|
||
@celery.task(name="punctuation_task", bind=True) | ||
def punctuation_task(self, text: Union[str, list]): | ||
"""punctuation_task do a synchronous call to the punctuation serving API""" | ||
self.update_state(state="STARTED") | ||
# Fetch model name | ||
try: | ||
result = requests.get( | ||
"http://localhost:8081/models", | ||
headers={ | ||
"accept": "application/json", | ||
}, | ||
) | ||
models = json.loads(result.text) | ||
model_name = models["models"][0]["modelName"] | ||
except Exception as error: | ||
raise Exception("Failed to fetch model name") from error | ||
|
||
unique = isinstance(text, str) | ||
|
||
if isinstance(text, str): | ||
if unique: | ||
sentences = [text] | ||
else: | ||
sentences = text | ||
punctuated_sentences = [] | ||
for i, sentence in enumerate(sentences): | ||
self.update_state(state="STARTED", meta={"current": i, "total": len(sentences)}) | ||
|
||
result = requests.post( | ||
f"http://localhost:8080/predictions/{model_name}", | ||
headers={"content-type": "application/octet-stream"}, | ||
data=sentence.strip().encode("utf-8"), | ||
) | ||
if result.status_code == 200: | ||
punctuated_sentence = result.text | ||
else: | ||
print("Failed to predict punctuation on sentence: >{sentence}<") | ||
punctuated_sentence = sentence | ||
# First letter in capital | ||
if len(punctuated_sentence): | ||
punctuated_sentence = punctuated_sentence[0].upper() + punctuated_sentence[1:] | ||
punctuated_sentences.append(punctuated_sentence) | ||
punctuated_sentences = generate_predictions(MODEL, sentences) | ||
|
||
return ( | ||
punctuated_sentences[0] | ||
if len(punctuated_sentences) == 1 | ||
if unique | ||
else punctuated_sentences | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.