Skip to content

Commit

Permalink
Refactor Dockerfile to update Python version to 3.11.9, fix `relik se…
Browse files Browse the repository at this point in the history
…rve`, black everything, update version to 1.0.5
  • Loading branch information
Riccorl committed Aug 6, 2024
1 parent 5902ece commit 6e1f3f7
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 77 deletions.
7 changes: 6 additions & 1 deletion dockerfiles/fastapi/Dockerfile.cpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10.13-slim-bullseye
FROM python:3.11.9-slim-bullseye

ARG DEBIAN_FRONTEND=noninteractive

Expand All @@ -10,6 +10,11 @@ ENV PATH=${PATH}:/home/relik-user/.local/bin
COPY --chown=relik-user:relik-user . /home/relik-user/relik
WORKDIR /home/relik-user/relik

# mount huggingface cache dir
RUN mkdir -p /home/relik-user/.cache/huggingface
# ENV HF_HOME=/home/relik-user/.cache/huggingface
# mount huggingface

RUN pip install --upgrade --no-cache-dir .[serve] -c constraints.cpu.txt \
&& chmod +x scripts/docker/start-gunic.sh

Expand Down
7 changes: 5 additions & 2 deletions dockerfiles/fastapi/Dockerfile.cuda
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
FROM nvidia/cuda:12.1.0-base-ubuntu22.04
FROM nvidia/cuda:12.0.0-base-ubuntu22.04

ARG DEBIAN_FRONTEND=noninteractive

RUN adduser --disabled-password --gecos '' relik-user \
&& apt-get update \
&& apt-get install -y --no-install-recommends \
curl wget python3.10 python3-distutils python3-pip \
curl wget python3.11 python3-distutils python3-pip \
&& rm -rf /var/lib/apt/lists/*

USER relik-user
Expand All @@ -15,6 +15,9 @@ ENV PATH=${PATH}:/home/relik-user/.local/bin
COPY --chown=relik-user:relik-user . /home/relik-user/relik
WORKDIR /home/relik-user/relik

RUN mkdir -p /home/relik-user/.cache/huggingface


RUN pip install --upgrade --no-cache-dir .[serve] \
&& chmod +x scripts/docker/start-gunic.sh

Expand Down
22 changes: 22 additions & 0 deletions examples/cie.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from relik import Relik

relik = Relik.from_pretrained("relik-ie/relik-cie-small", device="cuda")

text = """When Noah Lyles put his spike into Stade de France’s purple track for his first stride Sunday night of the Paris Olympics 100-meter final, he was already behind. In an event in which margin for error is slimmest, his reaction time to the starting gun was the slowest.
Halfway through, Lyles, 27, of the U.S., was still in seventh place in an eight-man field, trying to chase down Jamaica’s Kishane Thompson, who owned not only this season’s fastest time but also the fastest time in the semifinal round contested earlier Sunday.
By the final steps Lyles had caught up so much to Thompson, American Fred Kerley and South Africa’s Akani Simbine that he did something he rarely practices — dipping his shoulder at the finish.
Even then, Lyles was unconvinced he had won the gold medal he had so boldly predicted, and so badly wanted, for three years. The scoreboard offered no indication of who had won gold, silver or bronze as it processed a photo finish, a sold-out, raucous stadium sharing in the uncertainty.
“I think you got that one, big dog,” Lyles told Thompson.
“I’m not even sure,” Thompson replied. “It was that close.”"""

output = relik(text)

# Entities
print(output.spans)
# Relations
print(output.triplets)
21 changes: 21 additions & 0 deletions examples/langchain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from langchain_experimental.graph_transformers import RelikGraphTransformer
from langchain_core.documents import Document

relik = RelikGraphTransformer("relik-ie/relik-relation-extraction-small-wikipedia")

text = """When Noah Lyles put his spike into Stade de France’s purple track for his first stride Sunday night of the Paris Olympics 100-meter final, he was already behind. In an event in which margin for error is slimmest, his reaction time to the starting gun was the slowest.
Halfway through, Lyles, 27, of the U.S., was still in seventh place in an eight-man field, trying to chase down Jamaica’s Kishane Thompson, who owned not only this season’s fastest time but also the fastest time in the semifinal round contested earlier Sunday.
By the final steps Lyles had caught up so much to Thompson, American Fred Kerley and South Africa’s Akani Simbine that he did something he rarely practices — dipping his shoulder at the finish.
Even then, Lyles was unconvinced he had won the gold medal he had so boldly predicted, and so badly wanted, for three years. The scoreboard offered no indication of who had won gold, silver or bronze as it processed a photo finish, a sold-out, raucous stadium sharing in the uncertainty.
“I think you got that one, big dog,” Lyles told Thompson.
“I’m not even sure,” Thompson replied. “It was that close.”"""

documents = [Document(page_content=text)]
output = relik.convert_to_graph_documents(documents)
# triplets
print(output)
69 changes: 13 additions & 56 deletions relik/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,61 +144,19 @@ def inference(

@app.command(context_settings=dict(ignore_unknown_options=True, allow_extra_args=True))
def serve(
relik_pretrained: str,
device: Annotated[
relik_pretrained: Annotated[
str,
typer.Argument(
help="The device to use for relik (e.g., 'cuda', 'cpu').",
),
] = None,
retriever_device: Annotated[
Union[str, None],
typer.Argument(
help="The device to use for the retriever (e.g., 'cuda', 'cpu').",
),
] = None,
document_index_device: Annotated[
Union[str, None],
typer.Argument(
help="The device to use for the index (e.g., 'cuda', 'cpu').",
),
] = None,
reader_device: Annotated[
Union[str, None],
typer.Argument(
help="The device to use for the reader (e.g., 'cuda', 'cpu').",
),
] = None,
precision: Annotated[
Union[str, None],
typer.Argument(
help="The precision to use for relik (e.g., '32', '16').",
),
] = "32",
retriever_precision: Annotated[
Union[str, None],
typer.Argument(
help="The precision to use for the retriever (e.g., '32', '16').",
),
] = None,
document_index_precision: Annotated[
Union[str, None],
typer.Argument(
help="The precision to use for the index (e.g., '32', '16').",
),
] = None,
reader_precision: Annotated[
Union[str, None],
typer.Argument(
help="The precision to use for the reader (e.g., '32', '16').",
),
] = None,
annotation_type: Annotated[
str,
typer.Argument(
help="The type of annotation to use (e.g., 'CHAR', 'WORD').",
),
] = "char",
typer.Argument(help="The device to use for relik (e.g., 'cuda', 'cpu')."),
],
device: str = "cpu",
retriever_device: str = None,
document_index_device: str = None,
reader_device: str = None,
precision: int = 32,
retriever_precision: int = None,
document_index_precision: int = None,
reader_precision: int = None,
annotation_type: str = "char",
host: str = "0.0.0.0",
port: int = 8000,
frontend: bool = False,
Expand All @@ -216,10 +174,9 @@ def serve(
annotation_type=annotation_type,
host=host,
port=port,
frontend=frontend
frontend=frontend,
)



if __name__ == "__main__":
app()
2 changes: 1 addition & 1 deletion relik/inference/data/window/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ def _merge_window_pair(

# merge text, take into account overlapping chars
if isinstance(self.splitter, SpacySentenceSplitter):
m_text = window1.text[: window2.offset] + " " + window2.text
m_text = window1.text[: window2.offset] + " " + window2.text
else:
m_text = window1.text[: window2.offset] + window2.text

Expand Down
4 changes: 3 additions & 1 deletion relik/inference/serve/frontend/gradio_fe.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,9 @@ def text_analysis(Text, Relation_Threshold, Window_Size, Window_Stride):
# window_stride=Window_Stride,
# )
print(f"Using ReLiK at {relik}")
print(f"Querying ReLiK with ?text={Text}&relation_threshold={Relation_Threshold}&window_size={Window_Size}&window_stride={Window_Stride}&annotation_type=word&remove_nmes=False")
print(
f"Querying ReLiK with ?text={Text}&relation_threshold={Relation_Threshold}&window_size={Window_Size}&window_stride={Window_Stride}&annotation_type=word&remove_nmes=False"
)
response = requests.get(
f"http://{relik}/?text={Text}&relation_threshold={Relation_Threshold}&window_size={Window_Size}&window_stride={Window_Stride}&annotation_type=word&remove_nmes=False",
)
Expand Down
3 changes: 2 additions & 1 deletion relik/reader/data/relik_reader_re_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,8 @@ def flip_cands(flip_candidates, candidates):
"end_labels": end_labels,
"disambiguation_labels": disambiguation_labels,
"relation_labels": relation_labels,
"predictable_candidates": candidates_symbols + candidates_entities_symbols,
"predictable_candidates": candidates_symbols
+ candidates_entities_symbols,
}
if self.materialize_samples:
self.samples = data_acc
Expand Down
24 changes: 14 additions & 10 deletions relik/retriever/pytorch_modules/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,15 +648,18 @@ def to_config(cls, *args, **kwargs):
}
return config


class GoldenSillyRetriever(GoldenRetriever):
def __init__(self, documents: List[str], *args, **kwargs):
self.documents = DocumentStore([Document(doc) for doc in documents])
self.document_index = BaseDocumentIndex(self.documents)
def retrieve(self,
text: Optional[Union[str, List[str]]] = None,
k: int = 100,
*args,
**kwargs,

def retrieve(
self,
text: Optional[Union[str, List[str]]] = None,
k: int = 100,
*args,
**kwargs,
) -> List[List[RetrievedSample]]:
if isinstance(text, str):
text = [text]
Expand All @@ -666,11 +669,12 @@ def retrieve(self,
[RetrievedSample(score=1.0, document=doc) for doc in self.documents[:k]]
for _ in text
]

def index(self):
pass
def eval(self):
pass
raise NotImplementedError

def save_pretrained(self):
pass
raise NotImplementedError

def to(self, device):
pass
raise NotImplementedError
2 changes: 1 addition & 1 deletion relik/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
_MINOR = "0"
# On main and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "4"
_PATCH = "5"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = os.environ.get("RELIK_VERSION_SUFFIX", "")
Expand Down
2 changes: 1 addition & 1 deletion scripts/build_all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ docker build -f dockerfiles/fastapi/Dockerfile.cpu -t relik:$LATEST_VERSION-cpu-

echo "==== Building GPU images ===="
# docker build -f dockerfiles/ray/Dockerfile.cuda -t relik:$VERSION-cuda-ray .
docker build -f dockerfiles/fastapi/Dockerfile.cuda -t relik:$LATEST_VERSION-cuda-fastapi .
# docker build -f dockerfiles/fastapi/Dockerfile.cuda -t relik:$LATEST_VERSION-cuda-fastapi .
4 changes: 1 addition & 3 deletions scripts/docker/start-gunic.sh
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,7 @@ if [ -z "$GUNICORN_CONF" ]; then
fi

# Start Ray Serve with the app
# exec gunicorn -k uvicorn.workers.UvicornWorker -c "$GUNICORN_CONF" "$APP_MODULE" -b 0.0.0.0:8000
exec relik serve $RELIK_PRETRAINED \
--device $DEVICE \
exec relik serve $RELIK_PRETRAINED --device $DEVICE \
--retriever-device $RETRIEVER_DEVICE \
--index-device $INDEX_DEVICE \
--reader-device $READER_DEVICE \
Expand Down

0 comments on commit 6e1f3f7

Please sign in to comment.