Skip to content

Commit

Permalink
feat:update dockerfile
Browse files Browse the repository at this point in the history
  • Loading branch information
christinestraub committed Jan 3, 2025
1 parent 9c42660 commit efe1167
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ permissions:
contents: read

env:
NLTK_DATA: /home/notebook-user/nltk_data
NLTK_DATA: ${{ github.workspace }}/nltk_data

jobs:
setup:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ingest-test-fixtures-update-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
setup:
runs-on: ubuntu-latest-m
env:
NLTK_DATA: /home/notebook-user/nltk_data
NLTK_DATA: ${{ github.workspace }}/nltk_data
steps:
- uses: actions/checkout@v3
- uses: ./.github/actions/base-cache
Expand All @@ -24,7 +24,7 @@ jobs:
setup_ingest:
runs-on: ubuntu-latest
env:
NLTK_DATA: /home/notebook-user/nltk_data
NLTK_DATA: ${{ github.workspace }}/nltk_data
needs: [setup]
steps:
- uses: actions/checkout@v3
Expand Down
4 changes: 1 addition & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ COPY unstructured unstructured
COPY test_unstructured test_unstructured
COPY example-docs example-docs

# Copy the downloaded NLTK data folder to your local environment.s
COPY ./nltk_data /home/notebook-user/nltk_data

RUN chown -R notebook-user:notebook-user /app && \
apk add font-ubuntu git && \
Expand All @@ -22,7 +20,7 @@ USER notebook-user
RUN find requirements/ -type f -name "*.txt" -exec pip3.11 install --no-cache-dir --user -r '{}' ';'

# Command to check if NLTK data has been copied correctly
RUN python3.11 -c "import nltk; print(nltk.data.find('tokenizers/punkt_tab'))"
RUN python3.11 -c "from unstructured.nlp.tokenize import copy_nltk_packages; copy_nltk_packages()"

RUN python3.11 -c "from unstructured.partition.model_init import initialize; initialize()" && \
python3.11 -c "from unstructured_inference.models.tables import UnstructuredTableTransformerModel; model = UnstructuredTableTransformerModel(); model.initialize('microsoft/table-transformer-structure-recognition')"
Expand Down

0 comments on commit efe1167

Please sign in to comment.