From e5d84390eec743e7f538f6a7ac7212fd920de3e7 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 14 Feb 2024 17:30:50 -0600 Subject: [PATCH 1/4] docs: Add suggestions for installing PyICU on macOS --- backend/README.md | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/backend/README.md b/backend/README.md index 89fcb127..a46ee067 100644 --- a/backend/README.md +++ b/backend/README.md @@ -366,4 +366,29 @@ pytest Transliteration requires installing the [PyICU](https://pypi.org/project/PyICU/) bindings. -See the documentation ["Installing PyICU"](https://gitlab.pyicu.org/main/pyicu#installing-pyicu) for additional instructions for your operating system. \ No newline at end of file +See the documentation ["Installing PyICU"](https://gitlab.pyicu.org/main/pyicu#installing-pyicu) for additional instructions for your operating system. + +For macOS, you can try the following: + +Install `icu4c`: +``` +brew install icu4c +``` + +Determine the installed version: +``` +ls $HOMEBREW_CELLAR/icu4c +``` + +In my case, it was 73.2 + +Set up required environment variables for the PyICU build process. + +``` +export ICU_VERSION=73 # required because PyICU detects this using + # pkg-config, but icu4c does not register + # itself with pkg-config in recent versions +export PYICU_INCLUDES=$HOMEBREW_CELLAR/icu4c/73.2/include +export PYICU_LFLAGS=-L$HOMEBREW_CELLAR/icu4c/73.2/lib +pip install "PyICU>=2.9,<3" +``` From 58359cd9f9a95698ee3e1be597266e0732d9ab03 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 14 Feb 2024 17:34:50 -0600 Subject: [PATCH 2/4] Bump Python, ATLAS and Pandas deps Making it easier to work with more recent macOS installations --- backend/backend-dev.dockerfile | 4 ++-- backend/requirements.txt | 4 ++-- heroku.dockerfile | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/backend-dev.dockerfile b/backend/backend-dev.dockerfile index e4137de5..73d3f64e 100644 --- a/backend/backend-dev.dockerfile +++ b/backend/backend-dev.dockerfile @@ -1,4 +1,4 @@ -FROM python:3.9 AS build +FROM python:3.12 AS build WORKDIR /opt/scaife-stack/src/ RUN pip install --disable-pip-version-check --upgrade pip setuptools wheel virtualenv ENV PATH="/opt/scaife-stack/bin:${PATH}" VIRTUAL_ENV="/opt/scaife-stack" @@ -7,7 +7,7 @@ RUN set -x \ && virtualenv /opt/scaife-stack \ && pip install -r requirements-dev.txt -FROM python:3.9 +FROM python:3.12 ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONPATH=/opt/scaife-stack/src/ \ diff --git a/backend/requirements.txt b/backend/requirements.txt index 20418921..cf8e6324 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -10,10 +10,10 @@ gunicorn==20.0.0 # TODO: Package in scaife-viewer-atlas jsonlines==2.0.0 more_itertools==8.12.0 -pandas==1.5.2 +pandas==2.2.0 pygtrie==2.5.0 PyICU>=2.9,<3 requests>=2.0.6 -scaife-viewer-atlas @https://github.com/scaife-viewer/backend/archive/17bb7d538e1d89a7c81574f435d292d4c1a98134.zip#subdirectory=atlas +scaife-viewer-atlas @https://github.com/scaife-viewer/backend/archive/89bb569d97e629b7e1f46f0948a321d1a42d0b59.zip#subdirectory=atlas thefuzz==0.19.0 whitenoise==4.1.4 diff --git a/heroku.dockerfile b/heroku.dockerfile index 4154ca80..461f1162 100644 --- a/heroku.dockerfile +++ b/heroku.dockerfile @@ -17,7 +17,7 @@ RUN yarn build # # # # # # # # # # # # # # # # # # # # # # # # # backend # # # # # # # # # # # # # # # # # # # # # # # # -FROM python:3.9 AS backend-build +FROM python:3.12 AS backend-build WORKDIR /opt/scaife-stack/src/ RUN pip install --disable-pip-version-check --upgrade pip setuptools wheel virtualenv ENV PATH="/opt/scaife-stack/bin:${PATH}" VIRTUAL_ENV="/opt/scaife-stack" From a5dff1cfcb099ebfe8332b002e2dfcfe84793a84 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 14 Feb 2024 17:35:53 -0600 Subject: [PATCH 3/4] Run Python 3.12 in GitHub actions --- .github/workflows/ci-backend.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-backend.yml b/.github/workflows/ci-backend.yml index 3eb69b44..cef52231 100644 --- a/.github/workflows/ci-backend.yml +++ b/.github/workflows/ci-backend.yml @@ -17,10 +17,10 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.12 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.12 - name: Configure cache id: python-cache uses: actions/cache@v2 From d4efc2aae249edd8f56f222b0ff694104cf209e8 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 14 Feb 2024 18:09:02 -0600 Subject: [PATCH 4/4] Downgrade Pandas due to BI change in Pandas 2 There is some kind of regression between Pandas 1 and 2 in `tokenize_text_parts_parallel`; fails on some CSVs. I tried experimenting with TSVs and delimiter changes, but no dice. --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index cf8e6324..4a7b01b2 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -10,7 +10,7 @@ gunicorn==20.0.0 # TODO: Package in scaife-viewer-atlas jsonlines==2.0.0 more_itertools==8.12.0 -pandas==2.2.0 +pandas<2 pygtrie==2.5.0 PyICU>=2.9,<3 requests>=2.0.6