forked from LogicalSpark/docker-tikaserver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockerfile
21 lines (18 loc) · 1.08 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
FROM ubuntu:latest
MAINTAINER [email protected]
ENV TIKA_VERSION 1.16
ENV TIKA_SERVER_URL https://www.apache.org/dist/tika/tika-server-$TIKA_VERSION.jar
RUN apt-get update \
&& apt-get install openjdk-8-jre-headless curl gdal-bin tesseract-ocr \
tesseract-ocr-eng tesseract-ocr-ita tesseract-ocr-fra tesseract-ocr-spa tesseract-ocr-deu -y \
&& curl -sSL https://people.apache.org/keys/group/tika.asc -o /tmp/tika.asc \
&& gpg --import /tmp/tika.asc \
&& curl -sSL "$TIKA_SERVER_URL.asc" -o /tmp/tika-server-${TIKA_VERSION}.jar.asc \
&& NEAREST_TIKA_SERVER_URL=$(curl -sSL http://www.apache.org/dyn/closer.cgi/${TIKA_SERVER_URL#https://www.apache.org/dist/}\?asjson\=1 \
| awk '/"path_info": / { pi=$2; }; /"preferred":/ { pref=$2; }; END { print pref " " pi; };' \
| sed -r -e 's/^"//; s/",$//; s/" "//') \
&& echo "Nearest mirror: $NEAREST_TIKA_SERVER_URL" \
&& curl -sSL "$NEAREST_TIKA_SERVER_URL" -o /tika-server-${TIKA_VERSION}.jar \
&& apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
EXPOSE 9998
ENTRYPOINT java -jar /tika-server-${TIKA_VERSION}.jar -h 0.0.0.0