Skip to content

Commit

Permalink
Docker rewrite
Browse files Browse the repository at this point in the history
  • Loading branch information
jri-sp committed Dec 7, 2017
1 parent 5e58ed6 commit b44a899
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 108 deletions.
4 changes: 1 addition & 3 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
_deprecated
doc
hyphe_frontend/test
hyphe_www_client
bin/hyphe-phantomjs-2.0.0
47 changes: 21 additions & 26 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,43 +1,38 @@
FROM python:2.7
FROM python:2.7-alpine

WORKDIR /app

# Install system dependencies

RUN apt-get update && apt-get install openjdk-7-jdk -y --no-install-recommends


# App python dependencies
ENV PYTHONPATH $PYTHONPATH:/app

COPY requirements.txt /app/requirements.txt

ENV WORKON_HOME /opt/virtualenvs
RUN apk --update add gcc git musl-dev libxml2-dev libxslt-dev libffi-dev openssl-dev \
&& pip install --no-cache-dir --requirement /app/requirements.txt \
&& pip install --no-cache-dir Scrapy==0.24.6 \
&& apk del gcc git musl-dev \
&& rm /var/cache/apk/*

RUN mkdir -p ${WORKON_HOME} \
&& pip install virtualenv \
&& pip install virtualenvwrapper \
&& /bin/bash -c "source $(which virtualenvwrapper.sh) \
&& mkvirtualenv --no-site-packages hyphe \
&& workon hyphe \
&& add2virtualenv $(pwd) \
&& pip install -r /app/requirements.txt" \
&& pip install Scrapy==0.24 \
&& echo 'source $(which virtualenvwrapper.sh) && workon hyphe' | tee /root/.bashrc


# Install app


COPY ./bin /app/bin

COPY ./config /app/config

COPY ./hyphe_backend /app/hyphe_backend

RUN sed "s|##HYPHEPATH##|"`pwd`"|" /app/config/config.json.example | sed 's|"OPEN_CORS_API": false,|"OPEN_CORS_API": true,|' > /app/config/config.json \
&& mkdir -p /app/hyphe_backend/crawler/config \
&& cp /app/config/config.json /app/hyphe_backend/crawler/config/config.json
COPY ./docker-entrypoint.py /app/docker-entrypoint.py

RUN cp /app/config/config.json.example /app/config/config.json

RUN chmod +x /app/docker-entrypoint.py

# Start hyphe
RUN chmod +x /app/hyphe_backend/core.tac

EXPOSE 6978

CMD /bin/bash -c "source $(which virtualenvwrapper.sh) && workon hyphe && twistd -y /app/hyphe_backend/core.tac --nodaemon --pidfile="
VOLUME ["/app/config"]

VOLUME ["/app/traph-data"]

ENTRYPOINT ["/app/docker-entrypoint.py"]

37 changes: 26 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,33 +96,48 @@ Please read the dedicated [WebService documentation](doc/serve.md) to do so.
Docker enables isolated install and execution of software stacks, which can be an easy way to install Hyphe locally on an individual computer, including on unsupported distributions like MacOS.
Follow [Docker install instructions](https://docs.docker.com/installation/) to install Docker on your machine.

[Install Docker Compose](https://docs.docker.com/compose/install/) to set up and orchestrate Hyphe services in a single line.
Once you've Docker installed and running, [install Docker Compose](https://docs.docker.com/compose/install/) to set up and orchestrate Hyphe services in a single line.

You've now two options to get Hyphe Docker images:

### Pull official image from Docker Store (recommended way)

```bash
docker-compose up
docker-compose pull
```

When using [boot2docker](http://boot2docker.io/) for instance on MacOS, you might need beforehand to run the following:
### Or build your own images from the source code

```bash
boot2docker up
# and copy paste the 3 lines starting with export to set the environment variables
docker-compose build
```

It will take a couple of minutes to spin everything up for the first time.
Once the services are ready, you can access the frontend interface by connecting on its IP address:
It will take a couple of minutes to download or build everything.

### Create and run containers

Once done, you can run Hyphe containers with this command:

```bash
open http://$(docker inspect -f '{{.NetworkSettings.IPAddress}}' hyphe_frontend_1):8000
docker-compose up
```

Or, if you use boot2docker:
You can use `-d` option to run containers in the background.

Once the services are ready, you can access the frontend interface by connecting on `localhost` or the Docker host IP address:

```bash
open http://$(boot2docker ip):8000
open http://localhost
```

**Notice**: this is not a production setup. Get some inspiration from the `docker-compose.yml` to understand how to distribute the application on one or many machines.

It could be useful to see the containers logs, you can do it with:

```bash
docker-compose logs
```

Use `-f` option to follow the logs output./

## Advanced developers features & contributing

Expand Down
23 changes: 0 additions & 23 deletions Vagrantfile

This file was deleted.

69 changes: 37 additions & 32 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,32 +1,37 @@
backend:
build: .
links:
- "mongo:mongo"
- "crawler:crawler"
volumes_from:
- memorystructure
environment:
- HYPHE_MONGODB_HOST=mongo
- HYPHE_MONGODB_PORT=27017
- HYPHE_CRAWLER_HOST=crawler
- HYPHE_CRAWLER_PORT=6800
ports:
- "6978:6978"

frontend:
build: ./hyphe_frontend
ports:
- "8000:8000"

memorystructure:
build: ./memory_structure

crawler:
build: ./hyphe_backend/crawler
links:
- "mongo:mongo"
volumes:
- ./config/scrapyd.config:/etc/scrapyd/conf.d/100-hyphe

mongo:
image: mongo:3.0
version: "2"
volumes:
traph-data:
mongo-data:
services:
backend:
image: scpomedialab/hyphe_backend:latest
build: .
links:
- "mongo:mongo"
- "crawler:crawler"
environment:
- HYPHE_MONGODB_HOST=mongo
- HYPHE_MONGODB_PORT=27017
- HYPHE_CRAWLER_HOST=crawler
- HYPHE_CRAWLER_PORT=6800
- HYPHE_TRAPH_DATAPATH=/app/traph-data
volumes:
- traph-data:/app/traph-data
frontend:
image: scpomedialab/hyphe_frontend:latest
build: ./hyphe_frontend
ports:
- "80:80"
links:
- "backend:backend"
crawler:
image: scpomedialab/hyphe_crawler:latest
build:
context: .
dockerfile: ./hyphe_backend/crawler/Dockerfile
links:
- "mongo:mongo"
mongo:
image: mongo:3.0
volumes:
- mongo-data:/data/db
54 changes: 54 additions & 0 deletions docker-entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python
import json
from os import environ,system

def loadConfig(filename):
with open(filename, "r+") as filecontent:
data = json.load(filecontent)
return data

def setConfig(setting, value, configdata, section=None):
if section is not None:
configdata[section][setting] = value
else:
configdata[setting] = value

def writeConfig(filename,configdata):
with open(filename, 'w') as f:
f.write(json.dumps(configdata, indent=2))

def strToBool(string):
if string in ["true", "True", "yes", "y"]:
return True
else:
return False

configfile = "/app/config/config.json"

configdata = loadConfig(configfile)

if "HYPHE_MONGODB_HOST" in environ: setConfig("host", environ["HYPHE_MONGODB_HOST"],configdata,"mongo-scrapy")
if "HYPHE_MONGODB_PORT" in environ: setConfig("mongo_port", int(environ["HYPHE_MONGODB_PORT"]),configdata,"mongo-scrapy")
if "HYPHE_MONGODB_DBNAME" in environ: setConfig("db_name", environ["HYPHE_MONGODB_DBNAME"],configdata,"mongo-scrapy")
if "HYPHE_CRAWLER_PORT" in environ: setConfig("scrapy_port", int(environ["HYPHE_CRAWLER_PORT"]),configdata,"mongo-scrapy")
if "HYPHE_PROXY_HOST" in environ: setConfig("proxy_host", environ["HYPHE_PROXY_HOST"],configdata,"mongo-scrapy")
if "HYPHE_PROXY_PORT" in environ: setConfig("proxy_port", int(environ["HYPHE_PROXY_PORT"]),configdata,"mongo-scrapy")
if "HYPHE_MAXDEPTH" in environ: setConfig("maxdepth", int(environ["HYPHE_MAXDEPTH"]),configdata,"mongo-scrapy")
if "HYPHE_DOWNLOAD_DELAY" in environ: setConfig("download_delay", int(environ["HYPHE_DOWNLOAD_DELAY"]),configdata,"mongo-scrapy")
if "HYPHE_MAX_SIM_REQ" in environ: setConfig("max_simul_requests", int(environ["HYPHE_MAX_SIM_REQ"]),configdata,"mongo-scrapy")
if "HYPHE_HOST_MAX_SIM_REQ" in environ: setConfig("max_simul_requests_per_host", int(environ["HYPHE_HOST_MAX_SIM_REQ"]),configdata,"mongo-scrapy")

if "HYPHE_TRAPH_KEEPALIVE" in environ: setConfig("keepalive", int(environ["HYPHE_TRAPH_KEEPALIVE"]),configdata,"traph")
if "HYPHE_TRAPH_DATAPATH" in environ: setConfig("data_path", environ["HYPHE_TRAPH_DATAPATH"],configdata,"traph")
if "HYPHE_TRAPH_MAX_SIM_PAGES" in environ: setConfig("max_simul_pages_indexing", int(environ["HYPHE_TRAPH_MAX_SIM_PAGES"]),configdata,"traph")

if "HYPHE_OPEN_CORS_API" in environ: setConfig("OPEN_CORS_API", strToBool(environ["HYPHE_OPEN_CORS_API"]),configdata)
if "HYPHE_BACKEND_PORT" in environ: setConfig("twisted.port", int(environ["HYPHE_BACKEND_PORT"]),configdata)
if "HYPHE_PRECISION_LIMIT" in environ: setConfig("precisionLimit", int(environ["HYPHE_PRECISION_LIMIT"]),configdata)
if "HYPHE_MULTICORPUS" in environ: setConfig("MULTICORPUS", strToBool(environ["HYPHE_MULTICORPUS"]),configdata)
if "HYPHE_ADMIN_PASSWORD" in environ: setConfig("ADMIN_PASSWORD", environ["HYPHE_ADMIN_PASSWORD"],configdata)
if "HYPHE_DEBUG" in environ: setConfig("DEBUG", environ["HYPHE_DEBUG"],configdata)

writeConfig(configfile, configdata)

system("/app/hyphe_backend/core.tac")
26 changes: 13 additions & 13 deletions hyphe_backend/crawler/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
FROM buildpack-deps:wheezy
FROM python:2.7-alpine

ENV LANG C.UTF-8
COPY ./hyphe_backend/crawler/requirements-global-scrapyd.txt /requirements-global-scrapyd.txt

RUN apt-get update \
&& apt-get install python2.7-dev python-pip python-support --no-install-recommends -y \
&& wget -q -P ./ https://github.com/medialab/scrapyd/raw/medialab-debian/debs/scrapyd_1.0~r0_all.deb \
&& pip install service_identity \
&& pip install pymongo==2.7 \
&& pip install txmongo==0.6 \
&& pip install selenium==2.42.1 \
&& pip install Scrapy==0.18 \
&& dpkg -i scrapyd_1.0~r0_all.deb
RUN apk --update add gcc musl-dev libffi-dev openssl-dev libxml2-dev libxslt-dev \
&& pip install --no-cache-dir -r requirements-global-scrapyd.txt \
&& pip --no-cache-dir install Scrapy==0.24.6 \
&& pip --no-cache-dir install scrapyd==1.0.1 \
&& apk del gcc musl-dev \
&& rm /var/cache/apk/*

COPY ./config/scrapyd.config /etc/scrapyd/conf.d/100-hyphe

EXPOSE 6800

VOLUME ["/var/lib/scrapyd"]

VOLUME ["/var/log/scrapyd"]

# Set scrapyd as run entrypoint
CMD ["/usr/bin/scrapyd"]
CMD ["scrapyd"]

23 changes: 23 additions & 0 deletions hyphe_frontend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM nginx:alpine

COPY . /frontend/

WORKDIR /frontend

RUN apk --update add git nodejs \
&& npm install \
&& npm run build \
&& npm cache clean \
&& apk del git nodejs \
&& rm -fr node_modules \
&& rm /var/cache/apk/*

COPY ./docker-nginx-vhost.conf /etc/nginx/conf.d/default.conf

COPY docker-entrypoint.sh /

RUN chmod +x /docker-entrypoint.sh

ENTRYPOINT ["/docker-entrypoint.sh"]

CMD ["nginx", "-g", "daemon off;"]
16 changes: 16 additions & 0 deletions hyphe_frontend/docker-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/sh

CONFIGFILE=/frontend/app/conf/conf.js

/bin/cp /frontend/app/conf/conf_default.js $CONFIGFILE

sed --in-place "s|'serverURL'\s*,.*|'serverURL', '//' + window.location.hostname + ':' + window.location.port + '/api/')|" $CONFIGFILE

[[ ! -z ${HYPHE_GOOGLE_ANALYTICS_ID} ]] && sed --in-place "s|'googleAnalyticsId'\s*,.*|'googleAnalyticsId', '${HYPHE_GOOGLE_ANALYTICS_ID}')|" $CONFIGFILE


[[ ! -z ${HYPHE_DISCLAIMER} ]] && sed --in-place "s|'disclaimer'\s*,.*|'disclaimer', '${HYPHE_DISCLAIMER}')|" $CONFIGFILE

chmod -R 550 /frontend/app && chown -R nginx:nginx /frontend/app

exec "$@"
19 changes: 19 additions & 0 deletions hyphe_frontend/docker-nginx-vhost.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
upstream api {
server backend:6978;
}
server {
listen 80;
server_name localhost;

location / {
root /frontend/app;
index index.html index.htm;
}
location /api/ {
proxy_pass http://api;

proxy_connect_timeout 30s;
proxy_send_timeout 900s;
proxy_read_timeout 900s;
}
}

0 comments on commit b44a899

Please sign in to comment.