diff --git a/.streamlit/config.toml b/.streamlit/config.toml new file mode 100644 index 0000000..2a6bffe --- /dev/null +++ b/.streamlit/config.toml @@ -0,0 +1,5 @@ +[theme] +base="light" +primaryColor="#B1063A" +backgroundColor="#F1F1F1" +secondaryBackgroundColor="#D4DADE" \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a37c029 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,22 @@ +# Builder image +FROM continuumio/miniconda3 as builder + +RUN apt-get update -y && apt-get install default-jre make wget curl unzip build-essential -y +COPY . /app/unCover + +WORKDIR /app/unCover +RUN conda update conda --yes +RUN conda env create -f environment.yml +RUN cp -n .env.example .env +RUN make -C tem/topic-evolution-model/ +RUN conda run -n unCover ./corenlp --no-run && conda run -n unCover ./prepare_models + +# Final image +FROM continuumio/miniconda3 +LABEL authors="lucasliebe" + +COPY --from=builder /app /app +COPY --from=builder /opt/conda /opt/conda + +WORKDIR /app/unCover +CMD conda run -n unCover streamlit run main.py \ No newline at end of file diff --git a/README.md b/README.md index 8690699..37356f3 100644 --- a/README.md +++ b/README.md @@ -2,19 +2,29 @@ Detailed information about unCover can be found in the following publication: -> Liebe L, Baum J, Schutze T, Cech T, Scheibel W, and Dollner J (2023). UNCOVER: -> Identifying AI Generated News Articles by Linguistic Analysis and -> Visualization +> Liebe, L.; Baum, J.; Schütze, T.; Cech, T.; Scheibel, W. and Döllner, J. (2023). +> UNCOVER: Identifying AI Generated News Articles by Linguistic Analysis and Visualization. +> In Proceedings of the 15th International Joint Conference on Knowledge Discovery, +> Knowledge Engineering and Knowledge Management - Volume 1: KDIR, ISBN +> 978-989-758-671-2, ISSN 2184-3228, pages 39-50. -![Teaser](https://drive.google.com/uc?export=download&id=1i49F16U7TiHCS8-17lBv8ofPsnvd-RE0) +![Teaser](https://drive.google.com/uc?export=download&id=1DU9HwazIUGxoFdI5cJ-liW3Q_-a-QV6G) An interactive example deployment of unCover can be found at -[uncover.streamlit.app](https://uncover.streamlit.app). +[uncover.lucasliebe.de](https://uncover.lucasliebe.de). Our datasets and pre-trained models can be found in [Google Drive](https://drive.google.com/drive/folders/1fMZgGC2Bnp5K-ZoANXB_S0AI02akye_c?usp=drive_link). Please note that for copyright reasons we removed the plain text of the scraped news articles and only left the metadata and the generated texts in the dataset files. +## Prerequisites + +Before you can use the installation script as described below, please make sure +you have the following packages installed and working on your machine: +- Anaconda or Miniconda +- Java (Runtime Environment is sufficient) +- Make and g++ + ## Setup To set up this project to run on your own machine, run the following command @@ -33,11 +43,15 @@ activate the environment with conda activate unCover ``` -To take full advantage of all capabilities in this repository you should update -all the information in `.env`. OpenAI-credentials are +To take full advantage of all capabilities in this repository you should fill out +all the information in `.env.example` and save it as `.env`. OpenAI-credentials are only required for generation, however, fine-tuning the confidence thresholds to the used models will greatly benefit performance and is required to achieve good results. +Alternatively if you are only interested as running the web interface you can use +the provided docker container for a quick deployment. It can be built yourself using +the Dockerfile or pulled from Docker Hub: `docker pull lucasliebe/uncover:latest`. + ## Usage There are multiple ways to use unCover, depending on your use case. diff --git a/definitions.py b/definitions.py index 794401a..c9538c6 100644 --- a/definitions.py +++ b/definitions.py @@ -25,7 +25,7 @@ GPT_KEY = os.getenv("GPT_KEY", "") OPENAI_ORGA = os.getenv("OPENAI_ORGA", "") -CHAR_MACHINE_CONFIDENCE = float(c) if (c := os.getenv("CHAR_MACHINE_CONFIDENCE")) else st.secrets["CHAR_MACHINE_CONFIDENCE"] -CHAR_HUMAN_CONFIDENCE = float(c) if (c := os.getenv("CHAR_HUMAN_CONFIDENCE")) else st.secrets["CHAR_HUMAN_CONFIDENCE"] -SEM_MACHINE_CONFIDENCE = float(c) if (c := os.getenv("SEM_MACHINE_CONFIDENCE")) else st.secrets["SEM_MACHINE_CONFIDENCE"] -SEM_HUMAN_CONFIDENCE = float(c) if (c := os.getenv("SEM_HUMAN_CONFIDENCE")) else st.secrets["SEM_HUMAN_CONFIDENCE"] +CHAR_MACHINE_CONFIDENCE = float(os.getenv("CHAR_MACHINE_CONFIDENCE", "")) +CHAR_HUMAN_CONFIDENCE = float(os.getenv("CHAR_HUMAN_CONFIDENCE", "")) +SEM_MACHINE_CONFIDENCE = float(os.getenv("SEM_MACHINE_CONFIDENCE", "")) +SEM_HUMAN_CONFIDENCE = float(os.getenv("SEM_HUMAN_CONFIDENCE", "")) diff --git a/install.sh b/install.sh index ae739e9..03b38b3 100644 --- a/install.sh +++ b/install.sh @@ -4,16 +4,17 @@ print_bold() { printf "\\033[1m$1\\033[0m\n" } -if ! which java 1>/dev/null 2>/dev/null; then - print_bold "Please ensure Java is installed and on your PATH." - exit 1 -fi if ! which conda 1>/dev/null 2>/dev/null; then print_bold "Please ensure Anaconda is installed and on your PATH." exit 1 fi +if ! which java 1>/dev/null 2>/dev/null; then + print_bold "Please ensure Java is installed and on your PATH." + exit 1 +fi + if ! which make 1>/dev/null 2>/dev/null; then print_bold "Please ensure Make is installed and on your PATH." exit 1 @@ -23,18 +24,16 @@ print_bold "Cloning repository" git clone --recurse-submodules https://github.com/hpicgs/unCover.git cd unCover -print_bold "Compiling TEM" -make -C tem/topic-evolution-model/ - print_bold "Creating Anaconda environment" conda env create -f environment.yml -conda activate unCover + +print_bold "Compiling TEM" +make -C tem/topic-evolution-model/ print_bold "Installing CoreNLP" -./corenlp --no-run +conda run -n unCover ./corenlp --no-run print_bold "Downloading Models" -./prepare_models -cp ./.env.example ./.env +conda run -n unCover ./prepare_models print_bold "Done!" diff --git a/main.py b/main.py index 11e5964..b97ed5f 100644 --- a/main.py +++ b/main.py @@ -124,9 +124,10 @@ def get_prediction(style_prediction, te_prediction): col1.title("Welcome at unCover") col2.image(Image.open("unCover.png"), width=100) st.write( - " \nHere you can analyze a news article on topics and writing style to get further insights on whether this text " - "might have been written by an AI. This system was developed at Hasso-Plattner-Institute. To start, please choose " - "the type of input and enter the url/text in the field below.") + " \nHere you can analyze a news article on topics and writing style to get further insights on whether this " + "text might have been written by an AI. This system was developed at Hasso-Plattner-Institute. For more " + "information and the associated paper visit https://github.com/hpicgs/unCover.") + st.write("To start, please choose the type of input and enter the url/text in the field below.") col3, col4 = st.columns(2) input_type = col3.selectbox("type of input", ('URL', 'Text'), label_visibility="collapsed") text = ""