diff --git a/.github/workflows/crawl_downloads.yml b/.github/workflows/crawl_downloads.yml new file mode 100644 index 0000000..0f72dec --- /dev/null +++ b/.github/workflows/crawl_downloads.yml @@ -0,0 +1,29 @@ +name: crawl-zenodo-downloads + +on: + push: + branches: + - master + - zenodo + schedule: + - cron: '0 4 * * *' + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: zenodo + - name: Run + run: | + pip3 install -r requirements.txt --quiet + curl https://zenodo.org/records/8196385 > zenodo_page.txt + python3 crawl.py + git config --local user.name "${GITHUB_ACTOR}" + git config --local user.email "${GITHUB_ACTOR}@users.noreply.github.com" + export remote_repo="https://${GITHUB_ACTOR}:${{ secrets.GITHUB_TOKEN }}@github.com/${GITHUB_REPOSITORY}.git" + git add *.json + git commit -m "Update downloads" --amend --reset-author + git push -f "${remote_repo}" zenodo + curl https://purge.jsdelivr.net/gh/logpai/loghub@zenodo/downloads.json diff --git a/README.md b/README.md index 0b16062..bcdcab8 100644 --- a/README.md +++ b/README.md @@ -37,11 +37,12 @@ Loghub maintains a collection of system logs, which are freely accessible for AI ### Datasets download We host only a small sample (2k lines) of each log dataset on Github. If you are interested in these raw datasets, please download them [via Zenodo](https://doi.org/10.5281/zenodo.1144100). -:bell: We proudly announce that the loghub datasets have attained [**90000+ total downloads**](https://doi.org/10.5281/zenodo.1144100) by more than [**450 organizations**](https://github.com/logpai/loghub/wiki/Loghub-download-list) from both industry and academia. +:bell: We proudly announce that the loghub datasets have attained total by more than [**450 organizations**](https://github.com/logpai/loghub/wiki/Loghub-download-list) from both industry and academia. +### 🌈 Citation -### Citation Please cite the following paper if you use the loghub datasets for research. + + Jieming Zhu, Shilin He, Pinjia He, Jinyang Liu, Michael R. Lyu. [Loghub: A Large Collection of System Log Datasets for AI-driven Log Analytics](https://arxiv.org/abs/2008.06448). IEEE International Symposium on Software Reliability Engineering (ISSRE), 2023.