Skip to content

Commit

Permalink
added scraper setup
Browse files Browse the repository at this point in the history
  • Loading branch information
hanshack committed Nov 27, 2023
1 parent 5cc8a4a commit 07ec342
Show file tree
Hide file tree
Showing 3 changed files with 339 additions and 0 deletions.
28 changes: 28 additions & 0 deletions scraper/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
const axios = require("axios");
const cheerio = require("cheerio");
const fs = require("fs");
const async = require("async");
const path = require("path");

axios.get("https://www.berlin.de/umweltatlas/").then((response) => {
const body = response.data;
const $ = cheerio.load(body);

$("article .inner .more");
// loop throught this and only get the link like so $(el).attr("href");
// then push it to an array
// then loop through the array and make a request for each link
// then get the data from each link and save it to a file
const sachenGruppen = [];
$("article .inner .more").each((i, el) => {
const link = $(el).attr("href");
sachenGruppen.push(link);
});

// const sachenGruppen = $("article .inner .more").map((i, el) => {
// return $(el).attr("href");
// });

console.log("sachenGruppen", sachenGruppen);
});

294 changes: 294 additions & 0 deletions scraper/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 17 additions & 0 deletions scraper/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"name": "berlin_umweltatlas_scraper",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"dependencies": {
"async": "^3.2.4",
"axios": "^1.2.2",
"cheerio": "^1.0.0-rc.12"
}
}

0 comments on commit 07ec342

Please sign in to comment.