-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexamples.sql
100 lines (80 loc) · 3.49 KB
/
examples.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
/* Export as CSV */
\COPY (SELECT * FROM results r LEFT JOIN scrapers s ON(r.scraperid = s.id) WHERE s.name='Soundcloud') TO '/home/robimalco/output.csv' WITH csv header
SELECT
s.name AS company_name,
TO_CHAR(r.createdat, 'YYYY-MM-DD')AS job_created_at,
COUNT(r.id) AS new_jobs
FROM results r
LEFT JOIN scrapers s ON(r.scraperid = s.id)
WHERE s.name IN('Twitter', 'Google', 'Zalando')
GROUP BY 1, 2
ORDER BY 1, 2 DESC;
/* HERE A LIST OF EXAMPLE QUERIES */
/* Make a table empty and reset id */
TRUNCATE scrapers RESTART IDENTITY;
TRUNCATE userstargetskeywords, keywords RESTART IDENTITY;
/* Useful drop constraint and create index*/
ALTER TABLE results DROP CONSTRAINT resultspkey;
CREATE INDEX idxresultsurl ON results(url);
/* Add constraint */
ALTER TABLE targets ADD UNIQUE (name);
ALTER TABLE results ADD CONSTRAINT id_unique UNIQUE (id);
ALTER TABLE userstargetskeywords ADD UNIQUE (userid, targetid, keywordid);
/* Rename table's name */
ALTER TABLE userstargets RENAME TO userstargets;
/* Rename table's column */
ALTER TABLE users RENAME COLUMN name TO username;
/* Delete a whole table */
DROP TABLE author; --
/* Delete row from table */
DELETE FROM scrapers WHERE version = 2;
/* Delete column from table */
ALTER TABLE userstargets DROP COLUMN uuid;
/* Add column to table */
ALTER TABLE results ADD COLUMN updatedat timestamp NOT NULL DEFAULT current_timestamp;
ALTER TABLE userstargetskeywords ADD COLUMN updatedat timestamp NOT NULL DEFAULT current_timestamp;
ALTER TABLE jobs ADD COLUMN scrapingid integer references scraping(id);
ALTER TABLE targets ALTER COLUMN name SET NOT NULL;
/* Update value in column based on condition */
UPDATE targets SET name = 'Kununu' WHERE id = 45;
UPDATE targets SET url = replace(url, 'https://', '')
/* New scraper process */
INSERT INTO targets (url, host, createdat, name) VALUES('https://www.babelforce.com/jobs/', 'https://www.babelforce.com', currenttimestamp, 'Babelforce');
SELECT id FROM targets WHERE name = 'Babelforce';
INSERT INTO scrapers (name, version, targetid, createdat) VALUES('Babelforce', 1, 86, current_timestamp);
/* Get the latest two extractions */
SELECT s.id FROM scraping s LEFT JOIN scrapers ss ON(s.scraperid = ss.id) WHERE name = 'Zalando' ORDER BY s.id DESC LIMIT 2;
/* OLD DATA */
SELECT id, createdat, updatedat, url, title FROM results WHERE scrapingid = 99;
/* NEW DATA */
SELECT id, createdat, updatedat, url, title FROM results WHERE scrapingid = 114 AND DATE(createdat) = DATE(updatedat);
/* Example of query with Microsoft JSON */
SELECT
r.id,
r.title,
r.data#>>'{category}' AS category,
r.data#>>'{country}' AS country
FROM results r
LEFT JOIN scrapers s ON(r.scraperid = s.id)
WHERE s.name = 'Microsoft'
ORDER BY r.updatedat DESC
LIMIT 10;
/* Update targets CASCADE */
INSERT INTO targets (url, host, createdat, name) VALUES('https://boards.greenhouse.io/urbansportsclub/', 'https://urbansportsclub.com', current_timestamp, 'Urbansport');
UPDATE targets SET id = 7 WHERE url = 'https://boards.greenhouse.io/urbansportsclub/';
UPDATE scrapers SET targetid = 7 WHERE targetid = 241;
DELETE FROM targets WHERE id = 241;
/* Check if column is unique */
SELECT CASE WHEN count(distinct id)= count(id)
THEN 'column values are unique' ELSE 'column values are NOT unique' END
FROM results;
/* Massive insert into */
INSERT INTO userskeywords(userid, keywordid, createdat)
SELECT DISTINCT
utk.userid,
utk.keywordid,
MAX(utk.createdat)
FROM userstargetskeywords utk
WHERE utk.deletedat IS NULL
GROUP BY 1, 2
ORDER BY 1, 2, 3;