-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcovid19japan_summary.js
296 lines (288 loc) · 8.98 KB
/
covid19japan_summary.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
const fs = require('fs')
const fetch = require('node-fetch')
const cheerio = require('cheerio')
const util = require('./util.js')
const jimp = require("jimp")
const img2text = require('./img2text.js')
const pdf2text = require('./pdf2text.js')
const PATH = 'data/covid19japan/'
const URL = 'https://www.mhlw.go.jp/stf/seisakunitsuite/bunya/0000164708_00001.html'
const BASEURL = 'https://www.mhlw.go.jp'
const CACHE_TIME = 10 * 60 * 1000 // 10min
const getCovid19Data = async function(cachetime) {
return await util.getWebWithCache(URL, PATH, cachetime)
}
const getLastUpdate = function(fn) {
return util.getLastUpdateOfCache(URL, PATH)
}
const getCovid19DataJSON = async function(cachetime) {
// 緊急
const data = {
lastUpdate: '2020-03-20T18:00',
npatients: '996',
nexits: '232',
ndeaths: '35',
ncurrentpatients: '729',
nlighters: '18',
srcurl_img: 'https://www.mhlw.go.jp/content/10900000/000610718.jpg',
srcurl_web: 'https://www.mhlw.go.jp/stf/seisakunitsuite/bunya/0000164708_00001.html'
}
return data
/*
const data = await util.getCache(async function() {
return await fetchCovid19DataJSON(cachetime)
}, 'data/covid19japan/', '-summary.json', cachetime)
return JSON.parse(data)
*/
}
const startUpdate = function() {
setInterval(async function() {
await getCovid19DataJSON(CACHE_TIME)
}, CACHE_TIME)
}
const fetchCovid19DataJSON = async function(cachetime) {
const data = await getCovid19Data(cachetime)
const dom = cheerio.load(data)
const weeks = []
const res = {}
let state = 0
let url = null
dom('.m-grid__col1').each((idx, ele) => {
for (let i = 0; i < ele.children.length; i++) {
if (state == 0) {
const text = ele.children[i].data
if (text && text.trim() == '入退院の状況は以下の通りです。') {
state = 1
}
} else if (state == 1) {
const tag = ele.children[i].name
if (tag == 'img') {
const fn = dom(ele.children[i]).attr('src')
url = fn
state = 2
} else if (tag == 'a') {
const fn = dom(ele.children[i]).attr('href')
if (fn.endsWith('.pdf')) {
url = fn
state = 2
}
/*
} else if (tag == 'a' && ele.children[i].children.length) {
const tag2 = ele.children[i].children[0].name
if (tag2 == 'img') {
const fn = dom(ele.children[i].children[0]).attr('src')
url = fn
state = 2
}
*/
}
}
}
})
if (!url) {
return null // err
}
if (url.indexOf(":") == -1) {
url = BASEURL + url
}
let json = null
if (url.endsWith(".pdf")) {
json = await getJSONbyPDF(url)
} else {
json = await getJSONbyImage(url)
}
if (!json)
return null
return JSON.stringify(json)
}
const getCurrentPatients = async function(fn) {
const jpg = await jimp.read(fn)
/*
const orgwidth = 2339
const crops = [
//[ 'lastUpdate', 1573, 112, 2116 - 1573, 178 - 115 ],
[ 'lastUpdate', 1573, 112 - 10, 2116 - 1573, 178 - 115 + 10 ],
[ 'npatients', 60, 400, 500, 90 ],
[ 'nexits', 620, 400, 530, 90 ],
[ 'ndeaths', 1180, 400, 464, 90 ],
[ 'ncurrentpatients', 1686, 400, 584, 90 ],
//[ 'nlighters', 1686, 582, 584, 60 ], // for first
[ 'nlighters', 1686, 562, 584, 60 ],
]
*/
const orgwidth = 744
const crops = [
[ 'lastUpdate', 504, 38, 158, 22 ],
[ 'npatients', 58, 139, 116, 28 ],
[ 'nexits', 231, 126, 113, 30 ],
[ 'ndeaths', 568, 138, 137, 32 ],
[ 'ncurrentpatients', 390, 126, 136, 32 ],
[ 'nlighters', 430, 156, 39, 19 ],
]
const ratio = jpg.bitmap.width / orgwidth
// 3A12H (WN) 18
const reformatdate = function(s) {
s = s.replace(/ /g, "")
const num = s.match(/(\d+)A(\d+)([^\d]*)\(.+\)(\d+)/)
if (!num)
return "--"
const m = parseInt(num[1])
const d = num[3].length == 0 ? Math.floor(parseInt(num[2]) / 10) : parseInt(num[2])
const h = parseInt(num[4])
const y = new Date().getFullYear()
if (m == 12 && new Date().getMonth() == 0) // 年末対策?
y--
const fix0 = (n) => n < 10 ? "0" + n : "" + n
return y + "-" + fix0(m) + "-" + fix0(d) + "T" + fix0(h) + ":00"
}
const reformatnum = function(s) {
s = s.replace(/ /g, "")
const num = s.match(/(\d+).+/)
if (!num)
return "-"
return num[1]
}
const res = {}
for (const crop of crops) {
const imgc = jpg.clone()
const name = crop[0]
imgc.crop(crop[1] * ratio, crop[2] * ratio, crop[3] * ratio, crop[4] * ratio)
const text = await img2text.img2text(imgc, DEBUG)
console.log(text)
res[name] = name == 'lastUpdate' ? reformatdate(text) : reformatnum(text)
}
// 3/14 18:00
/*
res.nexits = '157'
res.nlighters = '13'
*/
if (!res.ncurrentpatients)
return null
return res
}
const DEBUG = false
const getJSONbyImage = async function(url) {
let fn = null
if (url.startsWith("data:")) {
fn = PATH + getLastUpdate().replace(/:/g, '_') + "_smr.png" // "2020-03-16.png"
console.log(fn)
try {
const data = fs.readFileSync(fn + ".json")
return JSON.parse(data)
} catch (e) {
}
const buf = Buffer.from(url.substring("data:image/png;base64".length), 'base64')
fs.writeFileSync(fn, buf)
} else {
fn = PATH + url.substring(url.lastIndexOf('/') + 1)
try {
const data = fs.readFileSync(fn + ".json")
return JSON.parse(data)
} catch (e) {
}
const img = await (await fetch(url)).arrayBuffer()
fs.writeFileSync(fn, new Buffer.from(img), 'binary')
}
const json = await getCurrentPatients(fn)
if (!url.startsWith("data:"))
json.srcurl_img = url
json.srcurl_web = URL
fs.writeFileSync(fn + ".json", JSON.stringify(json))
return json
}
// for pdf
const parseDate = function(s) { // '3月16日(月)18時時点'
s = util.toHalf(s)
//let num1 = s.substring(0, s.indexOf('(') - 1).match(/(\d+)月(\d+)日/)
//let num2 = s.substring(s.indexOf(')'+ 1)).match(/(\d+)時時点/)
let num = s.match(/(\d+)月(\d+)日\(.\)(\d+)時時点/)
const y = new Date().getFullYear()
const m = num[1]
const d = num[2]
const h = num[3]
if (m == 12 && new Date().getMonth() == 0) // 年末対策?
y--
const fix0 = (n) => n < 10 ? "0" + n : "" + n
return y + "-" + fix0(m) + "-" + fix0(d) + "T" + fix0(h) + ":00"
}
const getCurrentPatientsByPDF = async function(fn) {
const text = await pdf2text.pdf2text(fn)
const ss = text.split('\n')
//console.log(ss)
const parseNum = function(s) {
s = util.toHalf(s)
return parseInt(s)
}
/*
'PCR検査陽性者',
'退院者 現在も入院等 死亡者',
'824(+15)',
'171(+7) 625(+4)',
'28(+4)',
'【国内事例】',
'新型コロナウイルス感染症に関する入退院の状況',
'(注)1【国内事例】には、上記のほか空港検疫で確認されたPCR検査陽性者5名がいる。',
'2【クルーズ船事例】にはチャーター便帰国した者(40名)は含めない。',
'3【クルーズ船事例】には藤田岡崎医療センター分を含む。',
'3月16日(月)18時時点',
'PCR検査陽性者',
'退院者 現在も入院等 死亡者',
'672',
'508(+50) 157(-50)',
'7',
'【クルーズ船事例】',
'PCR検査陽性者',
'退院者 現在も入院等 死亡者',
'1496(+15)',
'679(+57) 782(-46)',
'35(+4)',
'【総計】',
'重症→軽~中等症になった者 13',
'重症→軽~中等症になった者 41',
'重症→軽~中等症になった者 28',
''
*/
const res = {}
res.lastUpdate = parseDate(ss[10])
res.npatients = parseNum(ss[2])
res.nexits = parseNum(ss[3])
res.ndeaths = parseNum(ss[4])
res.ncurrentpatients = parseNum(ss[3].substring(ss[3].indexOf(')') + 1))
res.nlighters = parseNum(ss[23].substring(ss[23].indexOf('者') + 1))
if (!res.ncurrentpatients)
return null
return res
}
const getJSONbyPDF = async function(url) {
const fn = PATH + url.substring(url.lastIndexOf('/') + 1)
try {
const data = fs.readFileSync(fn + ".json")
return JSON.parse(data)
} catch (e) {
}
const img = await (await fetch(url)).arrayBuffer()
fs.writeFileSync(fn, new Buffer.from(img), 'binary')
const json = await getCurrentPatientsByPDF(fn)
if (!json)
return null
json.srcurl_pdf = url
json.srcurl_web = URL
fs.writeFileSync(fn + ".json", JSON.stringify(json))
return json
}
const getCovid19DataSummaryForIchigoJam = async function() {
const json = await getCovid19DataJSON()
return util.simplejson2txt(json)
}
const main = async function() {
const data = await getCovid19DataJSON(1000 * 60)
console.log(data)
//console.log(await getCovid19DataSummaryForIchigoJam())
}
if (require.main === module) {
main()
} else {
startUpdate()
}
exports.getCovid19DataJSON = getCovid19DataJSON
exports.getCovid19DataSummaryForIchigoJam = getCovid19DataSummaryForIchigoJam