Skip to content

Commit

Permalink
Code logic fix; bug on cmcontinue; timeout retry
Browse files Browse the repository at this point in the history
  • Loading branch information
brynne8 authored Aug 16, 2020
1 parent 8eb81e7 commit 5893dac
Showing 1 changed file with 27 additions and 7 deletions.
34 changes: 27 additions & 7 deletions science_data.lua
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ local limit = require("copas.limit")
local MediaWikiApi = require('mwtest/mwapi')
local Utils = require('mwtest/utils')
local json = require('cjson')
local ltn12 = require('ltn12')

chttp.TIMEOUT = 20

local science_data = {
last_date = 0,
Expand Down Expand Up @@ -80,6 +83,8 @@ function getSummary(titles)
v.extract = stripHtmlTags(v.extract)
end
return pages
else
return getSummary(titles)
end
end

Expand All @@ -94,22 +99,24 @@ local sci_cats = {

function getScienceArt()
print('Start fetching science articles')
local getCatMembers = function (cat, cmcontinue)
local res, code = chttpsget('https://zh.wikipedia.org/w/api.php?action=query&format=json&list=categorymembers' ..
'&cmlimit=500&cmtitle=Category:' .. cat .. (cmcontinue and ('&cmcontinue=' .. cmcontinue) or ''))
local function getCatMembers(cat, cmcontinue)
local uri = 'https://zh.wikipedia.org/w/api.php?action=query&format=json&list=categorymembers' ..
'&cmlimit=max&cmtitle=Category:' .. Utils.urlEncode(cat) .. (cmcontinue and ('&cmcontinue=' .. cmcontinue) or '')
local res, code = chttpsget(uri)
if code ~= 200 then
MediaWikiApi.trace('Failed to get science art')
return
return getCatMembers(cat, cmcontinue)
end

local raw_catmem = json.decode(res).query.categorymembers
res = json.decode(res)
local raw_catmem = res.query.categorymembers
for _, v in ipairs(raw_catmem) do
local art_name = v.title:match('Talk:(.-)$')
if art_name then science_dict[art_name:gsub(' ', '_')] = true end
end

if res.continue then
getCatMembers(cat, cmcontinue)
return getCatMembers(cat, res.continue.cmcontinue)
end
end

Expand Down Expand Up @@ -142,6 +149,19 @@ for art_name in pairs(science_dict) do
end
end

if titles ~= '' then
local temp_titles = titles:sub(2)
taskset:addthread(function()
local pages = getSummary(temp_titles)
for _, v in ipairs(pages) do
science_dict[v.title:gsub(' ', '_')] = {
disp_name = v.varianttitles['zh-cn'],
extract = v.extract == '' and '无摘要' or v.extract
}
end
end)
end

copas.loop()

id = 0
Expand All @@ -160,4 +180,4 @@ science_data.list = science_data.new_list
science_data.new_list = {}
local f = io.open("mwtest/sci.txt", "wb")
f:write(json.encode(science_data))
f:close()
f:close()

0 comments on commit 5893dac

Please sign in to comment.