-
Notifications
You must be signed in to change notification settings - Fork 0
/
legacy_software_carpentry_scraper.rb
82 lines (69 loc) · 2.61 KB
/
legacy_software_carpentry_scraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env ruby
require 'tess_uploader'
$lessons = {}
$debug = false
$git_dir = "#{ENV['HOME']}/Work/Web/bc/"
$owner_org = 'software-carpentry'
$git_url = "https://github.com/swcarpentry/bc/tree/gh-pages/"
$skill_levels = {'novice' => %w{extras git hg matlab python r ref shell sql teaching},
'intermediate' => %w{doit git make python r regex shell sql webdata}}
$git_repo_remote = "https://github.com/swcarpentry/bc.git"
$git_repo = Dir.pwd + '/bc/'
#Not sure if there'll be updates to this repo as its frozen but it can't hurt.
if File.exists?($git_repo)
%x{cd bc && git pull #{$git_repo} & cd ..}
else
%x{git clone #{$git_repo_remote} }
end
def parse_data
$skill_levels.each_pair do |k,v|
v.each do |value|
#puts "Got #{k} lesson category entitled #{value}."
files = Dir["#{$git_repo}#{k}/#{value}/*.md"]
puts files
files.each do |file|
basename = File.basename(file)
next if basename == "README.md"
next if basename == "index.md"
File.foreach(file).with_index do |line,i|
break if i >= 5
if line =~ /title:/
# We have a lesson, and need to save the URL, title, and tags.
title = line.chomp.gsub(/title: /,'')
url = "#{$git_url}#{k}/#{value}/#{basename}"
tags = [{'name' => k.capitalize}, {'name' => value.capitalize}]
$lessons[url] = {}
$lessons[url]['tags'] = tags
$lessons[url]['title'] = title
break
end
end
end
end
end
end
parse_data
# Create the organisation.
org_title = 'Software Carpentry'
org_name = $owner_org
org_desc = 'The Software Carpentry Foundation is a non-profit organization whose members teach researchers basic software skills.'
org_image_url = 'http://software-carpentry.org/img/software-carpentry-banner.png'
homepage = 'http://software-carpentry.org/'
node_id = ''
organisation = Organisation.new(org_title,org_name,org_desc,org_image_url,homepage,node_id)
Uploader.check_create_organisation(organisation)
# Upload all the data.
$lessons.each_key do |key|
course = Tuition::Tutorial.new
course.url = key
course.owner_org = $owner_org
course.title = $lessons[key]['title']
course.notes = "#{$lessons[key]['title']} from #{key}, added automatically."
course.set_name($owner_org,$lessons[key]['title'])
course.tags = $lessons[key]['tags']
course.format = 'html'
# Before attempting to create anything we need to check if the resource/dataset already exists, updating it
# as and where necessary.
#puts "COURSE: #{course.to_json}"
Uploader.create_or_update(course)
end