-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmusikarchiv-analyses.R
339 lines (284 loc) · 14.9 KB
/
musikarchiv-analyses.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
library("SPARQL")
library("ggplot2")
library("ggrepel")
library("tidyr")
library("magrittr")
library("forcats")
library("dplyr" )
library("lubridate")
library("jsonlite")
library("stringr")
endpoint <- "http://localhost:9999/blazegraph/namespace/prosit/sparql"
worksPerformedQ <- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX dcterm: <http://purl.org/dc/terms/>
SELECT ?title where {
?s a ny:ProgrammeItem ;
dcterm:title ?title
}
"
worksPerformed <- SPARQL(endpoint, worksPerformedQ)$results %>%
pivot_longer(everything(),names_to="NA", values_to="title")
worksPerformed$title <- factor(worksPerformed$title)
worksPerformed %>%
count(title)
worksPerformed$title <- factor(worksPerformed$title)
timesWorksPerformedQ <- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX dcterm: <http://purl.org/dc/terms/>
SELECT ?title (count(?s) as ?c) where {
?s a ny:ProgrammeItem ;
dcterm:title ?title
} group by ?title
having (?c > 20)
order by desc(?c) ?title
"
timesWorksPerformed <- SPARQL(endpoint, timesWorksPerformedQ)$results
timesWorksPerformed$title <- factor(timesWorksPerformed$title)
ggplot(timesWorksPerformed, aes(reorder(title, -c), c)) + geom_bar(stat = "identity") +
theme_bw() +
scale_y_continuous(breaks=seq(0,150,10)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x="Work", y="Times performed", title="Pieces from Musikverein Archive performed more than 10 times in 'Silvester' and 'Neujahrs' concerts")
workComposerYearQ<- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX dcterm: <http://purl.org/dc/terms/>
select ?composer ?year (count(?composer) as ?c) where {
?s a ny:ProgrammeItem ;
dcterm:creator ?composer ;
dcterm:isPartOf ?performance .
?performance dcterm:date ?date .
BIND(SUBSTR(?date, STRLEN(?date)-4) as ?year) .
} GROUP BY ?composer ?year
"
workComposerYear <- SPARQL(endpoint, workComposerYearQ)$results
workComposerYear$composer<- factor(workComposerYear$composer)
workComposerYear$year <- as.numeric(workComposerYear$year)
ggplot(workComposerYear, aes(composer, year, size=c)) + geom_point(shape=21, fill="white", color="black") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x="Composer", y="Year",
title="Num. pieces performed by composer by year. n.b., conflates Silvester and Neujahr of the same year!",
size="Num. pieces"
)
conductorComposerQ<- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX dcterm: <http://purl.org/dc/terms/>
select ?conductor ?composer (count(?composer) as ?c) where {
?s a ny:ProgrammeItem ;
dcterm:creator ?composer ;
dcterm:isPartOf ?performance .
?performance ny:Dirigent ?conductor .
} GROUP BY ?conductor ?composer
"
conductorComposer <- SPARQL(endpoint, conductorComposerQ)$results
conductorComposer$conductor<- factor(conductorComposer$conductor)
conductorComposer$composer<- factor(conductorComposer$composer)
conductorPiecesQ <- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX dcterm: <http://purl.org/dc/terms/>
select ?conductor (count(?s) as ?numPieces) where {
?s a ny:ProgrammeItem ;
dcterm:isPartOf ?performance .
?performance ny:Dirigent ?conductor .
} GROUP BY ?conductor
"
conductorPieces <- SPARQL(endpoint, conductorPiecesQ)$results
conductorPieces$conductor<- factor(conductorPieces$conductor)
conductorComposer <- conductorComposer %>% inner_join(conductorPieces) %>% mutate(c_normalised = c / numPieces)
ggplot(conductorComposer, aes(composer, conductor, size=c_normalised)) + geom_point(shape=21, fill="white", color="black") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x="Composer", y="Conductor",
title="Works by composer as proportion of pieces conducted.\n n.b., conflates Silvester and Neujahr of the same year!",
size="Proportion"
)
ggplot(conductorComposer, aes(composer, conductor, size=c)) + geom_point(shape=21, fill="white", color="black") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(x="Composer", y="Conductor",
title="Works by composer as number of pieces conducted.\n n.b., conflates Silvester and Neujahr of the same year!",
size="Num. pieces"
)
#====================SPOTIFY DATA================================#
endpoint <- "http://localhost:9999/blazegraph/namespace/spotify/sparql"
track_analyses_features_donauwalzerQ <- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX mo: <http://purl.org/ontology/mo/>
PREFIX dcterm: <http://purl.org/dc/terms/>
SELECT ?album ?albName ?year ?track ?trackNum ?trackName ?analyses ?features where {
?album foaf:name ?albName ;
mo:track ?track .
?track foaf:name ?trackName ;
mo:track_number ?trackNum ;
ny:audio_analysis ?analyses ;
ny:audio_features ?features .
FILTER(REGEX(?trackName, '.*Donau.*')) . # An der schönen blauen ...
FILTER(!(REGEX(?trackName, '.*weibchen.*'))) . # exclude Donauweibchen
FILTER(!(REGEX(?trackName, '.*strande.*'))) . # exclude Vom Donaustrande
BIND(SUBSTR(?albName, STRLEN(?albName)-3) as ?year) . # year number is always last 4 diigts
FILTER(ISNUMERIC(xsd:integer(?year))) . # but some (e.g. compilations) don't have year; exclude them
}
"
track_analyses_features_radetzkyQ<- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX mo: <http://purl.org/ontology/mo/>
PREFIX dcterm: <http://purl.org/dc/terms/>
SELECT ?album ?albName ?year ?track ?trackNum ?trackName ?analyses ?features where {
?album foaf:name ?albName ;
mo:track ?track .
?track foaf:name ?trackName ;
mo:track_number ?trackNum ;
ny:audio_analysis ?analyses ;
ny:audio_features ?features .
FILTER(REGEX(?trackName, '.*Radetzky.*')) .
BIND(SUBSTR(?albName, STRLEN(?albName)-3) as ?year) . # year number is always last 4 diigts
FILTER(ISNUMERIC(xsd:integer(?year))) . # but some (e.g. compilations) don't have year; exclude them
}
"
album_publisherQ <- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX mo: <http://purl.org/ontology/mo/>
PREFIX dcterm: <http://purl.org/dc/terms/>
SELECT ?album ?albName ?year ?label WHERE {
?album foaf:name ?albName ;
mo:publisher ?label .
BIND(SUBSTR(?albName, STRLEN(?albName)-3) as ?year) . # year number is always last 4 diigts
FILTER(ISNUMERIC(xsd:integer(?year))) . # but some (e.g. compilations) don't have year; exclude them
}
"
#_dw below: Donauwalzer
#_rm below: Radetzky Marsch
track_analyses_features_rm <- SPARQL(endpoint, track_analyses_features_radetzkyQ)$results
track_analyses_features_dw <- SPARQL(endpoint, track_analyses_features_donauwalzerQ)$results
track_analyses_features_dw$analyses <- track_analyses_features_dw$analyses %>%
str_replace_all('"', '\"') %>%
str_replace_all("'", '"')
track_analyses_features_dw$features <- track_analyses_features_dw$features %>%
str_replace_all('"', '\"') %>%
str_replace_all("'", '"')
track_analyses_features_rm$analyses <- track_analyses_features_rm$analyses %>%
str_replace_all('"', '\"') %>%
str_replace_all("'", '"')
track_analyses_features_rm$features <- track_analyses_features_rm$features %>%
str_replace_all('"', '\"') %>%
str_replace_all("'", '"')
unpacked_analyses_bars <- function(track) {
return(cbind(track$track, parse_json(track$analyses, simplifyVector = TRUE)$bars))
}
donauwalzers <- tibble()
for(i in 1:nrow(track_analyses_features_dw)) {
donauwalzers <- rbind(donauwalzers,cbind(track_analyses_features_dw[i, c("year")], parse_json(track_analyses_features_dw[i, "analyses"], simplifyVector = TRUE)$bars))
}
names(donauwalzers) <- c("year","Sec", "duration", "confidence")
donauwalzers$year_n <- as.numeric(as.character(donauwalzers$year))
ggplot(donauwalzers%>%filter(confidence > 0.3)) + geom_vline(aes(xintercept=Sec, color=confidence)) +
facet_wrap(~year_n, ncol=1, strip.position = "left") +
theme_bw() +
scale_x_continuous(breaks=seq(0,800,20)) +
labs(ylab="Year", title="An der schönen blauen Donau - spotify bar positions")
radetzkys <- tibble()
for(i in 1:nrow(track_analyses_features_rm)) {
radetzkys <- rbind(radetzkys,cbind(track_analyses_features_rm[i, c("year")], parse_json(track_analyses_features_rm[i, "analyses"], simplifyVector = TRUE)$bars))
}
names(radetzkys) <- c("year","Sec", "duration", "confidence")
radetzkys$year_n <- as.numeric(as.character(radetzkys$year))
ggplot(radetzkys%>%filter(confidence > 0.3)) + geom_vline(aes(xintercept=Sec, color=confidence)) +
facet_wrap(~year_n, ncol=1, strip.position = "left") +
theme_bw() +
scale_x_continuous(breaks=seq(0,800,20)) +
labs(ylab="Year", title="Radetzky Marsch - spotify bar positions")
album_publishers <- SPARQL(endpoint, album_publisherQ)$results
# following info taken from manual inspection of WPhil and Amazon websites
album_publishers <- album_publishers %>%
add_row(year=2015, label="Sony Classical") %>%
add_row(year=2013, label="Sony Classical") %>%
add_row(year=2012, label="Sony Classical") %>%
add_row(year=2011, label="Decca Records") %>%
add_row(year=2010, label="Decca Records") %>%
add_row(year=2009, label="Decca Records") %>%
add_row(year=2008, label="Decca Records") %>%
# 2007 as Sony Classical on WPhil but also exists separately as Deutsche Grammophon (Universal Music) on Amazon!
add_row(year=2006, label="Sony Classical / Deutsche Grammophon (Universal Music)") %>%
add_row(year=2003, label="Sony Classical / Deutsche Grammophon (Universal Music)") %>%
add_row(year=2002, label="Philips (Universal Music)") %>%
add_row(year=1998, label="RCA Red Seal (Sony Music)") %>%
add_row(year=1997, label="EMI Classics") %>%
add_row(year=1993, label="Philips (Universal Music)") %>%
add_row(year=1992, label="Sony Classical") %>%
add_row(year=1991, label="Deutsche Grammophon (Universal Music)") %>%
add_row(year=1988, label="Deutsche Grammophon (Universal Music)") %>%
add_row(year=1987, label="Deutsche Grammophon (Universal Music)") %>%
add_row(year=1983, label="Deutsche Grammophon (Universal Music)") %>%
add_row(year=1980, label="Deutsche Grammophon (Universal Music)") %>%
# 1980 - 1983 compilation available from Universal Japan
add_row(year=1979, label="Decca (Universal Music))") %>%
# 1978 - 1979 compilation avialable on Vinyl from Decca DMR
add_row(year=1972, label="Decca (Universal Music))") %>%
# 1972 Vinyl only
add_row(year=1967, label="Teldec") %>%
# 1969 Vinyl only
add_row(year=1964, label="Teldec") %>%
# 1964 Vinyl only
# 1963 - 1979 compilation available from Deutsche Grammophon
add_row(year=1954, label="Telefunken") %>%
# 1954 - vinyl - as "Klemens Krauss dirigiert sein letztes Neujahrskonzert"
# 1951-1954 (CD) compilation as "Clemens Krauss: The New Year Concerts 1951-54"
add_row(year=1941, label="TON 4 Records")
# 1986 not available on Amazon
# graph which piece was performed in which year, ordered by frequency of overall performances of piece
endpoint <- "http://localhost:9999/blazegraph/namespace/prosit/sparql"
worksByYearQ <- "
PREFIX ny: <http://localhost:9999/vocab/>
PREFIX dcterm: <http://purl.org/dc/terms/>
SELECT ?title ?year ?date ?is_silvester where {
?s a ny:ProgrammeItem ;
dcterm:title ?title ;
dcterm:isPartOf ?performance .
?performance dcterm:date ?date .
BIND(CONTAINS(?date, 'Dezember') as ?is_silvester) .
BIND(SUBSTR(?date, STRLEN(?date)-4) as ?year) .
}
order by ?title"
worksByYear <- SPARQL(endpoint, worksByYearQ)$results %>%
mutate(year = ifelse(is_silvester, as.numeric(year)+1, as.numeric(year))) %>%
select(title, year) %>%
# manually add Donauwalzer and Radetzky for recent years (not listed on Musikverein site)
add_row(title="An der schönen blauen Donau. Walzer, op. 314", year=2013) %>%
add_row(title="An der schönen blauen Donau. Walzer, op. 314", year=2014) %>%
add_row(title="An der schönen blauen Donau. Walzer, op. 314", year=2015) %>%
add_row(title="An der schönen blauen Donau. Walzer, op. 314", year=2016) %>%
add_row(title="An der schönen blauen Donau. Walzer, op. 314", year=2017) %>%
add_row(title="An der schönen blauen Donau. Walzer, op. 314", year=2018) %>%
add_row(title="An der schönen blauen Donau. Walzer, op. 314", year=2019) %>%
add_row(title="An der schönen blauen Donau. Walzer, op. 314", year=2020) %>%
add_row(title="Radetzky-Marsch, op. 228", year=2013) %>%
add_row(title="Radetzky-Marsch, op. 228", year=2014) %>%
add_row(title="Radetzky-Marsch, op. 228", year=2015) %>%
add_row(title="Radetzky-Marsch, op. 228", year=2016) %>%
add_row(title="Radetzky-Marsch, op. 228", year=2017) %>%
add_row(title="Radetzky-Marsch, op. 228", year=2018) %>%
add_row(title="Radetzky-Marsch, op. 228", year=2019) %>%
add_row(title="Radetzky-Marsch, op. 228", year=2020)
worksTimesPerformed <- worksByYear %>%
count(title)
worksByYear_TimesPerformed <- inner_join(worksByYear, worksTimesPerformed) %>%
mutate(label = paste0(title, "\n(n=", n, ")")) %>%
mutate(label = fct_reorder(label, n)) %>% # order by num performance %>%
filter(n >= 40) %>% # at least 40 performances!
distinct() # throw out duplicates (where there is more than 1 concert per year)
ggplot(worksByYear_TimesPerformed, aes(year, label)) +
geom_point(shape="|", size=5, alpha=1) +
theme_bw() + scale_x_continuous(breaks = seq(0, 2020, 10), minor_breaks=seq(0, 2020, 1)) +
xlab("Year") + ylab("Work\n(n = number of times performed in series)")
worksYearsPerformed <- worksByYear %>%
distinct() %>%
count(title)
worksByYear_YearsPerformed <- inner_join(worksByYear, worksYearsPerformed) %>%
mutate(label = paste0(title, "\n(performed in ", n, " years)")) %>%
mutate(label = fct_reorder(label, n)) %>% # order by num performance %>%
filter(n >= 17) # at least 17 years!
ggplot(worksByYear_YearsPerformed, aes(year, label)) +
geom_point(shape="|", size=5, alpha=1) +
theme_bw() + scale_x_continuous(breaks = seq(0, 2020, 10), minor_breaks=seq(0, 2020, 1)) +
xlab("Year") + ylab("Piece") + ggtitle("Most frequently performed pieces", subtitle="Vienna Philharmonic New Years' Concert series")