Skip to content

Commit

Permalink
Merge pull request #73 from kaybenleroll/talk-idsc-2024
Browse files Browse the repository at this point in the history
talk idsc 2024
  • Loading branch information
kaybenleroll authored Jun 10, 2024
2 parents 20a8e58 + db8a609 commit 6938d31
Show file tree
Hide file tree
Showing 29 changed files with 8,528 additions and 0 deletions.
8 changes: 8 additions & 0 deletions talk_idsc_randport_202406/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.Rproj.user/*
.vscode/*
.git/*

geospatial_data/FRA_*

*.Rmd
*.html
8 changes: 8 additions & 0 deletions talk_idsc_randport_202406/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.vscode/*

*_cache/*
*_files/*

temp*

*.log
125 changes: 125 additions & 0 deletions talk_idsc_randport_202406/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
FROM rocker/tidyverse:4.3.3

ENV TZ=Europe/Dublin

RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \
&& echo $TZ > /etc/timezone \
&& apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
byobu \
clang \
ditaa \
graphviz \
htop \
less \
libclang-dev \
libgdal-dev \
libglpk-dev \
libgsl-dev \
libnlopt-dev \
libproj-dev \
libudunits2-dev \
p7zip-full \
pbzip2 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir -p $HOME/.R \
&& echo "" > $HOME/.R/Makevars \
&& echo "CC=clang" >> $HOME/.R/Makevars \
&& echo "CXX=clang++" >> $HOME/.R/Makevars \
&& echo "CXXFLAGS=-Os" >> $HOME/.R/Makevars \
&& echo "CXXFLAGS+= -Wno-unused-variable -Wno-unused-function" >> $HOME/.R/Makevars \
&& echo "CXXFLAGS+= -Wno-unknown-pragmas -Wno-macro-redefined" >> $HOME/.R/Makevars \
&& echo "" >> $HOME/.R/Makevars \
&& echo "CC11=clang" >> $HOME/.R/Makevars \
&& echo "CXX11=clang++" >> $HOME/.R/Makevars \
&& echo "CXX11FLAGS=-Os" >> $HOME/.R/Makevars \
&& echo "CXX11FLAGS+= -Wno-unused-variable -Wno-unused-function" >> $HOME/.R/Makevars \
&& echo "CXX11FLAGS+= -Wno-unknown-pragmas -Wno-macro-redefined" >> $HOME/.R/Makevars \
&& echo "" >> $HOME/.R/Makevars \
&& echo "CC14=clang" >> $HOME/.R/Makevars \
&& echo "CXX14=clang++" >> $HOME/.R/Makevars \
&& echo "CXX14FLAGS=-Os" >> $HOME/.R/Makevars \
&& echo "CXX14FLAGS+= -Wno-unused-variable -Wno-unused-function" >> $HOME/.R/Makevars \
&& echo "CXX14FLAGS+= -Wno-unknown-pragmas -Wno-macro-redefined" >> $HOME/.R/Makevars \
&& echo "" >> $HOME/.R/Makevars \
&& echo "CC17=clang" >> $HOME/.R/Makevars \
&& echo "CXX17=clang++" >> $HOME/.R/Makevars \
&& echo "CXX17FLAGS=-Os" >> $HOME/.R/Makevars \
&& echo "CXX17FLAGS+= -Wno-unused-variable -Wno-unused-function" >> $HOME/.R/Makevars \
&& echo "CXX17FLAGS+= -Wno-unknown-pragmas -Wno-macro-redefined" >> $HOME/.R/Makevars \
&& echo "" >> $HOME/.R/Makevars \
&& install2.r --error \
anytime \
bayesplot \
brms \
conflicted \
cowplot \
DataExplorer \
directlabels \
fs \
ISLR \
kableExtra \
loo \
markdown \
parallelly \
pryr \
quarto \
rsample \
rstan \
sessioninfo \
sf \
snakecase \
tictoc \
tidybayes \
tidygraph \
tidymodels \
tidyquant \
tmap \
xts \
zoo


WORKDIR /tmp

COPY build/docker_install_sys_rpkgs.R /tmp
RUN Rscript /tmp/docker_install_sys_rpkgs.R

RUN git clone https://github.com/lindenb/makefile2graph.git \
&& cd makefile2graph \
&& make \
&& make install

RUN cp -r $HOME/.R /home/rstudio \
&& chown -R rstudio:rstudio /home/rstudio/.R


WORKDIR /home/rstudio
USER rstudio

COPY build/conffiles.7z /tmp
RUN 7z x /tmp/conffiles.7z \
&& cp conffiles/.bash* . \
&& cp conffiles/.gitconfig . \
&& cp conffiles/.Renviron . \
&& cp conffiles/.Rprofile . \
&& mkdir -p .config/rstudio \
&& cp conffiles/rstudio-prefs.json .config/rstudio/ \
&& rm -rfv conffiles/ \
&& touch /home/rstudio/.bash_eternal_history

COPY build/docker_install_user_rpkgs.R /tmp
RUN Rscript /tmp/docker_install_user_rpkgs.R


USER root

RUN chown -R rstudio:rstudio /home/rstudio \
&& chmod ugo+rx /home/rstudio

ARG BUILD_DATE

LABEL org.opencontainers.image.source="https://github.com/kaybenleroll/data_workshops/talk_idsc_randport_202406" \
org.opencontainers.image.authors="Mick Cooney <[email protected]>" \
org.label-schema.build-date=$BUILD_DATE
96 changes: 96 additions & 0 deletions talk_idsc_randport_202406/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
### repo variables
PROJECT_USER=kaybenleroll
PROJECT_NAME=talk_idsc_randport_202406
PROJECT_TAG=latest

IMAGE_TAG=${PROJECT_USER}/${PROJECT_NAME}:${PROJECT_TAG}

DOCKER_USER=rstudio
DOCKER_PASS=CHANGEME
DOCKER_UID=$(shell id -u)
DOCKER_GID=$(shell id -g)
DOCKER_BUILD_ARGS=

RSTUDIO_PORT=8787

PROJECT_FOLDER=randport


### Set GITHUB_USER with 'gh config set gh_user <<user>>'
GITHUB_USER=$(shell gh config get gh_user)

CONTAINER_NAME=idsc-2024-talk

### Project build targets
.SUFFIXES: .qmd .html .dot .png

QMD_FILES := $(wildcard *.qmd)
HTML_FILES := $(patsubst %.qmd,%.html,$(QMD_FILES))

all-html: $(HTML_FILES)

.qmd.html:
echo "TIMESTAMP:" `date` "- Rendering script $<" >> output.log 2>&1
quarto render $< --to revealjs >> output.log 2>&1
echo "TIMESTAMP:" `date` "- Finished $*.html" >> output.log 2>&1


.dot.png:
dot -Tpng -o$*.png $<

full_deps.dot:
makefile2graph all-html > full_deps.dot

depgraph: full_deps.png



clean-html:
rm -fv $(HTML_FILES)


### Docker targets
docker-build-image: Dockerfile
docker build -t ${IMAGE_TAG} \
${DOCKER_BUILD_ARGS} \
--build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \
-f Dockerfile . 2>&1 | tee -a docker_build.log

docker-run:
docker run --rm -d \
-p ${RSTUDIO_PORT}:8787 \
-e USER=${DOCKER_USER} \
-e PASSWORD=${DOCKER_PASS} \
-e USERID=${DOCKER_UID} \
-e GROUPID=${DOCKER_GID} \
-v "${PWD}":"/home/${DOCKER_USER}/${PROJECT_FOLDER}":rw \
--name ${CONTAINER_NAME} \
${IMAGE_TAG}

docker-bash:
docker exec -it -u ${DOCKER_USER} ${CONTAINER_NAME} bash


create-slides-pdf:
docker run --rm -t \
-v `pwd`:/slides \
astefanutti/decktape:3.12.0 \
--size 1920x1080 \
/slides/idsc2024_mcooney_random_portfolios.html \
idsc2024_mcooney_random_portfolios.pdf


docker-stop:
docker stop ${CONTAINER_NAME}

docker-rm:
docker rm ${CONTAINER_NAME}

docker-start:
docker start ${CONTAINER_NAME}

docker-clean: docker-stop-all
docker rm $(shell docker ps -q -a)

docker-pull:
docker pull ${IMAGE_TAG}
27 changes: 27 additions & 0 deletions talk_idsc_randport_202406/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
This folder contains the code and slides for the "Demystifying Data"
masterclass given by CIRDAS (part of SETU) from November

A previous version of the class is available on YouTube at the following link.


[https://www.youtube.com/watch?v=sueqCmOo84Y](Demystifying Data)



The slides for each of the talks are available at the following URLS:

[Talk 01 - Introduction](ws_01_intro.html)

[Talk 02 - The Virtues of Simplicity](ws_02_simplicity.html)

[Talk 03 - Risk, Uncertainty and Knowledge](ws_03_uncertainty.html)

[Talk 04 - The Power and Perils of Statistics](ws_04_statistics.html)

[Talk 05 - ML and Models and Data - Oh My!!!](ws_05_ml.html)

[Talk 06 - Data Visualisation](ws_06_dataviz.html)

[Talk 07 - So You Want to Data?](ws_07_started.html)

[Talk 08 - Recap](ws_08_recap.html)
Binary file added talk_idsc_randport_202406/build/conffiles.7z
Binary file not shown.
8 changes: 8 additions & 0 deletions talk_idsc_randport_202406/build/docker_install_sys_rpkgs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
remotes::install_github(
"dutangc/CASdatasets",
ref = "8c085e5f71e0a1f4099a69c95988b35828edd875",
subdir = "pkg",
upgrade = "never"
)


38 changes: 38 additions & 0 deletions talk_idsc_randport_202406/build/docker_install_user_rpkgs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
remotes::install_github(
"stan-dev/cmdstanr",
ref = "v0.8.0",
upgrade = "never"
)


remotes::install_github(
"rmcelreath/rethinking",
ref = "v2.2.1",
upgrade = "never"
)


library(cmdstanr)

cmdstan_flags <- list(
"CC" = "clang",
"CXX" = "clang++",
"CXXFLAGS" = "-Os -Wno-unused-variable -Wno-unused-function -Wno-unknown-pragmas -Wno-macro-redefined",
"CC11" = "clang",
"CXX11" = "clang++",
"CXX11FLAGS" = "-Os -Wno-unused-variable -Wno-unused-function -Wno-unknown-pragmas -Wno-macro-redefined",
"CC14" = "clang",
"CXX14" = "clang++",
"CXX14FLAGS" = "-Os -Wno-unused-variable -Wno-unused-function -Wno-unknown-pragmas -Wno-macro-redefined",
"CC17" = "clang",
"CXX17" = "clang++",
"CXX17FLAGS" = "-Os -Wno-unused-variable -Wno-unused-function -Wno-unknown-pragmas -Wno-macro-redefined"
)

install_cmdstan(
cores = parallel::detectCores(),
cpp_options = cmdstan_flags,
quiet = FALSE,
overwrite = FALSE,
version = "2.35.0"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Load necessary libraries
library(tidyverse)
library(lubridate)

# Set the seed for reproducibility
set.seed(123)

# Number of claims
num_claims <- 100

# Function to generate timestamps
generate_timestamps <- function(num_movements) {
start_date <- ymd("2020-01-01")
end_date <- ymd("2023-01-01")
sort(sample(seq.Date(start_date, end_date, by = "day"), num_movements, replace = TRUE))
}

# Function to generate payment amounts with a heavy tail
generate_payments <- function(num_movements) {
# Parameters for the lognormal distribution
meanlog <- 10
sdlog <- 1

# Generate lognormal payments
payments <- rlnorm(num_movements, meanlog = meanlog, sdlog = sdlog)

# Apply a Pareto transformation to some of the payments to create a heavy tail
pareto_threshold <- quantile(payments, 0.95)
heavy_tail <- payments > pareto_threshold
payments[heavy_tail] <- payments[heavy_tail] * rpareto(sum(heavy_tail), shape = 2)

return(payments)
}

# Function to generate claims data
generate_claim_data <- function(num_claims) {
claims <- tibble(
Claim_ID = sprintf("CL%05d", 1:num_claims),
Num_Movements = sample(5:15, num_claims, replace = TRUE)
)

claims <- claims %>%
rowwise() %>%
mutate(
Timestamps = list(generate_timestamps(Num_Movements)),
Payments = list(generate_payments(Num_Movements))
) %>%
unnest(cols = c(Timestamps, Payments)) %>%
mutate(
Movement_Type = "Paid",
Incurred = cumsum(Payments),
Outstanding = Incurred - Payments
)

return(claims)
}

# Generate claims data
claims_data <- generate_claim_data(num_claims)

# View the first few rows of the data
print(head(claims_data, 10))
Loading

0 comments on commit 6938d31

Please sign in to comment.