Skip to content

Commit

Permalink
works mostly TM
Browse files Browse the repository at this point in the history
  • Loading branch information
MaxGaukler committed Aug 8, 2018
0 parents commit f39cb1a
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
output/
8 changes: 8 additions & 0 deletions all-wikis-to-html.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/sh
set -e
echo "usage: GITLAB_SESSION_COOKIE=a1337f0000 $0 user1/repo1 user2/repo2 ..."
mkdir output/ || rm -r output/
for repo in $@; do
mkdir -p output/$repo
./gitlabwiki-to-html.sh https://gitlab.cs.fau.de/ $repo output/$repo $GITLAB_COOKIE
done
43 changes: 43 additions & 0 deletions gitlabwiki-to-html.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/bin/bash
set -e
scriptdir="$(dirname $(readlink -f $0))"
outputdir="$3"
test -n "$outputdir" && test -d "$outputdir" || { echo "usage: $0 https://some-gitlab/ user/project local-output-dir/ [_gitlab_session cookie hex value]"; exit 1; }
cd "$outputdir"
mkdir -p html/

### 1. DOWNLOAD HTML FILES
cd html
wget --rejected-log=rejected-urls.log --header "Cookie: _gitlab_session=$4" -r -k -p "$1$2/wikis/" -I "/$2/uploads,/$2/wikis,/assets" -R '*?version_id=*,?view=create,edit,history,git_access'
for i in */assets/*.css; do cat "$scriptdir"/remove-dynamic-content.css >> $i; done
# TODO: strip trailing / from $2.
# fixup lazy-loaded images (src="data:image-gif..." class="lazy" data-src="/user/project/uploads/.../source.png' for uploads, or data-src="./source.png" for images inside wiki-git)
find -type f \( -name '*.html' -o -name '*.1' -o -not -name '*.*' \) -exec sed 's/src="data:image\/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=="//g;s| data-src="'/$2/'| src="../../|g;s| data-src="| src="|g' -i '{}' ';'

xdg-open */$2/wikis/index.html

### 2. PDF GENERATION
### 2.1 PREPROCESS HTML

cd ..
cp -r html temp-for-pdf
cd temp-for-pdf

# work around a few bugs where gitlab doesn't enforce canonic URLs:
# force ending as .html, force filename as lowercase (except files already ending in .html), "-" is equal to " "
find -type f \( -name '*.1' -o -not -name '*.*' \) -execdir cp '{}' '{}.copy.html' ';' -execdir rename -f 'y/A-Z\-/a-z /' '{}.copy.html' ';'

# deduplication: x.1 == x/index.html
for dir in $(find -mindepth 1 -type d); do
dirname="$(basename $dir)"
dirname="${dirname,,}" # convert to lowercase
test -f $dir/index.html && rm -f $dir/../${dirname}.1.copy.html
done

### 2.2 GENERATE PDF
find -name '*.html' -print -execdir chromium --headless --disable-gpu --print-to-pdf '{}' ';' -execdir mv 'output.pdf' '{}.pdf' ';'
find -name '*.pdf' -exec pdfjoin --outfile all.pdf '{}' +
mv all.pdf ../
cd ..

xdg-open all.pdf
15 changes: 15 additions & 0 deletions remove-dynamic-content.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
.title-container, .header-content, .navbar, .nav-sidebar, .alert-wrapper, .nav-controls, .wiki-sidebar-header {display:none !important}
.page-with-contextual-sidebar {padding-left:0 !important}

/*@media print {
@page {
size: 297mm 210mm;
margin: 25mm;
margin-right: 45mm;
}
}*/

/* images mustn't be wider than the page */
.content img {
max-width: 100%;
}

0 comments on commit f39cb1a

Please sign in to comment.