Skip to content

Commit

Permalink
Update to new dataset version
Browse files Browse the repository at this point in the history
  • Loading branch information
sbaltes committed Nov 9, 2020
1 parent a3f12b1 commit e65953e
Show file tree
Hide file tree
Showing 35 changed files with 513 additions and 1,260 deletions.
23 changes: 0 additions & 23 deletions sotorrent/export/1_export-to-csv.sh

This file was deleted.

44 changes: 44 additions & 0 deletions sotorrent/export/1_export.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

sotorrent_password="4ar7JKS2mfgGHiDA"
log_file="sotorrent.log"
sotorrent_db="sotorrent20_06"

# absolute path to XML and CSV files (consider MySQL's secure-file-priv option)
# escape slashes in path because the string is used in a sed command
data_path="E:\/Temp\/" # Cygwin
#data_path="\/tmp\/" # Linux

rm -f $log_file

if [ "$1" = "so-dump" ]; then
echo "Exporting $1 tables..." | tee -a "$log_file"
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db Users -r $data_path/so-dump/Users.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db Badges -r $data_path/so-dump/Badges.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostLinks -$data_path/so-dump/PostLinks.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db Tags -r $data_path/so-dump/Tags.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db Votes -r $data_path/so-dump/Votes.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db Comments -r $data_path/so-dump/Comments.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db Posts -r $data_path/so-dump/Posts.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostHistory -r $data_path/so-dump/PostHistory.sql
elif [ "$1" = "sotorrent" ]; then
echo "Exporting $1 tables..." | tee -a "$log_file"
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostBlockDiff -r $data_path/sotorrent/PostBlockDiff.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostVersion -r $data_path/sotorrent/PostVersion.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostBlockVersion -r $data_path/sotorrent/PostBlockVersion.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostVersionUrl -r $data_path/sotorrent/PostVersionUrl.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db CommentUrl -r $data_path/sotorrent/CommentUrl.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db TitleVersion -r $data_path/sotorrent/TitleVersion.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db StackSnippetVersion -r $data_path/sotorrent/StackSnippetVersion.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostViews -r $data_path/sotorrent/PostViews.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostTags -r $data_path/sotorrent/PostTags.sql
elif [ "$1" = "gh-references" ]; then
echo "Exporting $1 tables..." | tee -a "$log_file"
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db PostReferenceGH -r $data_path/gh-references/PostReferenceGH.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db GHMatches -r $data_path/gh-references/GHMatches.sql
mysqldump -usotorrent -p$sotorrent_password --default-character-set=utf8mb4 $sotorrent_db GHCommits -r $data_path/gh-references/GHCommits.sql
else
echo 'The first argument must be either "so-dump" or "sotorrent".' | tee -a "$log_file"
fi

echo "Finished." | tee -a "$log_file"
22 changes: 0 additions & 22 deletions sotorrent/export/2_compress-csv.sh

This file was deleted.

20 changes: 20 additions & 0 deletions sotorrent/export/2_compress.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

if [ "$1" = "so-dump" ]; then
echo "Compressing $1 SQL files..."
for file in ./so-dump/*.sql; do
7z a "$file.7z" "$file" -o./so-dump/ && rm "$file";
done
elif [ "$1" = "sotorrent" ]; then
echo "Compressing $1 SQL files..."
for file in ./sotorrent/*.sql; do
7z a "$file.7z" "$file" -o./sotorrent/; rm "$file";
done
elif [ "$1" = "gh-references" ]; then
echo "Compressing $1 SQL files..."
for file in ./gh-references/*.sql; do
7z a "$file.7z" "$file" -o./gh-references/; rm "$file";
done
else
echo 'The first argument must be either "so-dump" or "sotorrent".'
fi
156 changes: 0 additions & 156 deletions sotorrent/export/sql/export_so-dump.sql

This file was deleted.

93 changes: 0 additions & 93 deletions sotorrent/export/sql/export_sotorrent.sql

This file was deleted.

Loading

0 comments on commit e65953e

Please sign in to comment.