forked from academicpages/academicpages.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Ability to generate publications from bibtex files.
Initial pass to generate publications from .bib files.
- Loading branch information
1 parent
072b90b
commit 642ab57
Showing
2 changed files
with
383 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Publications markdown generator for academicpages\n", | ||
"\n", | ||
"Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). \n", | ||
"\n", | ||
"The core python code is also in `pubsFromBibs.py`. \n", | ||
"Run either from the `markdown_generator` folder after replacing updating the publist dictionary with:\n", | ||
"* bib file names\n", | ||
"* specific venue keys based on your bib file preferences\n", | ||
"* any specific pre-text for specific files\n", | ||
"* Collection Name (future feature)\n", | ||
"\n", | ||
"TODO: Make this work with other databases of citations, \n", | ||
"TODO: Merge this with the existing TSV parsing solution" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from pybtex.database.input import bibtex\n", | ||
"import pybtex.database.input.bibtex \n", | ||
"from time import strptime\n", | ||
"import string\n", | ||
"import html\n", | ||
"import os\n", | ||
"import re" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#todo: incorporate different collection types rather than a catch all publications, requires other changes to template\n", | ||
"publist = {\n", | ||
" \"proceeding\": {\n", | ||
" \"file\" : \"proceedings.bib\",\n", | ||
" \"venuekey\": \"booktitle\",\n", | ||
" \"venue-pretext\": \"In the proceedings of \",\n", | ||
" \"collection\" : {\"name\":\"publications\",\n", | ||
" \"permalink\":\"/publication/\"}\n", | ||
" \n", | ||
" },\n", | ||
" \"journal\":{\n", | ||
" \"file\": \"pubs.bib\",\n", | ||
" \"venuekey\" : \"journal\",\n", | ||
" \"venue-pretext\" : \"\",\n", | ||
" \"collection\" : {\"name\":\"publications\",\n", | ||
" \"permalink\":\"/publication/\"}\n", | ||
" } \n", | ||
"}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"html_escape_table = {\n", | ||
" \"&\": \"&\",\n", | ||
" '\"': \""\",\n", | ||
" \"'\": \"'\"\n", | ||
" }\n", | ||
"\n", | ||
"def html_escape(text):\n", | ||
" \"\"\"Produce entities within text.\"\"\"\n", | ||
" return \"\".join(html_escape_table.get(c,c) for c in text)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"scrolled": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"for pubsource in publist:\n", | ||
" parser = bibtex.Parser()\n", | ||
" bibdata = parser.parse_file(publist[pubsource][\"file\"])\n", | ||
"\n", | ||
" #loop through the individual references in a given bibtex file\n", | ||
" for bib_id in bibdata.entries:\n", | ||
" #reset default date\n", | ||
" pub_year = \"1900\"\n", | ||
" pub_month = \"01\"\n", | ||
" pub_day = \"01\"\n", | ||
" \n", | ||
" b = bibdata.entries[bib_id].fields\n", | ||
" \n", | ||
" try:\n", | ||
" pub_year = f'{b[\"year\"]}'\n", | ||
"\n", | ||
" #todo: this hack for month and day needs some cleanup\n", | ||
" if \"month\" in b.keys(): \n", | ||
" if(len(b[\"month\"])<3):\n", | ||
" pub_month = \"0\"+b[\"month\"]\n", | ||
" pub_month = pub_month[-2:]\n", | ||
" elif(b[\"month\"] not in range(12)):\n", | ||
" tmnth = strptime(b[\"month\"][:3],'%b').tm_mon \n", | ||
" pub_month = \"{:02d}\".format(tmnth) \n", | ||
" else:\n", | ||
" pub_month = str(b[\"month\"])\n", | ||
" if \"day\" in b.keys(): \n", | ||
" pub_day = str(b[\"day\"])\n", | ||
"\n", | ||
" \n", | ||
" pub_date = pub_year+\"-\"+pub_month+\"-\"+pub_day\n", | ||
" \n", | ||
" #strip out {} as needed (some bibtex entries that maintain formatting)\n", | ||
" clean_title = b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\").replace(\" \",\"-\") \n", | ||
"\n", | ||
" url_slug = re.sub(\"\\\\[.*\\\\]|[^a-zA-Z0-9_-]\", \"\", clean_title)\n", | ||
" url_slug = url_slug.replace(\"--\",\"-\")\n", | ||
"\n", | ||
" md_filename = (str(pub_date) + \"-\" + url_slug + \".md\").replace(\"--\",\"-\")\n", | ||
" html_filename = (str(pub_date) + \"-\" + url_slug).replace(\"--\",\"-\")\n", | ||
"\n", | ||
" #Build Citation from text\n", | ||
" citation = \"\"\n", | ||
"\n", | ||
" #citation authors - todo - add highlighting for primary author?\n", | ||
" for author in bibdata.entries[bib_id].persons[\"author\"]:\n", | ||
" citation = citation+\" \"+author.first_names[0]+\" \"+author.last_names[0]+\", \"\n", | ||
"\n", | ||
" #citation title\n", | ||
" citation = citation + \"\\\"\" + html_escape(b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")) + \".\\\"\"\n", | ||
"\n", | ||
" #add venue logic depending on citation type\n", | ||
" venue = publist[pubsource][\"venue-pretext\"]+b[publist[pubsource][\"venuekey\"]].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")\n", | ||
"\n", | ||
" citation = citation + \" \" + html_escape(venue)\n", | ||
" citation = citation + \", \" + pub_year + \".\"\n", | ||
"\n", | ||
" \n", | ||
" ## YAML variables\n", | ||
" md = \"---\\ntitle: \\\"\" + html_escape(b[\"title\"].replace(\"{\", \"\").replace(\"}\",\"\").replace(\"\\\\\",\"\")) + '\"\\n'\n", | ||
" \n", | ||
" md += \"\"\"collection: \"\"\" + publist[pubsource][\"collection\"][\"name\"]\n", | ||
"\n", | ||
" md += \"\"\"\\npermalink: \"\"\" + publist[pubsource][\"collection\"][\"permalink\"] + html_filename\n", | ||
" \n", | ||
" note = False\n", | ||
" if \"note\" in b.keys():\n", | ||
" if len(str(b[\"note\"])) > 5:\n", | ||
" md += \"\\nexcerpt: '\" + html_escape(b[\"note\"]) + \"'\"\n", | ||
" note = True\n", | ||
"\n", | ||
" md += \"\\ndate: \" + str(pub_date) \n", | ||
"\n", | ||
" md += \"\\nvenue: '\" + html_escape(venue) + \"'\"\n", | ||
" \n", | ||
" url = False\n", | ||
" if \"url\" in b.keys():\n", | ||
" if len(str(b[\"url\"])) > 5:\n", | ||
" md += \"\\npaperurl: '\" + b[\"url\"] + \"'\"\n", | ||
" url = True\n", | ||
"\n", | ||
" md += \"\\ncitation: '\" + html_escape(citation) + \"'\"\n", | ||
"\n", | ||
" md += \"\\n---\"\n", | ||
"\n", | ||
" \n", | ||
" ## Markdown description for individual page\n", | ||
" if note:\n", | ||
" md += \"\\n\" + html_escape(b[\"note\"]) + \"\\n\"\n", | ||
"\n", | ||
" if url:\n", | ||
" md += \"\\n[Access paper here](\" + b[\"url\"] + \"){:target=\\\"_blank\\\"}\\n\" \n", | ||
" else:\n", | ||
" md += \"\\nUse [Google Scholar](https://scholar.google.com/scholar?q=\"+html.escape(clean_title.replace(\"-\",\"+\"))+\"){:target=\\\"_blank\\\"} for full citation\"\n", | ||
"\n", | ||
" md_filename = os.path.basename(md_filename)\n", | ||
"\n", | ||
" with open(\"../_publications/\" + md_filename, 'w') as f:\n", | ||
" f.write(md)\n", | ||
" print(f'SUCESSFULLY PARSED {bib_id}: \\\"', b[\"title\"][:60],\"...\"*(len(b['title'])>60),\"\\\"\")\n", | ||
" # field may not exist for a reference\n", | ||
" except KeyError as e:\n", | ||
" print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \\\"', b[\"title\"][:30],\"...\"*(len(b['title'])>30),\"\\\"\")\n", | ||
" continue\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.7.1" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# # Publications markdown generator for academicpages | ||
# | ||
# Takes a set of bibtex of publications and converts them for use with [academicpages.github.io](academicpages.github.io). This is an interactive Jupyter notebook ([see more info here](http://jupyter-notebook-beginner-guide.readthedocs.io/en/latest/what_is_jupyter.html)). | ||
# | ||
# The core python code is also in `pubsFromBibs.py`. | ||
# Run either from the `markdown_generator` folder after replacing updating the publist dictionary with: | ||
# * bib file names | ||
# * specific venue keys based on your bib file preferences | ||
# * any specific pre-text for specific files | ||
# * Collection Name (future feature) | ||
# | ||
# TODO: Make this work with other databases of citations, | ||
# TODO: Merge this with the existing TSV parsing solution | ||
|
||
|
||
from pybtex.database.input import bibtex | ||
import pybtex.database.input.bibtex | ||
from time import strptime | ||
import string | ||
import html | ||
import os | ||
import re | ||
|
||
#todo: incorporate different collection types rather than a catch all publications, requires other changes to template | ||
publist = { | ||
"proceeding": { | ||
"file" : "proceedings.bib", | ||
"venuekey": "booktitle", | ||
"venue-pretext": "In the proceedings of ", | ||
"collection" : {"name":"publications", | ||
"permalink":"/publication/"} | ||
|
||
}, | ||
"journal":{ | ||
"file": "pubs.bib", | ||
"venuekey" : "journal", | ||
"venue-pretext" : "", | ||
"collection" : {"name":"publications", | ||
"permalink":"/publication/"} | ||
} | ||
} | ||
|
||
html_escape_table = { | ||
"&": "&", | ||
'"': """, | ||
"'": "'" | ||
} | ||
|
||
def html_escape(text): | ||
"""Produce entities within text.""" | ||
return "".join(html_escape_table.get(c,c) for c in text) | ||
|
||
|
||
for pubsource in publist: | ||
parser = bibtex.Parser() | ||
bibdata = parser.parse_file(publist[pubsource]["file"]) | ||
|
||
#loop through the individual references in a given bibtex file | ||
for bib_id in bibdata.entries: | ||
#reset default date | ||
pub_year = "1900" | ||
pub_month = "01" | ||
pub_day = "01" | ||
|
||
b = bibdata.entries[bib_id].fields | ||
|
||
try: | ||
pub_year = f'{b["year"]}' | ||
|
||
#todo: this hack for month and day needs some cleanup | ||
if "month" in b.keys(): | ||
if(len(b["month"])<3): | ||
pub_month = "0"+b["month"] | ||
pub_month = pub_month[-2:] | ||
elif(b["month"] not in range(12)): | ||
tmnth = strptime(b["month"][:3],'%b').tm_mon | ||
pub_month = "{:02d}".format(tmnth) | ||
else: | ||
pub_month = str(b["month"]) | ||
if "day" in b.keys(): | ||
pub_day = str(b["day"]) | ||
|
||
|
||
pub_date = pub_year+"-"+pub_month+"-"+pub_day | ||
|
||
#strip out {} as needed (some bibtex entries that maintain formatting) | ||
clean_title = b["title"].replace("{", "").replace("}","").replace("\\","").replace(" ","-") | ||
|
||
url_slug = re.sub("\\[.*\\]|[^a-zA-Z0-9_-]", "", clean_title) | ||
url_slug = url_slug.replace("--","-") | ||
|
||
md_filename = (str(pub_date) + "-" + url_slug + ".md").replace("--","-") | ||
html_filename = (str(pub_date) + "-" + url_slug).replace("--","-") | ||
|
||
#Build Citation from text | ||
citation = "" | ||
|
||
#citation authors - todo - add highlighting for primary author? | ||
for author in bibdata.entries[bib_id].persons["author"]: | ||
citation = citation+" "+author.first_names[0]+" "+author.last_names[0]+", " | ||
|
||
#citation title | ||
citation = citation + "\"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + ".\"" | ||
|
||
#add venue logic depending on citation type | ||
venue = publist[pubsource]["venue-pretext"]+b[publist[pubsource]["venuekey"]].replace("{", "").replace("}","").replace("\\","") | ||
|
||
citation = citation + " " + html_escape(venue) | ||
citation = citation + ", " + pub_year + "." | ||
|
||
|
||
## YAML variables | ||
md = "---\ntitle: \"" + html_escape(b["title"].replace("{", "").replace("}","").replace("\\","")) + '"\n' | ||
|
||
md += """collection: """ + publist[pubsource]["collection"]["name"] | ||
|
||
md += """\npermalink: """ + publist[pubsource]["collection"]["permalink"] + html_filename | ||
|
||
note = False | ||
if "note" in b.keys(): | ||
if len(str(b["note"])) > 5: | ||
md += "\nexcerpt: '" + html_escape(b["note"]) + "'" | ||
note = True | ||
|
||
md += "\ndate: " + str(pub_date) | ||
|
||
md += "\nvenue: '" + html_escape(venue) + "'" | ||
|
||
url = False | ||
if "url" in b.keys(): | ||
if len(str(b["url"])) > 5: | ||
md += "\npaperurl: '" + b["url"] + "'" | ||
url = True | ||
|
||
md += "\ncitation: '" + html_escape(citation) + "'" | ||
|
||
md += "\n---" | ||
|
||
|
||
## Markdown description for individual page | ||
if note: | ||
md += "\n" + html_escape(b["note"]) + "\n" | ||
|
||
if url: | ||
md += "\n[Access paper here](" + b["url"] + "){:target=\"_blank\"}\n" | ||
else: | ||
md += "\nUse [Google Scholar](https://scholar.google.com/scholar?q="+html.escape(clean_title.replace("-","+"))+"){:target=\"_blank\"} for full citation" | ||
|
||
md_filename = os.path.basename(md_filename) | ||
|
||
with open("../_publications/" + md_filename, 'w') as f: | ||
f.write(md) | ||
print(f'SUCESSFULLY PARSED {bib_id}: \"', b["title"][:60],"..."*(len(b['title'])>60),"\"") | ||
# field may not exist for a reference | ||
except KeyError as e: | ||
print(f'WARNING Missing Expected Field {e} from entry {bib_id}: \"', b["title"][:30],"..."*(len(b['title'])>30),"\"") | ||
continue |