Skip to content

Commit

Permalink
Merge pull request #5 from alan-turing-institute/main
Browse files Browse the repository at this point in the history
Merging development
  • Loading branch information
J-A-Ha authored Aug 11, 2024
2 parents 2c90b48 + 76f8f88 commit c997c62
Show file tree
Hide file tree
Showing 54 changed files with 159 additions and 146 deletions.
Binary file modified .DS_Store
Binary file not shown.
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
---
title: 'Academic Review Tool (ART)'
---

Academic Review Tool (ART)
===

Expand Down Expand Up @@ -41,6 +37,13 @@ The tool is object-oriented. It leverages Pandas, Numpy, iGraph, and other stand

ART can read and write your results to a large variety of file types (e.g. .xlsx, .csv, .txt, .json, .graphML).

## Installation

To install using PyPi, run the following code in your command interface:
```bash
pip install academic-review-tool
```

## Beginners Guide


Expand Down
Binary file modified art/.DS_Store
Binary file not shown.
4 changes: 2 additions & 2 deletions art/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@

from .importers.crossref import lookup_doi, lookup_dois, lookup_journal, lookup_journals, search_journals, get_journal_entries, search_journal_entries, lookup_funder, lookup_funders, search_funders, get_funder_works, search_funder_works
from .importers.crossref import search_works as search_crossref
from .importers.wos import search as search_wos
# from .importers.wos import search as search_wos
from .importers.scopus import search as search_scopus, lookup as lookup_scopus
from .importers.orcid import lookup_orcid, search as search_orcid
from .importers.search import search as api_search
from .importers import pdf, orcid, crossref, scopus, jstor, wos
# from .importers import pdf, orcid, crossref, scopus, jstor, wos
from .classes import Results, References, Author, Authors, Funder, Funders, Affiliation, Affiliations, Review
from .classes.networks import Network, Networks
from .classes.citation_crawler import academic_scraper as scrape
Expand Down
Binary file removed art/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/activitylog.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/affiliations.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/attrs.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/authors.cpython-39.pyc
Binary file not shown.
Binary file not shown.
Binary file removed art/classes/__pycache__/entities.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/funders.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/networks.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/properties.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/references.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/results.cpython-39.pyc
Binary file not shown.
Binary file removed art/classes/__pycache__/review.cpython-39.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion art/classes/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def generate_work_id(work_data: pd.Series):

else:
if '.Authors' in auths_type_str:
work_data['authors'] = work_data['authors'].all['full_name'].sort_values().to_list()
work_data['authors'] = work_data['authors'].summary['full_name'].sort_values().to_list()


work_data = work_data.astype(str).str.lower()
Expand Down
174 changes: 87 additions & 87 deletions art/classes/review.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ..importers.crossref import search_works, lookup_doi, lookup_dois, lookup_journal, lookup_journals, search_journals, get_journal_entries, search_journal_entries, lookup_funder, lookup_funders, search_funders, get_funder_works, search_funder_works
from ..importers.crossref import query_builder as crossref_query_builder
from ..importers.scopus import query_builder as scopus_query_builder, search as search_scopus, lookup as lookup_scopus
from ..importers.wos import search as search_wos, query_builder as wos_query_builder
# from ..importers.wos import search as search_wos, query_builder as wos_query_builder
from ..importers.search import search as api_search

from ..internet.scrapers import scrape_article, scrape_doi, scrape_google_scholar, scrape_google_scholar_search
Expand Down Expand Up @@ -1778,92 +1778,92 @@ def search_scopus(self,

return df

def search_wos(self,
all_fields = None,
title = None,
year = None,
author = None,
author_identifier = None,
affiliation = None,
doctype = None,
doi = None,
issn = None,
isbn = None,
pubmed_id = None,
source_title = None,
volume = None,
page = None,
issue = None,
topics = None,
default_operator = 'AND',
database: str = 'WOK',
limit: int = 10,
page_limit: int = 1,
sort_field: str = 'RS+D',
modified_time_span = None,
tc_modified_time_span = None,
detail = None,
add_to_results = False,
drop_duplicates = False,
drop_empty_rows = False
):

df = search_wos(
all_fields = all_fields,
title = title,
year = year,
author = author,
author_identifier = author_identifier,
affiliation = affiliation,
doctype = doctype,
doi = doi,
issn = issn,
isbn = isbn,
pubmed_id = pubmed_id,
source_title = source_title,
volume = volume,
page = page,
issue = issue,
topics = topics,
default_operator = default_operator,
database = database,
limit = limit,
page_limit = page_limit,
sort_field = sort_field,
modified_time_span = modified_time_span,
tc_modified_time_span = tc_modified_time_span,
detail = detail
)

for c in df.columns:
if c not in self.results.columns:
df = df.drop(c, axis=1)

if add_to_results == True:
# def search_wos(self,
# all_fields = None,
# title = None,
# year = None,
# author = None,
# author_identifier = None,
# affiliation = None,
# doctype = None,
# doi = None,
# issn = None,
# isbn = None,
# pubmed_id = None,
# source_title = None,
# volume = None,
# page = None,
# issue = None,
# topics = None,
# default_operator = 'AND',
# database: str = 'WOK',
# limit: int = 10,
# page_limit: int = 1,
# sort_field: str = 'RS+D',
# modified_time_span = None,
# tc_modified_time_span = None,
# detail = None,
# add_to_results = False,
# drop_duplicates = False,
# drop_empty_rows = False
# ):

# df = search_wos(
# all_fields = all_fields,
# title = title,
# year = year,
# author = author,
# author_identifier = author_identifier,
# affiliation = affiliation,
# doctype = doctype,
# doi = doi,
# issn = issn,
# isbn = isbn,
# pubmed_id = pubmed_id,
# source_title = source_title,
# volume = volume,
# page = page,
# issue = issue,
# topics = topics,
# default_operator = default_operator,
# database = database,
# limit = limit,
# page_limit = page_limit,
# sort_field = sort_field,
# modified_time_span = modified_time_span,
# tc_modified_time_span = tc_modified_time_span,
# detail = detail
# )

# for c in df.columns:
# if c not in self.results.columns:
# df = df.drop(c, axis=1)

# if add_to_results == True:

query = wos_query_builder(all_fields = all_fields,
title = title,
year = year,
author = author,
author_identifier = author_identifier,
affiliation = affiliation,
doctype = doctype,
doi = doi,
issn = issn,
isbn = isbn,
pubmed_id = pubmed_id,
source_title = source_title,
volume = volume,
page = page,
issue = issue,
topics = topics,
default_operator = default_operator)

self.activity_log.add_activity(type='API search', activity='searched World of Science and added to results', location=['results'], database=database, query=query)
self.results.add_dataframe(dataframe=df, drop_duplicates=drop_duplicates, drop_empty_rows=drop_empty_rows) # type: ignore


return df
# query = wos_query_builder(all_fields = all_fields,
# title = title,
# year = year,
# author = author,
# author_identifier = author_identifier,
# affiliation = affiliation,
# doctype = doctype,
# doi = doi,
# issn = issn,
# isbn = isbn,
# pubmed_id = pubmed_id,
# source_title = source_title,
# volume = volume,
# page = page,
# issue = issue,
# topics = topics,
# default_operator = default_operator)

# self.activity_log.add_activity(type='API search', activity='searched World of Science and added to results', location=['results'], database=database, query=query)
# self.results.add_dataframe(dataframe=df, drop_duplicates=drop_duplicates, drop_empty_rows=drop_empty_rows) # type: ignore


# return df

def lookup_doi(self, doi = 'request_input', timeout = 60):
return lookup_doi(doi=doi, timeout=timeout)
Expand Down Expand Up @@ -2221,7 +2221,7 @@ def api_search(self,
timeout = 60,
crossref = True,
scopus = True,
wos = True,
wos = False,
add_to_results = False):

df = api_search(default_query = default_query,
Expand Down
Binary file modified art/datasets/.DS_Store
Binary file not shown.
24 changes: 12 additions & 12 deletions art/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,60 +8,60 @@
import json
from pathlib import Path

here = Path(__file__).parent
here = str(Path(__file__).parent)

with open(f'{here}/names/all_personal_names.txt', 'r') as file:
with open(f'{here}/names/all_personal_names.txt', 'r', encoding='utf-8') as file:
all_personal_names = file.read()
file.close()
all_personal_names = all_personal_names.replace("'", "").split(', ')

with open(f'{here}/names/first_names.txt', 'r') as file:
with open(f'{here}/names/first_names.txt', 'r', encoding='utf-8') as file:
first_names = file.read()
file.close()
first_names = first_names.replace("'", "").split(', ')

with open(f'{here}/names/last_names.txt', 'r') as file:
with open(f'{here}/names/last_names.txt', 'r', encoding='utf-8') as file:
last_names = file.read()
file.close()
last_names = last_names.replace("'", "").split(', ')

with open(f'{here}/names/nltk_names.txt', 'r') as file:
with open(f'{here}/names/nltk_names.txt', 'r', encoding='ascii') as file:
nltk_names = file.read()
file.close()
nltk_names = nltk_names.replace("'", "").split(', ')

# Corpus extracted from country_list module. Stored locally for efficiency.

with open(f'{here}/countries/countries_all.txt', 'r') as file:
with open(f'{here}/countries/countries_all.txt', 'r', encoding='utf-8') as file:
countries_all = file.read()
file.close()
countries_all = countries_all.replace("'", "").split(', ')

with open(f'{here}/countries/country_names.json', 'r') as file:
with open(f'{here}/countries/country_names.json', 'r', encoding='utf-8') as file:
country_names = json.load(file)
file.close()

# Corpus extracted from geonamescache module. Stored locally for efficiency.
with open(f'{here}/cities/cities_all.txt', 'r') as file:
with open(f'{here}/cities/cities_all.txt', 'r', encoding='utf-8') as file:
cities_all = file.read()
file.close()
cities_all = cities_all.replace("'", "").split(', ')

with open(f'{here}/cities/cities_en.json', 'r') as file:
with open(f'{here}/cities/cities_en.json', 'r', encoding='ascii') as file:
cities_en = json.load(file)
file.close()

# Corpus extracted from language_data and langcodes modules. Stored locally for efficiency.

with open(f'{here}/languages/language_names.json', 'r') as file:
with open(f'{here}/languages/language_names.json', 'r', encoding='ascii') as file:
language_names = json.load(file)
file.close()

with open(f'{here}/languages/languages_en.json', 'r') as file:
with open(f'{here}/languages/languages_en.json', 'r', encoding='ascii') as file:
languages_en = json.load(file)
file.close()

with open(f'{here}/languages/language_codes.txt', 'r') as file:
with open(f'{here}/languages/language_codes.txt', 'r', encoding='ascii') as file:
language_codes = file.read()
file.close()

Expand Down
Binary file removed art/datasets/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file not shown.
13 changes: 8 additions & 5 deletions art/datasets/stopwords/stopwords.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,28 @@
from nltk import download
from pathlib import Path
from nltk import download # type: ignore
import pandas as pd

# Importing Stopwords corpus as an NLTK text
try:
from nltk.corpus import stopwords as nltk_stopwords
from nltk.corpus import stopwords as nltk_stopwords # type: ignore
nltk_stopwords.words()
except:
download('stopwords')
from nltk.corpus import stopwords as nltk_stopwords
from nltk.corpus import stopwords as nltk_stopwords # type: ignore

nltk_stopwords = list(nltk_stopwords.words())

with open('/Users/jhancock/Documents/Tool_dev/Investigative_data_analyser/Development/Current/idea/datasets/stopwords/en_stopwords.txt', 'r') as file:
here = str(Path(__file__).parent)

with open(f'{here}/en_stopwords.txt', 'r', encoding='ascii') as file:
en_stopwords = file.read()
file.close()
en_stopwords = en_stopwords.replace("'", "").split(', ')

en_stopwords_lower = pd.Series(en_stopwords).str.lower().to_list()
en_stopwords = list(set(en_stopwords_lower + en_stopwords))

with open('/Users/jhancock/Documents/Tool_dev/Investigative_data_analyser/Development/Current/idea/datasets/stopwords/html_stopwords.txt', 'r') as file:
with open(f'{here}/html_stopwords.txt', 'r', encoding='utf-8') as file:
html_stopwords = file.read()
file.close()
html_stopwords = html_stopwords.replace("'", "").split(', ')
Expand Down
Binary file removed art/exporters/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file removed art/importers/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file removed art/importers/__pycache__/bibtex.cpython-39.pyc
Binary file not shown.
Binary file removed art/importers/__pycache__/crossref.cpython-39.pyc
Binary file not shown.
Binary file removed art/importers/__pycache__/jstor.cpython-39.pyc
Binary file not shown.
Binary file removed art/importers/__pycache__/orcid.cpython-39.pyc
Binary file not shown.
Binary file removed art/importers/__pycache__/pdf.cpython-39.pyc
Binary file not shown.
Binary file removed art/importers/__pycache__/scopus.cpython-39.pyc
Binary file not shown.
Binary file removed art/importers/__pycache__/search.cpython-39.pyc
Binary file not shown.
Binary file removed art/importers/__pycache__/wos.cpython-39.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion art/importers/scopus.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pybliometrics # type: ignore

blockPrint()
pybliometrics.scopus.create_config(keys = [api_key])
pybliometrics.scopus.init(keys = [api_key])
enablePrint()

from pybliometrics.scopus import AbstractRetrieval, ScopusSearch # type: ignore
Expand Down
Loading

0 comments on commit c997c62

Please sign in to comment.