Merge pull request #5 from alan-turing-institute/main

Merging development
alan-turing-institute · Aug 11, 2024 · c997c62 · c997c62
2 parents 2c90b48 + 76f8f88
commit c997c62
Show file tree

Hide file tree

Showing 54 changed files with 159 additions and 146 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/README.md b/README.md
@@ -1,7 +1,3 @@
----
-title: 'Academic Review Tool (ART)'
----
-
 Academic Review Tool (ART)
 ===
 
@@ -41,6 +37,13 @@ The tool is object-oriented. It leverages Pandas, Numpy, iGraph, and other stand
 
 ART can read and write your results to a large variety of file types (e.g. .xlsx, .csv, .txt, .json, .graphML).
 
+## Installation
+
+To install using PyPi, run the following code in your command interface:
+```bash
+pip install academic-review-tool
+```
+
 ## Beginners Guide
 
 

diff --git a/art/.DS_Store b/art/.DS_Store
diff --git a/art/__init__.py b/art/__init__.py
@@ -32,11 +32,11 @@
 
 from .importers.crossref import lookup_doi, lookup_dois, lookup_journal, lookup_journals, search_journals, get_journal_entries, search_journal_entries, lookup_funder, lookup_funders, search_funders, get_funder_works, search_funder_works
 from .importers.crossref import search_works as search_crossref
-from .importers.wos import search as search_wos
+# from .importers.wos import search as search_wos
 from .importers.scopus import search as search_scopus, lookup as lookup_scopus
 from .importers.orcid import lookup_orcid, search as search_orcid
 from .importers.search import search as api_search
-from .importers import pdf, orcid, crossref, scopus, jstor, wos
+# from .importers import pdf, orcid, crossref, scopus, jstor, wos
 from .classes import Results, References, Author, Authors, Funder, Funders, Affiliation, Affiliations, Review
 from .classes.networks import Network, Networks
 from .classes.citation_crawler import academic_scraper as scrape

diff --git a/art/__pycache__/__init__.cpython-39.pyc b/art/__pycache__/__init__.cpython-39.pyc
diff --git a/art/classes/__pycache__/__init__.cpython-39.pyc b/art/classes/__pycache__/__init__.cpython-39.pyc
diff --git a/art/classes/__pycache__/activitylog.cpython-39.pyc b/art/classes/__pycache__/activitylog.cpython-39.pyc
diff --git a/art/classes/__pycache__/affiliations.cpython-39.pyc b/art/classes/__pycache__/affiliations.cpython-39.pyc
diff --git a/art/classes/__pycache__/attrs.cpython-39.pyc b/art/classes/__pycache__/attrs.cpython-39.pyc
diff --git a/art/classes/__pycache__/authors.cpython-39.pyc b/art/classes/__pycache__/authors.cpython-39.pyc
diff --git a/art/classes/__pycache__/citation_crawler.cpython-39.pyc b/art/classes/__pycache__/citation_crawler.cpython-39.pyc
diff --git a/art/classes/__pycache__/entities.cpython-39.pyc b/art/classes/__pycache__/entities.cpython-39.pyc
diff --git a/art/classes/__pycache__/funders.cpython-39.pyc b/art/classes/__pycache__/funders.cpython-39.pyc
diff --git a/art/classes/__pycache__/networks.cpython-39.pyc b/art/classes/__pycache__/networks.cpython-39.pyc
diff --git a/art/classes/__pycache__/properties.cpython-39.pyc b/art/classes/__pycache__/properties.cpython-39.pyc
diff --git a/art/classes/__pycache__/references.cpython-39.pyc b/art/classes/__pycache__/references.cpython-39.pyc
diff --git a/art/classes/__pycache__/results.cpython-39.pyc b/art/classes/__pycache__/results.cpython-39.pyc
diff --git a/art/classes/__pycache__/review.cpython-39.pyc b/art/classes/__pycache__/review.cpython-39.pyc
diff --git a/art/classes/results.py b/art/classes/results.py
@@ -31,7 +31,7 @@ def generate_work_id(work_data: pd.Series):
 
             else:
                 if '.Authors' in auths_type_str:
-                    work_data['authors'] = work_data['authors'].all['full_name'].sort_values().to_list()
+                    work_data['authors'] = work_data['authors'].summary['full_name'].sort_values().to_list()
 
 
         work_data = work_data.astype(str).str.lower()

diff --git a/art/classes/review.py b/art/classes/review.py
@@ -6,7 +6,7 @@
 from ..importers.crossref import search_works, lookup_doi, lookup_dois, lookup_journal, lookup_journals, search_journals, get_journal_entries, search_journal_entries, lookup_funder, lookup_funders, search_funders, get_funder_works, search_funder_works
 from ..importers.crossref import query_builder as crossref_query_builder
 from ..importers.scopus import query_builder as scopus_query_builder, search as search_scopus, lookup as lookup_scopus
-from ..importers.wos import search as search_wos, query_builder as wos_query_builder
+# from ..importers.wos import search as search_wos, query_builder as wos_query_builder
 from ..importers.search import search as api_search
 
 from ..internet.scrapers import scrape_article, scrape_doi, scrape_google_scholar, scrape_google_scholar_search
@@ -1778,92 +1778,92 @@ def search_scopus(self,
 
         return df
 
-    def search_wos(self,
-                   all_fields = None,
-            title = None,
-            year = None,
-            author = None,
-            author_identifier = None,
-            affiliation = None,
-            doctype = None,
-            doi = None,
-            issn = None,
-            isbn = None,
-            pubmed_id = None,
-            source_title = None,
-            volume = None,
-            page = None,
-            issue = None,
-            topics = None,
-            default_operator = 'AND',
-           database: str = 'WOK',
-           limit: int = 10,
-           page_limit: int = 1,
-           sort_field: str = 'RS+D',
-           modified_time_span = None,
-           tc_modified_time_span = None,
-           detail = None, 
-           add_to_results = False,
-           drop_duplicates = False,
-           drop_empty_rows = False
-           ):
-
-        df = search_wos(
-            all_fields = all_fields,
-            title = title,
-            year = year,
-            author = author,
-            author_identifier = author_identifier,
-            affiliation = affiliation,
-            doctype = doctype,
-            doi = doi,
-            issn = issn,
-            isbn = isbn,
-            pubmed_id = pubmed_id,
-            source_title = source_title,
-            volume = volume,
-            page = page,
-            issue = issue,
-            topics = topics,
-            default_operator = default_operator,
-           database = database,
-           limit = limit,
-           page_limit = page_limit,
-           sort_field = sort_field,
-           modified_time_span = modified_time_span,
-           tc_modified_time_span = tc_modified_time_span,
-           detail = detail
-           )
-
-        for c in df.columns:
-                if c not in self.results.columns:
-                    df = df.drop(c, axis=1)
-
-        if add_to_results == True:
+    # def search_wos(self,
+    #                all_fields = None,
+    #         title = None,
+    #         year = None,
+    #         author = None,
+    #         author_identifier = None,
+    #         affiliation = None,
+    #         doctype = None,
+    #         doi = None,
+    #         issn = None,
+    #         isbn = None,
+    #         pubmed_id = None,
+    #         source_title = None,
+    #         volume = None,
+    #         page = None,
+    #         issue = None,
+    #         topics = None,
+    #         default_operator = 'AND',
+    #        database: str = 'WOK',
+    #        limit: int = 10,
+    #        page_limit: int = 1,
+    #        sort_field: str = 'RS+D',
+    #        modified_time_span = None,
+    #        tc_modified_time_span = None,
+    #        detail = None, 
+    #        add_to_results = False,
+    #        drop_duplicates = False,
+    #        drop_empty_rows = False
+    #        ):
+
+    #     df = search_wos(
+    #         all_fields = all_fields,
+    #         title = title,
+    #         year = year,
+    #         author = author,
+    #         author_identifier = author_identifier,
+    #         affiliation = affiliation,
+    #         doctype = doctype,
+    #         doi = doi,
+    #         issn = issn,
+    #         isbn = isbn,
+    #         pubmed_id = pubmed_id,
+    #         source_title = source_title,
+    #         volume = volume,
+    #         page = page,
+    #         issue = issue,
+    #         topics = topics,
+    #         default_operator = default_operator,
+    #        database = database,
+    #        limit = limit,
+    #        page_limit = page_limit,
+    #        sort_field = sort_field,
+    #        modified_time_span = modified_time_span,
+    #        tc_modified_time_span = tc_modified_time_span,
+    #        detail = detail
+    #        )
+
+    #     for c in df.columns:
+    #             if c not in self.results.columns:
+    #                 df = df.drop(c, axis=1)
+
+    #     if add_to_results == True:
 
-            query = wos_query_builder(all_fields = all_fields,
-                                        title = title,
-                                        year = year,
-                                        author = author,
-                                        author_identifier = author_identifier,
-                                        affiliation = affiliation,
-                                        doctype = doctype,
-                                        doi = doi,
-                                        issn = issn,
-                                        isbn = isbn,
-                                        pubmed_id = pubmed_id,
-                                        source_title = source_title,
-                                        volume = volume,
-                                        page = page,
-                                        issue = issue,
-                                        topics = topics,
-                                        default_operator = default_operator)
-
-            self.activity_log.add_activity(type='API search', activity='searched World of Science and added to results', location=['results'], database=database, query=query)
-            self.results.add_dataframe(dataframe=df, drop_duplicates=drop_duplicates, drop_empty_rows=drop_empty_rows) # type: ignore
-
-
-        return df
+    #         query = wos_query_builder(all_fields = all_fields,
+    #                                     title = title,
+    #                                     year = year,
+    #                                     author = author,
+    #                                     author_identifier = author_identifier,
+    #                                     affiliation = affiliation,
+    #                                     doctype = doctype,
+    #                                     doi = doi,
+    #                                     issn = issn,
+    #                                     isbn = isbn,
+    #                                     pubmed_id = pubmed_id,
+    #                                     source_title = source_title,
+    #                                     volume = volume,
+    #                                     page = page,
+    #                                     issue = issue,
+    #                                     topics = topics,
+    #                                     default_operator = default_operator)
+
+    #         self.activity_log.add_activity(type='API search', activity='searched World of Science and added to results', location=['results'], database=database, query=query)
+    #         self.results.add_dataframe(dataframe=df, drop_duplicates=drop_duplicates, drop_empty_rows=drop_empty_rows) # type: ignore
+
+
+    #     return df
 
     def lookup_doi(self, doi = 'request_input', timeout = 60):
         return lookup_doi(doi=doi, timeout=timeout)
@@ -2221,7 +2221,7 @@ def api_search(self,
                     timeout = 60,
                     crossref = True,
                     scopus = True,
-                    wos = True, 
+                    wos = False, 
                     add_to_results = False):
 
         df = api_search(default_query = default_query,

diff --git a/art/datasets/.DS_Store b/art/datasets/.DS_Store
diff --git a/art/datasets/__init__.py b/art/datasets/__init__.py
@@ -8,60 +8,60 @@
 import json
 from pathlib import Path
 
-here = Path(__file__).parent
+here = str(Path(__file__).parent)
 
-with open(f'{here}/names/all_personal_names.txt', 'r') as file:
+with open(f'{here}/names/all_personal_names.txt', 'r', encoding='utf-8') as file:
     all_personal_names = file.read()
     file.close()
 all_personal_names = all_personal_names.replace("'", "").split(', ')
 
-with open(f'{here}/names/first_names.txt', 'r') as file:
+with open(f'{here}/names/first_names.txt', 'r', encoding='utf-8') as file:
     first_names = file.read()
     file.close()
 first_names = first_names.replace("'", "").split(', ')
 
-with open(f'{here}/names/last_names.txt', 'r') as file:
+with open(f'{here}/names/last_names.txt', 'r', encoding='utf-8') as file:
     last_names = file.read()
     file.close()
 last_names = last_names.replace("'", "").split(', ')
 
-with open(f'{here}/names/nltk_names.txt', 'r') as file:
+with open(f'{here}/names/nltk_names.txt', 'r', encoding='ascii') as file:
     nltk_names = file.read()
     file.close()
 nltk_names = nltk_names.replace("'", "").split(', ')
 
 # Corpus extracted from country_list module. Stored locally for efficiency.
 
-with open(f'{here}/countries/countries_all.txt', 'r') as file:
+with open(f'{here}/countries/countries_all.txt', 'r', encoding='utf-8') as file:
     countries_all = file.read()
     file.close()
 countries_all = countries_all.replace("'", "").split(', ')
 
-with open(f'{here}/countries/country_names.json', 'r') as file:
+with open(f'{here}/countries/country_names.json', 'r', encoding='utf-8') as file:
     country_names = json.load(file)
     file.close()
 
 # Corpus extracted from geonamescache module. Stored locally for efficiency.
-with open(f'{here}/cities/cities_all.txt', 'r') as file:
+with open(f'{here}/cities/cities_all.txt', 'r', encoding='utf-8') as file:
     cities_all = file.read()
     file.close()
 cities_all = cities_all.replace("'", "").split(', ')
 
-with open(f'{here}/cities/cities_en.json', 'r') as file:
+with open(f'{here}/cities/cities_en.json', 'r', encoding='ascii') as file:
     cities_en = json.load(file)
     file.close()
 
 # Corpus extracted from language_data and langcodes modules. Stored locally for efficiency.
 
-with open(f'{here}/languages/language_names.json', 'r') as file:
+with open(f'{here}/languages/language_names.json', 'r', encoding='ascii') as file:
     language_names = json.load(file)
     file.close()
 
-with open(f'{here}/languages/languages_en.json', 'r') as file:
+with open(f'{here}/languages/languages_en.json', 'r', encoding='ascii') as file:
     languages_en = json.load(file)
     file.close()
 
-with open(f'{here}/languages/language_codes.txt', 'r') as file:
+with open(f'{here}/languages/language_codes.txt', 'r', encoding='ascii') as file:
     language_codes = file.read()
     file.close()
 

diff --git a/art/datasets/__pycache__/__init__.cpython-39.pyc b/art/datasets/__pycache__/__init__.cpython-39.pyc
diff --git a/art/datasets/stopwords/__pycache__/stopwords.cpython-39.pyc b/art/datasets/stopwords/__pycache__/stopwords.cpython-39.pyc
diff --git a/art/datasets/stopwords/stopwords.py b/art/datasets/stopwords/stopwords.py
@@ -1,25 +1,28 @@
-from nltk import download
+from pathlib import Path
+from nltk import download # type: ignore
 import pandas as pd
 
 # Importing Stopwords corpus as an NLTK text
 try:
-    from nltk.corpus import stopwords as nltk_stopwords
+    from nltk.corpus import stopwords as nltk_stopwords # type: ignore
     nltk_stopwords.words()
 except:
     download('stopwords')
-    from nltk.corpus import stopwords as nltk_stopwords
+    from nltk.corpus import stopwords as nltk_stopwords # type: ignore
 
 nltk_stopwords = list(nltk_stopwords.words())
 
-with open('/Users/jhancock/Documents/Tool_dev/Investigative_data_analyser/Development/Current/idea/datasets/stopwords/en_stopwords.txt', 'r') as file:
+here = str(Path(__file__).parent)
+
+with open(f'{here}/en_stopwords.txt', 'r', encoding='ascii') as file:
     en_stopwords = file.read()
     file.close()
 en_stopwords = en_stopwords.replace("'", "").split(', ')
 
 en_stopwords_lower = pd.Series(en_stopwords).str.lower().to_list()
 en_stopwords = list(set(en_stopwords_lower + en_stopwords))
 
-with open('/Users/jhancock/Documents/Tool_dev/Investigative_data_analyser/Development/Current/idea/datasets/stopwords/html_stopwords.txt', 'r') as file:
+with open(f'{here}/html_stopwords.txt', 'r', encoding='utf-8') as file:
     html_stopwords = file.read()
     file.close()
 html_stopwords = html_stopwords.replace("'", "").split(', ')

diff --git a/art/exporters/__pycache__/__init__.cpython-39.pyc b/art/exporters/__pycache__/__init__.cpython-39.pyc
diff --git a/art/exporters/__pycache__/general_exporters.cpython-39.pyc b/art/exporters/__pycache__/general_exporters.cpython-39.pyc
diff --git a/art/exporters/__pycache__/network_exporters.cpython-39.pyc b/art/exporters/__pycache__/network_exporters.cpython-39.pyc
diff --git a/art/importers/__pycache__/__init__.cpython-39.pyc b/art/importers/__pycache__/__init__.cpython-39.pyc
diff --git a/art/importers/__pycache__/bibtex.cpython-39.pyc b/art/importers/__pycache__/bibtex.cpython-39.pyc
diff --git a/art/importers/__pycache__/crossref.cpython-39.pyc b/art/importers/__pycache__/crossref.cpython-39.pyc
diff --git a/art/importers/__pycache__/jstor.cpython-39.pyc b/art/importers/__pycache__/jstor.cpython-39.pyc
diff --git a/art/importers/__pycache__/orcid.cpython-39.pyc b/art/importers/__pycache__/orcid.cpython-39.pyc
diff --git a/art/importers/__pycache__/pdf.cpython-39.pyc b/art/importers/__pycache__/pdf.cpython-39.pyc
diff --git a/art/importers/__pycache__/scopus.cpython-39.pyc b/art/importers/__pycache__/scopus.cpython-39.pyc
diff --git a/art/importers/__pycache__/search.cpython-39.pyc b/art/importers/__pycache__/search.cpython-39.pyc
diff --git a/art/importers/__pycache__/wos.cpython-39.pyc b/art/importers/__pycache__/wos.cpython-39.pyc
diff --git a/art/importers/scopus.py b/art/importers/scopus.py
@@ -8,7 +8,7 @@
 import pybliometrics # type: ignore
 
 blockPrint()
-pybliometrics.scopus.create_config(keys = [api_key])
+pybliometrics.scopus.init(keys = [api_key])
 enablePrint()
 
 from pybliometrics.scopus import AbstractRetrieval, ScopusSearch # type: ignore