Skip to content

Commit

Permalink
deprecation fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
devincowan committed Mar 21, 2023
1 parent d14f307 commit b3ea2a6
Show file tree
Hide file tree
Showing 79 changed files with 43 additions and 20 deletions.
4 changes: 2 additions & 2 deletions jinja-report/collect_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def get_stats_data(users=True, resources=True,
skip=True, deidentify=False):

# standard query parameters
host = 'localhost'
host = 'elasticsearch'
port = 9200

ufile = os.path.join(dirname, 'users.pkl')
Expand Down Expand Up @@ -257,7 +257,7 @@ def get_stats_data(users=True, resources=True,
print(f'--> file exists: {afile}...skipping')
else:
print('--> downloading activity metrics')
elastic.get_es_data(host, port, aindex, query=aquery,
elastic.get_es_data(host=host, port=port, index=aindex, query=aquery,
outpik=afile, outfile=acsv, drop=drop,
return_es_index=True)
else:
Expand Down
26 changes: 15 additions & 11 deletions jinja-report/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,17 +119,21 @@ def citations(input_directory='.',
url = ('https://doi.crossref.org/servlet/getForwardLinks?'
f'usr={creds.username}&pwd={creds.password}&'
f'doi={doi}')
res = requests.get(url)
root = etree.fromstring(res.text.encode())
citations = root.findall('.//body/forward_link',
namespaces=root.nsmap)
dois = []
for citation in citations:
doi = citation.find('.//doi', namespaces=root.nsmap).text
dois.append(doi)

df.at[idx, 'citations'] = len(dois)
df.at[idx, 'citing_dois'] = ','.join(dois)
try:
res = requests.get(url)
root = etree.fromstring(res.text.encode())
citations = root.findall('.//body/forward_link',
namespaces=root.nsmap)
dois = []
for citation in citations:
doi = citation.find('.//doi', namespaces=root.nsmap).text
dois.append(doi)

df.at[idx, 'citations'] = len(dois)
df.at[idx, 'citing_dois'] = ','.join(dois)
except ConnectionError as e:
print(f'Warning: issue with DOI connection: {e}')
continue

df.to_pickle(os.path.join(input_directory, 'doi-citations.pkl'))
else:
Expand Down
14 changes: 12 additions & 2 deletions jinja-report/elastic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import argparse
from tqdm import tqdm
from elasticsearch import Elasticsearch
from pandas.io.json import json_normalize
from pandas import json_normalize
from dotenv import load_dotenv

load_dotenv('../.env')

# standard elasticsearch fields to trim from the dataframe
DEFAULT_TRIM = ['@version', 'beat.hostname', 'beat.name', 'count', 'fields',
Expand Down Expand Up @@ -59,7 +61,9 @@ def get_es_data(host,
return_es_index=False):

# connect to the hydroshare elasticsearch server
es = Elasticsearch(f"{scheme}://{host}:{port}", basic_auth=(os.getenv('ELASTIC_USERNAME', 'elastic'), os.getenv('ELASTIC_PASSWORD', 'changeme')))
elastic_url = f"{scheme}://{host}:{port}"
print(f"Connecting to: {elastic_url}")
es = Elasticsearch(elastic_url, basic_auth=(os.getenv('ELASTIC_USERNAME', 'elastic'), os.getenv('ELASTIC_PASSWORD', 'changeme')))

# perform search
try:
Expand All @@ -71,6 +75,12 @@ def get_es_data(host,
# get the total size of dataset
total_size = temp_r['hits']['total']

try:
total_size = int(total_size.get('value'))
except Exception as e:
print(f'Error attempting to access total_size: {e}')
print(f"Total size is: {total_size}")

# calculate the scroll size
min_scroll, max_scroll = 1000, 10000
inc_scroll = int(total_size / 25)
Expand Down
2 changes: 2 additions & 0 deletions jinja-report/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def line(plotObjs_ax1,
annotate = figure_dict.pop('annotate_series', False)
annotate_legend = figure_dict.pop('annotate_legend', False)
for pobj in plotObjs_ax1:
if len(pobj.y) == 0:
continue
label = pobj.label

if annotate_legend:
Expand Down
2 changes: 1 addition & 1 deletion jinja-report/users_pie.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def all(input_directory='.',
% (drp, df[drp].sum()))
df.drop(drp, inplace=True, axis=1)
except Exception as e:
print(e)
print(f'Error dropping from users pie df: {e}')

# calculate total and percentages for each user type
ds = df.sum()
Expand Down
14 changes: 10 additions & 4 deletions jinja-report/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,21 @@ def save_data_to_csv(data_dict, index='date'):

# # set the index
# d.set_index('date', inplace=True)
if d.empty:
continue

dfs.append(d)

# combine dataframes
df_concat = pandas.concat(dfs, axis=1)
try:
# combine dataframes
df_concat = pandas.concat(dfs, axis=1)

df_concat.to_csv(k)
df_concat.to_csv(k)

print(f'--> data saved to: {k}')
print(f'--> data saved to: {k}')
except ValueError as e:
print(f'Warning: looks like there is some data missing! {e}')
print(f'Attempted to save this dict to csv: {data_dict}')


def subset_by_date(dat, st, et, date_column='date'):
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ PyLaTeX==1.4.1
pyparsing==3.0.9
pyrepl==0.9.0
python-dateutil==2.8.2
python-dotenv==1.0.0
pytz==2022.7.1
PyYAML==6.0
rdflib==5.0.0
Expand Down

0 comments on commit b3ea2a6

Please sign in to comment.