Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for using local data files. #170

Merged
merged 2 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
=======
History
=======
2024.6.27 -- Added support for using local data files.
* Added support in the Flowchart and Node classes for using local data files for
e.g. forcefields. This allows the user to specify a local file, which is copied to
the working directory of the job.
* Also added a flag to both the Flowchart and Node classes indicating that the job
is running in the JobServer rather than command-line.

2024.5.27 -- Bugfix: Error saving results table.

2024.5.26 -- Bugfix: Error when clicking "Cancel" on some dialogs
Expand Down
35 changes: 30 additions & 5 deletions seamm/flowchart.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
however, the flow starts at the 'start' node and follows the connections,
so isolated nodes and fragments will not be executed."""

from datetime import datetime
import hashlib
import json
import logging
import os
import os.path
from pathlib import Path
import stat

from packaging.version import Version
Expand Down Expand Up @@ -76,15 +76,21 @@ def __init__(
# and make sure that the start node exists
self.add_node(seamm.StartNode(flowchart=self))

# And the root directory
# And the root directory and other information
self.root_directory = directory
self.in_jobserver = False

# And the parser associated with this flowchart
self._parser = None

def __iter__(self):
return self.graph.__iter__()

@property
def data_path(self):
"""A path to local and user data, such as forcefields."""
return self._data_path

@property
def executor(self):
"""The executor for tasks."""
Expand All @@ -94,6 +100,24 @@ def executor(self):
def executor(self, value):
self._executor = value

@property
def in_jobserver(self):
"""Whether running in a JobServer."""
return self._in_jobserver

@in_jobserver.setter
def in_jobserver(self, value):
self._in_jobserver = value
if value:
self._data_path = [
Path(self.root_directory) / "data",
] # path for local data in JobServer
else:
self._data_path = [
Path.home() / ".seamm.d" / "data",
Path.home() / "SEAMM" / "data",
] # path for local data on local machine

@property
def is_development(self):
"""Check if any of nodes are development versions."""
Expand All @@ -106,9 +130,7 @@ def is_development(self):
def root_directory(self):
"""The root directory for files, etc for this flowchart"""
if self._root_directory is None:
self._root_directory = os.path.join(
os.getcwd(), datetime.now().isoformat(sep="_", timespec="seconds")
)
self._root_directory = os.getcwd()
return self._root_directory

@root_directory.setter
Expand Down Expand Up @@ -214,6 +236,9 @@ def get_nodes(self):
nodes.append(next_node)
next_node = next_node.next()
logger.debug("Finished getting nodes")

self.reset_visited()

return nodes

def last_node(self, node="1"):
Expand Down
72 changes: 72 additions & 0 deletions seamm/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,11 @@ def calculation(self):
"""The type of calculation for filtering available results."""
return self._calculation

@property
def data_path(self):
"""A path to local and user data, such as forcefields."""
return self.flowchart.data_path

@property
def description(self):
"""A textual description of this node"""
Expand Down Expand Up @@ -254,6 +259,16 @@ def header(self):
".".join(str(e) for e in self._id), self.title, self.version
)

@property
def data_files(self):
"""tuples of short name and path for any data files needed"""
return self.list_data_files()

@property
def in_jobserver(self):
"""Whether running in a jobserver"""
return self.flowchart.in_jobserver

@property
def indent(self):
"""The amount to indent the output of this step in **job.out**."""
Expand Down Expand Up @@ -435,6 +450,31 @@ def reset_id(self):
"""Reset the id for node"""
self._id = None

def find_data_file(self, filename):
"""Using the data_path, find a file.

Parameters
----------
filename : str or pathlib.Path
Name of the file to find -- a relative path

Returns
-------
path : pathlib.Path
The path to the file

Exceptions
----------
FileNotFoundError if the file does not exist.
"""
for path in self.data_path:
tmp = path / filename
self.logger.debug(f" trying {tmp}")
if tmp.exists():
return tmp.expanduser().resolve()
self.logger.debug(f"Did not find {filename}")
raise FileNotFoundError(f"Data file '{filename}' not found.")

def get_gui_data(self, key, gui=None):
"""Return an element from the GUI dictionary"""
if gui is None:
Expand Down Expand Up @@ -602,6 +642,28 @@ def get_table(self, tablename, create=True):
table_handle = self.get_variable(tablename)
return table_handle["table"]

def glob_data_files(self, pattern):
"""Using the data_path, glob for files.

Parameters
----------
filename : str or pathlib.Path
Name of the file to find -- a relative path

Returns
-------
paths : [pathlib.Path]
A list of paths to the files

Exceptions
----------
FileNotFoundError if the file does not exist.
"""
paths = []
for path in self.data_path:
paths.extend(path.glob(pattern))
return paths

def connections(self):
"""Return a list of all the incoming and outgoing edges
for this node, giving the anchor points and other node
Expand Down Expand Up @@ -710,6 +772,16 @@ def existing_tables(self):

return sorted(tables)

def list_data_files(self):
"""Returns a list of auxilliary data files needed, like forcefields.

Returns
-------
(shortname, pathlib.Path)
Tuples with the local path or URI for the file, and its full pathlib.Path
"""
return []

def run(self, printer=None):
"""Do whatever we need to do! The base class does nothing except
return the next node.
Expand Down
Loading