Skip to content

Commit

Permalink
flake
Browse files Browse the repository at this point in the history
  • Loading branch information
porteratzo committed Feb 1, 2024
1 parent 8d7bc35 commit 56aefae
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 15 deletions.
26 changes: 17 additions & 9 deletions openfl/experimental/runtime/local_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def ray_group_assign(collaborators, number_of_actors=3):
Args:
collaborators (list): The list of collaborators.
number_of_actors (int, optional): Number of actors to distribute collaborators to. Defaults to 3.
number_of_actors (int, optional): Number of actors to distribute collaborators to.
Defaults to 3.
Returns:
list: A list of GroupMember instances.
Expand All @@ -83,7 +84,8 @@ class GroupMember:
"""
A utility class that manages the collaborator and its group.
This class maintains compatibility with runtime execution by assigning attributes for each function in the Collaborator interface in conjunction with RemoteHelper.
This class maintains compatibility with runtime execution by assigning attributes for each
function in the Collaborator interface in conjunction with RemoteHelper.
"""

def __init__(self, collaborator_actor, collaborator):
Expand Down Expand Up @@ -115,11 +117,15 @@ class RemoteHelper:
"""
A utility class to maintain compatibility with RayExecutor.
This class returns a lambda function that uses collaborator_actor.execute_from_col to run a given function from the given collaborator.
This class returns a lambda function that uses collaborator_actor.execute_from_col to run
a given function from the given collaborator.
"""

# once ray_grouped replaces the current ray runtime this class can be replaced with a funtion that returns the lambda funtion, using a funtion is necesary because this is used
# in setting multiple funtions in a loop and lambda takes the reference to self.f_name and not the value so we need to change scope to avoid self.f_name from changing as the loop progresses
# once ray_grouped replaces the current ray runtime this class can be replaced with a
# funtion that returns the lambda funtion, using a funtion is necesary because this is used
# in setting multiple funtions in a loop and lambda takes the reference to self.f_name and
# not the value so we need to change scope to avoid self.f_name from changing as the loop
# progresses
def __init__(self, collaborator_actor, collaborator, f_name) -> None:
"""
Initializes a new instance of the RemoteHelper class.
Expand Down Expand Up @@ -154,7 +160,8 @@ def remote(self, *args, **kwargs):
collaborator_ray_refs = []
collaborators_per_group = math.ceil(len(collaborators) / number_of_actors)
times_called = 0
# logic to sort collaborators by gpus, if collaborators have the same number of gpu then they are sorted by cpu
# logic to sort collaborators by gpus, if collaborators have the same number of gpu then they
# are sorted by cpu
cpu_magnitude = len(str(abs(max([i.num_cpus for i in collaborators]))))
min_gpu = min([i.num_gpus for i in collaborators])
min_gpu = max(min_gpu, 0.0001)
Expand All @@ -171,15 +178,15 @@ def remote(self, *args, **kwargs):
[
i.num_cpus
for i in collaborators_sorted_by_gpucpu[
times_called : times_called + collaborators_per_group
times_called: times_called + collaborators_per_group
]
]
)
max_num_gpus = max(
[
i.num_gpus
for i in collaborators_sorted_by_gpucpu[
times_called : times_called + collaborators_per_group
times_called: times_called + collaborators_per_group
]
]
)
Expand Down Expand Up @@ -238,7 +245,8 @@ def append(
Args:
name (str): The name of the collaborator.
private_attributes_callable (Callable): A callable that sets the private attributes of the collaborator.
private_attributes_callable (Callable): A callable that sets the private attributes of
the collaborator.
**kwargs: Additional keyword arguments.
"""
from openfl.experimental.interface import Collaborator
Expand Down
18 changes: 12 additions & 6 deletions openfl/experimental/utilities/runtime_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,17 @@ def checkpoint(ctx, parent_func, chkpnt_reserved_words=["next", "runtime"]):

def old_check_resource_allocation(num_gpus, each_participant_gpu_usage):
remaining_gpu_memory = {}
# TODO for each GPU the funtion tries see if all participant usages fit into a GPU, it it doesn't it removes that
# participant from the participant list, and adds it to the remaining_gpu_memory dict. So any sum of GPU requirements above 1
# TODO for each GPU the funtion tries see if all participant usages fit into a GPU, it it
# doesn't it removes that
# participant from the participant list, and adds it to the remaining_gpu_memory dict. So any
# sum of GPU requirements above 1
# triggers this.
# But at this point the funtion will raise an error because remaining_gpu_memory is never cleared.
# The participant list should remove the participant if it fits in the gpu and save the partipant if it doesn't and continue
# to the next GPU to see if it fits in that one, only if we run out of GPUs should this funtion raise an error.
# But at this point the funtion will raise an error because remaining_gpu_memory is never
# cleared.
# The participant list should remove the participant if it fits in the gpu and save the
# partipant if it doesn't and continue
# to the next GPU to see if it fits in that one, only if we run out of GPUs should this
# funtion raise an error.
for gpu in np.ones(num_gpus, dtype=int):
for i, (participant_name, participant_gpu_usage) in enumerate(
each_participant_gpu_usage.items()
Expand Down Expand Up @@ -147,7 +152,8 @@ def check_resource_allocation(num_gpus, each_participant_gpu_usage):
need_assigned.pop(participant_name)
gpu -= participant_gpu_usage

# raise error if after going though all gpus there are still participants that needed to be assigned
# raise error if after going though all gpus there are still participants that needed to be
# assigned
if len(need_assigned) > 0:
raise ResourcesAllocationError(
f"Failed to allocate Participant {list(need_assigned.keys())} "
Expand Down

0 comments on commit 56aefae

Please sign in to comment.