From 5a1881236510b41fb6e4da3bfc9e7fa38b7b4fc5 Mon Sep 17 00:00:00 2001 From: Ishant Thakare Date: Tue, 17 Dec 2024 05:31:51 +0530 Subject: [PATCH] Federated Runtime - Initial Implementation (#1190) * Introduction to FederatedRuntime Signed-off-by: Ishant Thakare * Updated aggregator Signed-off-by: Ishant Thakare * updated collaborator & added 101_MNIST federated_runtime tutorial Signed-off-by: Ishant Thakare * Updated code & fixed stream_stdout Signed-off-by: Ishant Thakare * Adding testcases for FederatedRuntime Signed-off-by: Ishant Thakare * Fixed formatting issues Signed-off-by: Ishant Thakare * Fix formatting issues Signed-off-by: Ishant Thakare * Incorporated internal review comments Signed-off-by: Ishant Thakare * Fix checkpoint issue Signed-off-by: Ishant Thakare * Updated FederatedRuntime Tutorials Signed-off-by: Ishant Thakare * Updated tutorial Signed-off-by: Ishant Thakare * Incorporated Teo's review comments Signed-off-by: Ishant Thakare * Incorporated Teo's review comments Signed-off-by: Ishant Thakare * Incorporated review comment Signed-off-by: Ishant Thakare * Incorporate review comment Co-authored-by: Patrick Foley * Updated Workflow Interface documentation Signed-off-by: Ishant Thakare * Fix certificates for federated_runtime.py Signed-off-by: Ishant Thakare --------- Signed-off-by: Ishant Thakare Co-authored-by: Patrick Foley --- .../features_index/workflowinterface.rst | 201 ++++- ...kspace_Creation_from_JupyterNotebook.ipynb | 4 +- .../101_MNIST/Portland/Portland_config.yaml | 2 + .../101_MNIST/Portland/private_attributes.py | 50 ++ .../101_MNIST/Portland/start_envoy.sh | 6 + .../FederatedRuntime/101_MNIST/README.md | 65 ++ .../101_MNIST/Seattle/Seattle_config.yaml | 2 + .../101_MNIST/Seattle/private_attributes.py | 50 ++ .../101_MNIST/Seattle/start_envoy.sh | 6 + .../101_MNIST/director/director_config.yaml | 4 + .../101_MNIST/director/start_director.sh | 4 + .../101_MNIST_FederatedRuntime.ipynb | 702 ++++++++++++++++++ .../Bangalore/Bangalore_config.yaml | 9 + .../Bangalore/private_attributes.py | 45 ++ .../Bangalore/requirements.txt | 8 + .../Bangalore/start_envoy.sh | 6 + .../Chandler/Chandler_config.yaml | 9 + .../Chandler/private_attributes.py | 45 ++ .../Chandler/requirements.txt | 8 + .../Chandler/start_envoy.sh | 6 + .../301_MNIST_Watermaking/README.md | 65 ++ .../director/director_config.yaml | 11 + .../director/private_attributes.py | 169 +++++ .../director/start_director.sh | 4 + .../workspace/MNIST_Watermarking.ipynb | 574 ++++++++++++++ .../workflow_interface_requirements.txt | 5 +- .../workflow/component/__init__.py | 2 + .../component/aggregator/aggregator.py | 155 ++-- .../component/collaborator/collaborator.py | 16 +- .../workflow/component/director/__init__.py | 6 + .../workflow/component/director/director.py | 317 ++++++++ .../workflow/component/director/experiment.py | 332 +++++++++ .../workflow/component/envoy/__init__.py | 6 + .../workflow/component/envoy/envoy.py | 224 ++++++ .../workflow/federated/plan/plan.py | 225 ++++-- .../workflow/interface/cli/aggregator.py | 9 +- .../workflow/interface/cli/director.py | 131 ++++ .../workflow/interface/cli/envoy.py | 151 ++++ .../workflow/interface/fl_spec.py | 203 +++-- .../workflow/protocols/director.proto | 107 +++ .../workflow/runtime/federated_runtime.py | 237 +++++- .../workflow/transport/__init__.py | 7 +- .../workflow/transport/grpc/__init__.py | 7 +- .../transport/grpc/aggregator_server.py | 18 +- .../transport/grpc/director_client.py | 269 +++++++ .../transport/grpc/director_server.py | 353 +++++++++ .../workflow/transport/grpc/exceptions.py | 4 +- .../workflow/utilities/runtime_utils.py | 8 +- .../workflow/workspace_export/export.py | 135 +++- .../director/director_config.yaml | 4 + .../director/start_director.sh | 4 + .../envoy_one/envoy_config.yaml | 10 + .../collaborator_private_attrs.py | 47 ++ .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 11 + .../collaborator_private_attrs.py | 47 ++ .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + .../workspace/testflow_datastore_cli.ipynb | 438 +++++++++++ .../director/director_config.yaml | 6 + .../director/start_director.sh | 4 + .../envoy_one/envoy_config.yaml | 2 + .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 0 .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + .../workspace/testflow_include_exclude.ipynb | 364 +++++++++ .../director/director_config.yaml | 5 + .../director/start_director.sh | 4 + .../envoy_one/envoy_config.yaml | 0 .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 0 .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + .../workspace/testflow_internal_loop.ipynb | 385 ++++++++++ .../director/director_config.yaml | 10 + .../aggregator_private_attrs.py | 7 + .../director/start_director.sh | 4 + .../envoy_one/envoy_config.yaml | 6 + .../collaborator_private_attrs.py | 10 + .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 6 + .../collaborator_private_attrs.py | 10 + .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + .../testflow_privateattributes.ipynb | 411 ++++++++++ .../director/director_config.yaml | 11 + .../aggregator_private_attrs.py | 10 + .../director/start_director.sh | 4 + .../envoy_one/envoy_config.yaml | 7 + .../collaborator_private_attrs.py | 16 + .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 6 + .../collaborator_private_attrs.py | 16 + .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + .../testflow_privateattributes.ipynb | 411 ++++++++++ .../director/director_config.yaml | 7 + .../aggregator_private_attrs.py | 6 + .../director/start_director.sh | 4 + .../envoy_one/envoy_config.yaml | 2 + .../collaborator_private_attrs.py | 8 + .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 2 + .../collaborator_private_attrs.py | 9 + .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + .../testflow_private_attributes.ipynb | 407 ++++++++++ .../director/director_config.yaml | 4 + .../director/start_director.sh | 4 + .../envoy_one/envoy_config.yaml | 5 + .../collaborator_private_attrs.py | 4 + .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 5 + .../collaborator_private_attrs.py | 5 + .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + .../workspace/testflow_reference.ipynb | 487 ++++++++++++ .../director/director_config.yaml | 6 + .../director/start_director.sh | 4 + .../envoy_one/envoy_config.yaml | 0 .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 1 + .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + ...tflow_reference_with_include_exclude.ipynb | 397 ++++++++++ .../director/director_config.yaml | 5 + .../director/start_director.sh | 4 + .../envoy_four/envoy_config.yaml | 5 + .../collaborator_private_attrs.py | 5 + .../envoy_four/requirements.txt | 7 + .../envoy_four/start_envoy.sh | 6 + .../envoy_one/envoy_config.yaml | 5 + .../collaborator_private_attrs.py | 4 + .../envoy_one/requirements.txt | 7 + .../envoy_one/start_envoy.sh | 6 + .../envoy_three/envoy_config.yaml | 5 + .../collaborator_private_attrs.py | 5 + .../envoy_three/requirements.txt | 7 + .../envoy_three/start_envoy.sh | 6 + .../envoy_two/envoy_config.yaml | 5 + .../collaborator_private_attrs.py | 5 + .../envoy_two/requirements.txt | 7 + .../envoy_two/start_envoy.sh | 6 + .../testflow_subset_of_collaborators.ipynb | 299 ++++++++ 153 files changed, 8955 insertions(+), 277 deletions(-) create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/Portland_config.yaml create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/private_attributes.py create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/start_envoy.sh create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/README.md create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/Seattle_config.yaml create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/private_attributes.py create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/start_envoy.sh create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/director/director_config.yaml create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/director/start_director.sh create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/workspace/101_MNIST_FederatedRuntime.ipynb create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/Bangalore_config.yaml create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/private_attributes.py create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/requirements.txt create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/start_envoy.sh create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/Chandler_config.yaml create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/private_attributes.py create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/requirements.txt create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/start_envoy.sh create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/README.md create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/director_config.yaml create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/private_attributes.py create mode 100755 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/start_director.sh create mode 100644 openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/workspace/MNIST_Watermarking.ipynb create mode 100644 openfl/experimental/workflow/component/director/__init__.py create mode 100644 openfl/experimental/workflow/component/director/director.py create mode 100644 openfl/experimental/workflow/component/director/experiment.py create mode 100644 openfl/experimental/workflow/component/envoy/__init__.py create mode 100644 openfl/experimental/workflow/component/envoy/envoy.py create mode 100644 openfl/experimental/workflow/interface/cli/director.py create mode 100644 openfl/experimental/workflow/interface/cli/envoy.py create mode 100644 openfl/experimental/workflow/protocols/director.proto create mode 100644 openfl/experimental/workflow/transport/grpc/director_client.py create mode 100644 openfl/experimental/workflow/transport/grpc/director_server.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/director/director_config.yaml create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/workspace/testflow_datastore_cli.ipynb create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/director/director_config.yaml create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/workspace/testflow_include_exclude.ipynb create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/director/director_config.yaml create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/workspace/testflow_internal_loop.ipynb create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/director_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/private_attributes/aggregator_private_attrs.py create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/workspace/testflow_privateattributes.ipynb create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/director_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/private_attributes/aggregator_private_attrs.py create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/workspace/testflow_privateattributes.ipynb create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/director_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/private_attributes/aggregator_private_attrs.py create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/workspace/testflow_private_attributes.ipynb create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/director/director_config.yaml create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference/workspace/testflow_reference.ipynb create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/director/director_config.yaml create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/workspace/testflow_reference_with_include_exclude.ipynb create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/director/director_config.yaml create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/director/start_director.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/envoy_config.yaml create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/private_attributes/collaborator_private_attrs.py create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/requirements.txt create mode 100755 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/start_envoy.sh create mode 100644 tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/workspace/testflow_subset_of_collaborators.ipynb diff --git a/docs/about/features_index/workflowinterface.rst b/docs/about/features_index/workflowinterface.rst index 942a6c97be..b076738b0b 100644 --- a/docs/about/features_index/workflowinterface.rst +++ b/docs/about/features_index/workflowinterface.rst @@ -1,4 +1,4 @@ -.. # Copyright (C) 2020-2023 Intel Corporation +.. # Copyright (C) 2020-2024 Intel Corporation .. # SPDX-License-Identifier: Apache-2.0 .. _workflow_interface: @@ -9,7 +9,7 @@ Workflow Interface **Important Note** -The OpenFL workflow interface is experimental, subject to change, and is currently limited to single node execution. To setup and launch a real federation, see :ref:`running_a_federation` +The OpenFL workflow interface is experimental and subject to change. For an overview of options supported to setup Federation and run FL experiments, see `Features <../features.rst>`_ What is it? =========== @@ -23,7 +23,7 @@ A new OpenFL interface that gives significantly more flexility to researchers in There are several modifications we make in our reimagined version of this interface that are necessary for federated learning: 1. *Placement*: Metaflow's `@step` decorator is replaced by placement decorators that specify where a task will run. In horizontal federated learning, there are server (or aggregator) and client (or collaborator) nodes. Tasks decorated by `@aggregator` will run on the aggregator node, and `@collaborator` will run on the collaborator node. These placement decorators are interpreted by *Runtime* implementations: these do the heavy lifting of figuring out how to get the state of the current task to another process or node. -2. *Runtime*: Each flow has a `.runtime` attribute. The runtime encapsulates the details of the infrastucture where the flow will run. In this experimental release, we support only a `LocalRuntime` single node implementation, but as this work matures, we will extend to a `FederatedRuntime` that implements distributed operation across remote infrastructure. +2. *Runtime*: Each flow has a `.runtime` attribute. The runtime encapsulates the details of the infrastucture where the flow will run. We support the LocalRuntime for simulating experiments on local node and FederatedRuntime to launch experiments on distributed infrastructure. 3. *Conditional branches*: Perform different tasks if a criteria is met 4. *Loops*: Internal loops are within a flow; this is necessary to support rounds of training where the same sequence of tasks is performed repeatedly. @@ -142,7 +142,18 @@ The workflow interface formulates the experiment as a series of tasks, or a flow Runtimes ======== -A :code:`Runtime` defines where the flow will be executed, who the participants are in the experiment, and the private information that each participant has access to. In this experimental release, single node execution is supported using the :code:`LocalRuntime`. Let's see how a :code:`LocalRuntime` is created. +A :code:`Runtime` defines where the flow will be executed, who the participants are in the experiment, and the private information that each participant has access to. In the current experimental release: + +* Single node execution is supported using the :code:`LocalRuntime`. +* Distributed node execution is supported using the :code:`FederatedRuntime`. + +Let us see how :code:`LocalRuntime` and :code:`FederatedRuntime` are created. + + +LocalRuntime +--------------- + +You can simulate a Federated Learning experiment locally using :code:`LocalRuntime`, which supports single-node execution.. Let's see how a :code:`LocalRuntime` is created. .. code-block:: python @@ -214,7 +225,7 @@ In rare cases this can be a problem because certain python objects cannot be ser Participant *private attributes* are returned by the callback function in form of a dictionary, where the key is the name of the attribute and the value is the object. In this example callback function :code:`callable_to_initialize_collaborator_private_attributes()` returns :code:`train_loader` and :code:`test_loader` in the form of a dictionary. -**Note:**If both callable and private attributes are provided, the initialization will prioritize the private attributes through the :code:`callable` function. +**Note:** If both callable and private attributes are provided, the initialization will prioritize the private attributes through the :code:`callable` function. Some important points to remember while creating callback function and private attributes are: @@ -241,8 +252,8 @@ Now let's see how the runtime for a flow is assigned, and the flow gets run: And that's it! This will run an instance of the :code:`FederatedFlow` on a single node in a single process. -Runtime Backends -================ +LocalRuntime Backends +--------------------- The Runtime defines where code will run, but the Runtime has a :code:`Backend` - which defines the underlying implementation of *how* the flow will be executed. :code:`single_process` is the default in the :code:`LocalRuntime`: it executes all code sequentially within a single python process, and is well suited to run both on high spec and low spec hardware @@ -281,6 +292,151 @@ In the above example, we have used :code:`num_gpus=0.2` while instantiating Aggr **Note:** It is not necessary to have ALL the participants use GPUs. For e.g. only the Collaborator are allocated to GPUs. In this scenario user should ensure that the artifacts returned by Collaborators to Aggregator (e.g. locally trained model object) should be loaded back to CPU before exiting the collaborator step (i.e. before the join step). As Tensorflow manages the object allocation by default therefore this step is needed only for Pytorch. +FederatedRuntime +---------------- + +The :code:`FederatedRuntime` facilitates distributed execution across long lived components (Director & Envoys) and enables Data scientists to deploy the experiment from the Jupyter notebook itself. Let’s explore the process of creating a :code:`FederatedRuntime`. + +First step is to create the participants in the Federation: the Director and Envoys + +**Director: The central node in the Federation** + +The `fx director start` command is used to start the Director. You can run it with or without TLS, depending on your setup. + +**With TLS:** +Use the following command: + +.. code-block:: console + + $ fx director start -c -rc -pk -oc + +**Without TLS:** +Use the following command: + +.. code-block:: console + + $ fx director start --disable-tls -c + +**Explanation of Command Options** + +- `-c `: Path to the Director's configuration file. +- `-rc `: Path to the root certificate (used with TLS). +- `-pk `: Path to the private key file (used with TLS). +- `-oc `: Path to the API certificate file (used with TLS). +- `--disable-tls`: Disables TLS encryption. + +**Configuration File** +The Director requires a configuration file in YAML format. This file contains essential settings such as: + +- Hostname (`listen_host`) +- Port (`listen_port`) +- Envoy health check period (`envoy_health_check_period`) +- Private attributes for the aggregator + +An example configuration file `director_config.yaml` is shown below: + +.. code-block:: yaml + + settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 + + aggregator: + private_attributes: private_attributes.aggregator_attrs + +**Envoy: Participating nodes in the Federation** + +The `fx envoy start` command is used to start the Envoy. You can run it with or without TLS, depending on your setup. + +**With TLS:** +Use the following command: + +.. code-block:: console + + $ fx envoy start -n -ec -dh -dp -rc -pk -oc + +**Without TLS:** +Use the following command: + +.. code-block:: console + + $ fx envoy start -n --disable-tls -ec + +**Explanation of Command Options** + +- `-n `: Specifies the name of the Envoy. +- `-ec `: Path to the Envoy's configuration file. +- `-dh `: Hostname or IP address of the Director. +- `-dp `: Port on which the Director is running. +- `-rc `: Path to the root certificate (used with TLS). +- `-pk `: Path to the private key file (used with TLS). +- `-oc `: Path to the API certificate file (used with TLS). +- `--disable-tls`: Disables TLS encryption. + +The Envoy configuration file includes details about the private attributes. An example configuration file `envoy_config.yaml` for `envoy_one` is shown below: + +.. code-block:: yaml + + envoy_one: + private_attributes: private_attributes.envoy_one_attrs + +**Note**: Private attributes for both the Director and Envoy can be configured in two ways, similar to :code:`LocalRuntime`. If both callable and private attributes are provided, the initialization process will prioritize the private attributes through the callable function. + +Now we proceed to instantiate the :code:`FederatedRuntime` to facilitate the deployment of the experiment on a distributed infrastructure. To initialize the :code:`FederatedRuntime`, the following inputs are required: + +1. **director_info** + + Details about the Director, including: + + - Fully Qualified Domain Name (FQDN) of the Director node. + - Port number on which the Director is listening. + - (Optional) Certificate information for TLS: + + - `cert_chain`: Path to the certificate chain. + - `api_cert`: Path to the API certificate. + - `api_private_key`: Path to the API private key. + +2. **collaborators** + + A list of collaborators participating in the federation. + Only Envoys hosting these collaborators will receive the experiment details from the Director. + +3. **notebook_path** + + File path to the Jupyter notebook defining the experiment logic. + +Below is an example of how to set up and instantiate a `FederatedRuntime`: + +.. code-block:: python + + # Define director information (TLS disabled) + director_info = { + 'director_node_fqdn':'localhost', + 'director_port':50050, + 'cert_chain': None, + 'api_cert': None, + 'api_private_key': None, + } + + # Instantiate the FederatedRuntime + federated_runtime = FederatedRuntime( + collaborators=collaborator_names, + director=director_info, + notebook_path=, + tls=False + ) + +To distribute the experiment on the Federation, we now need to assign the federated_runtime to the flow and execute it. + +.. code-block:: python + + flow = FederatedFlow() + flow.runtime = federated_runtime + flow.run() + +This will export the Jupyter notebook to an workspace and deploy it to the federation. The Director receives the experiment, distributes it to the Envoys, and initiates the execution of the experiment. + Debugging with the Metaflow Client ================================== @@ -293,6 +449,8 @@ Capturing this information requires just a one line change to the Flow object in .. code-block:: python flow = FederatedFlow(..., checkpoint=True) + +**LocalRuntime** After the flow has started running, you can use the Metaflow Client to get intermediate information from any of the participants tasks: @@ -390,22 +548,19 @@ Also, If we wanted to get the best model and the last model, you can just run: torch.save(last_model.state_dict(), PATH) torch.save(best_model.state_dict(), PATH) -While this information is useful for debugging, depending on your workflow it may require significant disk space. For this reason, `checkpoint` is disabled by default. +**FederatedRuntime** -Runtimes: Future Plans -====================== +In a distributed environment consisting of Director, Envoys and User Node (where the experiment is launched), the following debugging support is available: -Our goal is to make it a one line change to configure where and how a flow is executed. While we only support single node execution with the :code:`LocalRuntime` today, our aim in future releases is to make going from one to multiple nodes as easy as: +1. **Director Node**: If checkpointing is enabled, Metaflow client can be launched on Director and same steps outlined for :code:`LocalRuntime` can be followed. +2. **User Node**: The stdout and stderr logs are printed directly in the Jupyter notebook. -.. code-block:: python - - flow = FederatedFlow() - # Run on a single node first - local_runtime = LocalRuntime(aggregator=aggregator, collaborators=collaborators) - flow.runtime = local_runtime - flow.run() - - # A future example of how the same flow could be run on distributed infrastructure - federated_runtime = FederatedRuntime(...) - flow.runtime = federated_runtime - flow.run() +**IMPORTANT**: While this information is useful for debugging, depending on your workflow it may require significant disk space. For this reason, checkpoint is disabled by default. + +Future Plans +============== +Following functionalities are planned for inclusion in future releases of the Workflow Interface: + +1. **Pre-trained Model Integration**: Enable the capability to pass a pre-trained model to FederatedFlow. +2. **Plan Review Mechanism**: Enable the capability for Director and Envoy admin to review submitted plans and either accept / reject them. +3. **Straggler Handling**: Implement mechanisms to manage and mitigate the impact of stragglers during federated experiments. diff --git a/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb b/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb index 525abfc312..ab31b26d50 100644 --- a/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb +++ b/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb @@ -1065,7 +1065,7 @@ ], "metadata": { "kernelspec": { - "display_name": "openfl-wip", + "display_name": "fed_run", "language": "python", "name": "python3" }, @@ -1079,7 +1079,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.19" + "version": "3.10.15" } }, "nbformat": 4, diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/Portland_config.yaml b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/Portland_config.yaml new file mode 100755 index 0000000000..318e22d71d --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/Portland_config.yaml @@ -0,0 +1,2 @@ +Portland: + private_attributes: private_attributes.portland_attrs \ No newline at end of file diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/private_attributes.py b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/private_attributes.py new file mode 100644 index 0000000000..40910db415 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/private_attributes.py @@ -0,0 +1,50 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from copy import deepcopy + +import torch +import torchvision + +# Download Train and Test datasets +mnist_train = torchvision.datasets.MNIST( + "../files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +mnist_test = torchvision.datasets.MNIST( + "../files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +# shard the dataset according to collaborator index +portland_col_idx = 0 +n_collaborators = 2 +batch_size = 32 + +train = deepcopy(mnist_train) +test = deepcopy(mnist_test) + +train.data = mnist_train.data[portland_col_idx::n_collaborators] +train.targets = mnist_train.targets[portland_col_idx::n_collaborators] +test.data = mnist_test.data[portland_col_idx::n_collaborators] +test.targets = mnist_test.targets[portland_col_idx::n_collaborators] + +portland_attrs = { + "train_loader": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=False), + "test_loader": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False), +} diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/start_envoy.sh b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Portland/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/README.md b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/README.md new file mode 100644 index 0000000000..e4f771b4c5 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/README.md @@ -0,0 +1,65 @@ +# 101_MNIST_FederatedRuntime + +## **How to run this tutorial (without TLS and locally as a simulation):** +
+ +### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: + - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/get_started/installation.html). + +
+ +### 1. Split terminal into 4 (1 terminal for the director, 2 for the envoys, and 1 for the experiment) + +
+ +### 2. Do the following in each terminal: + - Activate the virtual environment from step 0: + + ```sh + source venv/bin/activate + ``` + - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. + - Navigate to the tutorial: + + ```sh + cd openfl/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/ + ``` + +
+ +### 3. In the first terminal, activate experimental features and run the director: + +```sh +fx experimental activate +cd director +./start_director.sh +``` + +
+ +### 4. In the second, and third terminals, run the envoys: + +#### 4.1 Second terminal +```sh +cd Portland +./start_envoy.sh Portland Portland_config.yaml +``` + +#### 4.2 Third terminal +```sh +cd Seattle +./start_envoy.sh Seattle Seattle_config.yaml +``` + +
+ +### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: + +```sh +cd workspace +jupyter lab 101_MNIST_FederatedRuntime.ipynb +``` +- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the pytorch_tinyimagenet.ipynb file. +- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". +- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. + diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/Seattle_config.yaml b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/Seattle_config.yaml new file mode 100755 index 0000000000..b95c8242ef --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/Seattle_config.yaml @@ -0,0 +1,2 @@ +Seattle: + private_attributes: private_attributes.seattle_attrs \ No newline at end of file diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/private_attributes.py b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/private_attributes.py new file mode 100644 index 0000000000..ba8c5ce7c8 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/private_attributes.py @@ -0,0 +1,50 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from copy import deepcopy + +import torch +import torchvision + +# Download Train and Test datasets +mnist_train = torchvision.datasets.MNIST( + "../files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +mnist_test = torchvision.datasets.MNIST( + "../files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +# shard the dataset according to collaborator index +seattle_col_idx = 1 +n_collaborators = 2 +batch_size = 32 + +train = deepcopy(mnist_train) +test = deepcopy(mnist_test) + +train.data = mnist_train.data[seattle_col_idx::n_collaborators] +train.targets = mnist_train.targets[seattle_col_idx::n_collaborators] +test.data = mnist_test.data[seattle_col_idx::n_collaborators] +test.targets = mnist_test.targets[seattle_col_idx::n_collaborators] + +seattle_attrs = { + "train_loader": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=False), + "test_loader": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False), +} diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/start_envoy.sh b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/Seattle/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/director/director_config.yaml b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/director/director_config.yaml new file mode 100755 index 0000000000..021cfc59c9 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/director/director_config.yaml @@ -0,0 +1,4 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds \ No newline at end of file diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/director/start_director.sh b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/workspace/101_MNIST_FederatedRuntime.ipynb b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/workspace/101_MNIST_FederatedRuntime.ipynb new file mode 100644 index 0000000000..d7f3c1be8f --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/workspace/101_MNIST_FederatedRuntime.ipynb @@ -0,0 +1,702 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# FederatedRuntime 101: Quickstart with MNIST" + ] + }, + { + "cell_type": "markdown", + "id": "cbe52a4e", + "metadata": {}, + "source": [ + "Welcome to the first **FederatedRuntime** Tutorial ! \n", + "This tutorial demonstrates how to deploy Federated-Learning experiment based on workflow interface on a distributed computing infrastructure.\n", + "\n", + "Data scientists often start by developing and fine-tuning Federated machine-learning models in a local environment before transitioning to a Federated setup. OpenFL supports this methodology and the Tutorial guides the user through the following steps:\n", + "- **Simulate** a Federated Learning experiment locally using `LocalRuntime` \n", + "- **Deploy** this experiment on Federated Infrastructure using `FederatedRuntime` from from a familiar Jupyter notebook environment\n", + "\n", + "**Key Features covered**: \n", + "1. **Simulate** Federated Learning experiment using `LocalRuntime`. Explore [101 MNIST](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/101_MNIST.ipynb) for insights\n", + "2. Enable creation of workspace content by annotating Jupyter notebook with export directives. Explore [1001 Workspace Creation from JupyterNotebook](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/1001_Workspace_Creation_from_JupyterNotebook.ipynb) for insights\n", + "3. **Deploy** the experiment on Federated infrastructure (Director and Envoy nodes) using `FederatedRuntime`\n", + "\n", + "Let's get started !\n" + ] + }, + { + "cell_type": "markdown", + "id": "b3b0701e", + "metadata": {}, + "source": [ + "### Getting Started" + ] + }, + { + "cell_type": "markdown", + "id": "b62ffd86", + "metadata": {}, + "source": [ + "We begin by specifying the module where cells marked with the `#| export` directive will be automatically exported. The export directive is used to identify specific code cells in the Jupyter notebook that should be included in the generated python module. This python module is required to distribute the FL experiment.\n", + "\n", + "The `#| default_exp` experiment directive in the following cell sets the name of the python module as `experiment`. This name can be customized according to the user’s requirements and preferences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "f5860947", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "cell_type": "markdown", + "id": "d109332c", + "metadata": {}, + "source": [ + "### Installing Pre-requisties\n", + "We start by installing OpenFL and dependencies of the workflow interface. These dependencies are exported and become requirements for the Federated Learning Environment " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7475cba", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "!pip install git+https://github.com/securefederatedai/openfl.git\n", + "!pip install -r ../../../workflow_interface_requirements.txt\n", + "!pip install torch==2.3.1\n", + "!pip install torchvision==0.18.1\n", + "!pip install -U ipywidgets\n" + ] + }, + { + "cell_type": "markdown", + "id": "a85485b8", + "metadata": {}, + "source": [ + "### Model definition" + ] + }, + { + "cell_type": "markdown", + "id": "6cc94801", + "metadata": {}, + "source": [ + "We begin with the quintessential example of a pytorch CNN model trained on the MNIST dataset. Let's start by defining\n", + "- Hyperparameters\n", + "- Model definition, and \n", + "- Helper functions to train and validate the model like we would for any other deep learning experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bd8ac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import torch\n", + "import numpy as np\n", + "import random\n", + "\n", + "# Hyperparameters\n", + "learning_rate = 0.01\n", + "momentum = 0.5\n", + "batch_size = 32\n", + "log_interval = 10\n", + "\n", + "# Model definition\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(1, 10, kernel_size=5)\n", + " self.conv2 = nn.Conv2d(10, 20, kernel_size=5)\n", + " self.fc1 = nn.Linear(320, 50)\n", + " self.fc2 = nn.Linear(50, 10)\n", + "\n", + " def forward(self, x):\n", + " x = F.relu(F.max_pool2d(self.conv1(x), 2))\n", + " x = F.relu(F.max_pool2d(self.conv2(x), 2))\n", + " x = x.view(-1, 320)\n", + " x = F.relu(self.fc1(x))\n", + " x = self.fc2(x)\n", + " return F.log_softmax(x)\n", + "\n", + "\n", + "# Helper function to validate the model\n", + "def validate(model, test_loader):\n", + " model.eval()\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " output = model(data)\n", + " pred = output.data.max(1, keepdim=True)[1]\n", + " correct += pred.eq(target.data.view_as(pred)).sum()\n", + " accuracy = float(correct / len(test_loader.dataset))\n", + " return accuracy\n", + "\n", + "\n", + "# Helper function to train the model\n", + "def train_model(model, optimizer, data_loader, round_number, log=False):\n", + " train_loss = 0\n", + " model.train()\n", + " for batch_idx, (X, y) in enumerate(data_loader):\n", + " optimizer.zero_grad()\n", + "\n", + " output = model(X)\n", + " loss = F.nll_loss(output, y)\n", + " loss.backward()\n", + "\n", + " optimizer.step()\n", + "\n", + " train_loss += loss.item() * len(X)\n", + " if batch_idx % log_interval == 0 and log:\n", + " print(\n", + " \"Train Epoch: {:3} [{:5}/{:<5} ({:<.0f}%)] Loss: {:<.4f}\".format(\n", + " round_number,\n", + " batch_idx * len(X),\n", + " len(data_loader.dataset),\n", + " 100.0 * batch_idx / len(data_loader),\n", + " loss.item(),\n", + " )\n", + " )\n", + "\n", + " train_loss /= len(data_loader.dataset)\n", + " return train_loss\n", + "\n", + "\n", + "# Helper function to initialize seed for reproducibility\n", + "def initialize_seed(random_seed=42):\n", + " torch.manual_seed(random_seed)\n", + " np.random.seed(random_seed)\n", + " random.seed(random_seed)" + ] + }, + { + "cell_type": "markdown", + "id": "475aa38c", + "metadata": {}, + "source": [ + "### Dataset definition\n", + "\n", + "We now download the training and test datasets of MNIST, a necessary step to demonstrate the functionality of the LocalRuntime." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9836c542", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "import torchvision\n", + "\n", + "# Train and Test datasets\n", + "mnist_train = torchvision.datasets.MNIST(\n", + " \"../files/\",\n", + " train=True,\n", + " download=True,\n", + " transform=torchvision.transforms.Compose(\n", + " [\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize((0.1307,), (0.3081,)),\n", + " ]\n", + " ),\n", + ")\n", + "\n", + "mnist_test = torchvision.datasets.MNIST(\n", + " \"../files/\",\n", + " train=False,\n", + " download=True,\n", + " transform=torchvision.transforms.Compose(\n", + " [\n", + " torchvision.transforms.ToTensor(),\n", + " torchvision.transforms.Normalize((0.1307,), (0.3081,)),\n", + " ]\n", + " ),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "4770fe7c", + "metadata": {}, + "source": [ + "### Workflow definition" + ] + }, + { + "cell_type": "markdown", + "id": "6306f73d", + "metadata": {}, + "source": [ + "Next we import the `FLSpec`, placement decorators (`aggregator/collaborator`), and define the `FedAvg` helper function\n", + "\n", + "- `FLSpec` – Defines the flow specification. User defined flows are subclasses of this.\n", + "- `aggregator/collaborator` - placement decorators that define where the task will be assigned\n", + "- `FedAvg` - helper function for Federated Averaging\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "from copy import deepcopy\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "\n", + "# Helper function for federated averaging\n", + "def FedAvg(agg_model, models, weights=None):\n", + " state_dicts = [model.state_dict() for model in models]\n", + " agg_state_dict = agg_model.state_dict()\n", + " for key in models[0].state_dict():\n", + " agg_state_dict[key] = torch.from_numpy(\n", + " np.average([state[key].numpy() for state in state_dicts], axis=0, weights=weights)\n", + " )\n", + "\n", + " agg_model.load_state_dict(agg_state_dict)\n", + " return agg_model" + ] + }, + { + "cell_type": "markdown", + "id": "2a9d8a60", + "metadata": {}, + "source": [ + "Let us now define the Workflow. Here we use the same tasks as the [101 MNIST](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/101_MNIST.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "class FederatedFlow_TorchMNIST(FLSpec):\n", + " \"\"\"\n", + " This Flow trains a CNN on MNIST Model in Federated Learning\n", + " \"\"\"\n", + "\n", + " def __init__(self, model=None, optimizer=None, learning_rate=1e-2, momentum=0.5, rounds=3, **kwargs):\n", + " super().__init__(**kwargs)\n", + "\n", + " if model is not None:\n", + " self.model = model\n", + " self.optimizer = optimizer\n", + " else:\n", + " initialize_seed()\n", + " self.model = Net()\n", + " self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate, momentum=momentum)\n", + "\n", + " self.learning_rate = learning_rate\n", + " self.momentum = momentum\n", + " self.rounds = rounds\n", + " self.results = []\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " This is the start of the Flow.\n", + " \"\"\"\n", + " print(f\"Initializing Workflow .... \")\n", + "\n", + " self.collaborators = self.runtime.collaborators\n", + " self.current_round = 0\n", + "\n", + " self.next(self.aggregated_model_validation, foreach=\"collaborators\")\n", + "\n", + " @collaborator\n", + " def aggregated_model_validation(self):\n", + " \"\"\"\n", + " Perform validation of aggregated model on collaborators.\n", + " \"\"\"\n", + " print(f\" Performing Validation on aggregated model ... \")\n", + " self.agg_validation_score = validate(self.model, self.test_loader)\n", + " print(\n", + " f\" Aggregated Model validation score = {self.agg_validation_score:.4f}\"\n", + " )\n", + "\n", + " self.next(self.train)\n", + "\n", + " @collaborator\n", + " def train(self):\n", + " \"\"\"\n", + " Train model on Local collaborator dataset.\n", + " \"\"\"\n", + " print(f\": Training Model on local dataset ... \")\n", + "\n", + " self.optimizer = optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=self.momentum)\n", + "\n", + " self.loss = train_model(\n", + " model=self.model,\n", + " optimizer=self.optimizer,\n", + " data_loader=self.train_loader,\n", + " round_number=self.current_round,\n", + " log=True,\n", + " )\n", + "\n", + " self.next(self.local_model_validation)\n", + "\n", + " @collaborator\n", + " def local_model_validation(self):\n", + " \"\"\"\n", + " Validate locally trained model.\n", + " \"\"\"\n", + " print(f\" Performing Validation on locally trained model ... \")\n", + " self.local_validation_score = validate(self.model, self.test_loader)\n", + " print(\n", + " f\" Local model validation score = {self.local_validation_score:.4f}\"\n", + " )\n", + " self.next(self.join)\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Model aggregation step.\n", + " \"\"\"\n", + " print(f\": Joining models from collaborators...\")\n", + "\n", + " # Average Training loss, aggregated and locally trained model accuracy\n", + " self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n", + " self.aggregated_model_accuracy = sum(input.agg_validation_score for input in inputs) / len(inputs)\n", + " self.local_model_accuracy = sum(input.local_validation_score for input in inputs) / len(inputs)\n", + "\n", + " print(f\"Avg. aggregated model validation score = {self.aggregated_model_accuracy:.4f}\")\n", + " print(f\"Avg. training loss = {self.average_loss:.4f}\")\n", + " print(f\"Avg. local model validation score = {self.local_model_accuracy:.4f}\")\n", + "\n", + " # FedAvg\n", + " self.model = FedAvg(self.model, [input.model for input in inputs])\n", + "\n", + " self.results.append(\n", + " [\n", + " self.current_round,\n", + " self.aggregated_model_accuracy,\n", + " self.average_loss,\n", + " self.local_model_accuracy,\n", + " ]\n", + " )\n", + "\n", + " self.current_round += 1\n", + " if self.current_round < self.rounds:\n", + " self.next( self.aggregated_model_validation, foreach=\"collaborators\")\n", + " else:\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the last step in the Flow.\n", + " \"\"\"\n", + " print(f\"This is the end of the flow\")" + ] + }, + { + "cell_type": "markdown", + "id": "b0757812", + "metadata": {}, + "source": [ + "### Simulation: LocalRuntime" + ] + }, + { + "cell_type": "markdown", + "id": "3bccffd7", + "metadata": {}, + "source": [ + "We now import & define the `LocalRuntime`, participants (`Aggregator/Collaborator`), and initialize the private attributes for participants\n", + "\n", + "- `Runtime` – Defines where the flow runs. `LocalRuntime` simulates the flow on local node.\n", + "- `Aggregator/Collaborator` - (Local) Participants in the simulation\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bffcc141", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "from openfl.experimental.workflow.interface import Aggregator, Collaborator\n", + "from openfl.experimental.workflow.runtime import LocalRuntime\n", + "\n", + "# Setup Aggregator & initialize private attributes\n", + "aggregator = Aggregator()\n", + "aggregator.private_attributes = {}\n", + "\n", + "# Setup Collaborators & initialize shards of MNIST dataset as private attributes\n", + "n_collaborators = 2\n", + "collaborator_names = [\"Portland\", \"Seattle\"]\n", + "\n", + "collaborators = [Collaborator(name=name) for name in collaborator_names]\n", + "for idx, collaborator in enumerate(collaborators):\n", + " local_train = deepcopy(mnist_train)\n", + " local_test = deepcopy(mnist_test)\n", + " local_train.data = mnist_train.data[idx::n_collaborators]\n", + " local_train.targets = mnist_train.targets[idx::n_collaborators]\n", + " local_test.data = mnist_test.data[idx::n_collaborators]\n", + " local_test.targets = mnist_test.targets[idx::n_collaborators]\n", + "\n", + " collaborator.private_attributes = {\n", + " \"train_loader\": torch.utils.data.DataLoader(\n", + " local_train, batch_size=batch_size, shuffle=False\n", + " ),\n", + " \"test_loader\": torch.utils.data.DataLoader(\n", + " local_test, batch_size=batch_size, shuffle=False\n", + " ),\n", + " }\n", + "\n", + "local_runtime = LocalRuntime(\n", + " aggregator=aggregator, collaborators=collaborators, backend=\"single_process\"\n", + ")\n", + "print(f\"Local runtime collaborators = {local_runtime.collaborators}\")" + ] + }, + { + "cell_type": "markdown", + "id": "78819357", + "metadata": {}, + "source": [ + "### Start Simulation" + ] + }, + { + "cell_type": "markdown", + "id": "3a2675ba", + "metadata": {}, + "source": [ + "Now that we have our flow and runtime defined, let's run the simulation ! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e5f10d5d", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "model = None\n", + "optimizer = None\n", + "flflow = FederatedFlow_TorchMNIST(model, optimizer, learning_rate, momentum, rounds=2, checkpoint=True)\n", + "flflow.runtime = local_runtime\n", + "flflow.run()" + ] + }, + { + "cell_type": "markdown", + "id": "50300fed", + "metadata": {}, + "source": [ + "Let us check the simulation results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5d77540", + "metadata": {}, + "outputs": [], + "source": [ + "from tabulate import tabulate \n", + "\n", + "headers = [\"Rounds\", \"Agg Model Validation Score\", \"Local Train loss\", \"Local Model Validation score\"]\n", + "print('********** Simulation results **********')\n", + "simulation_results = flflow.results\n", + "print(tabulate(simulation_results, headers=headers, tablefmt=\"outline\"))\n" + ] + }, + { + "cell_type": "markdown", + "id": "b5371b6d", + "metadata": {}, + "source": [ + "### Setup Federation: Director & Envoys" + ] + }, + { + "cell_type": "markdown", + "id": "f270e385", + "metadata": {}, + "source": [ + "Before we can deploy the experiment, let us create participants in Federation: Director and Envoys. As the Tutorial uses two collaborators we shall launch three participants:\n", + "1. Director: The central node in the Federation\n", + "2. Portland: The first envoy in the Federation\n", + "3. Seattle: The second envoy in the Federation \n", + "\n", + "The participants can be launched by following steps mentioned in [README]((https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/README.md))\n" + ] + }, + { + "cell_type": "markdown", + "id": "f9d556d0", + "metadata": {}, + "source": [ + "### Deploy: FederatedRuntime" + ] + }, + { + "cell_type": "markdown", + "id": "5ffd73b6", + "metadata": {}, + "source": [ + "We now import and instantiate `FederatedRuntime` to enable deployment of experiment on distributed infrastructure. Initializing the `FederatedRuntime` requires following inputs to be provided by the user:\n", + "\n", + "- `director_info` – director information including fqdn of the director node, port, and certificate information\n", + "- `collaborators` - names of the collaborators participating in experiment\n", + "- `notebook_path`- path to this jupyter notebook\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + "}\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators=collaborator_names,\n", + " director=director_info, \n", + " notebook_path='./101_MNIST_FederatedRuntime.ipynb'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "58d22bbb", + "metadata": {}, + "source": [ + "Let us connect to federation & check if the envoys are connected to the director by using the `get_envoys` method of `FederatedRuntime`. If the participants are launched successful in previous step the status of `Portland` and `Seattle` should be displayed as `Online`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "markdown", + "id": "87c487cb", + "metadata": {}, + "source": [ + "Now that we have our distributed infrastructure ready, let us modify the flow runtime to `FederatedRuntime` instance and deploy the experiment. \n", + "\n", + "Progress of the flow is available on \n", + "1. Jupyter notebook: if `checkpoint` attribute of the flow object is set to `True`\n", + "2. Director and Envoy terminals \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.results = [] # clear results from previous run\n", + "flflow.runtime = federated_runtime\n", + "flflow.run()" + ] + }, + { + "cell_type": "markdown", + "id": "5e5ef3ea", + "metadata": {}, + "source": [ + "Let us compare the simulation results from `LocalRuntime` and federation results from `FederatedRuntime`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4b63ce0", + "metadata": {}, + "outputs": [], + "source": [ + "headers = [\"Rounds\", \"Agg Model Validation Score\", \"Local Train loss\", \"Local Model Validation score\"]\n", + "print('********** Simulation results **********')\n", + "print(tabulate(simulation_results, headers=headers, tablefmt=\"outline\"))\n", + "\n", + "print('********** Federation results **********')\n", + "federation_results = flflow.results\n", + "print(tabulate(federation_results, headers=headers, tablefmt=\"outline\"))\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fed_run", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/Bangalore_config.yaml b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/Bangalore_config.yaml new file mode 100644 index 0000000000..ec4a088af7 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/Bangalore_config.yaml @@ -0,0 +1,9 @@ +Bangalore: + callable_func: + settings: + batch_size: 64 + index: 1 + n_collaborators: 2 + test_dataset: private_attributes.test_dataset + train_dataset: private_attributes.train_dataset + template: private_attributes.bangalore_attrs \ No newline at end of file diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/private_attributes.py b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/private_attributes.py new file mode 100644 index 0000000000..d19dd222c9 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/private_attributes.py @@ -0,0 +1,45 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from copy import deepcopy + +import torch +import torchvision + +train_dataset = torchvision.datasets.MNIST( + "./files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +test_dataset = torchvision.datasets.MNIST( + "./files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + + +def bangalore_attrs(index, n_collaborators, batch_size, train_dataset, test_dataset): + train = deepcopy(train_dataset) + test = deepcopy(test_dataset) + train.data = train_dataset.data[index::n_collaborators] + train.targets = train_dataset.targets[index::n_collaborators] + test.data = test_dataset.data[index::n_collaborators] + test.targets = test_dataset.targets[index::n_collaborators] + + return { + "train_loader": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True), + "test_loader": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True), + } diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/requirements.txt b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/requirements.txt new file mode 100644 index 0000000000..3a478314e8 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/requirements.txt @@ -0,0 +1,8 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch==2.3.1 +torchvision==0.18.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/start_envoy.sh b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Bangalore/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/Chandler_config.yaml b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/Chandler_config.yaml new file mode 100644 index 0000000000..60f763ee55 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/Chandler_config.yaml @@ -0,0 +1,9 @@ +Chandler: + callable_func: + settings: + batch_size: 64 + index: 0 + n_collaborators: 2 + test_dataset: private_attributes.test_dataset + train_dataset: private_attributes.train_dataset + template: private_attributes.chandler_attrs \ No newline at end of file diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/private_attributes.py b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/private_attributes.py new file mode 100644 index 0000000000..6ebcb1b0a6 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/private_attributes.py @@ -0,0 +1,45 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from copy import deepcopy + +import torch +import torchvision + +train_dataset = torchvision.datasets.MNIST( + "./files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +test_dataset = torchvision.datasets.MNIST( + "./files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + + +def chandler_attrs(index, n_collaborators, batch_size, train_dataset, test_dataset): + train = deepcopy(train_dataset) + test = deepcopy(test_dataset) + train.data = train_dataset.data[index::n_collaborators] + train.targets = train_dataset.targets[index::n_collaborators] + test.data = test_dataset.data[index::n_collaborators] + test.targets = test_dataset.targets[index::n_collaborators] + + return { + "train_loader": torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True), + "test_loader": torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True), + } diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/requirements.txt b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/requirements.txt new file mode 100644 index 0000000000..3a478314e8 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/requirements.txt @@ -0,0 +1,8 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch==2.3.1 +torchvision==0.18.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/start_envoy.sh b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/Chandler/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/README.md b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/README.md new file mode 100644 index 0000000000..be10df4d4c --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/README.md @@ -0,0 +1,65 @@ +# 301_MNIST_Watermarking + +## **How to run this tutorial (without TLS and locally as a simulation):** +
+ +### 0. If you haven't done so already, create a virtual environment, install OpenFL, and upgrade pip: + - For help with this step, visit the "Install the Package" section of the [OpenFL installation instructions](https://openfl.readthedocs.io/en/latest/get_started/installation.html). + +
+ +### 1. Split terminal into 4 (1 terminal for the director, 2 for the envoys, and 1 for the experiment) + +
+ +### 2. Do the following in each terminal: + - Activate the virtual environment from step 0: + + ```sh + source venv/bin/activate + ``` + - If you are in a network environment with a proxy, ensure proxy environment variables are set in each of your terminals. + - Navigate to the tutorial: + + ```sh + cd openfl/openfl-tutorials/experimental/workflow/FederatedRuntime/101_MNIST/ + ``` + +
+ +### 3. In the first terminal, activate experimental features and run the director: + +```sh +fx experimental activate +cd director +./start_director.sh +``` + +
+ +### 4. In the second, and third terminals, run the envoys: + +#### 4.1 Second terminal +```sh +cd Bangalore +./start_envoy.sh Bangalore Bangalore_config.yaml +``` + +#### 4.2 Third terminal +```sh +cd Chandler +./start_envoy.sh Chandler Chandler_config.yaml +``` + +
+ +### 5. Now that your director and envoy terminals are set up, run the Jupyter Notebook in your experiment terminal: + +```sh +cd workspace +jupyter lab mnist_watermarking.ipynb +``` +- A Jupyter Server URL will appear in your terminal. In your browser, proceed to that link. Once the webpage loads, click on the pytorch_tinyimagenet.ipynb file. +- To run the experiment, select the icon that looks like two triangles to "Restart Kernel and Run All Cells". +- You will notice activity in your terminals as the experiment runs, and when the experiment is finished the director terminal will display a message that the experiment has finished successfully. + diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/director_config.yaml b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/director_config.yaml new file mode 100644 index 0000000000..9d4a7f480b --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/director_config.yaml @@ -0,0 +1,11 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds + +aggregator: + callable_func: + settings: + batch_size: 50 + watermark_data: private_attributes.watermark_data + template: private_attributes.aggregator_attrs \ No newline at end of file diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/private_attributes.py b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/private_attributes.py new file mode 100644 index 0000000000..6c87f0d014 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/private_attributes.py @@ -0,0 +1,169 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import torch +import torchvision +import numpy as np +import pathlib +import os +import matplotlib +import matplotlib.pyplot as plt +import PIL.Image as Image +import imagen as ig +import numbergen as ng + + +watermark_dir = "./files/watermark-dataset/MWAFFLE/" + + +def generate_watermark( + x_size=28, y_size=28, num_class=10, num_samples_per_class=10, img_dir=watermark_dir +): + """ + Generate Watermark by superimposing a pattern on noisy background. + + Parameters + ---------- + x_size: x dimension of the image + y_size: y dimension of the image + num_class: number of classes in the original dataset + num_samples_per_class: number of samples to be generated per class + img_dir: directory for saving watermark dataset + + Reference + --------- + WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298) + + """ + x_pattern = int(x_size * 2 / 3.0 - 1) + y_pattern = int(y_size * 2 / 3.0 - 1) + + np.random.seed(0) + for cls in range(num_class): + patterns = [] + random_seed = 10 + cls + patterns.append( + ig.Line( + xdensity=x_pattern, + ydensity=y_pattern, + thickness=0.001, + orientation=np.pi * ng.UniformRandom(seed=random_seed), + x=ng.UniformRandom(seed=random_seed) - 0.5, + y=ng.UniformRandom(seed=random_seed) - 0.5, + scale=0.8, + ) + ) + patterns.append( + ig.Arc( + xdensity=x_pattern, + ydensity=y_pattern, + thickness=0.001, + orientation=np.pi * ng.UniformRandom(seed=random_seed), + x=ng.UniformRandom(seed=random_seed) - 0.5, + y=ng.UniformRandom(seed=random_seed) - 0.5, + size=0.33, + ) + ) + + pat = np.zeros((x_pattern, y_pattern)) + for i in range(6): + j = np.random.randint(len(patterns)) + pat += patterns[j]() + res = pat > 0.5 + pat = res.astype(int) + + x_offset = np.random.randint(x_size - x_pattern + 1) + y_offset = np.random.randint(y_size - y_pattern + 1) + + for i in range(num_samples_per_class): + base = np.random.rand(x_size, y_size) + base[ + x_offset: x_offset + pat.shape[0], + y_offset: y_offset + pat.shape[1], + ] += pat + d = np.ones((x_size, x_size)) + img = np.minimum(base, d) + if not os.path.exists(img_dir + str(cls) + "/"): + os.makedirs(img_dir + str(cls) + "/") + plt.imsave( + img_dir + str(cls) + "/wm_" + str(i + 1) + ".png", + img, + cmap=matplotlib.cm.gray, + ) + + +# If the Watermark dataset does not exist, generate and save the Watermark images +watermark_path = pathlib.Path(watermark_dir) +if watermark_path.exists() and watermark_path.is_dir(): + print( + f"Watermark dataset already exists at: {watermark_path}. Proceeding to next step ... " + ) + pass +else: + print("Generating Watermark dataset... ") + generate_watermark() + + +class WatermarkDataset(torch.utils.data.Dataset): + def __init__(self, images_dir, label_dir=None, transforms=None): + self.images_dir = os.path.abspath(images_dir) + self.image_paths = [ + os.path.join(self.images_dir, d) for d in os.listdir(self.images_dir) + ] + self.label_paths = label_dir + self.transform = transforms + temp = [] + + # Recursively counting total number of images in the directory + for image_path in self.image_paths: + for path in os.walk(image_path): + if len(path) <= 1: + continue + path = path[2] + for im_n in [image_path + "/" + p for p in path]: + temp.append(im_n) + self.image_paths = temp + + if len(self.image_paths) == 0: + raise Exception(f"No file(s) found under {images_dir}") + + def __len__(self): + return len(self.image_paths) + + def __getitem__(self, idx): + image_filepath = self.image_paths[idx] + image = Image.open(image_filepath) + image = image.convert("RGB") + image = self.transform(image) + label = int(image_filepath.split("/")[-2]) + + return image, label + + +def get_watermark_transforms(): + return torchvision.transforms.Compose( + [ + torchvision.transforms.Grayscale(), + torchvision.transforms.Resize(28), + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize(mean=(0.5,), std=(0.5,)), # Normalize + ] + ) + + +watermark_data = WatermarkDataset( + images_dir=watermark_dir, + transforms=get_watermark_transforms(), +) + + +def aggregator_attrs(watermark_data, batch_size): + return { + "watermark_data_loader": torch.utils.data.DataLoader( + watermark_data, batch_size=batch_size, shuffle=True + ), + "pretrain_epochs": 25, + "retrain_epochs": 25, + "watermark_acc_threshold": 0.98, + "watermark_pretraining_completed": False, + } diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/start_director.sh b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/workspace/MNIST_Watermarking.ipynb b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/workspace/MNIST_Watermarking.ipynb new file mode 100644 index 0000000000..040fb2cb26 --- /dev/null +++ b/openfl-tutorials/experimental/workflow/FederatedRuntime/301_MNIST_Watermaking/workspace/MNIST_Watermarking.ipynb @@ -0,0 +1,574 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Federated Runtime: 301_MNIST_Watermarking" + ] + }, + { + "cell_type": "markdown", + "id": "3b7357ef", + "metadata": {}, + "source": [ + "This tutorial is based on the LocalRuntime example [301_MNIST_Watermarking](https://github.com/securefederatedai/openfl/blob/develop/openfl-tutorials/experimental/workflow/301_MNIST_Watermarking.ipynb). It has been adapted to demonstrate the FederatedRuntime version of the watermarking workflow. In this tutorial, we will guide you through the process of deploying the watermarking example within a federation, showcasing how to transition from a local setup to a federated environment effectively." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "62449b5f", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "cell_type": "markdown", + "id": "2e19dcf2", + "metadata": {}, + "source": [ + "We start by installing OpenFL and dependencies of the workflow interface \n", + "> These dependencies are required to be exported and become the requirements for the Federated Learning Workspace " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f7475cba", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "!pip install git+https://github.com/securefederatedai/openfl.git\n", + "!pip install -r ../../../workflow_interface_requirements.txt\n", + "!pip install matplotlib\n", + "!pip install torch==2.3.1\n", + "!pip install torchvision==0.18.1\n", + "!pip install git+https://github.com/pyviz-topics/imagen.git@master\n", + "!pip install holoviews==1.15.4\n", + "!pip install -U ipywidgets" + ] + }, + { + "cell_type": "markdown", + "id": "9a6ae8e2", + "metadata": {}, + "source": [ + "We now define our model, optimizer, and some helper functions like we would for any other deep learning experiment \n", + "\n", + "> This cell and all the subsequent cells are important ingredients of the Federated Learning experiment and therefore annotated with the `#| export` directive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bd8ac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import torch\n", + "import numpy as np\n", + "\n", + "random_seed = 1\n", + "torch.backends.cudnn.enabled = False\n", + "torch.manual_seed(random_seed)\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self, dropout=0.0):\n", + " super(Net, self).__init__()\n", + " self.dropout = dropout\n", + " self.block = nn.Sequential(\n", + " nn.Conv2d(1, 32, 2),\n", + " nn.MaxPool2d(2),\n", + " nn.ReLU(),\n", + " nn.Conv2d(32, 64, 2),\n", + " nn.MaxPool2d(2),\n", + " nn.ReLU(),\n", + " nn.Conv2d(64, 128, 2),\n", + " nn.ReLU(),\n", + " )\n", + " self.fc1 = nn.Linear(128 * 5**2, 200)\n", + " self.fc2 = nn.Linear(200, 10)\n", + " self.relu = nn.ReLU()\n", + " self.dropout = nn.Dropout(p=dropout)\n", + "\n", + " def forward(self, x):\n", + " x = self.dropout(x)\n", + " out = self.block(x)\n", + " out = out.view(-1, 128 * 5**2)\n", + " out = self.dropout(out)\n", + " out = self.relu(self.fc1(out))\n", + " out = self.dropout(out)\n", + " out = self.fc2(out)\n", + " return F.log_softmax(out, 1)\n", + "\n", + "\n", + "def inference(network, test_loader):\n", + " network.eval()\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " output = network(data)\n", + " pred = output.data.max(1, keepdim=True)[1]\n", + " correct += pred.eq(target.data.view_as(pred)).sum()\n", + " accuracy = float(correct / len(test_loader.dataset))\n", + " return accuracy\n", + "\n", + "\n", + "def train_model(model, optimizer, data_loader, entity, round_number, log=False):\n", + " # Helper function to train the model\n", + " train_loss = 0\n", + " log_interval = 20\n", + " model.train()\n", + " for batch_idx, (X, y) in enumerate(data_loader):\n", + " optimizer.zero_grad()\n", + "\n", + " output = model(X)\n", + " loss = F.nll_loss(output, y)\n", + " loss.backward()\n", + "\n", + " optimizer.step()\n", + "\n", + " train_loss += loss.item() * len(X)\n", + " if batch_idx % log_interval == 0 and log:\n", + " print(\"{:<20} Train Epoch: {:<3} [{:<3}/{:<4} ({:<.0f}%)] Loss: {:<.6f}\".format(\n", + " entity,\n", + " round_number,\n", + " batch_idx * len(X),\n", + " len(data_loader.dataset),\n", + " 100.0 * batch_idx / len(data_loader),\n", + " loss.item(),\n", + " )\n", + " )\n", + " train_loss /= len(data_loader.dataset)\n", + " return train_loss" + ] + }, + { + "cell_type": "markdown", + "id": "d0849d57", + "metadata": {}, + "source": [ + "Next we import the `FLSpec` & placement decorators (`aggregator/collaborator`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "def FedAvg(agg_model, models, weights=None):\n", + " state_dicts = [model.state_dict() for model in models]\n", + " state_dict = agg_model.state_dict()\n", + " for key in models[0].state_dict():\n", + " state_dict[key] = torch.from_numpy(np.average([state[key].numpy() for state in state_dicts],\n", + " axis=0, \n", + " weights=weights))\n", + " \n", + " agg_model.load_state_dict(state_dict)\n", + " return agg_model" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the Workflow for Watermark embedding." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class FederatedFlow_MNIST_Watermarking(FLSpec):\n", + " \"\"\"\n", + " This Flow demonstrates Watermarking on a Deep Learning Model in Federated Learning\n", + " Ref: WAFFLE: Watermarking in Federated Learning (https://arxiv.org/abs/2008.07298)\n", + " \"\"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " model=None,\n", + " optimizer=None,\n", + " watermark_pretrain_optimizer=None,\n", + " watermark_retrain_optimizer=None,\n", + " round_number=0,\n", + " n_rounds=1,\n", + " **kwargs,\n", + " ):\n", + " super().__init__(**kwargs)\n", + "\n", + " if model is not None:\n", + " self.model = model\n", + " self.optimizer = optimizer\n", + " self.watermark_pretrain_optimizer = watermark_pretrain_optimizer\n", + " self.watermark_retrain_optimizer = watermark_retrain_optimizer\n", + " else:\n", + " self.model = Net()\n", + " self.optimizer = optim.SGD(\n", + " self.model.parameters(), lr=learning_rate, momentum=momentum\n", + " )\n", + " self.watermark_pretrain_optimizer = optim.SGD(\n", + " self.model.parameters(),\n", + " lr=watermark_pretrain_learning_rate,\n", + " momentum=watermark_pretrain_momentum,\n", + " weight_decay=watermark_pretrain_weight_decay,\n", + " )\n", + " self.watermark_retrain_optimizer = optim.SGD(\n", + " self.model.parameters(), lr=watermark_retrain_learning_rate\n", + " )\n", + " self.round_number = round_number\n", + " self.n_rounds = n_rounds\n", + " self.watermark_pretraining_completed = False\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " This is the start of the Flow.\n", + " \"\"\"\n", + " print(\": Start of flow ... \")\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " self.next(self.watermark_pretrain)\n", + "\n", + " @aggregator\n", + " def watermark_pretrain(self):\n", + " \"\"\"\n", + " Pre-Train the Model before starting Federated Learning.\n", + " \"\"\"\n", + " if not self.watermark_pretraining_completed:\n", + "\n", + " print(\": Performing Watermark Pre-training\")\n", + "\n", + " for i in range(self.pretrain_epochs):\n", + "\n", + " watermark_pretrain_loss = train_model(\n", + " self.model,\n", + " self.watermark_pretrain_optimizer,\n", + " self.watermark_data_loader,\n", + " \":\",\n", + " i,\n", + " log=False,\n", + " )\n", + " watermark_pretrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + "\n", + " print(f\": Watermark Pretraining: Round: {i:<3}\"\n", + " + f\" Loss: {watermark_pretrain_loss:<.6f}\"\n", + " + f\" Acc: {watermark_pretrain_validation_score:<.6f}\")\n", + "\n", + " self.watermark_pretraining_completed = True\n", + "\n", + " self.next(\n", + " self.aggregated_model_validation,\n", + " foreach=\"collaborators\",\n", + " )\n", + "\n", + " @collaborator\n", + " def aggregated_model_validation(self):\n", + " \"\"\"\n", + " Perform Aggregated Model validation on Collaborators.\n", + " \"\"\"\n", + " self.agg_validation_score = inference(self.model, self.test_loader)\n", + " print(f\"\"\n", + " + f\" Aggregated Model validation score = {self.agg_validation_score}\"\n", + " )\n", + "\n", + " self.next(self.train)\n", + "\n", + " @collaborator\n", + " def train(self):\n", + " \"\"\"\n", + " Train model on Local collab dataset.\n", + " \"\"\"\n", + " print(\": Performing Model Training on Local dataset ... \")\n", + "\n", + " self.optimizer = optim.SGD(\n", + " self.model.parameters(), lr=learning_rate, momentum=momentum\n", + " )\n", + "\n", + " self.loss = train_model(\n", + " self.model,\n", + " self.optimizer,\n", + " self.train_loader,\n", + " f\"\",\n", + " self.round_number,\n", + " log=True,\n", + " )\n", + "\n", + " self.next(self.local_model_validation)\n", + "\n", + " @collaborator\n", + " def local_model_validation(self):\n", + " \"\"\"\n", + " Validate locally trained model.\n", + " \"\"\"\n", + " self.local_validation_score = inference(self.model, self.test_loader)\n", + " print(\n", + " f\" Local model validation score = {self.local_validation_score}\"\n", + " )\n", + " self.next(self.join)\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Model aggregation step.\n", + " \"\"\"\n", + " self.average_loss = sum(input.loss for input in inputs) / len(inputs)\n", + " self.aggregated_model_accuracy = sum(\n", + " input.agg_validation_score for input in inputs\n", + " ) / len(inputs)\n", + " self.local_model_accuracy = sum(\n", + " input.local_validation_score for input in inputs\n", + " ) / len(inputs)\n", + "\n", + " print(\": Joining models from collaborators...\")\n", + "\n", + " print(\n", + " f\" Aggregated model validation score = {self.aggregated_model_accuracy}\"\n", + " )\n", + " print(f\" Average training loss = {self.average_loss}\")\n", + " print(f\" Average local model validation values = {self.local_model_accuracy}\")\n", + "\n", + " self.model = FedAvg(self.model, [input.model for input in inputs])\n", + "\n", + " self.next(self.watermark_retrain)\n", + "\n", + " @aggregator\n", + " def watermark_retrain(self):\n", + " \"\"\"\n", + " Retrain the aggregated model.\n", + " \"\"\"\n", + " print(\": Performing Watermark Retraining ... \")\n", + " self.watermark_retrain_optimizer = optim.SGD(\n", + " self.model.parameters(), lr=watermark_retrain_learning_rate\n", + " )\n", + "\n", + " retrain_round = 0\n", + "\n", + " # Perform re-training until (accuracy >= acc_threshold) or\n", + " # (retrain_round > number of retrain_epochs)\n", + " self.watermark_retrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + " while (\n", + " self.watermark_retrain_validation_score < self.watermark_acc_threshold\n", + " ) and (retrain_round < self.retrain_epochs):\n", + " self.watermark_retrain_train_loss = train_model(\n", + " self.model,\n", + " self.watermark_retrain_optimizer,\n", + " self.watermark_data_loader,\n", + " \"\",\n", + " retrain_round,\n", + " log=False,\n", + " )\n", + " self.watermark_retrain_validation_score = inference(\n", + " self.model, self.watermark_data_loader\n", + " )\n", + "\n", + " print(f\": Watermark Retraining: Train Epoch: {self.round_number:<3}\"\n", + " + f\" Retrain Round: {retrain_round:<3}\"\n", + " + f\" Loss: {self.watermark_retrain_train_loss:<.6f},\"\n", + " + f\" Acc: {self.watermark_retrain_validation_score:<.6f}\")\n", + " retrain_round += 1\n", + "\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the last step in the Flow.\n", + " \"\"\"\n", + " print(\"This is the end of the flow\")" + ] + }, + { + "cell_type": "markdown", + "id": "b5371b6d", + "metadata": {}, + "source": [ + "## Defining and Initializing the Federated Runtime\n", + "We initialize the Federated Runtime by providing:\n", + "- `director_info`: The director's connection information \n", + "- `authorized_collaborators`: A list of authorized collaborators\n", + "- `notebook_path`: Path to this Jupyter notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + "}\n", + "\n", + "authorized_collaborators = ['Bangalore', 'Chandler']\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators=authorized_collaborators,\n", + " director=director_info, \n", + " notebook_path='./MNIST_Watermarking.ipynb'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "6de9684f", + "metadata": {}, + "source": [ + "The status of the connected Envoys can be checked using the `get_envoys()` method of the `federated_runtime`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "markdown", + "id": "0eaeca25", + "metadata": {}, + "source": [ + "With the federated_runtime now instantiated, we will proceed to deploy the watermarking workspace and run the experiment!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "# Set random seed\n", + "random_seed = 42\n", + "torch.manual_seed(random_seed)\n", + "np.random.seed(random_seed)\n", + "torch.backends.cudnn.enabled = False\n", + "\n", + "# MNIST parameters\n", + "learning_rate = 5e-2\n", + "momentum = 5e-1\n", + "log_interval = 20\n", + "\n", + "# Watermarking parameters\n", + "watermark_pretrain_learning_rate = 1e-1\n", + "watermark_pretrain_momentum = 5e-1\n", + "watermark_pretrain_weight_decay = 5e-05\n", + "watermark_retrain_learning_rate = 5e-3\n", + "\n", + "model = Net()\n", + "optimizer = optim.SGD(\n", + " model.parameters(), lr=learning_rate, momentum=momentum\n", + ")\n", + "watermark_pretrain_optimizer = optim.SGD(\n", + " model.parameters(),\n", + " lr=watermark_pretrain_learning_rate,\n", + " momentum=watermark_pretrain_momentum,\n", + " weight_decay=watermark_pretrain_weight_decay,\n", + ")\n", + "watermark_retrain_optimizer = optim.SGD(\n", + " model.parameters(), lr=watermark_retrain_learning_rate\n", + ")\n", + "\n", + "flflow = FederatedFlow_MNIST_Watermarking(\n", + " model,\n", + " optimizer,\n", + " watermark_pretrain_optimizer,\n", + " watermark_retrain_optimizer,\n", + " checkpoint=True,\n", + ")\n", + "flflow.runtime = federated_runtime\n", + "flflow.run()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fed_run", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/openfl-tutorials/experimental/workflow/workflow_interface_requirements.txt b/openfl-tutorials/experimental/workflow/workflow_interface_requirements.txt index 4dc2bc32cb..dab7c8863d 100644 --- a/openfl-tutorials/experimental/workflow/workflow_interface_requirements.txt +++ b/openfl-tutorials/experimental/workflow/workflow_interface_requirements.txt @@ -6,5 +6,6 @@ metaflow==2.7.15 nbdev==2.3.12 nbformat==5.10.4 ray==2.9.2 -torch -torchvision +tabulate==0.9.0 +torch==2.3.1 +torchvision==0.18.1 diff --git a/openfl/experimental/workflow/component/__init__.py b/openfl/experimental/workflow/component/__init__.py index db588c6130..4c2c6ee45d 100644 --- a/openfl/experimental/workflow/component/__init__.py +++ b/openfl/experimental/workflow/component/__init__.py @@ -7,3 +7,5 @@ # FIXME: Too much recursion from openfl.experimental.workflow.component.aggregator import Aggregator from openfl.experimental.workflow.component.collaborator import Collaborator +from openfl.experimental.workflow.component.director import Director +from openfl.experimental.workflow.component.envoy import Envoy diff --git a/openfl/experimental/workflow/component/aggregator/aggregator.py b/openfl/experimental/workflow/component/aggregator/aggregator.py index 4e034aa315..568c3246fa 100644 --- a/openfl/experimental/workflow/component/aggregator/aggregator.py +++ b/openfl/experimental/workflow/component/aggregator/aggregator.py @@ -4,36 +4,56 @@ """Experimental Aggregator module.""" +import asyncio import inspect -import pickle import queue import time from logging import getLogger from threading import Event from typing import Any, Callable, Dict, List, Tuple +import dill + from openfl.experimental.workflow.interface import FLSpec from openfl.experimental.workflow.runtime import FederatedRuntime from openfl.experimental.workflow.utilities import aggregator_to_collaborator, checkpoint from openfl.experimental.workflow.utilities.metaflow_utils import MetaflowInterface +logger = getLogger(__name__) + class Aggregator: r"""An Aggregator is the central node in federated learning. Args: + single_col_cert_common_name (str): Common name for single + collaborator certificate. + log_metric_callback (optional): Callback for log metric. Defaults + to None. aggregator_uuid (str): Aggregation ID. federation_uuid (str): Federation ID. authorized_cols (list of str): The list of IDs of enrolled collaborators. - flow (Any): Flow class. rounds_to_train (int): External loop rounds. - checkpoint (bool): Whether to save checkpoint or noe (default=False). + current_round (int): Current round. + collaborators_counter (int): counter for the collaborators + quit_job_sent_to (list): A list of collaborators to whom quit jobs + will be sent. + time_to_quit (bool): Time to quit the experiment, + collaborator_task_results (Event): Event to inform aggregator that + collaborators have sent the results. + __collaborator_tasks_queue (Dict[Queue]): queue for each collaborator. + flow (Any): Flow class. + name (str): aggregator in string format. + checkpoint (bool): Whether to save checkpoint or not (default=False). private_attrs_callable (Callable): Function for Aggregator private - attriubtes + attributes. (default=None). - private_attrs_kwargs (Dict): Arguments to call private_attrs_callable + private_attrs (Dict): Private attributes dictionary. (default={}). + connected_collaborators (list): List of connected collaborators + tasks_sent_to_collaborators (int): count of tasks sent to collaborators. + stdout_queue (queue.Queue): Queue for stdout. Returns: None @@ -54,8 +74,27 @@ def __init__( log_metric_callback: Callable = None, **kwargs, ) -> None: - self.logger = getLogger(__name__) + """Initializes the Aggregator. + Args: + aggregator_uuid (int): Aggregation ID. + federation_uuid (str): Federation ID. + authorized_cols (list of str): The list of IDs of enrolled + collaborators. + flow (Any): Flow class. + rounds_to_train (int, optional): Number of rounds to train. + Defaults to 1. + checkpoint (bool): Whether to save checkpoint or not (default=False). + private_attributes_callable (Callable): Function for Aggregator private + attributes. (default=None). + private_attributes (Dict): Private attributes dictionary. + (default={}). + single_col_cert_common_name (str, optional): Common name for single + collaborator certificate. Defaults to None. + log_metric_callback (optional): Callback for log metric. Defaults + to None. + **kwargs: Additional keyword arguments. + """ self.single_col_cert_common_name = single_col_cert_common_name if self.single_col_cert_common_name is not None: self._log_big_warning() @@ -67,7 +106,7 @@ def __init__( self.log_metric_callback = log_metric_callback if log_metric_callback is not None: self.log_metric = log_metric_callback - self.logger.info(f"Using custom log metric: {self.log_metric}") + logger.info(f"Using custom log metric: {self.log_metric}") self.uuid = aggregator_uuid self.federation_uuid = federation_uuid @@ -87,21 +126,21 @@ def __init__( self.flow = flow self.checkpoint = checkpoint self.flow._foreach_methods = [] - self.logger.info("MetaflowInterface creation.") + logger.info("MetaflowInterface creation.") self.flow._metaflow_interface = MetaflowInterface(self.flow.__class__, "single_process") self.flow._run_id = self.flow._metaflow_interface.create_run() self.flow.runtime = FederatedRuntime() - self.flow.runtime.aggregator = "aggregator" + self.name = "aggregator" self.flow.runtime.collaborators = self.authorized_cols self.__private_attrs_callable = private_attributes_callable self.__private_attrs = private_attributes self.connected_collaborators = [] self.tasks_sent_to_collaborators = 0 - self.collaborator_results_received = [] + self.stdout_queue = queue.Queue() if self.__private_attrs_callable is not None: - self.logger.info("Initializing aggregator private attributes...") + logger.info("Initializing aggregator private attributes...") self.__initialize_private_attributes(private_attributes_kwargs) def __initialize_private_attributes(self, kwargs: Dict) -> None: @@ -135,7 +174,7 @@ def __delete_private_attrs_from_clone(self, clone: Any, replace_str: str = None) def _log_big_warning(self) -> None: """Warn user about single collaborator cert mode.""" - self.logger.warning( + logger.warning( f"\n{the_dragon}\nYOU ARE RUNNING IN SINGLE COLLABORATOR CERT MODE! THIS IS" f" NOT PROPER PKI AND " f"SHOULD ONLY BE USED IN DEVELOPMENT SETTINGS!!!! YE HAVE BEEN" @@ -151,21 +190,26 @@ def _get_sleep_time() -> int: """ return 10 - def run_flow(self) -> None: - """Start the execution and run flow until transition.""" + async def run_flow(self) -> FLSpec: + """ + Start the execution and run flow until completion. + Returns the updated flow to the user. + + Returns: + flow (FLSpec): Updated instance. + """ # Start function will be the first step if any flow f_name = "start" # Creating a clones from the flow object FLSpec._reset_clones() FLSpec._create_clones(self.flow, self.flow.runtime.collaborators) - self.logger.info(f"Starting round {self.current_round}...") + logger.info(f"Starting round {self.current_round}...") while True: next_step = self.do_task(f_name) if self.time_to_quit: - self.logger.info("Experiment Completed.") - self.quit_job_sent_to = self.authorized_cols + logger.info("Experiment Completed.") break # Prepare queue for collaborator task, with clones @@ -173,32 +217,32 @@ def run_flow(self) -> None: if k in self.selected_collaborators: v.put((next_step, self.clones_dict[k])) else: - self.logger.info(f"Tasks will not be sent to {k}") + logger.info(f"Tasks will not be sent to {k}") while not self.collaborator_task_results.is_set(): len_sel_collabs = len(self.selected_collaborators) len_connected_collabs = len(self.connected_collaborators) if len_connected_collabs < len_sel_collabs: # Waiting for collaborators to connect. - self.logger.info( + logger.info( "Waiting for " + f"{len_connected_collabs}/{len_sel_collabs}" + " collaborators to connect..." ) elif self.tasks_sent_to_collaborators != len_sel_collabs: - self.logger.info( + logger.info( "Waiting for " + f"{self.tasks_sent_to_collaborators}/{len_sel_collabs}" + " to make requests for tasks..." ) else: # Waiting for selected collaborators to send the results. - self.logger.info( + logger.info( "Waiting for " + f"{self.collaborators_counter}/{len_sel_collabs}" + " collaborators to send results..." ) - time.sleep(Aggregator._get_sleep_time()) + await asyncio.sleep(Aggregator._get_sleep_time()) self.collaborator_task_results.clear() f_name = self.next_step @@ -206,36 +250,43 @@ def run_flow(self) -> None: self.flow.restore_instance_snapshot(self.flow, list(self.instance_snapshot)) delattr(self, "instance_snapshot") - def call_checkpoint(self, ctx: Any, f: Callable, stream_buffer: bytes = None) -> None: + return self.flow + + def call_checkpoint( + self, name: str, ctx: Any, f: Callable, stream_buffer: bytes = None + ) -> None: """ - Perform checkpoint task. + Perform checkpoint task and log stdout Args: + name (str): name of the caller ctx (FLSpec / bytes): Collaborator FLSpec object for which checkpoint is to be performed. f (Callable / bytes): Collaborator Step (Function) which is to be checkpointed. stream_buffer (bytes): Captured object for output and error (default=None). - reserved_attributes (List[str]): List of attribute names which is - to be excluded from checkpoint (default=[]). - - Returns: - None """ + # Perform checkpoint if enabled if self.checkpoint: - # Check if arguments are pickled, if yes then unpickle if not isinstance(ctx, FLSpec): - ctx = pickle.loads(ctx) - # Updating metaflow interface object + ctx = dill.loads(ctx) + # Update metaflow interface object ctx._metaflow_interface = self.flow._metaflow_interface + # Deserialize objects if passed in serialized form if not isinstance(f, Callable): - f = pickle.loads(f) - if isinstance(stream_buffer, bytes): - # Set stream buffer as function parameter - f.__func__._stream_buffer = pickle.loads(stream_buffer) - - checkpoint(ctx, f) + f = dill.loads(f) + if stream_buffer and isinstance(stream_buffer, bytes): + f.__func__._stream_buffer = dill.loads(stream_buffer) + + stdout = checkpoint(ctx, f) + # Retrieve and log stdout + stream_info = { + "stdout_origin": name, + "task_name": f.__name__, + "stdout_value": str(stdout.getvalue()), + } + self.stdout_queue.put(stream_info) def get_tasks(self, collaborator_name: str) -> Tuple: """RPC called by a collaborator to determine which tasks to perform. @@ -251,10 +302,10 @@ def get_tasks(self, collaborator_name: str) -> Tuple: # If requesting collaborator is not registered as connected # collaborator, then register it if collaborator_name not in self.connected_collaborators: - self.logger.info(f"Collaborator {collaborator_name} is connected.") + logger.info(f"Collaborator {collaborator_name} is connected.") self.connected_collaborators.append(collaborator_name) - self.logger.debug( + logger.debug( f"Aggregator GetTasks function reached from collaborator {collaborator_name}..." ) @@ -262,9 +313,8 @@ def get_tasks(self, collaborator_name: str) -> Tuple: while self.__collaborator_tasks_queue[collaborator_name].qsize() == 0: # If it is time to then inform the collaborator if self.time_to_quit: - self.logger.info( - f"Sending signal to collaborator {collaborator_name} to shutdown..." - ) + logger.info(f"Sending signal to collaborator {collaborator_name} to shutdown...") + self.quit_job_sent_to.append(collaborator_name) # FIXME: 0, and "" instead of None is just for protobuf compatibility. # Cleaner solution? return ( @@ -282,14 +332,14 @@ def get_tasks(self, collaborator_name: str) -> Tuple: next_step, clone = self.__collaborator_tasks_queue[collaborator_name].get() self.tasks_sent_to_collaborators += 1 - self.logger.info( + logger.info( "Sending tasks to collaborator" + f" {collaborator_name} for round {self.current_round}..." ) return ( self.current_round, next_step, - pickle.dumps(clone), + dill.dumps(clone), 0, self.time_to_quit, ) @@ -320,7 +370,7 @@ def do_task(self, f_name: str) -> Any: self.__delete_private_attrs_from_clone( self.flow, "Private attributes: Not Available." ) - self.call_checkpoint(self.flow, f) + self.call_checkpoint(self.name, self.flow, f) self.__set_private_attrs_to_clone(self.flow) # Check if all rounds of external loop is executed if self.current_round is self.rounds_to_train: @@ -331,7 +381,7 @@ def do_task(self, f_name: str) -> Any: # Start next round of execution else: self.current_round += 1 - self.logger.info(f"Starting round {self.current_round}...") + logger.info(f"Starting round {self.current_round}...") f_name = "start" continue @@ -358,7 +408,7 @@ def do_task(self, f_name: str) -> Any: self.__delete_private_attrs_from_clone(self.flow, "Private attributes: Not Available.") # Take the checkpoint of executed step - self.call_checkpoint(self.flow, f) + self.call_checkpoint(self.name, self.flow, f) self.__set_private_attrs_to_clone(self.flow) # Next function in the flow @@ -406,19 +456,22 @@ def send_task_results( """ # Log a warning if collaborator is sending results for old round if round_number is not self.current_round: - self.logger.warning( + logger.warning( f"Collaborator {collab_name} is reporting results" f" for the wrong round: {round_number}. Ignoring..." ) else: - self.logger.info( + logger.info( f"Collaborator {collab_name} sent task results" f" for round {round_number}." ) # Unpickle the clone (FLSpec object) - clone = pickle.loads(clone_bytes) + clone = dill.loads(clone_bytes) # Update the clone in clones_dict dictionary self.clones_dict[clone.input] = clone self.next_step = next_step[0] + # Sync flow state with clone + self.flow._foreach_methods = list(set(clone._foreach_methods)) + self.flow.execute_task_args = clone.execute_task_args self.collaborators_counter += 1 # If selected collaborator have sent the results diff --git a/openfl/experimental/workflow/component/collaborator/collaborator.py b/openfl/experimental/workflow/component/collaborator/collaborator.py index a683bbfeb7..0cbb1de069 100644 --- a/openfl/experimental/workflow/component/collaborator/collaborator.py +++ b/openfl/experimental/workflow/component/collaborator/collaborator.py @@ -4,11 +4,12 @@ """Experimental Collaborator module.""" -import pickle import time from logging import getLogger from typing import Any, Callable, Dict, Tuple +import dill + class Collaborator: r"""The Collaborator object class. @@ -120,9 +121,9 @@ def call_checkpoint(self, ctx: Any, f: Callable, stream_buffer: Any) -> None: """ self.client.call_checkpoint( self.name, - pickle.dumps(ctx), - pickle.dumps(f), - pickle.dumps(stream_buffer), + dill.dumps(ctx), + dill.dumps(f), + dill.dumps(stream_buffer), ) def run(self) -> None: @@ -162,7 +163,7 @@ def send_task_results(self, next_step: str, clone: Any) -> None: self.logger.info( f"Round {self.round_number}," f" collaborator {self.name} is sending results..." ) - self.client.send_task_results(self.name, self.round_number, next_step, pickle.dumps(clone)) + self.client.send_task_results(self.name, self.round_number, next_step, dill.dumps(clone)) def get_tasks(self) -> Tuple: """Get tasks from the aggregator. @@ -179,8 +180,9 @@ def get_tasks(self) -> Tuple: self.logger.info("Waiting for tasks...") temp = self.client.get_tasks(self.name) self.round_number, next_step, clone_bytes, sleep_time, time_to_quit = temp - - return next_step, pickle.loads(clone_bytes), sleep_time, time_to_quit + if time_to_quit: + return next_step, "", sleep_time, time_to_quit + return next_step, dill.loads(clone_bytes), sleep_time, time_to_quit def do_task(self, f_name: str, ctx: Any) -> Tuple: """Run collaborator steps until transition. diff --git a/openfl/experimental/workflow/component/director/__init__.py b/openfl/experimental/workflow/component/director/__init__.py new file mode 100644 index 0000000000..bdb0eefff5 --- /dev/null +++ b/openfl/experimental/workflow/component/director/__init__.py @@ -0,0 +1,6 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Director package.""" + +from openfl.experimental.workflow.component.director.director import Director diff --git a/openfl/experimental/workflow/component/director/director.py b/openfl/experimental/workflow/component/director/director.py new file mode 100644 index 0000000000..103491c811 --- /dev/null +++ b/openfl/experimental/workflow/component/director/director.py @@ -0,0 +1,317 @@ +# Copyright 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +"""Experimental Director module.""" + +import asyncio +import logging +import time +from collections import defaultdict +from pathlib import Path +from typing import Any, AsyncGenerator, Dict, Iterable, Optional, Tuple, Union + +import dill + +from openfl.experimental.workflow.component.director.experiment import ( + Experiment, + ExperimentsRegistry, +) +from openfl.experimental.workflow.transport.grpc.exceptions import EnvoyNotFoundError + +logger = logging.getLogger(__name__) + + +class Director: + """Director class for managing experiments and envoys. + + Attributes: + tls (bool): A flag indicating if TLS should be used for connections. + root_certificate (Optional[Union[Path, str]]): The path to the root certificate + for TLS. + private_key (Optional[Union[Path, str]]): The path to the private key for TLS. + certificate (Optional[Union[Path, str]]): The path to the certificate for TLS. + director_config (Optional[Path]): Path to director_config file + install_requirements (bool): A flag indicating if the requirements + should be installed. + _flow_status (Queue): Stores the flow status + experiments_registry (ExperimentsRegistry): An object of + ExperimentsRegistry to store the experiments. + col_exp (dict): A dictionary to store the experiments for + collaborators. + col_exp_queues (defaultdict): A defaultdict to store the experiment + queues for collaborators. + _envoy_registry (dict): A dcitionary to store envoy info + envoy_health_check_period (int): The period for health check of envoys + in seconds. + authorized_cols (list): A list of authorized envoys + """ + + def __init__( + self, + *, + tls: bool = True, + root_certificate: Optional[Union[Path, str]] = None, + private_key: Optional[Union[Path, str]] = None, + certificate: Optional[Union[Path, str]] = None, + director_config: Optional[Path] = None, + envoy_health_check_period: int = 60, + install_requirements: bool = True, + ) -> None: + """Initialize a Director object. + + Args: + tls (bool, optional): A flag indicating if TLS should be used for + connections. Defaults to True. + root_certificate (Optional[Union[Path, str]]): The path to the + root certificate for TLS. Defaults to None. + private_key (Optional[Union[Path, str]]): The path to the private + key for TLS. Defaults to None. + certificate (Optional[Union[Path, str]]): The path to the + certificate for TLS. Defaults to None. + director_config (Optional[Path]): Path to director_config file + envoy_health_check_period (int): The period for health check of envoys + in seconds. + install_requirements (bool, optional): A flag indicating if the + requirements should be installed. Defaults to True. + """ + self.tls = tls + self.root_certificate = root_certificate + self.private_key = private_key + self.certificate = certificate + self.director_config = director_config + self.install_requirements = install_requirements + self._flow_status = asyncio.Queue() + + self.experiments_registry = ExperimentsRegistry() + self.col_exp = {} + self.col_exp_queues = defaultdict(asyncio.Queue) + self._envoy_registry = {} + self.envoy_health_check_period = envoy_health_check_period + # authorized_cols refers to envoy & collaborator pair (one to one mapping) + self.authorized_cols = [] + + async def start_experiment_execution_loop(self) -> None: + """Run tasks and experiments here""" + loop = asyncio.get_event_loop() + while True: + try: + async with self.experiments_registry.get_next_experiment() as experiment: + await self._wait_for_authorized_envoys() + run_aggregator_future = loop.create_task( + experiment.start( + root_certificate=self.root_certificate, + certificate=self.certificate, + private_key=self.private_key, + tls=self.tls, + director_config=self.director_config, + install_requirements=False, + ) + ) + # Adding the experiment to collaborators queues + for col_name in experiment.collaborators: + queue = self.col_exp_queues[col_name] + await queue.put(experiment.name) + # Wait for the experiment to complete and save the result + flow_status = await run_aggregator_future + await self._flow_status.put(flow_status) + except Exception as e: + logger.error(f"Error while executing experiment: {e}") + raise + + async def _wait_for_authorized_envoys(self) -> None: + """Wait until all authorized envoys are connected""" + while not all(envoy in self.get_envoys().keys() for envoy in self.authorized_cols): + connected_envoys = len( + [envoy for envoy in self.authorized_cols if envoy in self.get_envoys().keys()] + ) + logger.info( + f"Waiting for {connected_envoys}/{len(self.authorized_cols)} " + "authorized envoys to connect..." + ) + await asyncio.sleep(10) + + async def get_flow_state(self) -> Tuple[bool, bytes]: + """Wait until the experiment flow status indicates completion + and return the status along with a serialized FLSpec object. + + Returns: + status (bool): The flow status. + flspec_obj (bytes): A serialized FLSpec object (in bytes) using dill. + """ + status, flspec_obj = await self._flow_status.get() + return status, dill.dumps(flspec_obj) + + async def wait_experiment(self, envoy_name: str) -> str: + """Waits for an experiment to be ready for a given envoy. + + Args: + envoy_name (str): The name of the envoy. + + Returns: + str: The name of the experiment on the queue. + """ + experiment_name = self.col_exp.get(envoy_name) + # If any envoy gets disconnected + if experiment_name and experiment_name in self.experiments_registry: + experiment = self.experiments_registry[experiment_name] + if experiment.aggregator.current_round < experiment.aggregator.rounds_to_train: + return experiment_name + + self.col_exp[envoy_name] = None + queue = self.col_exp_queues[envoy_name] + experiment_name = await queue.get() + self.col_exp[envoy_name] = experiment_name + + return experiment_name + + async def set_new_experiment( + self, + experiment_name: str, + sender_name: str, + collaborator_names: Iterable[str], + experiment_archive_path: Path, + ) -> bool: + """Set new experiment. + + Args: + experiment_name (str): String id for experiment. + sender_name (str): The name of the sender. + collaborator_names (Iterable[str]): Names of collaborators. + experiment_archive_path (Path): Path of the experiment. + + Returns: + bool : Boolean returned if the experiment register was successful. + """ + experiment = Experiment( + name=experiment_name, + archive_path=experiment_archive_path, + collaborators=collaborator_names, + users=[sender_name], + sender=sender_name, + ) + + self.authorized_cols = collaborator_names + self.experiments_registry.add(experiment) + return True + + async def stream_experiment_stdout( + self, experiment_name: str, caller: str + ) -> AsyncGenerator[Optional[Dict[str, Any]], None]: + """Stream stdout from the aggregator. + + This method takes next stdout dictionary from the aggregator's queue + and returns it to the caller. + + Args: + experiment_name (str): String id for experiment. + caller (str): String id for experiment owner. + + Yields: + Optional[Dict[str, str]]: A dictionary containing the keys + 'stdout_origin', 'task_name', and 'stdout_value' if the queue is not empty, + or None if the queue is empty but the experiment is still running. + """ + if ( + experiment_name not in self.experiments_registry + or caller not in self.experiments_registry[experiment_name].users + ): + raise Exception( + f'No experiment name "{experiment_name}" in experiments list, or caller "{caller}"' + f" does not have access to this experiment" + ) + while not self.experiments_registry[experiment_name].aggregator: + await asyncio.sleep(5) + aggregator = self.experiments_registry[experiment_name].aggregator + while True: + if not aggregator.stdout_queue.empty(): + # Yield the next item from the queue + yield aggregator.stdout_queue.get() + elif aggregator.all_quit_jobs_sent(): + # Stop Iteration if all jobs have quit and the queue is empty + break + else: + # Yeild none if the queue is empty but the experiment is still running. + yield None + + def get_experiment_data(self, experiment_name: str) -> Path: + """Get experiment data. + + Args: + experiment_name (str): String id for experiment. + + Returns: + str: Path of archive. + """ + return self.experiments_registry[experiment_name].archive_path + + def ack_envoy_connection_request(self, envoy_name: str) -> bool: + """Save the envoy info into _envoy_registry + + Args: + envoy_name (str): Name of the envoy + + Returns: + bool: Always returns True to indicate the envoy + has been successfully acknowledged. + """ + self._envoy_registry[envoy_name] = { + "name": envoy_name, + "is_online": True, + "is_experiment_running": False, + "last_updated": time.time(), + "valid_duration": 2 * self.envoy_health_check_period, + } + # Currently always returns True, indicating the envoy was added successfully. + # Future logic might change this to handle conditions. + return True + + def get_envoys(self) -> Dict[str, Any]: + """Gets list of connected envoys + + Returns: + dict: Dictionary with the status information about envoys. + """ + logger.debug("Envoy registry: %s", self._envoy_registry) + for envoy in self._envoy_registry.values(): + envoy["is_online"] = time.time() < envoy.get("last_updated", 0) + envoy.get( + "valid_duration", 0 + ) + envoy["experiment_name"] = self.col_exp.get(envoy["name"], "None") + + return self._envoy_registry + + def update_envoy_status( + self, + *, + envoy_name: str, + is_experiment_running: bool, + ) -> int: + """Accept health check from envoy. + + Args: + envoy_name (str): String id for envoy. + is_experiment_running (bool): Boolean value for the status of the + experiment. + + Raises: + EnvoyNotFoundError: When Unknown envoy {envoy_name}. + + Returns: + int: Value of the envoy_health_check_period. + """ + envoy_info = self._envoy_registry.get(envoy_name) + if not envoy_info: + logger.error(f"Unknown envoy {envoy_name}") + raise EnvoyNotFoundError(f"Unknown envoy {envoy_name}") + + envoy_info.update( + { + "is_online": True, + "is_experiment_running": is_experiment_running, + "valid_duration": 2 * self.envoy_health_check_period, + "last_updated": time.time(), + } + ) + + return self.envoy_health_check_period diff --git a/openfl/experimental/workflow/component/director/experiment.py b/openfl/experimental/workflow/component/director/experiment.py new file mode 100644 index 0000000000..c85d6b90d9 --- /dev/null +++ b/openfl/experimental/workflow/component/director/experiment.py @@ -0,0 +1,332 @@ +# Copyright 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + + +"""Experimental Experiment module.""" + +import asyncio +import logging +from contextlib import asynccontextmanager +from enum import Enum, auto +from pathlib import Path +from typing import Any, Iterable, List, Optional, Tuple, Union + +from openfl.experimental.workflow.federated import Plan +from openfl.experimental.workflow.transport import AggregatorGRPCServer +from openfl.utilities.workspace import ExperimentWorkspace + +logger = logging.getLogger(__name__) + + +class Status(Enum): + """Experiment's statuses.""" + + PENDING = auto() + FINISHED = auto() + IN_PROGRESS = auto() + FAILED = auto() + REJECTED = auto() + + +class Experiment: + """Experiment class. + + Attributes: + name (str): The name of the experiment. + archive_path (Union[Path, str]): The path to the experiment + archive. + collaborators (List[str]): The list of collaborators. + sender (str): The name of the sender. + init_tensor_dict (dict): The initial tensor dictionary. + plan_path (Union[Path, str]): The path to the plan. + users (Iterable[str]): The list of users. + status (str): The status of the experiment. + aggregator (Aggregator): The aggregator instance. + updated_flow (FLSpec): Updated flow instance. + """ + + def __init__( + self, + *, + name: str, + archive_path: Union[Path, str], + collaborators: List[str], + sender: str, + plan_path: Union[Path, str] = "plan/plan.yaml", + users: Iterable[str] = None, + ) -> None: + """Initialize an experiment object. + + Args: + name (str): The name of the experiment. + archive_path (Union[Path, str]): The path to the experiment + archive. + collaborators (List[str]): The list of collaborators. + sender (str): The name of the sender. + plan_path (Union[Path, str], optional): The path to the plan. + Defaults to 'plan/plan.yaml'. + users (Iterable[str], optional): The list of users. Defaults to + None. + """ + self.name = name + self.archive_path = Path(archive_path).absolute() + self.collaborators = collaborators + self.sender = sender + # This plan path ("plan/plan.yaml") originates from the + # experiment workspace provided by the director + self.plan_path = Path(plan_path) + self.users = set() if users is None else set(users) + self.status = Status.PENDING + self.aggregator = None + self.updated_flow = None + + async def start( + self, + *, + tls: bool = True, + root_certificate: Optional[Union[Path, str]] = None, + private_key: Optional[Union[Path, str]] = None, + certificate: Optional[Union[Path, str]] = None, + director_config: Path = None, + install_requirements: bool = False, + ) -> Tuple[bool, Any]: + """Run experiment. + + Args: + tls (bool, optional): A flag indicating if TLS should be used for + connections. Defaults to True. + root_certificate (Optional[Union[Path, str]], optional): The path to the + root certificate for TLS. Defaults to None. + private_key (Optional[Union[Path, str]], optional): The path to the private + key for TLS. Defaults to None. + certificate (Optional[Union[Path, str]], optional): The path to the + certificate for TLS. Defaults to None. + director_config (Path): Path to director's config file + install_requirements (bool, optional): A flag indicating if the + requirements should be installed. Defaults to False. + + Returns: + List[Union[bool, Any]]: + - status: status of the experiment. + - updated_flow: The updated flow object. + """ + self.status = Status.IN_PROGRESS + try: + logger.info(f"New experiment {self.name} for collaborators {self.collaborators}") + + with ExperimentWorkspace( + experiment_name=self.name, + data_file_path=self.archive_path, + install_requirements=install_requirements, + ): + aggregator_grpc_server = self._create_aggregator_grpc_server( + tls=tls, + root_certificate=root_certificate, + private_key=private_key, + certificate=certificate, + director_config=director_config, + ) + self.aggregator = aggregator_grpc_server.aggregator + _, self.updated_flow = await asyncio.gather( + self._run_aggregator_grpc_server( + aggregator_grpc_server, + ), + self.aggregator.run_flow(), + ) + self.status = Status.FINISHED + logger.info("Experiment %s was finished successfully.", self.name) + except Exception as e: + self.status = Status.FAILED + logger.error("Experiment %s failed with error: %s.", self.name, e) + raise + + return self.status == Status.FINISHED, self.updated_flow + + def _create_aggregator_grpc_server( + self, + *, + tls: bool = True, + root_certificate: Optional[Union[Path, str]] = None, + private_key: Optional[Union[Path, str]] = None, + certificate: Optional[Union[Path, str]] = None, + director_config: Path = None, + ) -> AggregatorGRPCServer: + """Create an aggregator gRPC server. + + Args: + tls (bool, optional): A flag indicating if TLS should be used for + connections. Defaults to True. + root_certificate (Optional[Union[Path, str]]): The path to the + root certificate for TLS. Defaults to None. + private_key (Optional[Union[Path, str]]): The path to the private + key for TLS. Defaults to None. + certificate (Optional[Union[Path, str]]): The path to the + certificate for TLS. Defaults to None. + director_config (Path): Path to director's config file. + Defaults to None. + Returns: + AggregatorGRPCServer: The created aggregator gRPC server. + """ + plan = Plan.parse(plan_config_path=self.plan_path) + plan.authorized_cols = list(self.collaborators) + + logger.info("🧿 Created an Aggregator Server for %s experiment.", self.name) + aggregator_grpc_server = plan.get_server( + root_certificate=root_certificate, + certificate=certificate, + private_key=private_key, + tls=tls, + director_config=director_config, + ) + return aggregator_grpc_server + + @staticmethod + async def _run_aggregator_grpc_server( + aggregator_grpc_server: AggregatorGRPCServer, + ) -> None: + """Run aggregator. + + Args: + aggregator_grpc_server (AggregatorGRPCServer): The aggregator gRPC + server to run. + """ + logger.info("🧿 Starting the Aggregator Service.") + grpc_server = aggregator_grpc_server.get_server() + grpc_server.start() + logger.info("Starting Aggregator gRPC Server") + + try: + while not aggregator_grpc_server.aggregator.all_quit_jobs_sent(): + # Awaiting quit job sent to collaborators + await asyncio.sleep(10) + logger.debug("Aggregator sent quit jobs calls to all collaborators") + except KeyboardInterrupt: + logger.info("Keyboard interrupt received. Stopping the server.") + finally: + grpc_server.stop(0) + + +class ExperimentsRegistry: + """ExperimentsList class.""" + + def __init__(self) -> None: + """Initialize an experiments registry object.""" + self.__active_experiment_name = None + self.__pending_experiments = [] + self.__archived_experiments = [] + self.__dict = {} + + @property + def active(self) -> Union[Experiment, None]: + """Get active experiment. + + Returns: + Union[Experiment, None]: The active experiment if exists, None + otherwise. + """ + if self.__active_experiment_name is None: + return None + return self.__dict[self.__active_experiment_name] + + @property + def pending(self) -> List[str]: + """Get queue of not started experiments. + + Returns: + List[str]: The list of pending experiments. + """ + return self.__pending_experiments + + def add(self, experiment: Experiment) -> None: + """Add experiment to queue of not started experiments. + + Args: + experiment (Experiment): The experiment to add. + """ + self.__dict[experiment.name] = experiment + self.__pending_experiments.append(experiment.name) + + def remove(self, name: str) -> None: + """Remove experiment from everywhere. + + Args: + name (str): The name of the experiment to remove. + """ + if self.__active_experiment_name == name: + self.__active_experiment_name = None + if name in self.__pending_experiments: + self.__pending_experiments.remove(name) + if name in self.__archived_experiments: + self.__archived_experiments.remove(name) + if name in self.__dict: + del self.__dict[name] + + def __getitem__(self, key: str) -> Experiment: + """Get experiment by name. + + Args: + key (str): The name of the experiment. + + Returns: + Experiment: The experiment with the given name. + """ + return self.__dict[key] + + def get(self, key: str, default=None) -> Experiment: + """Get experiment by name. + + Args: + key (str): The name of the experiment. + default (optional): The default value to return if the experiment + does not exist. + + Returns: + Experiment: The experiment with the given name, or the default + value if the experiment does not exist. + """ + return self.__dict.get(key, default) + + def get_user_experiments(self, user: str) -> List[Experiment]: + """Get list of experiments for specific user. + + Args: + user (str): The name of the user. + + Returns: + List[Experiment]: The list of experiments for the specific user. + """ + return [exp for exp in self.__dict.values() if user in exp.users] + + def __contains__(self, key: str) -> bool: + """Check if experiment exists. + + Args: + key (str): The name of the experiment. + + Returns: + bool: True if the experiment exists, False otherwise. + """ + return key in self.__dict + + def finish_active(self) -> None: + """Finish active experiment.""" + self.__archived_experiments.insert(0, self.__active_experiment_name) + self.__active_experiment_name = None + + @asynccontextmanager + async def get_next_experiment(self): + """Context manager. + + On enter get experiment from pending experiments. On exit put finished + experiment to archive_experiments. + """ + while True: + if self.active is None and self.pending: + break + await asyncio.sleep(10) + + try: + self.__active_experiment_name = self.pending.pop(0) + yield self.active + finally: + self.finish_active() diff --git a/openfl/experimental/workflow/component/envoy/__init__.py b/openfl/experimental/workflow/component/envoy/__init__.py new file mode 100644 index 0000000000..279a46fc1f --- /dev/null +++ b/openfl/experimental/workflow/component/envoy/__init__.py @@ -0,0 +1,6 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Envoy package.""" + +from openfl.experimental.workflow.component.envoy.envoy import Envoy diff --git a/openfl/experimental/workflow/component/envoy/envoy.py b/openfl/experimental/workflow/component/envoy/envoy.py new file mode 100644 index 0000000000..c532ee1a6b --- /dev/null +++ b/openfl/experimental/workflow/component/envoy/envoy.py @@ -0,0 +1,224 @@ +# Copyright 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""Experimental Envoy module.""" + +import logging +import sys +import time +import uuid +from concurrent.futures import ThreadPoolExecutor +from pathlib import Path +from typing import Optional, Union + +from openfl.experimental.workflow.federated import Plan +from openfl.experimental.workflow.transport.grpc.director_client import DirectorClient +from openfl.experimental.workflow.transport.grpc.exceptions import EnvoyNotFoundError +from openfl.utilities.workspace import ExperimentWorkspace + +logger = logging.getLogger(__name__) + + +class Envoy: + """Envoy class. The Envoy is a long-lived entity that runs on collaborator + nodes connected to the Director. + + Attributes: + name (str): The name of the envoy. + envoy_config (Optional[Path]): Path to envoy_config.yaml + tls (bool, optional): A flag indicating if TLS should be used for + connections. Defaults to True. + root_certificate (Optional[Union[Path, str]]): The path to the root certificate + for TLS. + private_key (Optional[Union[Path, str]]): The path to the private key for TLS. + certificate (Optional[Union[Path, str]]): The path to the certificate for TLS. + director_client (DirectorClient): The director client. + install_requirements (bool): A flag indicating if the requirements + should be installed. + is_experiment_running (bool): A flag indicating if an experiment is + running. + executor (ThreadPoolExecutor): The executor for running tasks. + plan(str): Path to plan.yaml + _health_check_future (object): The future object for the health check. + """ + + DEFAULT_RETRY_TIMEOUT_IN_SECONDS = 5 + + def __init__( + self, + *, + envoy_name: str, + director_host: str, + director_port: int, + envoy_config: Optional[Path] = None, + root_certificate: Optional[Union[Path, str]] = None, + private_key: Optional[Union[Path, str]] = None, + certificate: Optional[Union[Path, str]] = None, + tls: bool = True, + install_requirements: bool = True, + ) -> None: + """Initialize a envoy object. + + Args: + envoy_name (str): The name of the envoy. + director_host (str): The host of the director. + director_port (int): The port of the director. + envoy_config (Optional[Path]): Path to envoy_config.yaml + root_certificate (Optional[Union[Path, str]]): The path + to the root certificate for TLS. Defaults to None. + private_key (Optional[Union[Path, str]]): The path to + the private key for TLS. Defaults to None. + certificate (Optional[Union[Path, str]]): The path to + the certificate for TLS. Defaults to None. + tls (bool, optional): A flag indicating if TLS should be used for + connections. Defaults to True. + install_requirements (bool, optional): A flag indicating if the + requirements should be installed. Defaults to True. + """ + self.name = envoy_name + self.envoy_config = envoy_config + self.tls = tls + self._fill_certs(root_certificate, private_key, certificate) + self.install_requirements = install_requirements + self.director_client = self._create_director_client(director_host, director_port) + self.is_experiment_running = False + self.executor = ThreadPoolExecutor() + # This plan path ("plan/plan.yaml") originates from the + # experiment workspace provided by the director + self.plan = "plan/plan.yaml" + self._health_check_future = None + + def _create_director_client(self, director_host: str, director_port: int) -> DirectorClient: + """Create a DirectorClient instance. + + Args: + director_host (str): The host of the director. + director_port (int): The port of the director. + + Returns: + DirectorClient: Instance of the client + """ + return DirectorClient( + director_host=director_host, + director_port=director_port, + envoy_name=self.name, + tls=self.tls, + root_certificate=self.root_certificate, + private_key=self.private_key, + certificate=self.certificate, + ) + + def _fill_certs(self, root_certificate, private_key, certificate) -> None: + """Fill certificates. + + Args: + root_certificate (Union[Path, str]): The path to the root + certificate for the TLS connection. + private_key (Union[Path, str]): The path to the server's private + key for the TLS connection. + certificate (Union[Path, str]): The path to the server's + certificate for the TLS connection. + """ + if self.tls: + if not all([root_certificate, private_key, certificate]): + raise ValueError("Incomplete certificates provided") + + self.root_certificate = Path(root_certificate).absolute() + self.private_key = Path(private_key).absolute() + self.certificate = Path(certificate).absolute() + else: + self.root_certificate = self.private_key = self.certificate = None + + def _run(self) -> None: + """Run of the envoy working cycle.""" + while True: + try: + # Wait for experiment from Director server + experiment_name = self.director_client.wait_experiment() + data_stream = self.director_client.get_experiment_data(experiment_name) + except Exception as exc: + logger.exception("Failed to get experiment: %s", exc) + time.sleep(self.DEFAULT_RETRY_TIMEOUT_IN_SECONDS) + continue + data_file_path = self._save_data_stream_to_file(data_stream) + + try: + with ExperimentWorkspace( + experiment_name=f"{self.name}_{experiment_name}", + data_file_path=data_file_path, + install_requirements=self.install_requirements, + ): + self.is_experiment_running = True + self._run_collaborator() + except Exception as exc: + logger.exception("Collaborator failed with error: %s:", exc) + finally: + self.is_experiment_running = False + + @staticmethod + def _save_data_stream_to_file(data_stream) -> Path: + """Save data stream to file. + + Args: + data_stream: The data stream to save. + + Returns: + Path: The path to the saved data file. + """ + data_file_path = Path(str(uuid.uuid4())).absolute() + with open(data_file_path, "wb") as data_file: + for response in data_stream: + if response.size == len(response.exp_data): + data_file.write(response.exp_data) + else: + raise Exception("Broken archive") + return data_file_path + + def _send_health_check(self) -> None: + """Send health check to the director.""" + logger.debug("Sending envoy node status to director.") + timeout = self.DEFAULT_RETRY_TIMEOUT_IN_SECONDS + while True: + try: + timeout = self.director_client.send_health_check( + envoy_name=self.name, + is_experiment_running=self.is_experiment_running, + ) + except EnvoyNotFoundError: + logger.info( + "The director has lost information about current envoy. Reconnecting..." + ) + self.director_client.connect_envoy(envoy_name=self.name) + time.sleep(timeout) + + def _run_collaborator(self) -> None: + """Run the collaborator for the experiment running.""" + plan = Plan.parse(plan_config_path=Path(self.plan)) + logger.info("🧿 Starting the Collaborator Service.") + + col = plan.get_collaborator( + self.name, + self.root_certificate, + self.private_key, + self.certificate, + envoy_config=self.envoy_config, + tls=self.tls, + ) + col.run() + + def start(self) -> None: + """Start the envoy""" + try: + is_accepted = self.director_client.connect_envoy(envoy_name=self.name) + except Exception as exc: + logger.exception("Failed to connect envoy: %s", exc) + sys.exit(1) + else: + if is_accepted: + logger.info(f"{self.name} is connected to the director") + self._health_check_future = self.executor.submit(self._send_health_check) + self._run() + else: + # Connection failed + logger.error(f"{self.name} failed to connect to the director") + sys.exit(1) diff --git a/openfl/experimental/workflow/federated/plan/plan.py b/openfl/experimental/workflow/federated/plan/plan.py index 6df27d8b1c..5e81a91a9d 100644 --- a/openfl/experimental/workflow/federated/plan/plan.py +++ b/openfl/experimental/workflow/federated/plan/plan.py @@ -7,10 +7,11 @@ import inspect import os from hashlib import sha384 -from importlib import import_module +from importlib import import_module, reload from logging import getLogger from os.path import splitext from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple from yaml import SafeDumper, dump, safe_load @@ -18,6 +19,10 @@ from openfl.experimental.workflow.transport import AggregatorGRPCClient, AggregatorGRPCServer from openfl.utilities.utils import getfqdn_env +if TYPE_CHECKING: + from openfl.experimental.workflow.component import Aggregator, Collaborator + from openfl.experimental.workflow.interface import FLSpec + SETTINGS = "settings" TEMPLATE = "template" DEFAULTS = "defaults" @@ -25,13 +30,37 @@ class Plan: - """Federated Learning plan.""" + """A class used to represent a Federated Learning plan. + + This class provides methods to manage and manipulate federated learning + plans. + + Attributes: + logger (Logger): Logger instance for the class. + config (dict): Dictionary containing patched plan definition. + authorized_cols (list): Authorized collaborator list. + cols_data_paths (dict): Collaborator data paths dictionary. + collaborator_ (Collaborator): Collaborator object. + aggregator_ (Aggregator): Aggregator object. + server_ (AggregatorGRPCServer): gRPC server object. + client_ (AggregatorGRPCClient): gRPC client object. + hash_ (str): Hash of the instance. + """ logger = getLogger(__name__) @staticmethod - def load(yaml_path: Path, default: dict = None): - """Load the plan from YAML file.""" + def load(yaml_path: Path, default: dict = None) -> dict: + """Load the plan from YAML file. + + Args: + yaml_path (Path): Path to the YAML file. + default (dict, optional): Default plan configuration. + Defaults to {}. + + Returns: + dict: Plan configuration loaded from the YAML file. + """ if default is None: default = {} if yaml_path and yaml_path.exists(): @@ -39,8 +68,15 @@ def load(yaml_path: Path, default: dict = None): return default @staticmethod - def dump(yaml_path, config, freeze=False): - """Dump the plan config to YAML file.""" + def dump(yaml_path, config, freeze=False) -> None: + """Dump the plan config to YAML file. + + Args: + yaml_path (Path): Path to the YAML file. + config (dict): Plan configuration to be dumped. + freeze (bool, optional): Flag to freeze the plan. Defaults to + False. + """ class NoAliasDumper(SafeDumper): def ignore_aliases(self, data): @@ -65,7 +101,7 @@ def parse( cols_config_path: Path = None, data_config_path: Path = None, resolve=True, - ): + ) -> "Plan": """Parse the Federated Learning plan. Args: @@ -140,7 +176,7 @@ def parse( raise @staticmethod - def accept_args(cls): + def accept_args(cls) -> bool: """Determines whether a class's constructor (__init__ method) accepts variable positional arguments (*args). @@ -154,7 +190,7 @@ def accept_args(cls): return False @staticmethod - def build(template, settings, **override): + def build(template, settings, **override) -> object: """Create an instance of a openfl Component or Federated DataLoader/TaskRunner. @@ -174,6 +210,8 @@ def build(template, settings, **override): settings.update(**override) module = import_module(module_path) + # Reload the module to ensure the flow is rebuilt with updated changes + module = reload(module) if Plan.accept_args(getattr(module, class_name)): args = list(settings.values()) @@ -184,7 +222,7 @@ def build(template, settings, **override): return instance @staticmethod - def import_(template): + def import_(template) -> object: """Import an instance of a openfl Component or Federated DataLoader/TaskRunner. @@ -202,11 +240,13 @@ def import_(template): extra={"markup": True}, ) module = import_module(module_path) + # FIXME: Reload module to refresh private attributes; optimize this. + module = reload(module) instance = getattr(module, class_name) return instance - def __init__(self): + def __init__(self) -> None: """Initialize.""" self.config = {} # dictionary containing patched plan definition self.authorized_cols = [] # authorized collaborator list @@ -221,7 +261,7 @@ def __init__(self): self.hash_ = None @property - def hash(self): # NOQA + def hash(self) -> str: # NOQA """Generate hash for this instance.""" self.hash_ = sha384(dump(self.config).encode("utf-8")) Plan.logger.info( @@ -231,7 +271,7 @@ def hash(self): # NOQA return self.hash_.hexdigest() - def resolve(self): + def resolve(self) -> None: """Resolve the federation settings.""" self.federation_uuid = f"{self.name}_{self.hash[:8]}" self.aggregator_uuid = f"aggregator_{self.federation_uuid}" @@ -246,8 +286,16 @@ def resolve(self): int(self.hash[:8], 16) % (60999 - 49152) + 49152 ) - def get_aggregator(self): - """Get federation aggregator.""" + def get_aggregator(self, director_config=None) -> "Aggregator": + """Get federation aggregator. + + Args: + director_config: Path to director config file. + Defaults to None + + Returns: + self.aggregator_ (Aggregator): The federation aggregator. + """ defaults = self.config.get( "aggregator", {TEMPLATE: "openfl.experimental.workflow.Aggregator", SETTINGS: {}}, @@ -258,17 +306,14 @@ def get_aggregator(self): defaults[SETTINGS]["authorized_cols"] = self.authorized_cols private_attrs_callable, private_attrs_kwargs, private_attributes = self.get_private_attr( - "aggregator" + "aggregator", director_config ) defaults[SETTINGS]["private_attributes_callable"] = private_attrs_callable defaults[SETTINGS]["private_attributes_kwargs"] = private_attrs_kwargs defaults[SETTINGS]["private_attributes"] = private_attributes defaults[SETTINGS]["flow"] = self.get_flow() - checkpoint = self.config.get("federated_flow", False) - if not checkpoint: - checkpoint = checkpoint["settings"]["checkpoint"] - defaults[SETTINGS]["checkpoint"] = checkpoint + defaults[SETTINGS]["checkpoint"] = defaults[SETTINGS]["flow"].checkpoint log_metric_callback = defaults[SETTINGS].get("log_metric_callback") if log_metric_callback: @@ -293,8 +338,32 @@ def get_collaborator( private_key=None, certificate=None, client=None, - ): - """Get collaborator.""" + tls=False, + envoy_config=None, + ) -> "Collaborator": + """Get collaborator. + + This method retrieves a collaborator. If the collaborator does not + exist, it is built using the configuration settings and the provided + parameters. + + Args: + collaborator_name (str): Name of the collaborator. + root_certificate (str, optional): Root certificate for the + collaborator. Defaults to None. + private_key (str, optional): Private key for the collaborator. + Defaults to None. + certificate (str, optional): Certificate for the collaborator. + Defaults to None. + client (Client, optional): Client for the collaborator. Defaults + to None. + tls (bool): Whether to use TLS for the connection. + envoy_config (Path): Path to envoy_config.yaml. Defaults + to None. + + Returns: + self.collaborator_ (Collaborator): The collaborator instance. + """ defaults = self.config.get( "collaborator", {TEMPLATE: "openfl.experimental.workflow.Collaborator", SETTINGS: {}}, @@ -305,7 +374,7 @@ def get_collaborator( defaults[SETTINGS]["federation_uuid"] = self.federation_uuid private_attrs_callable, private_attrs_kwargs, private_attributes = self.get_private_attr( - collaborator_name + collaborator_name, envoy_config ) defaults[SETTINGS]["private_attributes_callable"] = private_attrs_callable defaults[SETTINGS]["private_attributes_kwargs"] = private_attrs_kwargs @@ -321,6 +390,7 @@ def get_collaborator( root_certificate, private_key, certificate, + tls, ) if self.collaborator_ is None: @@ -336,8 +406,25 @@ def get_client( root_certificate=None, private_key=None, certificate=None, - ): - """Get gRPC client for the specified collaborator.""" + tls=False, + ) -> AggregatorGRPCClient: + """Get gRPC client for the specified collaborator. + + Args: + collaborator_name (str): Name of the collaborator. + aggregator_uuid (str): UUID of the aggregator. + federation_uuid (str): UUID of the federation. + root_certificate (str, optional): Root certificate for the + collaborator. Defaults to None. + private_key (str, optional): Private key for the collaborator. + Defaults to None. + certificate (str, optional): Certificate for the collaborator. + Defaults to None. + tls (bool): Whether to use TLS for the connection. + + Returns: + AggregatorGRPCClient: gRPC client for the specified collaborator. + """ common_name = collaborator_name if not root_certificate or not private_key or not certificate: root_certificate = "cert/cert_chain.crt" @@ -351,6 +438,7 @@ def get_client( client_args["root_certificate"] = root_certificate client_args["certificate"] = certificate client_args["private_key"] = private_key + client_args["tls"] = tls client_args["aggregator_uuid"] = aggregator_uuid client_args["federation_uuid"] = federation_uuid @@ -365,9 +453,27 @@ def get_server( root_certificate=None, private_key=None, certificate=None, + tls=False, + director_config=None, **kwargs, - ): - """Get gRPC server of the aggregator instance.""" + ) -> AggregatorGRPCServer: + """Get gRPC server of the aggregator instance. + + Args: + root_certificate (str, optional): Root certificate for the server. + Defaults to None. + private_key (str, optional): Private key for the server. Defaults + to None. + certificate (str, optional): Certificate for the server. Defaults + to None. + tls (bool): Whether to use TLS for the connection. + director_config (Path): Path to director_config.yaml. Defaults + to None. + **kwargs: Additional keyword arguments. + + Returns: + AggregatorGRPCServer: gRPC server of the aggregator instance. + """ common_name = self.config["network"][SETTINGS]["agg_addr"].lower() if not root_certificate or not private_key or not certificate: @@ -383,16 +489,21 @@ def get_server( server_args["root_certificate"] = root_certificate server_args["certificate"] = certificate server_args["private_key"] = private_key + server_args["tls"] = tls - server_args["aggregator"] = self.get_aggregator() + server_args["aggregator"] = self.get_aggregator(director_config) if self.server_ is None: self.server_ = AggregatorGRPCServer(**server_args) return self.server_ - def get_flow(self): - """Instantiates federated flow object.""" + def get_flow(self) -> "FLSpec": + """Instantiates federated flow object. + + Returns: + flow_: FLSpec instance + """ defaults = self.config.get( "federated_flow", {TEMPLATE: self.config["federated_flow"]["template"], SETTINGS: {}}, @@ -402,7 +513,18 @@ def get_flow(self): self.flow_ = Plan.build(**defaults) return self.flow_ - def import_kwargs_modules(self, defaults): + def import_kwargs_modules(self, defaults) -> Dict[str, Any]: + """ + Imports and resolves class references in a nested settings structure. + Args: + defaults (Dict[str, Any]): A dictionary of settings, containing module paths + and class names as strings in its nested structure. + + Returns: + Dict[str, Any]: The updated settings dictionary with resolved classes and attributes + from the imported modules. + """ + def import_nested_settings(settings): for key, value in settings.items(): if isinstance(value, dict): @@ -431,17 +553,35 @@ def import_nested_settings(settings): defaults[SETTINGS] = import_nested_settings(defaults[SETTINGS]) return defaults - def get_private_attr(self, private_attr_name=None): - private_attrs_callable = None - private_attrs_kwargs = {} + def get_private_attr( + self, private_attr_name=None, config=None + ) -> Tuple[Optional[dict], Optional[dict], dict]: + """ + Retrieves private attributes defined in a configuration or data file. + + Args: + private_attr_name (str): The name of the participant (Aggregator or Collaborator) + whose private attribute is to be retrieved. + config (Path): Path to the config file. + + Returns: + Tuple: A tuple containing: + - private_attrs_callable (Optional[dict]): A dictionary containing callable + function information, or None if not applicable. + - private_attrs_kwargs (Optional[dict]): A dictionary of arguments for the + callable function, or None if not applicable. + - private_attributes (dict): A dictionary of private attributes, + or an empty dictionary if none are found. + """ + private_attrs_callable = private_attrs_kwargs = None private_attributes = {} data_yaml = "plan/data.yaml" - if os.path.exists(data_yaml) and os.path.isfile(data_yaml): - d = Plan.load(Path(data_yaml).absolute()) + if config or (os.path.exists(data_yaml) and os.path.isfile(data_yaml)): + d = Plan.load(config) if config else Plan.load(Path(data_yaml).absolute()) - if d.get(private_attr_name, None): + if d and d.get(private_attr_name, None): callable_func = d.get(private_attr_name, {}).get("callable_func") private_attributes = d.get(private_attr_name, {}).get("private_attributes") if callable_func and private_attributes: @@ -474,9 +614,8 @@ def get_private_attr(self, private_attr_name=None): f"or be import from code part, get {private_attrs_callable}" ) - return ( - private_attrs_callable, - private_attrs_kwargs, - private_attributes, - ) - return None, None, {} + return ( + private_attrs_callable, + private_attrs_kwargs, + private_attributes, + ) diff --git a/openfl/experimental/workflow/interface/cli/aggregator.py b/openfl/experimental/workflow/interface/cli/aggregator.py index 2c458fa2fe..b51b78f480 100644 --- a/openfl/experimental/workflow/interface/cli/aggregator.py +++ b/openfl/experimental/workflow/interface/cli/aggregator.py @@ -6,7 +6,6 @@ import os import sys -import threading from logging import getLogger from pathlib import Path @@ -92,13 +91,7 @@ def start_(plan, authorized_cols, secure): agg_server = plan.get_server() agg_server.is_server_started = False - agg_grpc_server = threading.Thread(target=agg_server.serve) - agg_grpc_server.start() - - while True: - if agg_server.is_server_started: - plan.aggregator_.run_flow() - break + agg_server.run_server() @aggregator.command(name="generate-cert-request") diff --git a/openfl/experimental/workflow/interface/cli/director.py b/openfl/experimental/workflow/interface/cli/director.py new file mode 100644 index 0000000000..b7e202715e --- /dev/null +++ b/openfl/experimental/workflow/interface/cli/director.py @@ -0,0 +1,131 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Director CLI.""" + +import logging +import sys +from pathlib import Path + +import click +from click import Path as ClickPath +from click import group, option, pass_context +from dynaconf import Validator + +from openfl.experimental.workflow.component.director import Director +from openfl.experimental.workflow.transport import DirectorGRPCServer +from openfl.utilities import merge_configs +from openfl.utilities.path_check import is_directory_traversal + +logger = logging.getLogger(__name__) + + +@group() +@pass_context +def director(context): + """Manage Federated Learning Director. + + Args: + context (click.core.Context): Click context. + """ + context.obj["group"] = "director" + + +@director.command(name="start") +@option( + "-c", + "--director-config-path", + default="director.yaml", + help="The director config file path", + type=ClickPath(exists=True), +) +@option( + "--tls/--disable-tls", + default=True, + is_flag=True, + help="Use TLS or not (By default TLS is enabled)", +) +@option( + "-rc", + "--root-cert-path", + "root_certificate", + required=False, + type=ClickPath(exists=True), + default=None, + help="Path to a root CA cert", +) +@option( + "-pk", + "--private-key-path", + "private_key", + required=False, + type=ClickPath(exists=True), + default=None, + help="Path to a private key", +) +@option( + "-oc", + "--public-cert-path", + "certificate", + required=False, + type=ClickPath(exists=True), + default=None, + help="Path to a signed certificate", +) +def start(director_config_path, tls, root_certificate, private_key, certificate): + """Start the director service. + + Args: + director_config_path (str): The director config file path. + tls (bool): Use TLS or not. + root_certificate (str): Path to a root CA cert. + private_key (str): Path to a private key. + certificate (str): Path to a signed certificate. + """ + + director_config_path = Path(director_config_path).absolute() + logger.info("🧿 Starting the Director Service.") + if is_directory_traversal(director_config_path): + click.echo("The director config file path is out of the openfl workspace scope.") + sys.exit(1) + config = merge_configs( + settings_files=director_config_path, + overwrite_dict={ + "root_certificate": root_certificate, + "private_key": private_key, + "certificate": certificate, + }, + validators=[ + Validator("settings.listen_host", default="localhost"), + Validator("settings.listen_port", default=50051, gte=1024, lte=65535), + Validator("settings.install_requirements", default=False), + Validator( + "settings.envoy_health_check_period", + default=60, # in seconds + gte=1, + lte=24 * 60 * 60, + ), + ], + ) + + if config.root_certificate: + config.root_certificate = Path(config.root_certificate).absolute() + + if config.private_key: + config.private_key = Path(config.private_key).absolute() + + if config.certificate: + config.certificate = Path(config.certificate).absolute() + + director_server = DirectorGRPCServer( + director_cls=Director, + tls=tls, + root_certificate=config.root_certificate, + private_key=config.private_key, + certificate=config.certificate, + listen_host=config.settings.listen_host, + listen_port=config.settings.listen_port, + envoy_health_check_period=config.settings.envoy_health_check_period, + install_requirements=config.settings.install_requirements, + director_config=director_config_path, + ) + director_server.start() diff --git a/openfl/experimental/workflow/interface/cli/envoy.py b/openfl/experimental/workflow/interface/cli/envoy.py new file mode 100644 index 0000000000..dbb5d57ce9 --- /dev/null +++ b/openfl/experimental/workflow/interface/cli/envoy.py @@ -0,0 +1,151 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +"""Envoy CLI.""" + +import logging +import sys +from pathlib import Path + +import click +from click import Path as ClickPath +from click import group, option, pass_context +from dynaconf import Validator + +from openfl.experimental.workflow.component.envoy import Envoy +from openfl.utilities import click_types, merge_configs +from openfl.utilities.path_check import is_directory_traversal + +logger = logging.getLogger(__name__) + + +@group() +@pass_context +def envoy(context): + """Manage Federated Learning Envoy. + + Args: + context (click.core.Context): Click context. + """ + context.obj["group"] = "envoy" + + +@envoy.command(name="start") +@option("-n", "--envoy_name", required=True, help="Current shard name") +@option( + "-dh", + "--director-host", + required=True, + help="The FQDN of the federation director", + type=click_types.FQDN, +) +@option( + "-dp", + "--director-port", + required=True, + help="The federation director port", + type=click.IntRange(1, 65535), +) +@option( + "--tls/--disable-tls", + default=True, + is_flag=True, + help="Use TLS or not (By default TLS is enabled)", +) +@option( + "-ec", + "--envoy-config-path", + default="envoy_config.yaml", + help="The envoy config path", + type=ClickPath(exists=True), +) +@option( + "-rc", + "--root-cert-path", + "root_certificate", + default=None, + help="Path to a root CA cert", + type=ClickPath(exists=True), +) +@option( + "-pk", + "--private-key-path", + "private_key", + default=None, + help="Path to a private key", + type=ClickPath(exists=True), +) +@option( + "-oc", + "--public-cert-path", + "certificate", + default=None, + help="Path to a signed certificate", + type=ClickPath(exists=True), +) +def start_( + envoy_name, + director_host, + director_port, + tls, + envoy_config_path, + root_certificate, + private_key, + certificate, +): + """Start the Envoy. + + Args: + envoy_name (str): Name of the Envoy. + director_host (str): The FQDN of the federation director. + director_port (int): The federation director port. + tls (bool): Use TLS or not. + envoy_config_path (str): The envoy config path. + root_certificate (str): Path to a root CA cert. + private_key (str): Path to a private key. + certificate (str): Path to a signed certificate. + """ + + logger.info("🧿 Starting the Envoy.") + if is_directory_traversal(envoy_config_path): + click.echo("The envoy config path is out of the openfl workspace scope.") + sys.exit(1) + + config = merge_configs( + settings_files=envoy_config_path, + overwrite_dict={ + "root_certificate": root_certificate, + "private_key": private_key, + "certificate": certificate, + }, + validators=[ + Validator("params.install_requirements", default=True), + ], + ) + + # Parse envoy parameters + envoy_params = config.get("params", {}) + if envoy_params: + install_requirements = envoy_params["install_requirements"] + else: + install_requirements = False + + if config.root_certificate: + config.root_certificate = Path(config.root_certificate).absolute() + if config.private_key: + config.private_key = Path(config.private_key).absolute() + if config.certificate: + config.certificate = Path(config.certificate).absolute() + + envoy = Envoy( + envoy_name=envoy_name, + director_host=director_host, + director_port=director_port, + envoy_config=Path(envoy_config_path).absolute(), + root_certificate=config.root_certificate, + private_key=config.private_key, + certificate=config.certificate, + tls=tls, + install_requirements=install_requirements, + ) + + envoy.start() diff --git a/openfl/experimental/workflow/interface/fl_spec.py b/openfl/experimental/workflow/interface/fl_spec.py index 766dea553e..3e8365458b 100644 --- a/openfl/experimental/workflow/interface/fl_spec.py +++ b/openfl/experimental/workflow/interface/fl_spec.py @@ -8,7 +8,10 @@ import inspect from copy import deepcopy -from typing import Callable, List, Type +from typing import TYPE_CHECKING, Callable, List, Type, Union + +if TYPE_CHECKING: + from openfl.experimental.workflow.runtime import FederatedRuntime, LocalRuntime, Runtime from openfl.experimental.workflow.utilities import ( MetaflowInterface, @@ -23,10 +26,23 @@ class FLSpec: + """FLSpec Class + + A class representing a Federated Learning Specification. It manages clones, + maintains the initial state, and supports checkpointing. + + Attributes: + _clones (list): A list of clones created for the FLSpec instance. + _initial_state (FLSpec or None): The saved initial state of the FLSpec instance. + _foreach_methods (list): A list of methods to be applied iteratively. + _checkpoint (bool): A flag indicating whether checkpointing is enabled. + _runtime (RuntimeType): The runtime of the flow. + """ + _clones = [] _initial_state = None - def __init__(self, checkpoint: bool = False): + def __init__(self, checkpoint: bool = False) -> None: """Initializes the FLSpec object. Args: @@ -47,7 +63,7 @@ def _create_clones(cls, instance: Type[FLSpec], names: List[str]) -> None: cls._clones = {name: deepcopy(instance) for name in names} @classmethod - def _reset_clones(cls): + def _reset_clones(cls) -> None: """Resets the clones of the class.""" cls._clones = [] @@ -62,55 +78,31 @@ def save_initial_state(cls, instance: Type[FLSpec]) -> None: """ cls._initial_state = deepcopy(instance) - def run(self) -> None: - """Starts the execution of the flow.""" + @property + def checkpoint(self) -> bool: + """Getter for the checkpoint attribute. - # Submit flow to Runtime - if str(self._runtime) == "LocalRuntime": - self._metaflow_interface = MetaflowInterface(self.__class__, self.runtime.backend) - self._run_id = self._metaflow_interface.create_run() - # Initialize aggregator private attributes - self.runtime.initialize_aggregator() - self._foreach_methods = [] - FLSpec._reset_clones() - FLSpec._create_clones(self, self.runtime.collaborators) - # Initialize collaborator private attributes - self.runtime.initialize_collaborators() - if self._checkpoint: - print(f"Created flow {self.__class__.__name__}") - try: - # Execute all Participant (Aggregator & Collaborator) tasks and - # retrieve the final attributes - # start step is the first task & invoked on aggregator through - # runtime.execute_task - final_attributes = self.runtime.execute_task( - self, - self.start, - ) - except Exception as e: - if "cannot pickle" in str(e) or "Failed to unpickle" in str(e): - msg = ( - "\nA serialization error was encountered that could not" - "\nbe handled by the ray backend." - "\nTry rerunning the flow without ray as follows:\n" - "\nLocalRuntime(...,backend='single_process')\n" - "\n or for more information about the original error," - "\nPlease see the official Ray documentation" - "\nhttps://docs.ray.io/en/releases-2.2.0/ray-core/\ - objects/serialization.html" - ) - raise SerializationError(str(e) + msg) - else: - raise e - for name, attr in final_attributes: - setattr(self, name, attr) - elif str(self._runtime) == "FederatedRuntime": - pass - else: - raise Exception("Runtime not implemented") + Returns: + bool: The current value of the checkpoint. + """ + return self._checkpoint + + @checkpoint.setter + def checkpoint(self, value: bool) -> None: + """Setter for the checkpoint attribute. + + Args: + value (bool): The new value for the checkpoint. + + Raises: + ValueError: If the provided value is not a boolean. + """ + if not isinstance(value, bool): + raise ValueError("checkpoint must be a boolean value.") + self._checkpoint = value @property - def runtime(self): + def runtime(self) -> Type[Union[LocalRuntime, FederatedRuntime]]: """Returns flow runtime. Returns: @@ -119,7 +111,7 @@ def runtime(self): return self._runtime @runtime.setter - def runtime(self, runtime) -> None: + def runtime(self, runtime: Type[Runtime]) -> None: """Sets flow runtime. Args: @@ -132,7 +124,110 @@ def runtime(self, runtime) -> None: raise TypeError(f"{runtime} is not a valid OpenFL Runtime") self._runtime = runtime - def _capture_instance_snapshot(self, kwargs): + def run(self) -> None: + """Starts the execution of the flow.""" + # Submit flow to Runtime + if str(self._runtime) == "LocalRuntime": + self._run_local() + elif str(self._runtime) == "FederatedRuntime": + self._run_federated() + else: + raise Exception("Runtime not implemented") + + def _run_local(self) -> None: + """Executes the flow using LocalRuntime.""" + self._setup_initial_state() + try: + # Execute all Participant (Aggregator & Collaborator) tasks and + # retrieve the final attributes + # start step is the first task & invoked on aggregator through + # runtime.execute_task + final_attributes = self.runtime.execute_task( + self, + self.start, + ) + except Exception as e: + if "cannot pickle" in str(e) or "Failed to unpickle" in str(e): + msg = ( + "\nA serialization error was encountered that could not" + "\nbe handled by the ray backend." + "\nTry rerunning the flow without ray as follows:\n" + "\nLocalRuntime(...,backend='single_process')\n" + "\n or for more information about the original error," + "\nPlease see the official Ray documentation" + "\nhttps://docs.ray.io/en/releases-2.2.0/ray-core/\ + objects/serialization.html" + ) + raise SerializationError(str(e) + msg) + else: + raise e + for name, attr in final_attributes: + setattr(self, name, attr) + + def _setup_initial_state(self) -> None: + """ + Sets up the flow's initial state, initializing private attributes for + collaborators and aggregators. + """ + self._metaflow_interface = MetaflowInterface(self.__class__, self.runtime.backend) + self._run_id = self._metaflow_interface.create_run() + # Initialize aggregator private attributes + self.runtime.initialize_aggregator() + self._foreach_methods = [] + FLSpec._reset_clones() + FLSpec._create_clones(self, self.runtime.collaborators) + # Initialize collaborator private attributes + self.runtime.initialize_collaborators() + if self._checkpoint: + print(f"Created flow {self.__class__.__name__}") + + def _run_federated(self) -> None: + """Executes the flow using FederatedRuntime.""" + try: + # Prepare workspace and submit it for the FederatedRuntime + archive_path, exp_name = self.runtime.prepare_workspace_archive() + self.runtime.submit_experiment(archive_path, exp_name) + # Stream the experiment's stdout if the checkpoint is enabled + if self._checkpoint: + self.runtime.stream_experiment_stdout(exp_name) + # Retrieve the flspec object to update the experiment state + flspec_obj = self._get_flow_state() + # Update state of self + self._update_from_flspec_obj(flspec_obj) + except Exception as e: + raise Exception( + f"FederatedRuntime: Experiment {exp_name} failed to run due to error: {e}" + ) + + def _update_from_flspec_obj(self, flspec_obj: FLSpec) -> None: + """Update self with attributes from the updated flspec instance. + + Args: + flspec_obj (FLSpec): Updated Flspec instance + """ + artifacts_iter, _ = generate_artifacts(ctx=flspec_obj) + for name, attr in artifacts_iter(): + setattr(self, name, deepcopy(attr)) + + self._foreach_methods = flspec_obj._foreach_methods + + def _get_flow_state(self) -> Union[FLSpec, None]: + """ + Gets the updated flow state. + + Returns: + flspec_obj (Union[FLSpec, None]): An updated FLSpec instance if the experiment + runs successfully. None if the experiment could not run. + """ + status, flspec_obj = self.runtime.get_flow_state() + if status: + print("Experiment ran successfully") + return flspec_obj + else: + print("Experiment could not run") + return None + + def _capture_instance_snapshot(self, kwargs) -> List: """Takes backup of self before exclude or include filtering. Args: @@ -184,7 +279,7 @@ def _display_transition_logs(self, f: Callable, parent_func: Callable) -> None: elif collaborator_to_aggregator(f, parent_func): print("Sending state from collaborator to aggregator") - def filter_exclude_include(self, f, **kwargs): + def filter_exclude_include(self, f, **kwargs) -> None: """Filters exclude/include attributes for a given task within the flow. Args: @@ -214,7 +309,7 @@ def filter_exclude_include(self, f, **kwargs): setattr(clone, name, deepcopy(attr)) clone._foreach_methods = self._foreach_methods - def restore_instance_snapshot(self, ctx: FLSpec, instance_snapshot: List[FLSpec]): + def restore_instance_snapshot(self, ctx: FLSpec, instance_snapshot: List[FLSpec]) -> None: """Restores attributes from backup (in instance snapshot) to ctx. Args: @@ -228,7 +323,7 @@ def restore_instance_snapshot(self, ctx: FLSpec, instance_snapshot: List[FLSpec] if not hasattr(ctx, name): setattr(ctx, name, attr) - def next(self, f, **kwargs): + def next(self, f, **kwargs) -> None: """Specifies the next task in the flow to execute. Args: diff --git a/openfl/experimental/workflow/protocols/director.proto b/openfl/experimental/workflow/protocols/director.proto new file mode 100644 index 0000000000..0e4a16e548 --- /dev/null +++ b/openfl/experimental/workflow/protocols/director.proto @@ -0,0 +1,107 @@ +// Copyright 2020-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +syntax = "proto3"; + +package openfl.experimental.workflow.director; + +import "google/protobuf/timestamp.proto"; +import "google/protobuf/duration.proto"; + +service Director { + //Envoy RPCs + rpc EnvoyConnectionRequest(SendConnectionRequest) returns (RequestAccepted) {} + rpc WaitExperiment(WaitExperimentRequest) returns (WaitExperimentResponse) {} + rpc GetExperimentData(GetExperimentDataRequest) returns (stream ExperimentData) {} + rpc UpdateEnvoyStatus(UpdateEnvoyStatusRequest) returns (UpdateEnvoyStatusResponse) {} + + //Runtime RPCs + rpc SetNewExperiment(stream ExperimentInfo) returns (SetNewExperimentResponse) {} + rpc GetEnvoys(GetEnvoysRequest) returns (GetEnvoysResponse) {} + rpc GetFlowState(GetFlowStateRequest) returns (GetFlowStateResponse) {} + rpc ConnectRuntime(SendRuntimeRequest) returns (RuntimeRequestResponse) {} + rpc GetExperimentStdout(GetExperimentStdoutRequest) returns (stream GetExperimentStdoutResponse) {} +} + +message SendConnectionRequest { + string envoy_name = 1; +} + +message RequestAccepted { + bool accepted = 1; +} + +message WaitExperimentRequest { + string collaborator_name = 1; +} + +message WaitExperimentResponse { + string experiment_name = 1; +} + +message GetExperimentDataRequest { + string experiment_name = 1; + string collaborator_name = 2; +} + +message ExperimentData { + uint32 size = 1; + bytes exp_data = 2; +} + +message UpdateEnvoyStatusRequest { + string name = 1; + bool is_experiment_running = 2; +} + +message UpdateEnvoyStatusResponse { + google.protobuf.Duration health_check_period = 1; +} + +message ExperimentInfo { + string name = 1; + repeated string collaborator_names = 2; + ExperimentData experiment_data = 3; +} + +message SetNewExperimentResponse { + bool status = 1; +} + +message EnvoyInfo { + string envoy_name = 1; + string experiment_name = 2; + bool is_online = 3; + bool is_experiment_running = 4; + google.protobuf.Timestamp last_updated = 5; + google.protobuf.Duration valid_duration = 6; +} + +message GetEnvoysRequest {} + +message GetEnvoysResponse { + repeated EnvoyInfo envoy_infos = 1; +} + +message GetFlowStateRequest {} + +message GetFlowStateResponse { + bool completed = 1; + bytes flspec_obj = 2; +} + +message SendRuntimeRequest {} + +message RuntimeRequestResponse { + bool accepted = 1; +} + +message GetExperimentStdoutRequest { + string experiment_name = 1; +} + +message GetExperimentStdoutResponse { + string stdout_origin = 1; + string task_name = 2; + string stdout_value = 3; +} diff --git a/openfl/experimental/workflow/runtime/federated_runtime.py b/openfl/experimental/workflow/runtime/federated_runtime.py index 9684fad404..604f13ce88 100644 --- a/openfl/experimental/workflow/runtime/federated_runtime.py +++ b/openfl/experimental/workflow/runtime/federated_runtime.py @@ -2,71 +2,88 @@ # SPDX-License-Identifier: Apache-2.0 -"""openfl.experimental.workflow.runtime package LocalRuntime class.""" +"""openfl.experimental.workflow.runtime package FederatedRuntime class.""" from __future__ import annotations -from typing import TYPE_CHECKING +import logging +import os +import sys +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple -from openfl.experimental.workflow.runtime.runtime import Runtime +import dill +from tabulate import tabulate -if TYPE_CHECKING: - from openfl.experimental.workflow.interface import Aggregator, Collaborator +from openfl.experimental.workflow.runtime.runtime import Runtime +from openfl.experimental.workflow.transport.grpc.director_client import DirectorClient +from openfl.experimental.workflow.workspace_export import WorkspaceExport -from typing import List, Type +logger = logging.getLogger(__name__) class FederatedRuntime(Runtime): - """Class for a federated runtime, derived from the Runtime class. + """FederatedRuntime class, derived from Runtime class. Attributes: - aggregator (Type[Aggregator]): The aggregator participant. - collaborators (List[Type[Collaborator]]): The list of collaborator - participants. + __collaborators (Optional[List[str]]): List of Authorized collaborators + tls (bool): A flag indicating if TLS should be used for + connections. Defaults to False. + director (Optional[Dict[str, Any]]): Dictionary containing director info. + _dir_client (DirectorClient): The director client. + notebook_path (Optional[str]) : Path to the Jupyter notebook + experiment_submitted (bool): Whether the experiment has been submitted. + generated_workspace_path (Path): Path to generated workspace """ def __init__( self, - aggregator: str = None, - collaborators: List[str] = None, - **kwargs, + collaborators: Optional[List[str]] = None, + director: Optional[Dict[str, Any]] = None, + notebook_path: Optional[str] = None, + tls: bool = False, ) -> None: """Initializes the FederatedRuntime object. - Use single node to run the flow. - Args: - aggregator (str, optional): Name of the aggregator. Defaults to - None. - collaborators (List[str], optional): List of collaborator names. + collaborators (Optional[List[str]]): List of Authorized collaborators. Defaults to None. - **kwargs: Additional keyword arguments. + director (Optional[Dict[str, Any]]): Director information. Defaults to None + notebook_path (Optional[str]): Jupyter notebook path + tls (bool): Whether to use TLS for the connection. """ super().__init__() - if aggregator is not None: - self.aggregator = aggregator - - if collaborators is not None: - self.collaborators = collaborators - - @property - def aggregator(self) -> str: - """Returns name of _aggregator.""" - return self._aggregator + self.__collaborators = collaborators - @aggregator.setter - def aggregator(self, aggregator_name: Type[Aggregator]): - """Set LocalRuntime _aggregator. + self.tls = tls + if director: + self.director = director + self._fill_certs( + self.director.get("cert_chain", None), + self.director.get("api_private_key", None), + self.director.get("api_cert", None), + ) + self._dir_client = self._create_director_client() + + self.notebook_path = notebook_path + self.experiment_submitted = False + self.generated_workspace_path = Path("./generated_workspace").resolve() + + @staticmethod + def remove_workspace_archive(archive_path) -> None: + """ + Removes workspace archive Args: - aggregator_name (Type[Aggregator]): The name of the aggregator to - set. + archive_path (str): Archive file path containing the workspace. """ - self._aggregator = aggregator_name + if os.path.exists(archive_path): + os.remove(archive_path) @property def collaborators(self) -> List[str]: - """Return names of collaborators. + """Get the names of collaborators. Don't give direct access to private attributes. @@ -76,14 +93,154 @@ def collaborators(self) -> List[str]: return self.__collaborators @collaborators.setter - def collaborators(self, collaborators: List[Type[Collaborator]]): - """Set LocalRuntime collaborators. + def collaborators(self, collaborators: List[str]) -> None: + """Set the collaborators. Args: - collaborators (List[Type[Collaborator]]): The list of + collaborators (List[str]): The list of collaborators to set. """ self.__collaborators = collaborators - def __repr__(self): + def _fill_certs(self, root_certificate, private_key, certificate) -> None: + """Fill certificates. + + Args: + root_certificate (Union[Path, str]): The path to the root + certificate for the TLS connection. + private_key (Union[Path, str]): The path to the server's private + key for the TLS connection. + certificate (Union[Path, str]): The path to the server's + certificate for the TLS connection. + """ + if self.tls: + if not all([root_certificate, private_key, certificate]): + raise ValueError("Incomplete certificates provided") + + self.root_certificate = Path(root_certificate).absolute() + self.private_key = Path(private_key).absolute() + self.certificate = Path(certificate).absolute() + else: + self.root_certificate = self.private_key = self.certificate = None + + def _create_director_client(self) -> DirectorClient: + """Create a DirectorClient instance. + + Returns: + DirectorClient: Instance of the client + """ + return DirectorClient( + director_host=self.director["director_node_fqdn"], + director_port=self.director["director_port"], + tls=self.tls, + root_certificate=self.root_certificate, + private_key=self.private_key, + certificate=self.certificate, + ) + + def prepare_workspace_archive(self) -> Tuple[Path, str]: + """ + Prepare workspace archive using WorkspaceExport. + + Returns: + Tuple[Path, str]: A tuple containing the path of the created + archive and the experiment name. + """ + archive_path, exp_name = WorkspaceExport.export_federated( + notebook_path=self.notebook_path, + output_workspace="./generated_workspace", + ) + return archive_path, exp_name + + def submit_experiment(self, archive_path, exp_name) -> None: + """ + Submits experiment archive to the director + + Args: + archive_path (str): Archive file path containing the workspace. + exp_name (str): The name of the experiment to be submitted. + """ + try: + response = self._dir_client.set_new_experiment( + archive_path=archive_path, experiment_name=exp_name, col_names=self.__collaborators + ) + self.experiment_submitted = response.status + + if self.experiment_submitted: + print( + f"\033[92mExperiment {exp_name} was successfully " + "submitted to the director!\033[0m" + ) + else: + print(f"\033[91mFailed to submit experiment '{exp_name}' to the director.\033[0m") + finally: + self.remove_workspace_archive(archive_path) + + def get_flow_state(self) -> Tuple[bool, Any]: + """ + Retrieve the updated flow status and deserialized flow object. + + Returns: + status (bool): The flow status. + flow_object: The deserialized flow object. + """ + status, flspec_obj = self._dir_client.get_flow_state() + + # Append generated workspace path to sys.path + # to allow unpickling of flspec_obj + sys.path.append(str(self.generated_workspace_path)) + flow_object = dill.loads(flspec_obj) + + return status, flow_object + + def get_envoys(self) -> None: + """Prints the status of Envoys in a formatted way.""" + # Fetch envoy data + envoys = self._dir_client.get_envoys() + DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" + now = datetime.now().strftime(DATETIME_FORMAT) + + # Prepare the table headers + headers = ["Name", "Online", "Last Updated", "Experiment Running", "Experiment Name"] + # Prepare the table rows + rows = [] + for envoy in envoys.envoy_infos: + rows.append( + [ + envoy.envoy_name, + "Yes" if envoy.is_online else "No", + datetime.fromtimestamp(envoy.last_updated.seconds).strftime(DATETIME_FORMAT), + "Yes" if envoy.is_experiment_running else "No", + envoy.experiment_name if envoy.experiment_name else "None", + ] + ) + # Use tabulate to format the table + result = tabulate(rows, headers=headers, tablefmt="grid") + # Display the current timestamp + print(f"Status of Envoys connected to Federation at: {now}\n") + print(result) + + def stream_experiment_stdout(self, experiment_name) -> None: + """Stream experiment stdout. + + Args: + experiment_name (str): Name of the experiment. + """ + if not self.experiment_submitted: + print("No experiment has been submitted yet.") + return + print(f"Getting standard output for experiment: {experiment_name}...") + for stdout_message_dict in self._dir_client.stream_experiment_stdout(experiment_name): + print( + f'Origin: {stdout_message_dict["stdout_origin"]}, ' + f'Task: {stdout_message_dict["task_name"]}' + f'\n{stdout_message_dict["stdout_value"]}' + ) + + def __repr__(self) -> str: + """Returns the string representation of the FederatedRuntime object. + + Returns: + str: The string representation of the FederatedRuntime object. + """ return "FederatedRuntime" diff --git a/openfl/experimental/workflow/transport/__init__.py b/openfl/experimental/workflow/transport/__init__.py index 6a8abc0d3f..3397c94b7e 100644 --- a/openfl/experimental/workflow/transport/__init__.py +++ b/openfl/experimental/workflow/transport/__init__.py @@ -4,4 +4,9 @@ """openfl.experimental.workflow.transport package.""" -from openfl.experimental.workflow.transport.grpc import AggregatorGRPCClient, AggregatorGRPCServer +from openfl.experimental.workflow.transport.grpc import ( + AggregatorGRPCClient, + AggregatorGRPCServer, + DirectorClient, + DirectorGRPCServer, +) diff --git a/openfl/experimental/workflow/transport/grpc/__init__.py b/openfl/experimental/workflow/transport/grpc/__init__.py index e849e35af9..7406f1dda5 100644 --- a/openfl/experimental/workflow/transport/grpc/__init__.py +++ b/openfl/experimental/workflow/transport/grpc/__init__.py @@ -6,8 +6,5 @@ from openfl.experimental.workflow.transport.grpc.aggregator_client import AggregatorGRPCClient from openfl.experimental.workflow.transport.grpc.aggregator_server import AggregatorGRPCServer - - -# FIXME: Not the right place for exceptions -class ShardNotFoundError(Exception): - """Indicates that director has no information about that shard.""" +from openfl.experimental.workflow.transport.grpc.director_client import DirectorClient +from openfl.experimental.workflow.transport.grpc.director_server import DirectorGRPCServer diff --git a/openfl/experimental/workflow/transport/grpc/aggregator_server.py b/openfl/experimental/workflow/transport/grpc/aggregator_server.py index 557ae03cf2..41c990d366 100644 --- a/openfl/experimental/workflow/transport/grpc/aggregator_server.py +++ b/openfl/experimental/workflow/transport/grpc/aggregator_server.py @@ -4,6 +4,7 @@ """AggregatorGRPCServer module.""" +import asyncio import logging from concurrent.futures import ThreadPoolExecutor from multiprocessing import cpu_count @@ -181,7 +182,9 @@ def CallCheckpoint(self, request, context): # NOQA:N802 function = request.function stream_buffer = request.stream_buffer - self.aggregator.call_checkpoint(execution_environment, function, stream_buffer) + self.aggregator.call_checkpoint( + collaborator_name, execution_environment, function, stream_buffer + ) return aggregator_pb2.CheckpointResponse(header=self.get_header(collaborator_name)) @@ -217,7 +220,7 @@ def get_server(self): return self.server - def serve(self): + async def serve(self): """Start an aggregator gRPC service.""" self.get_server() @@ -226,9 +229,18 @@ def serve(self): self.is_server_started = True try: while not self.aggregator.all_quit_jobs_sent(): - sleep(5) + await asyncio.sleep(5) except KeyboardInterrupt: pass + finally: + self.logger.info("All Jobs Sent Successfully, Exiting...") + self.stop_server() + + def run_server(self): + """Launch the aggregator gRPC server and aggregator flow concurrently""" + loop = asyncio.get_event_loop() + loop.create_task(self.aggregator.run_flow()) + loop.run_until_complete(self.serve()) def stop_server(self): self.server.stop(0) diff --git a/openfl/experimental/workflow/transport/grpc/director_client.py b/openfl/experimental/workflow/transport/grpc/director_client.py new file mode 100644 index 0000000000..91eea331e8 --- /dev/null +++ b/openfl/experimental/workflow/transport/grpc/director_client.py @@ -0,0 +1,269 @@ +# Copyright 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""DirectorClient module.""" + +import logging +from pathlib import Path +from typing import Any, Dict, Iterator, Optional, Tuple, Union # type: ignore + +import grpc +from grpc._channel import _MultiThreadedRendezvous as DataStream + +from openfl.experimental.workflow.protocols import director_pb2, director_pb2_grpc +from openfl.experimental.workflow.transport.grpc.exceptions import EnvoyNotFoundError + +from .grpc_channel_options import channel_options + +logger = logging.getLogger(__name__) + + +class DirectorClient: + """Director client class for experiment managers/envoys. + + This class communicates with the director to manage the envoys + participation in the federation. + + Attributes: + director_addr (host:port): Director Address + envoy_name (Optional[str]): The name of the envoy. + stub (director_pb2_grpc.DirectorStub): The gRPC stub for communication + with the director. + """ + + def __init__( + self, + *, + director_host: str, + director_port: int, + envoy_name: Optional[str] = None, + tls: bool = False, + root_certificate: Optional[Union[Path, str]] = None, + private_key: Optional[Union[Path, str]] = None, + certificate: Optional[Union[Path, str]] = None, + ) -> None: + """ + Initialize director client object. + + Args: + director_host (str): The host name for Director server. + director_port (int): The port number for Director server. + envoy_name (Optional[str]): The name of the envoy. + tls (bool): Whether to use TLS for the connection. + root_certificate (Optional[Union[Path, str]]): The path to the root certificate for the + TLS connection. + private_key (Optional[Union[Path, str]]): The path to the private key for the TLS + connection. + certificate (Optional[Union[Path, str]]): The path to the certificate for the TLS + connection. + """ + director_addr = f"{director_host}:{director_port}" + self.envoy_name = envoy_name + if not tls: + channel = grpc.insecure_channel(director_addr, options=channel_options) + else: + if not (root_certificate and private_key and certificate): + raise Exception("No certificates provided for TLS connection") + try: + with open(root_certificate, "rb") as f: + root_certificate_b = f.read() + with open(private_key, "rb") as f: + private_key_b = f.read() + with open(certificate, "rb") as f: + certificate_b = f.read() + except FileNotFoundError as exc: + raise Exception(f"Provided certificate file is not exist: {exc.filename}") + + credentials = grpc.ssl_channel_credentials( + root_certificates=root_certificate_b, + private_key=private_key_b, + certificate_chain=certificate_b, + ) + channel = grpc.secure_channel(director_addr, credentials, options=channel_options) + self.stub = director_pb2_grpc.DirectorStub(channel) + + def connect_envoy(self, envoy_name: str) -> bool: + """Attempt to establish a connection with the director. + Args: + envoy_name (str): Name of the envoy + + Returns: + response.accepted (bool): Whether Envoy connection is accepted or not + """ + logger.info(f"Sending {envoy_name} connection request to director") + + request = director_pb2.SendConnectionRequest(envoy_name=envoy_name) + response = self.stub.EnvoyConnectionRequest(request) + + return response.accepted + + def wait_experiment(self) -> str: + """ + Waits for experiment data from the director. + + Returns: + experiment_name (str): The name of the experiment. + """ + logger.info("Waiting for an experiment to run...") + response = self.stub.WaitExperiment(self._get_experiment_data()) + logger.info("New experiment received: %s", response) + if not response.experiment_name: + raise ValueError("No experiment name received") + return response.experiment_name + + def get_experiment_data(self, experiment_name) -> DataStream: + """ + Get an experiment data from the director. + + Args: + experiment_name (str): The name of the experiment. + + Returns: + data_stream (grpc._channel._MultiThreadedRendezvous): The data + stream of the experiment data. + """ + logger.info("Getting experiment data for %s...", experiment_name) + request = director_pb2.GetExperimentDataRequest( + experiment_name=experiment_name, collaborator_name=self.envoy_name + ) + data_stream = self.stub.GetExperimentData(request) + + return data_stream + + def _get_experiment_data(self) -> director_pb2.WaitExperimentRequest: + """Generate the experiment data request. + + Returns: + director_pb2.WaitExperimentRequest: The request for experiment + data. + """ + return director_pb2.WaitExperimentRequest(collaborator_name=self.envoy_name) + + def set_new_experiment( + self, experiment_name, col_names, archive_path + ) -> director_pb2.SetNewExperimentResponse: + """ + Send the new experiment to director to launch. + + Args: + experiment_name (str): The name of the experiment. + col_names (List[str]): The names of the collaborators. + archive_path (str): The path to the architecture. + + Returns: + resp (director_pb2.SetNewExperimentResponse): The response from + the director. + """ + logger.info("Submitting new experiment %s to director", experiment_name) + + experiment_info_gen = self._get_experiment_info( + arch_path=archive_path, + name=experiment_name, + col_names=col_names, + ) + resp = self.stub.SetNewExperiment(experiment_info_gen) + return resp + + def _get_experiment_info( + self, arch_path, name, col_names + ) -> Iterator[director_pb2.ExperimentInfo]: + """ + Generate the experiment data request. + + This method generates a stream of experiment data to be sent to the + director. + + Args: + arch_path (str): The path to the architecture. + name (str): The name of the experiment. + col_names (List[str]): The names of the collaborators. + + Yields: + director_pb2.ExperimentInfo: The experiment data. + """ + with open(arch_path, "rb") as arch: + max_buffer_size = 2 * 1024 * 1024 + chunk = arch.read(max_buffer_size) + while chunk != b"": + if not chunk: + raise StopIteration + experiment_info = director_pb2.ExperimentInfo( + name=name, + collaborator_names=col_names, + ) + experiment_info.experiment_data.size = len(chunk) + experiment_info.experiment_data.exp_data = chunk + yield experiment_info + chunk = arch.read(max_buffer_size) + + def get_envoys(self) -> director_pb2.GetEnvoysRequest: + """Display envoys info in a tabular format. + + Returns: + envoys (director_pb2.GetEnvoysResponse): The envoy status response + from the gRPC server. + """ + envoys = self.stub.GetEnvoys(director_pb2.GetEnvoysRequest()) + return envoys + + def get_flow_state(self) -> Tuple: + """ + Gets updated state of the flow + + Returns: + tuple: A tuple containing: + - completed (bool): Indicates whether the flow has completed. + - flspec_obj (object): The FLSpec object containing + details of the updated flow state. + """ + response = self.stub.GetFlowState(director_pb2.GetFlowStateRequest()) + + return response.completed, response.flspec_obj + + def send_health_check( + self, + *, + envoy_name: str, + is_experiment_running: bool, + ) -> int: + """Send envoy health check. + + Args: + envoy_name (str): The name of the envoy. + is_experiment_running (bool): Whether an experiment is currently + running. + + Returns: + health_check_period (int): The period for health checks. + """ + status = director_pb2.UpdateEnvoyStatusRequest( + name=envoy_name, + is_experiment_running=is_experiment_running, + ) + + logger.debug("Sending health check status: %s", status) + try: + response = self.stub.UpdateEnvoyStatus(status) + except grpc.RpcError as rpc_error: + logger.error(rpc_error) + if rpc_error.code() == grpc.StatusCode.NOT_FOUND: + raise EnvoyNotFoundError + else: + health_check_period = response.health_check_period.seconds + + return health_check_period + + def stream_experiment_stdout(self, experiment_name) -> Iterator[Dict[str, Any]]: + """Stream experiment stdout RPC. + Args: + experiment_name (str): The name of the experiment. + Yields: + Dict[str, Any]: The stdout. + """ + request = director_pb2.GetExperimentStdoutRequest(experiment_name=experiment_name) + for stdout_message in self.stub.GetExperimentStdout(request): + yield { + "stdout_origin": stdout_message.stdout_origin, + "task_name": stdout_message.task_name, + "stdout_value": stdout_message.stdout_value, + } diff --git a/openfl/experimental/workflow/transport/grpc/director_server.py b/openfl/experimental/workflow/transport/grpc/director_server.py new file mode 100644 index 0000000000..608d2a7743 --- /dev/null +++ b/openfl/experimental/workflow/transport/grpc/director_server.py @@ -0,0 +1,353 @@ +# Copyright 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""DirectorGRPCServer module.""" + +import asyncio +import logging +import uuid +from pathlib import Path +from typing import AsyncIterator, Optional, Union + +import grpc +from grpc import aio, ssl_server_credentials + +from openfl.experimental.workflow.protocols import director_pb2, director_pb2_grpc +from openfl.experimental.workflow.transport.grpc.exceptions import EnvoyNotFoundError +from openfl.experimental.workflow.transport.grpc.grpc_channel_options import channel_options +from openfl.protocols.utils import get_headers + +logger = logging.getLogger(__name__) + +CLIENT_ID_DEFAULT = "__default__" + + +class DirectorGRPCServer(director_pb2_grpc.DirectorServicer): + """ + Director transport class. + + This class implements a gRPC server for the Director, allowing it to + communicate with envoys. + + Attributes: + listen_uri (str): The URI that the server is serving on. + tls (bool): Whether to use TLS for the connection. + root_certificate (Optional[Union[Path, str]]): The path to the root certificate for the TLS + connection. + private_key (Optional[Union[Path, str]]): The path to the server's private key for the TLS + connection. + certificate (Optional[Union[Path, str]]): The path to the server's certificate for the TLS + connection. + server (grpc.Server): The gRPC server. + root_dir (Path): Path to the root directory + director (Director): The director that this server is serving. + """ + + def __init__( + self, + *, + director_cls, + tls: bool = True, + root_certificate: Optional[Union[Path, str]] = None, + private_key: Optional[Union[Path, str]] = None, + certificate: Optional[Union[Path, str]] = None, + listen_host: str = "[::]", + listen_port: int = 50051, + envoy_health_check_period: int = 0, + director_config: Optional[Path] = None, + **kwargs, + ) -> None: + """ + Initialize a DirectorGRPCServer object. + + Args: + director_cls (Type[Director]): The class of the director. + tls (bool, optional): Whether to use TLS for the connection. + Defaults to True. + root_certificate (Optional[Union[Path, str]]): The path + to the root certificate for the TLS connection. Defaults to + None. + private_key (Optional[Union[Path, str]]): The path to + the server's private key for the TLS connection. Defaults to + None. + certificate (Optional[Union[Path, str]]): The path to + the server's certificate for the TLS connection. Defaults to + None. + listen_host (str, optional): The host to listen on. Defaults to + '[::]'. + listen_port (int, optional): The port to listen on. Defaults to + 50051. + director_config (Optional[Path]): Path to director_config file + **kwargs: Additional keyword arguments. + """ + super().__init__() + self.listen_uri = f"{listen_host}:{listen_port}" + self.tls = tls + self._fill_certs(root_certificate, private_key, certificate) + self.server = None + self.root_dir = Path.cwd() + self.director = director_cls( + tls=self.tls, + root_certificate=self.root_certificate, + private_key=self.private_key, + certificate=self.certificate, + envoy_health_check_period=envoy_health_check_period, + director_config=director_config, + **kwargs, + ) + + def _fill_certs(self, root_certificate, private_key, certificate) -> None: + """Fill certificates. + + Args: + root_certificate (Union[Path, str]): The path to the root + certificate for the TLS connection. + private_key (Union[Path, str]): The path to the server's private + key for the TLS connection. + certificate (Union[Path, str]): The path to the server's + certificate for the TLS connection. + """ + if self.tls: + if not all([root_certificate, private_key, certificate]): + raise ValueError("Incomplete certificates provided") + + self.root_certificate = Path(root_certificate).absolute() + self.private_key = Path(private_key).absolute() + self.certificate = Path(certificate).absolute() + else: + self.root_certificate = self.private_key = self.certificate = None + + def start(self) -> None: + """Launch the DirectorGRPCServer""" + loop = asyncio.get_event_loop() + loop.create_task(self.director.start_experiment_execution_loop()) + loop.run_until_complete(self._run_server()) + + async def _run_server(self) -> None: + """Run the gRPC server.""" + self.server = aio.server(options=channel_options) + director_pb2_grpc.add_DirectorServicer_to_server(self, self.server) + + if not self.tls: + self.server.add_insecure_port(self.listen_uri) + else: + with open(self.private_key, "rb") as f: + private_key_b = f.read() + with open(self.certificate, "rb") as f: + certificate_b = f.read() + with open(self.root_certificate, "rb") as f: + root_certificate_b = f.read() + server_credentials = ssl_server_credentials( + ((private_key_b, certificate_b),), + root_certificates=root_certificate_b, + require_client_auth=True, + ) + self.server.add_secure_port(self.listen_uri, server_credentials) + logger.info(f"Starting director server on {self.listen_uri}") + await self.server.start() + await self.server.wait_for_termination() + + def get_caller(self, context) -> str: + """Get caller name from context. + + if tls == True: get caller name from auth_context + if tls == False: get caller name from context header 'client_id' + + Args: + context (grpc.ServicerContext): The context of the request. + + Returns: + str: The name of the caller. + """ + if self.tls: + return context.auth_context()["x509_common_name"][0].decode("utf-8") + headers = get_headers(context) + client_id = headers.get("client_id", CLIENT_ID_DEFAULT) + return client_id + + def EnvoyConnectionRequest(self, request, context) -> director_pb2.RequestAccepted: + """Handles a connection request from an Envoy. + + Args: + request (director_pb2.ConnectEnvoyRequest): The request from + the envoy + context (grpc.ServicerContext): The context of the request. + + Returns: + director_pb2.RequestAccepted: Indicating if connection was accepted + """ + logger.info(f"Envoy {request.envoy_name} is attempting to connect") + is_accepted = self.director.ack_envoy_connection_request(request.envoy_name) + if is_accepted: + logger.info(f"Envoy {request.envoy_name} is connected") + + return director_pb2.RequestAccepted(accepted=is_accepted) + + async def UpdateEnvoyStatus(self, request, context) -> director_pb2.UpdateEnvoyStatusResponse: + """Accept health check from envoy. + + Args: + request (director_pb2.UpdateEnvoyStatusRequest): The request from + the envoy. + context (grpc.ServicerContext): The context of the request. + + Returns: + resp (director_pb2.UpdateEnvoyStatusResponse): The response to the + request. + """ + logger.debug("Updating envoy status: %s", request) + try: + health_check_period = self.director.update_envoy_status( + envoy_name=request.name, + is_experiment_running=request.is_experiment_running, + ) + except EnvoyNotFoundError as exc: + logger.error(exc) + await context.abort(grpc.StatusCode.NOT_FOUND, str(exc)) + else: + resp = director_pb2.UpdateEnvoyStatusResponse() + resp.health_check_period.seconds = health_check_period + + return resp + + async def GetEnvoys(self, request, context) -> director_pb2.GetEnvoysResponse: + """Get status of connected envoys. + + Args: + request (director_pb2.GetEnvoysRequest): The request from + the envoy. + context (grpc.ServicerContext): The context of the request. + + Returns: + director_pb2.GetEnvoysResponse: The response to the request. + """ + envoy_infos = self.director.get_envoys() + envoy_statuses = [] + for envoy_name, envoy_info in envoy_infos.items(): + envoy_info_message = director_pb2.EnvoyInfo( + envoy_name=envoy_name, + is_online=envoy_info["is_online"], + is_experiment_running=envoy_info["is_experiment_running"], + experiment_name=envoy_info["experiment_name"], + ) + envoy_info_message.valid_duration.seconds = envoy_info["valid_duration"] + envoy_info_message.last_updated.seconds = int(envoy_info["last_updated"]) + + envoy_statuses.append(envoy_info_message) + + return director_pb2.GetEnvoysResponse(envoy_infos=envoy_statuses) + + async def GetExperimentData( + self, request, context + ) -> AsyncIterator[director_pb2.ExperimentData]: + """Receive experiment data. + + Args: + request (director_pb2.GetExperimentDataRequest): The request from + the collaborator. + context (grpc.ServicerContext): The context of the request. + + Yields: + director_pb2.ExperimentData: The experiment data. + """ + data_file_path = self.director.get_experiment_data(request.experiment_name) + max_buffer_size = 2 * 1024 * 1024 + with open(data_file_path, "rb") as df: + while True: + data = df.read(max_buffer_size) + if len(data) == 0: + break + yield director_pb2.ExperimentData(size=len(data), exp_data=data) + + async def WaitExperiment(self, request, context) -> director_pb2.WaitExperimentResponse: + """Handles a request to wait for an experiment to be ready. + + Args: + request (director_pb2.WaitExperimentRequest): The request from the + collaborator. + context (grpc.ServicerContext): The context of the request. + + Returns: + director_pb2.WaitExperimentResponse: The response to the request. + """ + logger.debug( + "Request WaitExperiment received from envoy %s", + request.collaborator_name, + ) + experiment_name = await self.director.wait_experiment(request.collaborator_name) + logger.debug( + "Experiment %s is ready for %s", + experiment_name, + request.collaborator_name, + ) + + return director_pb2.WaitExperimentResponse(experiment_name=experiment_name) + + async def SetNewExperiment(self, stream, context) -> director_pb2.SetNewExperimentResponse: + """Request to set new experiment. + + Args: + stream (grpc.aio._MultiThreadedRendezvous): The stream of + experiment data. + context (grpc.ServicerContext): The context of the request. + + Returns: + director_pb2.SetNewExperimentResponse: The response to the request. + """ + data_file_path = self.root_dir / str(uuid.uuid4()) + with open(data_file_path, "wb") as data_file: + async for request in stream: + if request.experiment_data.size == len(request.experiment_data.exp_data): + data_file.write(request.experiment_data.exp_data) + else: + raise Exception("Could not register new experiment") + + caller = self.get_caller(context) + + is_accepted = await self.director.set_new_experiment( + experiment_name=request.name, + sender_name=caller, + collaborator_names=request.collaborator_names, + experiment_archive_path=data_file_path, + ) + + logger.info("Experiment %s registered", request.name) + return director_pb2.SetNewExperimentResponse(status=is_accepted) + + async def GetFlowState(self, request, context) -> director_pb2.GetFlowStateResponse: + """Get updated flow after experiment is finished. + + Args: + request (director_pb2.GetFlowStatusRequest): The request from + the experiment manager + context (grpc.ServicerContext): The context of the request. + + Returns: + director_pb2.GetFlowStateResponse: The response to the request. + """ + status, flspec_obj = await self.director.get_flow_state() + return director_pb2.GetFlowStateResponse(completed=status, flspec_obj=flspec_obj) + + async def GetExperimentStdout( + self, request, context + ) -> AsyncIterator[director_pb2.GetExperimentStdoutResponse]: + """ + Request to stream stdout from the aggregator to frontend. + + Args: + request (director_pb2.GetExperimentStdoutRequest): The request from + the experiment manager. + context (grpc.ServicerContext): The context of the request. + + Yields: + director_pb2.GetExperimentStdoutResponse: The metrics. + """ + logger.info("Getting standard output for experiment: %s...", request.experiment_name) + caller = self.get_caller(context) + async for stdout_dict in self.director.stream_experiment_stdout( + experiment_name=request.experiment_name, caller=caller + ): + if stdout_dict is None: + await asyncio.sleep(1) + continue + yield director_pb2.GetExperimentStdoutResponse(**stdout_dict) diff --git a/openfl/experimental/workflow/transport/grpc/exceptions.py b/openfl/experimental/workflow/transport/grpc/exceptions.py index a61807aa75..a83cdced09 100644 --- a/openfl/experimental/workflow/transport/grpc/exceptions.py +++ b/openfl/experimental/workflow/transport/grpc/exceptions.py @@ -5,5 +5,5 @@ """Exceptions that occur during service interaction.""" -class ShardNotFoundError(Exception): - """Indicates that director has no information about that shard.""" +class EnvoyNotFoundError(Exception): + """Indicates that director has no information about that Envoy.""" diff --git a/openfl/experimental/workflow/utilities/runtime_utils.py b/openfl/experimental/workflow/utilities/runtime_utils.py index 1aba29605a..d2b097fd8c 100644 --- a/openfl/experimental/workflow/utilities/runtime_utils.py +++ b/openfl/experimental/workflow/utilities/runtime_utils.py @@ -45,14 +45,14 @@ def parse_attrs(ctx, exclude=[], reserved_words=["next", "runtime", "input"]): return cls_attrs, valid_artifacts -def generate_artifacts(ctx, reserved_words=["next", "runtime", "input"]): +def generate_artifacts(ctx, reserved_words=["next", "runtime", "input", "checkpoint"]): """Generates artifacts from the given context, excluding specified reserved words. Args: ctx (any): The context to generate artifacts from. reserved_words (list, optional): A list of reserved words to exclude. - Defaults to ["next", "runtime", "input"]. + Defaults to ["next", "runtime", "input", "checkpoint"]. Returns: tuple: A tuple containing a generator of artifacts and a list of @@ -114,6 +114,9 @@ def checkpoint(ctx, parent_func, chkpnt_reserved_words=["next", "runtime"]): parent_func (function): The function that was just executed. chkpnt_reserved_words (list, optional): A list of reserved words to exclude from checkpointing. Defaults to ["next", "runtime"]. + + Returns: + step_stdout (io.StringIO): parent_func stdout """ # Extract the stdout & stderr from the buffer @@ -134,6 +137,7 @@ def checkpoint(ctx, parent_func, chkpnt_reserved_words=["next", "runtime"]): buffer_err=step_stderr, ) print(f"Saved data artifacts for {parent_func.__name__}") + return step_stdout def old_check_resource_allocation(num_gpus, each_participant_gpu_usage): diff --git a/openfl/experimental/workflow/workspace_export/export.py b/openfl/experimental/workflow/workspace_export/export.py index 0e00e2efd6..3975e83e44 100644 --- a/openfl/experimental/workflow/workspace_export/export.py +++ b/openfl/experimental/workflow/workspace_export/export.py @@ -2,16 +2,18 @@ # SPDX-License-Identifier: Apache-2.0 -"""Workspace Builder module.""" +"""Workspace Export module.""" import ast import importlib import inspect import re +import shutil import sys from logging import getLogger from pathlib import Path from shutil import copytree +from typing import Any, Dict, Optional, Tuple import nbformat import yaml @@ -19,27 +21,39 @@ from openfl.experimental.workflow.interface.cli.cli_helper import print_tree +logger = getLogger(__name__) + class WorkspaceExport: """Convert a LocalRuntime Jupyter Notebook to Aggregator based FederatedRuntime Workflow. - Args: + Attributes: notebook_path: Absolute path of jupyter notebook. template_workspace_path: Path to template workspace provided with OpenFL. - output_dir: Output directory for new generated workspace + output_workspace_path: Output directory for new generated workspace (default="/tmp"). - - Returns: - None """ def __init__(self, notebook_path: str, output_workspace: str) -> None: - self.logger = getLogger(__name__) + """Initialize a WorkspaceExport object. + + Args: + notebook_path (str): Path to Jupyter notebook. + output_workspace (str): Path to output_workspace to be + generated. + """ self.notebook_path = Path(notebook_path).resolve() + # Check if the Jupyter notebook exists + if not self.notebook_path.exists() or not self.notebook_path.is_file(): + raise FileNotFoundError(f"The Jupyter notebook at {notebook_path} does not exist.") + self.output_workspace_path = Path(output_workspace).resolve() + # Regenerate the workspace if it already exists + if self.output_workspace_path.exists(): + shutil.rmtree(self.output_workspace_path) self.output_workspace_path.parent.mkdir(parents=True, exist_ok=True) self.template_workspace_path = ( @@ -58,9 +72,9 @@ def __init__(self, notebook_path: str, output_workspace: str) -> None: self.created_workspace_path = Path( copytree(self.template_workspace_path, self.output_workspace_path) ) - self.logger.info(f"Copied template workspace to {self.created_workspace_path}") + logger.info(f"Copied template workspace to {self.created_workspace_path}") - self.logger.info("Converting jupter notebook to python script...") + logger.info("Converting jupter notebook to python script...") export_filename = self.__get_exp_name() if export_filename is None: raise NameError( @@ -85,7 +99,7 @@ def __init__(self, notebook_path: str, output_workspace: str) -> None: # backend="ray" # NOQA self.__change_runtime() - def __get_exp_name(self): + def __get_exp_name(self) -> None: """Fetch the experiment name from the Jupyter notebook.""" with open(str(self.notebook_path), "r") as f: notebook_content = nbformat.read(f, as_version=nbformat.NO_CONVERT) @@ -95,16 +109,25 @@ def __get_exp_name(self): code = cell.source match = re.search(r"#\s*\|\s*default_exp\s+(\w+)", code) if match: - self.logger.info(f"Retrieved {match.group(1)} from default_exp") + logger.info(f"Retrieved {match.group(1)} from default_exp") return match.group(1) return None - def __convert_to_python(self, notebook_path: Path, output_path: Path, export_filename): + def __convert_to_python(self, notebook_path: Path, output_path: Path, export_filename) -> Path: + """Converts a Jupyter notebook to a Python script. + + Args: + notebook_path (Path): The path to the Jupyter notebook file + to be converted. + output_path (Path): The directory where the exported Python + script should be saved. + export_filename: The name of the exported Python script file. + """ nb_export(notebook_path, output_path) return Path(output_path).joinpath(export_filename).resolve() - def __comment_flow_execution(self): + def __comment_flow_execution(self) -> None: """In the python script search for ".run()" and comment it.""" with open(self.script_path, "r") as f: data = f.readlines() @@ -114,7 +137,7 @@ def __comment_flow_execution(self): with open(self.script_path, "w") as f: f.writelines(data) - def __change_runtime(self): + def __change_runtime(self) -> None: """Change the LocalRuntime backend from ray to single_process.""" with open(self.script_path, "r") as f: data = f.read() @@ -127,8 +150,12 @@ def __change_runtime(self): with open(self.script_path, "w") as f: f.write(data) - def __get_class_arguments(self, class_name): - """Given the class name returns expected class arguments.""" + def __get_class_arguments(self, class_name) -> list: + """Given the class name returns expected class arguments. + + Args: + class_name (str): Name of the class + """ # Import python script if not already if not hasattr(self, "exported_script_module"): self.__import_exported_script() @@ -158,11 +185,17 @@ def __get_class_arguments(self, class_name): ] return arg_names return [] - self.logger.error(f"{cls} is not a class") + logger.error(f"{cls} is not a class") - def __get_class_name_and_sourcecode_from_parent_class(self, parent_class): + def __get_class_name_and_sourcecode_from_parent_class( + self, parent_class + ) -> Optional[Tuple[Optional[str], Optional[str]]]: """Provided the parent_class name returns derived class source code and - name.""" + name. + + Args: + parent_class: FLSpec instance + """ # Import python script if not already if not hasattr(self, "exported_script_module"): self.__import_exported_script() @@ -175,9 +208,13 @@ def __get_class_name_and_sourcecode_from_parent_class(self, parent_class): return None, None - def __extract_class_initializing_args(self, class_name): # noqa: C901 + def __extract_class_initializing_args(self, class_name) -> Dict[str, Any]: # noqa: C901 """Provided name of the class returns expected arguments and it's - values in form of dictionary.""" + values in form of dictionary. + + Args: + class_name (str): Name of the class + """ instantiation_args = {"args": {}, "kwargs": {}} with open(self.script_path, "r") as s: @@ -221,7 +258,7 @@ def __extract_class_initializing_args(self, class_name): # noqa: C901 return instantiation_args - def __import_exported_script(self): + def __import_exported_script(self) -> None: """ Imports generated python script with help of importlib """ @@ -230,35 +267,65 @@ def __import_exported_script(self): self.exported_script_module = importlib.import_module(self.script_name) self.available_modules_in_exported_script = dir(self.exported_script_module) - def __read_yaml(self, path): + def __read_yaml(self, path) -> None: with open(path, "r") as y: return yaml.safe_load(y) - def __write_yaml(self, path, data): + def __write_yaml(self, path, data) -> None: with open(path, "w") as y: yaml.safe_dump(data, y) @classmethod - def export(cls, notebook_path: str, output_workspace: str) -> None: - """Exports workspace to `output_dir`. + def export_federated(cls, notebook_path: str, output_workspace: str) -> Tuple[str, str]: + """Exports workspace for FederatedRuntime. Args: - notebook_path: Jupyter notebook path. - output_dir: Path for generated workspace directory. - template_workspace_path: Path to template workspace provided with - OpenFL (default="/tmp"). + notebook_path (str): Path to the Jupyter notebook. + output_workspace (str): Path for the generated workspace directory. Returns: - None + Tuple[str, str]: A tuple containing: + (archive_path, flow_class_name). + """ + instance = cls(notebook_path, output_workspace) + instance.generate_requirements() + instance.generate_plan_yaml() + return instance.generate_experiment_archive() + + @classmethod + def export(cls, notebook_path: str, output_workspace: str) -> None: + """Exports workspace to output_workspace. + + Args: + notebook_path (str): Path to the Jupyter notebook. + output_workspace (str): Path for the generated workspace directory. """ instance = cls(notebook_path, output_workspace) instance.generate_requirements() instance.generate_plan_yaml() instance.generate_data_yaml() + def generate_experiment_archive(self) -> Tuple[str, str]: + """ + Create archive of the generated workspace + + Returns: + Tuple[str, str]: A tuple containing: + (generated_workspace_path, archive_path, flow_class_name). + """ + parent_directory = self.output_workspace_path.parent + archive_path = parent_directory / "experiment" + + # Create a ZIP archive of the generated_workspace directory + arch_path = shutil.make_archive(str(archive_path), "zip", str(self.output_workspace_path)) + + print(f"Archive created at {archive_path}.zip") + + return arch_path, self.flow_class_name + # Have to do generate_requirements before anything else # because these !pip commands needs to be removed from python script - def generate_requirements(self): + def generate_requirements(self) -> None: """Finds pip libraries mentioned in exported python script and append in workspace/requirements.txt.""" data = None @@ -290,7 +357,7 @@ def generate_requirements(self): if i not in line_nos: f.write(line) - def generate_plan_yaml(self): + def generate_plan_yaml(self) -> None: """ Generates plan.yaml """ @@ -333,7 +400,7 @@ def update_dictionary(args: dict, data: dict, dtype: str = "args"): self.__write_yaml(plan, data) - def generate_data_yaml(self): # noqa: C901 + def generate_data_yaml(self) -> None: # noqa: C901 """Generates data.yaml.""" # Import python script if not already if not hasattr(self, "exported_script_module"): diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/director/director_config.yaml new file mode 100644 index 0000000000..9882f72c63 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/director/director_config.yaml @@ -0,0 +1,4 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..f6f96d3d6b --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/envoy_config.yaml @@ -0,0 +1,10 @@ +envoy_one: + callable_func: + settings: + batch_size_train: 64 + index: 1 + n_collaborators: 2 + test_dataset: private_attributes.collaborator_private_attrs.test_dataset + train_dataset: private_attributes.collaborator_private_attrs.train_dataset + template: private_attributes.collaborator_private_attrs.collaborator_private_attrs + \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..fcf5d81e98 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,47 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import torch +from copy import deepcopy +import torchvision + +train_dataset = torchvision.datasets.MNIST( + "files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +test_dataset = torchvision.datasets.MNIST( + "files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + + +def collaborator_private_attrs(index, n_collaborators, train_dataset, + test_dataset, batch_size_train): + local_train = deepcopy(train_dataset) + local_test = deepcopy(test_dataset) + local_train.data = train_dataset.data[index:: n_collaborators] + local_train.targets = train_dataset.targets[index:: n_collaborators] + local_test.data = test_dataset.data[index:: n_collaborators] + local_test.targets = test_dataset.targets[index:: n_collaborators] + return { + "train_loader": torch.utils.data.DataLoader( + local_train, batch_size=batch_size_train, shuffle=True + ), + "test_loader": torch.utils.data.DataLoader( + local_test, batch_size=batch_size_train, shuffle=True + ), + } diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..eaf34da1df --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/envoy_config.yaml @@ -0,0 +1,11 @@ +envoy_two: + callable_func: + settings: + batch_size_train: 64 + index: 1 + n_collaborators: 2 + test_dataset: private_attributes.collaborator_private_attrs.test_dataset + train_dataset: private_attributes.collaborator_private_attrs.train_dataset + template: private_attributes.collaborator_private_attrs.collaborator_private_attrs + + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..fcf5d81e98 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,47 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import torch +from copy import deepcopy +import torchvision + +train_dataset = torchvision.datasets.MNIST( + "files/", + train=True, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + +test_dataset = torchvision.datasets.MNIST( + "files/", + train=False, + download=True, + transform=torchvision.transforms.Compose( + [ + torchvision.transforms.ToTensor(), + torchvision.transforms.Normalize((0.1307,), (0.3081,)), + ] + ), +) + + +def collaborator_private_attrs(index, n_collaborators, train_dataset, + test_dataset, batch_size_train): + local_train = deepcopy(train_dataset) + local_test = deepcopy(test_dataset) + local_train.data = train_dataset.data[index:: n_collaborators] + local_train.targets = train_dataset.targets[index:: n_collaborators] + local_test.data = test_dataset.data[index:: n_collaborators] + local_test.targets = test_dataset.targets[index:: n_collaborators] + return { + "train_loader": torch.utils.data.DataLoader( + local_train, batch_size=batch_size_train, shuffle=True + ), + "test_loader": torch.utils.data.DataLoader( + local_test, batch_size=batch_size_train, shuffle=True + ), + } diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/workspace/testflow_datastore_cli.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/workspace/testflow_datastore_cli.ipynb new file mode 100644 index 0000000000..9e518febcb --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_datastore_cli/workspace/testflow_datastore_cli.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Datastore_cli" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bd8ac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "import torch\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "batch_size_train = 64\n", + "learning_rate = 0.01\n", + "momentum = 0.5\n", + "log_interval = 10\n", + "\n", + "random_seed = 1\n", + "torch.backends.cudnn.enabled = False\n", + "torch.manual_seed(random_seed)\n", + "\n", + "\n", + "class Bcolors:\n", + " HEADER = \"\\033[95m\"\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " ENDC = \"\\033[0m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"\n", + "\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.conv1 = nn.Conv2d(1, 10, kernel_size=5)\n", + " self.fc1 = nn.Linear(1440, 10)\n", + "\n", + " def forward(self, x):\n", + " x = F.relu(F.max_pool2d(self.conv1(x), 2))\n", + " x = x.view(-1, 1440)\n", + " x = F.relu(self.fc1(x))\n", + " return F.log_softmax(x)\n", + "\n", + "\n", + "def inference(network, test_loader):\n", + " network.eval()\n", + " test_loss = 0\n", + " correct = 0\n", + " with torch.no_grad():\n", + " for data, target in test_loader:\n", + " output = network(data)\n", + " test_loss += F.nll_loss(output, target, size_average=False).item()\n", + " pred = output.data.max(1, keepdim=True)[1]\n", + " correct += pred.eq(target.data.view_as(pred)).sum()\n", + " test_loss /= len(test_loader.dataset)\n", + " accuracy = float(correct / len(test_loader.dataset))\n", + " return accuracy" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow of the testcase datastore cli" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowDatastoreAndCli(FLSpec):\n", + " \"\"\"\n", + " Testflow for Dataflow and CLI Functionality\n", + " \"\"\"\n", + " def __init__(self, model=None, optimizer=None, rounds=3, **kwargs):\n", + " super().__init__(**kwargs)\n", + " if model is not None:\n", + " self.model = model\n", + " self.optimizer = optimizer\n", + " else:\n", + " self.model = Net()\n", + " self.optimizer = optim.SGD(\n", + " self.model.parameters(), lr=learning_rate, momentum=momentum\n", + " )\n", + " self.num_rounds = rounds\n", + " self.current_round = 0\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " print(\n", + " \"Testing FederatedFlow - Starting Test for Dataflow and CLI Functionality\"\n", + " )\n", + " self.collaborators = self.runtime.collaborators\n", + " self.private = 10\n", + " self.next(\n", + " self.aggregated_model_validation,\n", + " foreach=\"collaborators\",\n", + " exclude=[\"private\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def aggregated_model_validation(self):\n", + " print(\"Performing aggregated model validation for collaborator\")\n", + " self.agg_validation_score = inference(self.model, self.test_loader)\n", + " self.next(self.train)\n", + "\n", + " @collaborator\n", + " def train(self):\n", + " print(\"Train the model\")\n", + " self.model.train()\n", + " self.optimizer = optim.SGD(\n", + " self.model.parameters(), lr=learning_rate, momentum=momentum\n", + " )\n", + " for batch_idx, (data, target) in enumerate(self.train_loader):\n", + " self.optimizer.zero_grad()\n", + " output = self.model(data)\n", + " loss = F.nll_loss(output, target)\n", + " loss.backward()\n", + " self.optimizer.step()\n", + " if batch_idx % log_interval == 0:\n", + " self.loss = loss.item()\n", + " torch.save(self.model.state_dict(), \"model.pth\")\n", + " torch.save(self.optimizer.state_dict(), \"optimizer.pth\")\n", + " self.training_completed = True\n", + " self.next(self.local_model_validation)\n", + "\n", + " @collaborator\n", + " def local_model_validation(self):\n", + " self.local_validation_score = inference(self.model, self.test_loader)\n", + " print(\"Doing local model validation for collaborator\")\n", + " self.next(self.join, exclude=[\"training_completed\"])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " print(\"Executing join\")\n", + " self.current_round += 1\n", + " if self.current_round < self.num_rounds:\n", + " self.next(self.start)\n", + " else:\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " print(\"This is the end of the flow\")\n", + "\n", + " expected_flow_steps = [\n", + " \"start\",\n", + " \"aggregated_model_validation\",\n", + " \"train\",\n", + " \"local_model_validation\",\n", + " \"join\",\n", + " ] # List to verify expected steps\n", + " validate_datastore_cli(\n", + " self, expected_flow_steps, self.num_rounds\n", + " ) # Function to validate datastore and cli\n", + "\n", + "\n", + "def validate_datastore_cli(flow_obj, expected_flow_steps, num_rounds):\n", + " \"\"\"\n", + " This function test the flow as below\n", + " 1. Verify datastore steps and expected steps are matching\n", + " 2. Verify task stdout and task stderr verified through \\\n", + " cli is as expected\n", + " 3. Verify no of tasks executed is aligned with the total \\\n", + " number of rounds and total number of collaborators\n", + " \"\"\"\n", + " validate_flow_error = []\n", + "\n", + " verify_stdout = {\n", + " \"start\":\n", + " \"\\x1b[94mTesting FederatedFlow - Starting Test for Dataflow\"\n", + " + \" and CLI Functionality\\x1b[0m\\x1b[94m\\n\\x1b[0m\\n\",\n", + " \"aggregated_model_validation\":\n", + " \"\\x1b[94mPerforming aggregated model validation for\"\n", + " + \" collaborator\\x1b[0m\\x1b[94m\\n\\x1b[0m\\n\",\n", + " \"train\": \"\\x1b[94mTrain the model\\x1b[0m\\x1b[94m\\n\\x1b[0m\\n\",\n", + " \"local_model_validation\":\n", + " \"\\x1b[94mDoing local model validation for collaborator\"\n", + " + \"\\x1b[0m\\x1b[94m\\n\\x1b[0m\\n\",\n", + " \"join\": \"\\x1b[94mExecuting join\\x1b[0m\\x1b[94m\\n\\x1b[0m\\n\",\n", + " \"end\": \"\\x1b[94mThis is the end of the flow\\x1b[0m\\x1b[94m\\n\\x1b[0m\\n\",\n", + " }\n", + "\n", + " # fetch data from metaflow\n", + " from metaflow import Flow\n", + "\n", + " cli_flow_obj = Flow(\"TestFlowDatastoreAndCli\")\n", + " cli_flow_steps = list(list(cli_flow_obj)[0])\n", + " cli_step_names = [step.id for step in cli_flow_steps]\n", + "\n", + " steps_present_in_cli = [\n", + " step for step in expected_flow_steps if step in cli_step_names\n", + " ]\n", + " missing_steps_in_cli = [\n", + " step for step in expected_flow_steps if step not in cli_step_names\n", + " ]\n", + " extra_steps_in_cli = [\n", + " step for step in cli_step_names if step not in expected_flow_steps\n", + " ]\n", + "\n", + " if len(steps_present_in_cli) != len(expected_flow_steps):\n", + " validate_flow_error.append(\n", + " f\"{Bcolors.FAIL}... Error : Number of steps fetched from \\\n", + " Datastore through CLI do not match the Expected steps provided {Bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if len(missing_steps_in_cli) != 0:\n", + " validate_flow_error.append(\n", + " f\"{Bcolors.FAIL}... Error : Following steps missing from Datastore: \\\n", + " {missing_steps_in_cli} {Bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if len(extra_steps_in_cli) != 0:\n", + " validate_flow_error.append(\n", + " f\"{Bcolors.FAIL}... Error : Following steps are extra in Datastore: \\\n", + " {extra_steps_in_cli} {Bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " for step in cli_flow_steps:\n", + " task_count = 0\n", + " func = getattr(flow_obj, step.id)\n", + " for task in list(step):\n", + " task_count = task_count + 1\n", + " if verify_stdout.get(step.id) != task.stdout:\n", + " validate_flow_error.append(\n", + " f\"{Bcolors.FAIL}... Error : task stdout detected issues : \\\n", + " {step} {task} {Bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if (\n", + " (func.aggregator_step)\n", + " and (task_count != num_rounds)\n", + " and (func.__name__ != \"end\")\n", + " ):\n", + " validate_flow_error.append(\n", + " f\"{Bcolors.FAIL}... Error : More than one execution detected \\\n", + " for Aggregator Step: {step} {Bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if (\n", + " (func.aggregator_step)\n", + " and (task_count != 1)\n", + " and (func.__name__ == \"end\")\n", + " ):\n", + " validate_flow_error.append(\n", + " f\"{Bcolors.FAIL}... Error : More than one execution detected \\\n", + " for Aggregator Step: {step} {Bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if (func.collaborator_step) and (\n", + " task_count != len(flow_obj.collaborators) * num_rounds\n", + " ):\n", + " validate_flow_error.append(\n", + " f\"{Bcolors.FAIL}... Error : Incorrect number of execution \\\n", + " detected for Collaborator Step: {step}. \\\n", + " Expected: {num_rounds*len(flow_obj.collaborators)} \\\n", + " Actual: {task_count}{Bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if validate_flow_error:\n", + " display_validate_errors(validate_flow_error)\n", + " else:\n", + " print(f\"\"\"{Bcolors.OKGREEN}\\n**** Summary of internal flow testing ****\n", + " No issues found and below are the tests that ran successfully\n", + " 1. Datastore steps and expected steps are matching\n", + " 2. Task stdout and task stderr verified through metaflow cli is as expected\n", + " 3. Number of tasks are aligned with number of rounds and number \"\"\"\n", + " f\"\"\"of collaborators {Bcolors.ENDC}\"\"\")\n", + "\n", + "\n", + "def display_validate_errors(validate_flow_error):\n", + " \"\"\"\n", + " Function to display error that is captured during datastore and cli test\n", + " \"\"\"\n", + " print(\n", + " f\"{Bcolors.OKBLUE}Testing FederatedFlow - Ending test for validatng \\\n", + " the Datastore and Cli Testing {Bcolors.ENDC}\"\n", + " )\n", + " print(\"\".join(validate_flow_error))\n", + " print(f\"{Bcolors.FAIL}\\n ... Test case failed ... {Bcolors.ENDC}\")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "b5371b6d", + "metadata": {}, + "source": [ + "## Workspace creation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators= ['envoy_one','envoy_two'],\n", + " director=director_info,\n", + " notebook_path='./testflow_datastore_cli.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowDatastoreAndCli(checkpoint=True)\n", + "flflow.runtime = federated_runtime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fed_run", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/director/director_config.yaml new file mode 100644 index 0000000000..b604d6463a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/director/director_config.yaml @@ -0,0 +1,6 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds + + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..3d8b4e2882 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/envoy_config.yaml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..bfc8af2959 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..bfc8af2959 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/workspace/testflow_include_exclude.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/workspace/testflow_include_exclude.ipynb new file mode 100644 index 0000000000..77b876ff93 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_include_exclude/workspace/testflow_include_exclude.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Include and Exclude" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "62449b5f", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "\n", + "class bcolors: # NOQA: N801\n", + " HEADER = \"\\033[95m\"\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " ENDC = \"\\033[0m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow of the testcase include_exclude" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowIncludeExclude(FLSpec):\n", + " \"\"\"\n", + " Testflow to validate include and exclude functionality in Federated Flow.\n", + " \"\"\"\n", + "\n", + " include_exclude_error_list = []\n", + "\n", + " def __init__(self, checkpoint: bool = False):\n", + " super().__init__(checkpoint)\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " Flow start.\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Starting Test for Include and Exclude \"\n", + " + f\"Attributes {bcolors.ENDC}\"\n", + " )\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " self.exclude_agg_to_agg = 10\n", + " self.include_agg_to_agg = 100\n", + " self.next(self.test_include_exclude_agg_to_agg, exclude=[\"exclude_agg_to_agg\"])\n", + "\n", + " @aggregator\n", + " def test_include_exclude_agg_to_agg(self):\n", + " \"\"\"\n", + " Testing whether attributes are excluded from agg to agg\n", + " \"\"\"\n", + " if (\n", + " hasattr(self, \"include_agg_to_agg\") is True\n", + " and hasattr(self, \"exclude_agg_to_agg\") is False\n", + " ):\n", + " print(\n", + " f\"{bcolors.OKGREEN} ... Exclude test passed in test_include_exclude_agg_to_agg \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + " else:\n", + " TestFlowIncludeExclude.include_exclude_error_list.append(\n", + " \"test_include_exclude_agg_to_agg\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Exclude test failed in test_incude_exclude_agg_to_agg \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + "\n", + " self.include_agg_to_collab = 100\n", + " self.exclude_agg_to_collab = 78\n", + " self.next(\n", + " self.test_include_exclude_agg_to_collab,\n", + " foreach=\"collaborators\",\n", + " include=[\"include_agg_to_collab\", \"collaborators\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def test_include_exclude_agg_to_collab(self):\n", + " \"\"\"\n", + " Testing whether attributes are included from agg to collab\n", + " \"\"\"\n", + " if (\n", + " hasattr(self, \"include_agg_to_agg\") is False\n", + " and hasattr(self, \"exclude_agg_to_agg\") is False\n", + " and hasattr(self, \"exclude_agg_to_collab\") is False\n", + " and hasattr(self, \"include_agg_to_collab\") is True\n", + " ):\n", + " print(\n", + " f\"{bcolors.OKGREEN} ... Include test passed in test_include_exclude_agg_to_collab \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + " else:\n", + " TestFlowIncludeExclude.include_exclude_error_list.append(\n", + " \"test_incude_exclude_agg_to_collab\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Include test failed in test_include_exclude_agg_to_collab \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + " self.exclude_collab_to_collab = 10\n", + " self.include_collab_to_collab = 44\n", + " self.next(\n", + " self.test_include_exclude_collab_to_collab,\n", + " exclude=[\"exclude_collab_to_collab\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def test_include_exclude_collab_to_collab(self):\n", + " \"\"\"\n", + " Testing whether attributes are excluded from collab to collab\n", + " \"\"\"\n", + " if (\n", + " hasattr(self, \"include_agg_to_agg\") is False\n", + " and hasattr(self, \"include_agg_to_collab\") is True\n", + " and hasattr(self, \"include_collab_to_collab\") is True\n", + " and hasattr(self, \"exclude_agg_to_agg\") is False\n", + " and hasattr(self, \"exclude_agg_to_collab\") is False\n", + " and hasattr(self, \"exclude_collab_to_collab\") is False\n", + " ):\n", + " print(\n", + " f\"{bcolors.OKGREEN} ... Exclude test passed in \"\n", + " + f\"test_include_exclude_collab_to_collab {bcolors.ENDC}\"\n", + " )\n", + " else:\n", + " TestFlowIncludeExclude.include_exclude_error_list.append(\n", + " \"test_incude_exclude_collab_to_collab\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Exclude test failed in test_include_exclude_collab_to_collab \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + "\n", + " self.exclude_collab_to_agg = 20\n", + " self.include_collab_to_agg = 56\n", + " self.next(self.join, include=[\"include_collab_to_agg\"])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Testing whether attributes are included from collab to agg\n", + " \"\"\"\n", + " # Aggregator attribute check\n", + " validate = (\n", + " hasattr(self, \"include_agg_to_agg\") is True\n", + " and hasattr(self, \"include_agg_to_collab\") is True\n", + " and hasattr(self, \"exclude_agg_to_collab\") is True\n", + " and hasattr(self, \"exclude_agg_to_agg\") is False\n", + " )\n", + "\n", + " # Collaborator attribute check\n", + " for input in inputs:\n", + " validation = validate and (\n", + " hasattr(input, \"include_collab_to_collab\") is False\n", + " and hasattr(input, \"exclude_collab_to_collab\") is False\n", + " and hasattr(input, \"exclude_collab_to_agg\") is False\n", + " and hasattr(input, \"include_collab_to_agg\") is True\n", + " )\n", + "\n", + " if validation:\n", + " print(\n", + " f\"{bcolors.OKGREEN} ... Include and Exclude tests passed in join {bcolors.ENDC}\"\n", + " )\n", + " else:\n", + " TestFlowIncludeExclude.include_exclude_error_list.append(\"join\")\n", + " print(\n", + " f\"{bcolors.FAIL} ... Include and Exclude tests failed in join {bcolors.ENDC}\"\n", + " )\n", + "\n", + " print(\n", + " f\"\\n{bcolors.UNDERLINE} Include and exclude attributes test summary: {bcolors.ENDC}\\n\"\n", + " )\n", + "\n", + " if TestFlowIncludeExclude.include_exclude_error_list:\n", + " validated_include_exclude_variables = \",\".join(\n", + " TestFlowIncludeExclude.include_exclude_error_list\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ...Test case failed for {validated_include_exclude_variables} \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + "\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the 'end' step. All flows must have an 'end' step, which is the\n", + " last step in the flow.\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Ending Test for Include and Exclude \"\n", + " + f\"Attributes {bcolors.ENDC}\"\n", + " )\n", + " if TestFlowIncludeExclude.include_exclude_error_list:\n", + " raise (\n", + " AssertionError(\n", + " f\"{bcolors.FAIL}\\n ...Test case failed ... {bcolors.ENDC}\"\n", + " )\n", + " )\n", + " print(f\"{bcolors.OKBLUE}End of Testing FederatedFlow {bcolors.ENDC}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators= ['envoy_one','envoy_two'], \n", + " director=director_info, \n", + " notebook_path='./testflow_include_exclude.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowIncludeExclude(checkpoint=True)\n", + "flflow.runtime = federated_runtime\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dir-wip", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/director/director_config.yaml new file mode 100644 index 0000000000..e8dcc1e12a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/director/director_config.yaml @@ -0,0 +1,5 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/workspace/testflow_internal_loop.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/workspace/testflow_internal_loop.ipynb new file mode 100644 index 0000000000..190a6eac0e --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_internalloop/workspace/testflow_internal_loop.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Internal_loop" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "62449b5f", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.interface.fl_spec import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "import numpy as np\n", + "\n", + "\n", + "class bcolors: # NOQA: N801\n", + " HEADER = \"\\033[95m\"\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " ENDC = \"\\033[0m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow of internalloop testcase\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowInternalLoop(FLSpec):\n", + " def __init__(self, model=None, optimizer=None, rounds= 3, **kwargs):\n", + " super().__init__(**kwargs)\n", + " self.training_rounds = rounds\n", + " self.train_count = 0\n", + " self.end_count = 0\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " Flow start.\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - \"\n", + " + f\"Test for Internal Loops - Round: {self.train_count}\"\n", + " + f\" of Training Rounds: {self.training_rounds}{bcolors.ENDC}\"\n", + " )\n", + " self.model = np.zeros((10, 10, 10)) # Test model\n", + " self.collaborators = self.runtime.collaborators\n", + " self.next(self.agg_model_mean, foreach=\"collaborators\")\n", + "\n", + " @collaborator\n", + " def agg_model_mean(self):\n", + " \"\"\"\n", + " Calculating the mean of the model created in start.\n", + " \"\"\"\n", + " self.agg_mean_value = np.mean(self.model)\n", + " print(f\": {self.input} Mean of Agg model: {self.agg_mean_value} \")\n", + " self.next(self.collab_model_update)\n", + "\n", + " @collaborator\n", + " def collab_model_update(self):\n", + " \"\"\"\n", + " Initializing the model with random numbers.\n", + " \"\"\"\n", + " print(f\": {self.input} Initializing the model randomly \")\n", + " self.model = np.random.randint(1, len(self.input), (10, 10, 10))\n", + " self.next(self.local_model_mean)\n", + "\n", + " @collaborator\n", + " def local_model_mean(self):\n", + " \"\"\"\n", + " Calculating the mean of the model created in train.\n", + " \"\"\"\n", + " self.local_mean_value = np.mean(self.model)\n", + " print(f\": {self.input} Local mean: {self.local_mean_value} \")\n", + " self.next(self.join)\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Joining inputs from collaborators\n", + " \"\"\"\n", + " self.agg_mean = sum(input.local_mean_value for input in inputs) / len(inputs)\n", + " print(f\"Aggregated mean : {self.agg_mean}\")\n", + " self.next(self.internal_loop)\n", + "\n", + " @aggregator\n", + " def internal_loop(self):\n", + " \"\"\"\n", + " Internally Loop for training rounds\n", + " \"\"\"\n", + " self.train_count = self.train_count + 1\n", + " if self.training_rounds == self.train_count:\n", + " self.next(self.end)\n", + " else:\n", + " self.next(self.start)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the 'end' step. All flows must have an 'end' step, which is the\n", + " last step in the flow.\n", + " \"\"\"\n", + " self.end_count += 1\n", + " print(\"This is the end of the flow\")\n", + "\n", + " flflow = self\n", + " # Flow Test Begins\n", + " expected_flow_steps = [\n", + " \"join\",\n", + " \"internal_loop\",\n", + " \"agg_model_mean\",\n", + " \"collab_model_update\",\n", + " \"local_model_mean\",\n", + " \"start\",\n", + " ] # List to verify expected steps\n", + " try:\n", + " validate_flow(\n", + " flflow, expected_flow_steps\n", + " ) # Function to validate the internal flow\n", + " except Exception as e:\n", + " raise e\n", + " # Flow Test Ends\n", + "\n", + "\n", + "def validate_flow(flow_obj, expected_flow_steps):\n", + " \"\"\"\n", + " Validate:\n", + " 1. If the given training round were completed\n", + " 2. If all the steps were executed\n", + " 3. If each collaborator step was executed\n", + " 4. If end was executed once\n", + " \"\"\"\n", + " validate_flow_error = [] # List to capture any errors in the flow\n", + "\n", + " from metaflow import Flow\n", + "\n", + " cli_flow_obj = Flow(\"TestFlowInternalLoop\") # Flow object from CLI\n", + " cli_flow_steps = list(cli_flow_obj.latest_run) # Steps from CLI\n", + " cli_step_names = [step.id for step in cli_flow_steps]\n", + "\n", + " # 1. If the given training round were completed\n", + " if not flow_obj.training_rounds == flow_obj.train_count:\n", + " validate_flow_error.append(\n", + " f\"{bcolors.FAIL}... Error : Number of training completed is not equal\"\n", + " + f\" to training rounds {bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " for step in cli_flow_steps:\n", + " task_count = 0\n", + " func = getattr(flow_obj, step.id)\n", + " for task in list(step):\n", + " task_count = task_count + 1\n", + "\n", + " # Each aggregator step should be executed for training rounds times\n", + " if (\n", + " (func.aggregator_step is True)\n", + " and (task_count != flow_obj.training_rounds)\n", + " and (step.id != \"end\")\n", + " ):\n", + " validate_flow_error.append(\n", + " f\"{bcolors.FAIL}... Error : More than one execution detected for \"\n", + " + f\"Aggregator Step: {step} {bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " # Each collaborator step is executed for (training rounds)*(number of collaborator) times\n", + " if (func.collaborator_step is True) and (\n", + " task_count != len(flow_obj.collaborators) * flow_obj.training_rounds\n", + " ):\n", + " validate_flow_error.append(\n", + " f\"{bcolors.FAIL}... Error : Incorrect number of execution detected for \"\n", + " + f\"Collaborator Step: {step}. Expected: \"\n", + " + f\"{flow_obj.training_rounds*len(flow_obj.collaborators)} \"\n", + " + f\"Actual: {task_count}{bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " steps_present_in_cli = [\n", + " step for step in expected_flow_steps if step in cli_step_names\n", + " ]\n", + " missing_steps_in_cli = [\n", + " step for step in expected_flow_steps if step not in cli_step_names\n", + " ]\n", + " extra_steps_in_cli = [\n", + " step for step in cli_step_names if step not in expected_flow_steps\n", + " ]\n", + "\n", + " if len(steps_present_in_cli) != len(expected_flow_steps):\n", + " validate_flow_error.append(\n", + " f\"{bcolors.FAIL}... Error : Number of steps fetched from Datastore through CLI do not \"\n", + " + f\"match the Expected steps provided {bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if len(missing_steps_in_cli) != 0:\n", + " validate_flow_error.append(\n", + " f\"{bcolors.FAIL}... Error : Following steps missing from Datastore: \"\n", + " + f\"{missing_steps_in_cli} {bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if len(extra_steps_in_cli) != 0:\n", + " validate_flow_error.append(\n", + " f\"{bcolors.FAIL}... Error : Following steps are extra in Datastore: \"\n", + " + f\"{extra_steps_in_cli} {bcolors.ENDC} \\n\"\n", + " )\n", + "\n", + " if not flow_obj.end_count == 1:\n", + " validate_flow_error.append(\n", + " f\"{bcolors.FAIL}... Error : End function called more than one time...{bcolors.ENDC}\"\n", + " )\n", + "\n", + " if validate_flow_error:\n", + " display_validate_errors(validate_flow_error)\n", + " raise Exception(f\"{bcolors.FAIL}Test for Internal Loop FAILED\")\n", + " else:\n", + " print(\n", + " f\"\"\"{bcolors.OKGREEN}\\n **** Summary of internal flow testing ****\n", + " No issues found and below are the tests that ran successfully\n", + " 1. Number of training completed is equal to training rounds\n", + " 2. Cli steps and Expected steps are matching\n", + " 3. Number of tasks are aligned with number of rounds and number of collaborators\n", + " 4. End function executed one time {bcolors.ENDC}\"\"\"\n", + " )\n", + "\n", + "\n", + "def display_validate_errors(validate_flow_error):\n", + " \"\"\"\n", + " Function to display error that is captured during flow test\n", + " \"\"\"\n", + " print(\"\".join(validate_flow_error))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators= ['envoy_one','envoy_two'],\n", + " director=director_info, \n", + " notebook_path='./testflow_internal_loop.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowInternalLoop(checkpoint=True)\n", + "flflow.runtime = federated_runtime\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fed_run", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/director_config.yaml new file mode 100644 index 0000000000..95d4f007e5 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/director_config.yaml @@ -0,0 +1,10 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds + +aggregator: + callable_func: + settings: {} + template: private_attributes.aggregator_private_attrs.aggregator_private_attrs + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/private_attributes/aggregator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/private_attributes/aggregator_private_attrs.py new file mode 100644 index 0000000000..d57490a699 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/private_attributes/aggregator_private_attrs.py @@ -0,0 +1,7 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +def aggregator_private_attrs(): + return {"test_loader_agg": np.random.rand(10, 28, 28)} # Random data diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..73a1ce09f4 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/envoy_config.yaml @@ -0,0 +1,6 @@ +envoy_one: + callable_func: + settings: + index: 1 + template: private_attributes.collaborator_private_attrs.collaborator_private_attrs + \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..0cf51214ea --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,10 @@ +# Copyright (C) 2020-2024 Intel Corporation +# # SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +def collaborator_private_attrs(index): + return { + "train_loader": np.random.rand(index * 50, 28, 28), + "test_loader": np.random.rand(index * 10, 28, 28), + } diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..ec859a79cd --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/envoy_config.yaml @@ -0,0 +1,6 @@ +envoy_two: + callable_func: + settings: + index: 2 + template: private_attributes.collaborator_private_attrs.collaborator_private_attrs + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..caee86a1a7 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,10 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +def collaborator_private_attrs(index): + return { + "train_loader": np.random.rand(index * 50, 28, 28), + "test_loader": np.random.rand(index * 10, 28, 28), + } diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/workspace/testflow_privateattributes.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/workspace/testflow_privateattributes.ipynb new file mode 100644 index 0000000000..bbf5cf2f34 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes/workspace/testflow_privateattributes.ipynb @@ -0,0 +1,411 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Private Attributes" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "62449b5f", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "\n", + "from openfl.experimental.workflow.component import Aggregator\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "class bcolors: # NOQA: N801\n", + " HEADER = \"\\033[95m\"\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " ENDC = \"\\033[0m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow of Private attribute testcase\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowPrivateAttributes(FLSpec):\n", + " \"\"\"\n", + " Testflow to validate Aggregator private attributes are not accessible to collaborators\n", + " and vice versa\n", + " \"\"\"\n", + "\n", + " ERROR_LIST = []\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " Flow start.\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Starting Test for accessibility of private \"\n", + " + f\"attributes {bcolors.ENDC}\"\n", + " )\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " validate_agg_private_attr(self, \"start\", aggr=[\"test_loader_agg\"], collabs=[\"train_loader\", \"test_loader\"])\n", + "\n", + " self.exclude_agg_to_agg = 10\n", + " self.include_agg_to_agg = 100\n", + " self.next(self.aggregator_step, exclude=[\"exclude_agg_to_agg\"])\n", + "\n", + " @aggregator\n", + " def aggregator_step(self):\n", + " \"\"\"\n", + " Testing whether Agg private attributes are accessible in next agg step.\n", + " Collab private attributes should not be accessible here\n", + " \"\"\"\n", + " validate_agg_private_attr(self, \"aggregator_step\", aggr=[\"test_loader_agg\"], collabs=[\"train_loader\", \"test_loader\"])\n", + "\n", + " self.include_agg_to_collab = 42\n", + " self.exclude_agg_to_collab = 40\n", + " self.next(\n", + " self.collaborator_step_a,\n", + " foreach=\"collaborators\",\n", + " exclude=[\"exclude_agg_to_collab\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def collaborator_step_a(self):\n", + " \"\"\"\n", + " Testing whether Collab private attributes are accessible in collab step\n", + " Aggregator private attributes should not be accessible here\n", + " \"\"\"\n", + " validate_collab_private_attrs(\n", + " self, \"collaborator_step_a\", aggr=[\"test_loader_agg\"], collabs=[\"train_loader\", \"test_loader\"]\n", + " )\n", + "\n", + " self.exclude_collab_to_collab = 2\n", + " self.include_collab_to_collab = 22\n", + " self.next(self.collaborator_step_b, exclude=[\"exclude_collab_to_collab\"])\n", + "\n", + " @collaborator\n", + " def collaborator_step_b(self):\n", + " \"\"\"\n", + " Testing whether Collab private attributes are accessible in collab step\n", + " Aggregator private attributes should not be accessible here\n", + " \"\"\"\n", + "\n", + " validate_collab_private_attrs(\n", + " self, \"collaborator_step_b\", aggr=[\"test_loader_agg\"], collabs=[\"train_loader\", \"test_loader\"]\n", + " )\n", + " self.exclude_collab_to_agg = 10\n", + " self.include_collab_to_agg = 12\n", + " self.next(self.join, exclude=[\"exclude_collab_to_agg\"])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Testing whether attributes are excluded from collab to agg\n", + " \"\"\"\n", + " # Aggregator should only be able to access its own attributes\n", + " if hasattr(self, \"test_loader_agg\") is False:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " \"aggregator_join_aggregator_attributes_missing\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in join - aggregator private attributes\"\n", + " + f\" not accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " for input in enumerate(inputs):\n", + " collab = input[1].input\n", + " if (\n", + " hasattr(input, \"train_loader\")\n", + " or hasattr(input, \"test_loader\")\n", + " ):\n", + " # Error - we are able to access collaborator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " \"join_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in Join - Collaborator: {collab}\"\n", + " + f\" private attributes accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the 'end' step. All flows must have an 'end' step, which is the\n", + " last step in the flow.\n", + "\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Ending Test for accessibility of private \"\n", + " + f\"attributes {bcolors.ENDC}\"\n", + " )\n", + "\n", + " if TestFlowPrivateAttributes.ERROR_LIST:\n", + " raise (\n", + " AssertionError(\n", + " f\"{bcolors.FAIL}\\n ...Test case failed ... {bcolors.ENDC}\"\n", + " )\n", + " )\n", + " else:\n", + " print(f\"{bcolors.OKGREEN}\\n ...Test case passed ... {bcolors.ENDC}\")\n", + "\n", + " TestFlowPrivateAttributes.ERROR_LIST = []\n", + "\n", + "\n", + "def validate_agg_private_attr(self, step_name, **private_attrs_kwargs):\n", + "\n", + " \"\"\"\n", + " Validate that aggregator can only access their own attributes\n", + "\n", + " Args:\n", + " step_name: Name of the step being validated\n", + " private_attr_kwargs: Keyword arguments specifying the names of private attributes for the aggregator and collaborators.\n", + " \"\"\"\n", + " agg_attrs = private_attrs_kwargs.get('aggr', [])\n", + " collab_attrs = private_attrs_kwargs.get('collabs', [])\n", + " # Aggregator should only be able to access its own attributes\n", + "\n", + " # check for missing aggregator attributes\n", + " inaccessible_agg_attrs = [attr for attr in agg_attrs if not hasattr(self, attr)]\n", + " if inaccessible_agg_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \" aggregator_attributes_missing\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ...Failed in {step_name} - aggregator private attributes not \"\n", + " + f\"accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " # check for collaborator private attributes that should not be accessible\n", + " breached_collab_attrs = [attr for attr in collab_attrs if hasattr(self, attr)]\n", + " if breached_collab_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - collaborator\"\n", + " + f\"private attributes accessible:{','.join(breached_collab_attrs)} {bcolors.ENDC}\"\n", + " )\n", + " for idx, collab in enumerate(self.collaborators):\n", + " # Collaborator attributes should not be accessible in aggregator step\n", + " if (\n", + " type(self.collaborators[idx]) is not str\n", + " or hasattr(self.runtime, \"_collaborators\")\n", + " or hasattr(self.runtime, \"__collaborators\")\n", + " ):\n", + " # Error - we are able to access collaborator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - collaborator {collab} \"\n", + " + f\"private attributes accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + "\n", + "def validate_collab_private_attrs(self, step_name, **private_attrs_kwargs):\n", + "\n", + " \"\"\"\n", + " Validate that collaborators can only access their own attributes\n", + "\n", + " Args:\n", + " step_name: Name of the step being validated\n", + " private_attr_kwargs: Keyword arguments specifying the names of private attributes for the aggregator and collaborators.\n", + " \"\"\"\n", + " agg_attrs = private_attrs_kwargs.get('aggr', [])\n", + " collab_attrs = private_attrs_kwargs.get('collabs', [])\n", + "\n", + " # Collaborator should only be able to access its own attributes\n", + "\n", + " # check for missing collaborators attributes\n", + " inaccessible_collab_attrs = [attr for attr in collab_attrs if not hasattr(self, attr)]\n", + "\n", + " if inaccessible_collab_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \" collab_attributes_not_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Collab \"\n", + " + f\"private attributes not accessible {bcolors.ENDC}\"\n", + " )\n", + " # check for aggregator private attributes that should not be accessible\n", + " breached_agg_attr = [attr for attr in agg_attrs if hasattr(self, attr)]\n", + " if breached_agg_attr:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_aggregator_attributes_found\"\n", + " )\n", + "\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Aggregator\"\n", + " + f\" private attributes accessible: {','.join(breached_agg_attr)} {bcolors.ENDC}\"\n", + " )\n", + "\n", + " # Aggregator attributes should not be accessible in collaborator step\n", + " if hasattr(self.runtime, \"_aggregator\") and isinstance(self.runtime._aggregator, Aggregator):\n", + " # Error - we are able to access aggregator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_aggregator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Aggregator\"\n", + " + f\" private attributes accessible {bcolors.ENDC}\"\n", + " )\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "# TODO: Is there a way to get the notebook path without passing it?\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators= ['envoy_one','envoy_two'],\n", + " director=director_info,\n", + " notebook_path='./testflow_privateattributes.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowPrivateAttributes(checkpoint=True)\n", + "flflow.runtime = federated_runtime\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fed_run", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.15" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/director_config.yaml new file mode 100644 index 0000000000..12944c1243 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/director_config.yaml @@ -0,0 +1,11 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds + +aggregator: + callable_func: + settings: {} + template: private_attributes.aggregator_private_attrs.aggregator_private_attrs + private_attributes: private_attributes.aggregator_private_attrs.aggregator_private_attributes + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/private_attributes/aggregator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/private_attributes/aggregator_private_attrs.py new file mode 100644 index 0000000000..c5e4b754b3 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/private_attributes/aggregator_private_attrs.py @@ -0,0 +1,10 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +def aggregator_private_attrs(): + return {"test_loader_agg_via_callable": np.random.rand(10, 28, 28)} # Random data + + +aggregator_private_attributes = {"test_loader_agg": np.random.rand(10, 28, 28)} # Random data diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..afce54bcd2 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/envoy_config.yaml @@ -0,0 +1,7 @@ +envoy_one: + callable_func: + settings: + index: 1 + template: private_attributes.collaborator_private_attrs.collaborator_private_attrs + private_attributes: private_attributes.collaborator_private_attrs.collaborator_private_attributes + \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..449c72876b --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,16 @@ +# Copyright (C) 2020-2024 Intel Corporation +# # SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +def collaborator_private_attrs(index): + return { + "train_loader_via_callable": np.random.rand(index * 50, 28, 28), + "test_loader_via_callable": np.random.rand(index * 10, 28, 28), + } + + +collaborator_private_attributes = { + "train_loader": np.random.rand(1 * 50, 28, 28), + "test_loader": np.random.rand(1 * 10, 28, 28), +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..51c4e7be6e --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/envoy_config.yaml @@ -0,0 +1,6 @@ +envoy_two: + callable_func: + settings: + index: 2 + template: private_attributes.collaborator_private_attrs.collaborator_private_attrs + private_attributes: private_attributes.collaborator_private_attrs.collaborator_private_attributes \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..2cfb219a45 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,16 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +def collaborator_private_attrs(index): + return { + "train_loader_via_callable": np.random.rand(index * 50, 28, 28), + "test_loader_via_callable": np.random.rand(index * 10, 28, 28), + } + + +collaborator_private_attributes = { + "train_loader": np.random.rand(1 * 50, 28, 28), + "test_loader": np.random.rand(1 * 10, 28, 28), +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/workspace/testflow_privateattributes.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/workspace/testflow_privateattributes.ipynb new file mode 100644 index 0000000000..87a6ac01e9 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_with_both_options/workspace/testflow_privateattributes.ipynb @@ -0,0 +1,411 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Private Attributes Initialization with Both Options" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "markdown", + "id": "62449b5f", + "metadata": {}, + "source": [ + "Once we have specified the name of the module, subsequent cells of the notebook need to be *appended* by the `#| export` directive as shown below. User should ensure that *all* the notebook functionality required in the Federated Learning experiment is included in this directive" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "\n", + "from openfl.experimental.workflow.component import Aggregator\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "class bcolors: # NOQA: N801\n", + " HEADER = \"\\033[95m\"\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " ENDC = \"\\033[0m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow of Private attribute testcase\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowPrivateAttributes(FLSpec):\n", + " \"\"\"\n", + " Testflow to validate Aggregator private attributes are not accessible to collaborators\n", + " and vice versa\n", + " \"\"\"\n", + "\n", + " ERROR_LIST = []\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " Flow start.\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Starting Test for accessibility of private \"\n", + " + f\"attributes {bcolors.ENDC}\"\n", + " )\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " validate_agg_private_attr(self, \"start\", aggr=[\"test_loader_agg_via_callable\"], collabs=[\"train_loader_via_callable\", \"test_loader_via_callable\"])\n", + "\n", + " self.exclude_agg_to_agg = 10\n", + " self.include_agg_to_agg = 100\n", + " self.next(self.aggregator_step, exclude=[\"exclude_agg_to_agg\"])\n", + "\n", + " @aggregator\n", + " def aggregator_step(self):\n", + " \"\"\"\n", + " Testing whether Agg private attributes are accessible in next agg step.\n", + " Collab private attributes should not be accessible here\n", + " \"\"\"\n", + " validate_agg_private_attr(self, \"aggregator_step\", aggr=[\"test_loader_agg_via_callable\"], collabs=[\"train_loader_via_callable\", \"test_loader_via_callable\"])\n", + "\n", + " self.include_agg_to_collab = 42\n", + " self.exclude_agg_to_collab = 40\n", + " self.next(\n", + " self.collaborator_step_a,\n", + " foreach=\"collaborators\",\n", + " exclude=[\"exclude_agg_to_collab\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def collaborator_step_a(self):\n", + " \"\"\"\n", + " Testing whether Collab private attributes are accessible in collab step\n", + " Aggregator private attributes should not be accessible here\n", + " \"\"\"\n", + " validate_collab_private_attrs(\n", + " self, \"collaborator_step_a\", aggr=[\"test_loader_agg_via_callable\"], collabs=[\"train_loader_via_callable\", \"test_loader_via_callable\"]\n", + " )\n", + "\n", + " self.exclude_collab_to_collab = 2\n", + " self.include_collab_to_collab = 22\n", + " self.next(self.collaborator_step_b, exclude=[\"exclude_collab_to_collab\"])\n", + "\n", + " @collaborator\n", + " def collaborator_step_b(self):\n", + " \"\"\"\n", + " Testing whether Collab private attributes are accessible in collab step\n", + " Aggregator private attributes should not be accessible here\n", + " \"\"\"\n", + "\n", + " validate_collab_private_attrs(\n", + " self, \"collaborator_step_b\", aggr=[\"test_loader_agg_via_callable\"], collabs=[\"train_loader_via_callable\", \"test_loader_via_callable\"]\n", + " )\n", + " self.exclude_collab_to_agg = 10\n", + " self.include_collab_to_agg = 12\n", + " self.next(self.join, exclude=[\"exclude_collab_to_agg\"])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Testing whether attributes are excluded from collab to agg\n", + " \"\"\"\n", + " # Aggregator should only be able to access its own attributes\n", + " if hasattr(self, \"test_loader_agg_via_callable\") is False:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " \"aggregator_join_aggregator_attributes_missing\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in join - aggregator private attributes\"\n", + " + f\" not accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " for input in enumerate(inputs):\n", + " collab = input[1].input\n", + " if (\n", + " hasattr(input, \"train_loader_via_callable\")\n", + " or hasattr(input, \"test_loader_via_callable\")\n", + " ):\n", + " # Error - we are able to access collaborator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " \"join_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in Join - Collaborator: {collab}\"\n", + " + f\" private attributes accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the 'end' step. All flows must have an 'end' step, which is the\n", + " last step in the flow.\n", + "\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Ending Test for accessibility of private \"\n", + " + f\"attributes {bcolors.ENDC}\"\n", + " )\n", + "\n", + " if TestFlowPrivateAttributes.ERROR_LIST:\n", + " raise (\n", + " AssertionError(\n", + " f\"{bcolors.FAIL}\\n ...Test case failed ... {bcolors.ENDC}\"\n", + " )\n", + " )\n", + " else:\n", + " print(f\"{bcolors.OKGREEN}\\n ...Test case passed ... {bcolors.ENDC}\")\n", + "\n", + " TestFlowPrivateAttributes.ERROR_LIST = []\n", + "\n", + "\n", + "def validate_agg_private_attr(self, step_name, **private_attrs_kwargs):\n", + "\n", + " \"\"\"\n", + " Validate that aggregator can only access their own attributes\n", + "\n", + " Args:\n", + " step_name: Name of the step being validated\n", + " private_attr_kwargs: Keyword arguments specifying the names of private attributes for the aggregator and collaborators.\n", + " \"\"\"\n", + " agg_attrs = private_attrs_kwargs.get('aggr', [])\n", + " collab_attrs = private_attrs_kwargs.get('collabs', [])\n", + " # Aggregator should only be able to access its own attributes\n", + "\n", + " # check for missing aggregator attributes\n", + " inaccessible_agg_attrs = [attr for attr in agg_attrs if not hasattr(self, attr)]\n", + " if inaccessible_agg_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \" aggregator_attributes_missing\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ...Failed in {step_name} - aggregator private attributes not \"\n", + " + f\"accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " # check for collaborator private attributes that should not be accessible\n", + " breached_collab_attrs = [attr for attr in collab_attrs if hasattr(self, attr)]\n", + " if breached_collab_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - collaborator\"\n", + " + f\"private attributes accessible:{','.join(breached_collab_attrs)} {bcolors.ENDC}\"\n", + " )\n", + " for idx, collab in enumerate(self.collaborators):\n", + " # Collaborator attributes should not be accessible in aggregator step\n", + " if (\n", + " type(self.collaborators[idx]) is not str\n", + " or hasattr(self.runtime, \"_collaborators\")\n", + " or hasattr(self.runtime, \"__collaborators\")\n", + " ):\n", + " # Error - we are able to access collaborator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - collaborator {collab} \"\n", + " + f\"private attributes accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + "\n", + "def validate_collab_private_attrs(self, step_name, **private_attrs_kwargs):\n", + "\n", + " \"\"\"\n", + " Validate that collaborators can only access their own attributes\n", + "\n", + " Args:\n", + " step_name: Name of the step being validated\n", + " private_attr_kwargs: Keyword arguments specifying the names of private attributes for the aggregator and collaborators.\n", + " \"\"\"\n", + " agg_attrs = private_attrs_kwargs.get('aggr', [])\n", + " collab_attrs = private_attrs_kwargs.get('collabs', [])\n", + "\n", + " # Collaborator should only be able to access its own attributes\n", + "\n", + " # check for missing collaborators attributes\n", + " inaccessible_collab_attrs = [attr for attr in collab_attrs if not hasattr(self, attr)]\n", + "\n", + " if inaccessible_collab_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \" collab_attributes_not_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Collab \"\n", + " + f\"private attributes not accessible {bcolors.ENDC}\"\n", + " )\n", + " # check for aggregator private attributes that should not be accessible\n", + " breached_agg_attr = [attr for attr in agg_attrs if hasattr(self, attr)]\n", + " if breached_agg_attr:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_aggregator_attributes_found\"\n", + " )\n", + "\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Aggregator\"\n", + " + f\" private attributes accessible: {','.join(breached_agg_attr)} {bcolors.ENDC}\"\n", + " )\n", + "\n", + " # Aggregator attributes should not be accessible in collaborator step\n", + " if hasattr(self.runtime, \"_aggregator\") and isinstance(self.runtime._aggregator, Aggregator):\n", + " # Error - we are able to access aggregator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_aggregator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Aggregator\"\n", + " + f\" private attributes accessible {bcolors.ENDC}\"\n", + " )\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "# TODO: Is there a way to get the notebook path without passing it?\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators = ['envoy_one', 'envoy_two'], \n", + " director=director_info, \n", + " notebook_path='./testflow_privateattributes.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowPrivateAttributes(checkpoint=True)\n", + "flflow.runtime = federated_runtime\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dir_shift", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.20" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/director_config.yaml new file mode 100644 index 0000000000..18521aa671 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/director_config.yaml @@ -0,0 +1,7 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds + +aggregator: + private_attributes: private_attributes.aggregator_private_attrs.aggregator_private_attributes diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/private_attributes/aggregator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/private_attributes/aggregator_private_attrs.py new file mode 100644 index 0000000000..e081e4a31d --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/private_attributes/aggregator_private_attrs.py @@ -0,0 +1,6 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np + +aggregator_private_attributes = {"test_loader_agg": np.random.rand(10, 28, 28)} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..a4a2ecb2d8 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/envoy_config.yaml @@ -0,0 +1,2 @@ +envoy_one: + private_attributes: private_attributes.collaborator_private_attrs.collaborator_private_attributes diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..56b822d7b8 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,8 @@ +# Copyright (C) 2020-2024 Intel Corporation +# # SPDX-License-Identifier: Apache-2.0 +import numpy as np + +collaborator_private_attributes = { + "train_loader": np.random.rand(1 * 50, 28, 28), + "test_loader": np.random.rand(1 * 10, 28, 28), +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..9bce3671b7 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/envoy_config.yaml @@ -0,0 +1,2 @@ +envoy_two: + private_attributes: private_attributes.collaborator_private_attrs.collaborator_private_attributes \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..cbe0503e49 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,9 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +import numpy as np + + +collaborator_private_attributes = { + "train_loader": np.random.rand(1 * 50, 28, 28), + "test_loader": np.random.rand(1 * 10, 28, 28), +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/workspace/testflow_private_attributes.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/workspace/testflow_private_attributes.ipynb new file mode 100644 index 0000000000..bdbf091bee --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_private_attributes_initialization_without_callable/workspace/testflow_private_attributes.ipynb @@ -0,0 +1,407 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Private Attributes Without Callable " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.component import Aggregator\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "class bcolors: # NOQA: N801\n", + " HEADER = \"\\033[95m\"\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " ENDC = \"\\033[0m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow for testcase private attributes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowPrivateAttributes(FLSpec):\n", + " \"\"\"\n", + " Testflow to validate Aggregator private attributes are not accessible to collaborators\n", + " and vice versa\n", + " \"\"\"\n", + "\n", + " ERROR_LIST = []\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " Flow start.\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Starting Test for accessibility of private \"\n", + " + f\"attributes {bcolors.ENDC}\"\n", + " )\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " validate_agg_private_attr(self, \"start\", aggr=[\"test_loader_agg\"], collabs=[\"train_loader\", \"test_loader\"])\n", + "\n", + " self.exclude_agg_to_agg = 10\n", + " self.include_agg_to_agg = 100\n", + " self.next(self.aggregator_step, exclude=[\"exclude_agg_to_agg\"])\n", + "\n", + " @aggregator\n", + " def aggregator_step(self):\n", + " \"\"\"\n", + " Testing whether Agg private attributes are accessible in next agg step.\n", + " Collab private attributes should not be accessible here\n", + " \"\"\"\n", + " validate_agg_private_attr(self, \"aggregator_step\", aggr=[\"test_loader_agg\"], collabs=[\"train_loader\", \"test_loader\"])\n", + "\n", + " self.include_agg_to_collab = 42\n", + " self.exclude_agg_to_collab = 40\n", + " self.next(\n", + " self.collaborator_step_a,\n", + " foreach=\"collaborators\",\n", + " exclude=[\"exclude_agg_to_collab\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def collaborator_step_a(self):\n", + " \"\"\"\n", + " Testing whether Collab private attributes are accessible in collab step\n", + " Aggregator private attributes should not be accessible here\n", + " \"\"\"\n", + " validate_collab_private_attrs(\n", + " self, \"collaborator_step_a\", aggr=[\"test_loader_agg\"], collabs=[\"train_loader\", \"test_loader\"]\n", + " )\n", + "\n", + " self.exclude_collab_to_collab = 2\n", + " self.include_collab_to_collab = 22\n", + " self.next(self.collaborator_step_b, exclude=[\"exclude_collab_to_collab\"])\n", + "\n", + " @collaborator\n", + " def collaborator_step_b(self):\n", + " \"\"\"\n", + " Testing whether Collab private attributes are accessible in collab step\n", + " Aggregator private attributes should not be accessible here\n", + " \"\"\"\n", + "\n", + " validate_collab_private_attrs(\n", + " self, \"collaborator_step_b\", aggr=[\"test_loader_agg\"], collabs=[\"train_loader\", \"test_loader\"]\n", + " )\n", + " self.exclude_collab_to_agg = 10\n", + " self.include_collab_to_agg = 12\n", + " self.next(self.join, exclude=[\"exclude_collab_to_agg\"])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Testing whether attributes are excluded from collab to agg\n", + " \"\"\"\n", + " # Aggregator should only be able to access its own attributes\n", + " if hasattr(self, \"test_loader_agg\") is False:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " \"aggregator_join_aggregator_attributes_missing\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in join - aggregator private attributes\"\n", + " + f\" not accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " for input in enumerate(inputs):\n", + " collab = input[1].input\n", + " if (\n", + " hasattr(input, \"train_loader\")\n", + " or hasattr(input, \"test_loader\")\n", + " ):\n", + " # Error - we are able to access collaborator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " \"join_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in Join - Collaborator: {collab}\"\n", + " + f\" private attributes accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the 'end' step. All flows must have an 'end' step, which is the\n", + " last step in the flow.\n", + "\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Ending Test for accessibility of private \"\n", + " + f\"attributes {bcolors.ENDC}\"\n", + " )\n", + "\n", + " if TestFlowPrivateAttributes.ERROR_LIST:\n", + " raise (\n", + " AssertionError(\n", + " f\"{bcolors.FAIL}\\n ...Test case failed ... {bcolors.ENDC}\"\n", + " )\n", + " )\n", + " else:\n", + " print(f\"{bcolors.OKGREEN}\\n ...Test case passed ... {bcolors.ENDC}\")\n", + "\n", + " TestFlowPrivateAttributes.ERROR_LIST = []\n", + "\n", + "\n", + "def validate_agg_private_attr(self, step_name, **private_attrs_kwargs):\n", + "\n", + " \"\"\"\n", + " Validate that aggregator can only access their own attributes\n", + "\n", + " Args:\n", + " step_name: Name of the step being validated\n", + " private_attr_kwargs: Keyword arguments specifying the names of private attributes for the aggregator and collaborators.\n", + " \"\"\"\n", + " agg_attrs = private_attrs_kwargs.get('aggr', [])\n", + " collab_attrs = private_attrs_kwargs.get('collabs', [])\n", + " # Aggregator should only be able to access its own attributes\n", + "\n", + " # check for missing aggregator attributes\n", + " inaccessible_agg_attrs = [attr for attr in agg_attrs if not hasattr(self, attr)]\n", + " if inaccessible_agg_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \" aggregator_attributes_missing\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ...Failed in {step_name} - aggregator private attributes not \"\n", + " + f\"accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + " # check for collaborator private attributes that should not be accessible\n", + " breached_collab_attrs = [attr for attr in collab_attrs if hasattr(self, attr)]\n", + " if breached_collab_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - collaborator\"\n", + " + f\"private attributes accessible:{','.join(breached_collab_attrs)} {bcolors.ENDC}\"\n", + " )\n", + " for idx, collab in enumerate(self.collaborators):\n", + " # Collaborator attributes should not be accessible in aggregator step\n", + " if (\n", + " type(self.collaborators[idx]) is not str\n", + " or hasattr(self.runtime, \"_collaborators\")\n", + " or hasattr(self.runtime, \"__collaborators\")\n", + " ):\n", + " # Error - we are able to access collaborator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_collaborator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - collaborator {collab} \"\n", + " + f\"private attributes accessible {bcolors.ENDC}\"\n", + " )\n", + "\n", + "\n", + "def validate_collab_private_attrs(self, step_name, **private_attrs_kwargs):\n", + "\n", + " \"\"\"\n", + " Validate that collaborators can only access their own attributes\n", + " Args:\n", + " step_name: Name of the step being validated\n", + " private_attr_kwargs: Keyword arguments specifying the names of private attributes for the aggregator and collaborators.\n", + " \"\"\"\n", + " agg_attrs = private_attrs_kwargs.get('aggr', [])\n", + " collab_attrs = private_attrs_kwargs.get('collabs', [])\n", + "\n", + " # Collaborator should only be able to access its own attributes\n", + "\n", + " # check for missing collaborators attributes\n", + " inaccessible_collab_attrs = [attr for attr in collab_attrs if not hasattr(self, attr)]\n", + "\n", + " if inaccessible_collab_attrs:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \" collab_attributes_not_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Collab \"\n", + " + f\"private attributes not accessible {bcolors.ENDC}\"\n", + " )\n", + " # check for aggregator private attributes that should not be accessible\n", + " breached_agg_attr = [attr for attr in agg_attrs if hasattr(self, attr)]\n", + " if breached_agg_attr:\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_aggregator_attributes_found\"\n", + " )\n", + "\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Aggregator\"\n", + " + f\" private attributes accessible: {','.join(breached_agg_attr)} {bcolors.ENDC}\"\n", + " )\n", + "\n", + " # Aggregator attributes should not be accessible in collaborator step\n", + " if hasattr(self.runtime, \"_aggregator\") and isinstance(self.runtime._aggregator, Aggregator):\n", + " # Error - we are able to access aggregator attributes\n", + " TestFlowPrivateAttributes.ERROR_LIST.append(\n", + " step_name + \"_aggregator_attributes_found\"\n", + " )\n", + " print(\n", + " f\"{bcolors.FAIL} ... Attribute test failed in {step_name} - Aggregator\"\n", + " + f\" private attributes accessible {bcolors.ENDC}\"\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "id": "c61813ab", + "metadata": {}, + "source": [ + "Now that we have our flow and runtime defined, let's run the experiment! " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators= ['envoy_one','envoy_two'],\n", + " director=director_info, \n", + " notebook_path='./testflow_private_attributes.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowPrivateAttributes(checkpoint=True)\n", + "flflow.runtime = federated_runtime\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dir-wip", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/director/director_config.yaml new file mode 100644 index 0000000000..9882f72c63 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/director/director_config.yaml @@ -0,0 +1,4 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..bb76d4845e --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/envoy_config.yaml @@ -0,0 +1,5 @@ +envoy_one: + callable_func: + settings: + index: 1 + template: private_attributes.collaborator_private_attrs.collaborator_private_attrs \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..2815174759 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,4 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +def collaborator_private_attrs(index): + return {"index": index + 1} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..2c44953a5e --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/envoy_config.yaml @@ -0,0 +1,5 @@ +envoy_two: + callable_func: + settings: + index: 2 + template: private_attributes.collaborator_private_attrs.collaborator_private_attrs \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..188261e469 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +def collaborator_private_attrs(index): + return {"index": index + 1} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/workspace/testflow_reference.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/workspace/testflow_reference.ipynb new file mode 100644 index 0000000000..311a443f09 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference/workspace/testflow_reference.ipynb @@ -0,0 +1,487 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Reference" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bd8ac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "import io\n", + "import math\n", + "import logging\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "import inspect\n", + "from types import MethodType\n", + "\n", + "\n", + "class bcolors: # NOQA: N801\n", + " HEADER = \"\\033[95m\"\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " ENDC = \"\\033[0m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"\n", + "\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.linear1 = nn.Linear(60, 100)\n", + " self.linear2 = nn.Linear(100, 10)\n", + "\n", + " def forward(self, x):\n", + " x = self.linear1(x)\n", + " x = self.linear2(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow of the testcase datastore cli" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowReference(FLSpec):\n", + "\n", + " \"\"\"\n", + " Testflow to validate references of collabartor attributes in Federated Flow.\n", + "\n", + " \"\"\"\n", + "\n", + " step_one_collab_attrs = []\n", + " step_two_collab_attrs = []\n", + " all_ref_error_dict = {}\n", + " agg_attr_dict = {}\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " Flow start.\n", + "\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Starting Test for validating references. \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + " self.next(self.test_create_agg_attr)\n", + "\n", + " @aggregator\n", + " def test_create_agg_attr(self):\n", + " \"\"\"\n", + " Create different types of objects.\n", + " \"\"\"\n", + "\n", + " self.agg_attr_str = \"Test string data\"\n", + " self.agg_attr_list = [1, 2, 5, 6, 7, 8]\n", + " self.agg_attr_dict = {key: key for key in range(5)}\n", + " self.agg_attr_file = io.StringIO(\"Test file data in aggregator\")\n", + " self.agg_attr_math = math.sqrt(2)\n", + " self.agg_attr_complex_num = complex(2, 3)\n", + " self.agg_attr_log = logging.getLogger(\"Test logger data in aggregator\")\n", + " self.agg_attr_model = Net()\n", + " self.agg_attr_optimizer = optim.SGD(\n", + " self.agg_attr_model.parameters(), lr=1e-3, momentum=1e-2\n", + " )\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " # get aggregator attributes\n", + " agg_attr_list = filter_attrs(inspect.getmembers(self))\n", + " for attr in agg_attr_list:\n", + " agg_attr_id = id(getattr(self, attr))\n", + " TestFlowReference.agg_attr_dict[attr] = agg_attr_id\n", + " self.next(self.test_create_collab_attr, foreach=\"collaborators\")\n", + "\n", + " @collaborator\n", + " def test_create_collab_attr(self):\n", + " \"\"\"\n", + " Modify the attirbutes of aggregator to validate the references.\n", + " Create different types of objects.\n", + " \"\"\"\n", + "\n", + " self.agg_attr_str = self.agg_attr_str + \" \" + self.input\n", + " self.agg_attr_complex_num += complex(self.index, self.index)\n", + " self.agg_attr_math += self.index\n", + " self.agg_attr_log = \" \" + self.input\n", + "\n", + " self.collab_attr_str_one = \"Test string data in collab \" + self.input\n", + " self.collab_attr_list_one = [1, 2, 5, 6, 7, 8]\n", + " self.collab_attr_dict_one = {key: key for key in range(5)}\n", + " self.collab_attr_file_one = io.StringIO(\"Test file data in collaborator\")\n", + " self.collab_attr_math_one = math.sqrt(self.index)\n", + " self.collab_attr_complex_num_one = complex(self.index, self.index)\n", + "\n", + " # append attributes of collaborator\n", + " TestFlowReference.step_one_collab_attrs.append(self)\n", + "\n", + " if len(TestFlowReference.step_one_collab_attrs) >= 2:\n", + " collab_attr_list = filter_attrs(inspect.getmembers(self))\n", + " matched_ref_dict = find_matched_references(\n", + " collab_attr_list, TestFlowReference.step_one_collab_attrs\n", + " )\n", + " validate_collab_references(matched_ref_dict)\n", + "\n", + " self.next(self.test_create_more_collab_attr)\n", + "\n", + " @collaborator\n", + " def test_create_more_collab_attr(self):\n", + " \"\"\"\n", + " Create different types of objects.\n", + " \"\"\"\n", + "\n", + " self.collab_attr_str_two = \"String reference three \" + self.input\n", + " self.collab_attr_list_two = [1, 2, 3, 5, 6, 8]\n", + " self.collab_attr_dict_two = {key: key for key in range(5)}\n", + " self.collab_attr_file_two = io.StringIO(\"Test file reference one\")\n", + " self.collab_attr_math_two = math.sqrt(2)\n", + " self.collab_attr_complex_num_two = complex(2, 3)\n", + "\n", + " TestFlowReference.step_two_collab_attrs.append(self)\n", + "\n", + " if len(TestFlowReference.step_two_collab_attrs) >= 2:\n", + " collab_attr_list = filter_attrs(inspect.getmembers(self))\n", + " matched_ref_dict = find_matched_references(\n", + " collab_attr_list, TestFlowReference.step_two_collab_attrs\n", + " )\n", + " validate_collab_references(matched_ref_dict)\n", + "\n", + " self.next(self.join)\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Iterate over the references of collaborator attributes\n", + " validate uniqueness of attributes and raise assertion\n", + " \"\"\"\n", + "\n", + " all_attr_list = filter_attrs(inspect.getmembers(inputs[0]))\n", + " agg_attrs = filter_attrs(inspect.getmembers(self))\n", + "\n", + " # validate aggregator references are intact after coming out of collaborators.\n", + " validate_agg_attr_ref(agg_attrs, self)\n", + "\n", + " # validate collaborators references are not shared in between.\n", + " matched_ref_dict = find_matched_references(all_attr_list, inputs)\n", + " validate_collab_references(matched_ref_dict)\n", + "\n", + " # validate aggregator references are not shared with any of the collaborators .\n", + " validate_agg_collab_references(inputs, self, agg_attrs)\n", + "\n", + " all_shared_attr = \"\"\n", + " print(f\"\\n{bcolors.UNDERLINE}Reference test summary: {bcolors.ENDC}\\n\")\n", + " for val in TestFlowReference.all_ref_error_dict.values():\n", + " all_shared_attr = all_shared_attr + \",\".join(val)\n", + " if all_shared_attr:\n", + " print(\n", + " f\"{bcolors.FAIL}...Test case failed for {all_shared_attr} {bcolors.ENDC}\"\n", + " )\n", + " else:\n", + " print(\n", + " f\"{bcolors.OKGREEN}...Test case passed for all the attributes.{bcolors.ENDC}\"\n", + " )\n", + "\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " This is the 'end' step. All flows must have an 'end' step, which is the\n", + " last step in the flow.\n", + "\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Ending test for validating the references. \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + " if TestFlowReference.all_ref_error_dict:\n", + " raise (\n", + " AssertionError(\n", + " f\"{bcolors.FAIL}\\n ...Test case failed ... {bcolors.ENDC}\"\n", + " )\n", + " )\n", + "\n", + " TestFlowReference.step_one_collab_attrs = []\n", + " TestFlowReference.step_two_collab_attrs = []\n", + " TestFlowReference.all_ref_error_dict = {}\n", + "\n", + "\n", + "def filter_attrs(attr_list):\n", + " valid_attrs = []\n", + " reserved_words = [\"next\", \"runtime\", \"execute_next\"]\n", + " for attr in attr_list:\n", + " if (\n", + " not attr[0].startswith(\"_\")\n", + " and attr[0] not in reserved_words\n", + " and not hasattr(TestFlowReference, attr[0])\n", + " ):\n", + " if not isinstance(attr[1], MethodType):\n", + " valid_attrs.append(attr[0])\n", + " return valid_attrs\n", + "\n", + "\n", + "def find_matched_references(collab_attr_list, all_collaborators):\n", + " \"\"\"\n", + " Iterate attributes of collborator and capture the duplicate reference\n", + " return: dict: {\n", + " 'Portland': ['failed attributes'], 'Seattle': [],\n", + " }\n", + " \"\"\"\n", + " matched_ref_dict = {}\n", + " for i in range(len(all_collaborators)):\n", + " matched_ref_dict[all_collaborators[i].input] = []\n", + "\n", + " # For each attribute in the collaborator attribute list, check if any of the collaborator\n", + " # attributes are shared with another collaborator\n", + " for attr_name in collab_attr_list:\n", + " for i, curr_collab in enumerate(all_collaborators):\n", + " # Compare the current collaborator with the collaborator(s) that come(s) after it.\n", + " for next_collab in all_collaborators[i + 1:]:\n", + " # Check if both collaborators have the current attribute\n", + " if hasattr(curr_collab, attr_name) and hasattr(next_collab, attr_name):\n", + " # Check if both collaborators are sharing same reference\n", + " if id(getattr(curr_collab, attr_name)) is id(getattr(\n", + " next_collab, attr_name\n", + " )):\n", + " matched_ref_dict[curr_collab.input].append(attr_name)\n", + " print(\n", + " f\"{bcolors.FAIL} ... Reference test failed - {curr_collab.input} \\\n", + " sharing same \"\n", + " + f\"{attr_name} reference with {next_collab.input} {bcolors.ENDC}\"\n", + " )\n", + "\n", + " return matched_ref_dict\n", + "\n", + "\n", + "def validate_collab_references(matched_ref_dict):\n", + " \"\"\"\n", + " Iterate reference list and raise assertion for conflicts\n", + " \"\"\"\n", + " collborators_sharing_ref = []\n", + " reference_flag = False\n", + "\n", + " for collab, val in matched_ref_dict.items():\n", + " if val:\n", + " collborators_sharing_ref.append(collab)\n", + " reference_flag = True\n", + " if collborators_sharing_ref:\n", + " for collab in collborators_sharing_ref:\n", + " if collab not in TestFlowReference.all_ref_error_dict:\n", + " TestFlowReference.all_ref_error_dict[collab] = matched_ref_dict.get(\n", + " collab\n", + " )\n", + "\n", + " if not reference_flag:\n", + " print(\n", + " f\"{bcolors.OKGREEN} Pass : Reference test passed for collaborators. {bcolors.ENDC}\"\n", + " )\n", + "\n", + "\n", + "def validate_agg_attr_ref(agg_attrs, agg_obj):\n", + " \"\"\"\n", + " Verifies aggregator attributes are retained after\n", + " collaborator execution\n", + " \"\"\"\n", + " attr_flag = False\n", + " for attr in agg_attrs:\n", + " if TestFlowReference.agg_attr_dict.get(attr) == id(getattr(agg_obj, attr)):\n", + " attr_flag = True\n", + " if not attr_flag:\n", + " print(\n", + " f\"{bcolors.FAIL}...Aggregator references are not intact after coming out of \"\n", + " + f\"collaborators.{bcolors.ENDC}\"\n", + " )\n", + " else:\n", + " print(\n", + " f\"{bcolors.OKGREEN} Pass : Aggregator references are intact after coming out of \"\n", + " + f\"collaborators.{bcolors.ENDC}\"\n", + " )\n", + "\n", + "\n", + "def validate_agg_collab_references(all_collborators, agg_obj, agg_attrs):\n", + " \"\"\"\n", + " Iterate attributes of aggregator and collborator to capture the mismatched references.\n", + " \"\"\"\n", + "\n", + " mis_matched_ref = {}\n", + " for collab in all_collborators:\n", + " mis_matched_ref[collab.input] = []\n", + "\n", + " attr_ref_flag = False\n", + " for attr in agg_attrs:\n", + " agg_attr_id = id(getattr(agg_obj, attr))\n", + " for collab in all_collborators:\n", + " collab_attr_id = id(getattr(collab, attr))\n", + " if agg_attr_id is collab_attr_id:\n", + " attr_ref_flag = True\n", + " mis_matched_ref.get(collab).append(attr)\n", + "\n", + " if attr_ref_flag:\n", + " print(\n", + " f\"{bcolors.FAIL}...Aggregator references are shared between collaborators.\"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + " else:\n", + " print(\n", + " f\"{bcolors.OKGREEN} Pass : Reference test passed for aggregator.{bcolors.ENDC}\"\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "id": "b5371b6d", + "metadata": {}, + "source": [ + "## Workspace creation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators= ['envoy_one','envoy_two'], \n", + " director=director_info, \n", + " notebook_path='./testflow_reference.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowReference(checkpoint=True)\n", + "flflow.runtime = federated_runtime\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dir_shift", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.20" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/director/director_config.yaml new file mode 100644 index 0000000000..b604d6463a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/director/director_config.yaml @@ -0,0 +1,6 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds + + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/envoy_config.yaml @@ -0,0 +1 @@ + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/workspace/testflow_reference_with_include_exclude.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/workspace/testflow_reference_with_include_exclude.ipynb new file mode 100644 index 0000000000..b584487489 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_reference_with_include_exclude/workspace/testflow_reference_with_include_exclude.ipynb @@ -0,0 +1,397 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Reference with Include and Exclude" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89cf4866", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.interface import FLSpec\n", + "from openfl.experimental.workflow.placement import aggregator, collaborator\n", + "\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "import inspect\n", + "from types import MethodType\n", + "\n", + "MIN_COLLECTION_COUNT = 2\n", + "\n", + "\n", + "class bcolors: # NOQA: N801\n", + " HEADER = \"\\033[95m\"\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " ENDC = \"\\033[0m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self):\n", + " super(Net, self).__init__()\n", + " self.linear1 = nn.Linear(60, 100)\n", + " self.linear2 = nn.Linear(100, 10)\n", + "\n", + " def forward(self, x):\n", + " x = self.linear1(x)\n", + " x = self.linear2(x)\n", + " return x\n" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow of the testcase reference with include_exclude" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowReferenceWithIncludeExclude(FLSpec):\n", + "\n", + " \"\"\"\n", + " Testflow to validate references of collabartor attributes in Federated Flow with include.\n", + "\n", + " \"\"\"\n", + " step_one_collab_attrs = []\n", + " step_two_collab_attrs = []\n", + " all_ref_error_dict = {}\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " Flow start.\n", + "\n", + " \"\"\"\n", + " self.agg_agg_attr_dict = {key: key for key in range(5)}\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Starting Test for validating references \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + " self.next(self.test_create_agg_attr, exclude=[\"agg_agg_attr_dict\"])\n", + "\n", + " @aggregator\n", + " def test_create_agg_attr(self):\n", + " \"\"\"\n", + " Create different types of objects\n", + " \"\"\"\n", + "\n", + " self.agg_attr_list = [1, 2, 5, 6, 7, 8]\n", + " self.agg_attr_dict = {key: key for key in range(5)}\n", + "\n", + " self.agg_attr_model = Net()\n", + " self.agg_attr_optimizer = optim.SGD(\n", + " self.agg_attr_model.parameters(), lr=1e-3, momentum=1e-2\n", + " )\n", + " self.collaborators = self.runtime.collaborators\n", + " self.next(\n", + " self.test_create_collab_attr,\n", + " foreach=\"collaborators\",\n", + " include=[\"collaborators\", \"agg_attr_list\"],\n", + " )\n", + "\n", + " @collaborator\n", + " def test_create_collab_attr(self):\n", + " \"\"\"\n", + " Modify the attirbutes of aggregator to validate the references.\n", + " Create different types of objects.\n", + " \"\"\"\n", + "\n", + " self.collab_attr_list_one = [1, 2, 5, 6, 7, 8]\n", + " self.collab_attr_dict_one = {key: key for key in range(5)}\n", + "\n", + " # append self attributes of collaborators\n", + " TestFlowReferenceWithIncludeExclude.step_one_collab_attrs.append(self)\n", + "\n", + " if (\n", + " len(TestFlowReferenceWithIncludeExclude.step_one_collab_attrs)\n", + " >= MIN_COLLECTION_COUNT\n", + " ):\n", + " collab_attr_list = filter_attrs(inspect.getmembers(self))\n", + " matched_ref_dict = find_matched_references(\n", + " collab_attr_list,\n", + " TestFlowReferenceWithIncludeExclude.step_one_collab_attrs,\n", + " )\n", + " validate_references(matched_ref_dict)\n", + "\n", + " self.next(self.test_create_more_collab_attr, exclude=[\"collab_attr_dict_one\"])\n", + "\n", + " @collaborator\n", + " def test_create_more_collab_attr(self):\n", + " \"\"\"\n", + " Create different types of objects.\n", + " \"\"\"\n", + "\n", + " self.collab_attr_list_two = [1, 2, 3, 5, 6, 8]\n", + " self.collab_attr_dict_two = {key: key for key in range(5)}\n", + "\n", + " TestFlowReferenceWithIncludeExclude.step_two_collab_attrs.append(self)\n", + "\n", + " if (\n", + " len(TestFlowReferenceWithIncludeExclude.step_two_collab_attrs)\n", + " >= MIN_COLLECTION_COUNT\n", + " ):\n", + " collab_attr_list = filter_attrs(inspect.getmembers(self))\n", + " matched_ref_dict = find_matched_references(\n", + " collab_attr_list,\n", + " TestFlowReferenceWithIncludeExclude.step_two_collab_attrs,\n", + " )\n", + " validate_references(matched_ref_dict)\n", + "\n", + " self.next(self.join, include=[\"collab_attr_dict_two\"])\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " Iterate over the references of collaborator attributes\n", + " validate uniqueness of attributes and raise assertion\n", + " \"\"\"\n", + "\n", + " all_attr_list = filter_attrs(inspect.getmembers(inputs[0]))\n", + "\n", + " matched_ref_dict = find_matched_references(all_attr_list, inputs)\n", + " validate_references(matched_ref_dict)\n", + " all_shared_attr = \"\"\n", + " print(f\"\\n{bcolors.UNDERLINE}Reference test summary: {bcolors.ENDC}\\n\")\n", + " for val in TestFlowReferenceWithIncludeExclude.all_ref_error_dict.values():\n", + " all_shared_attr = all_shared_attr + \",\".join(val)\n", + " if all_shared_attr:\n", + " print(\n", + " f\"{bcolors.FAIL}...Test case failed for {all_shared_attr} {bcolors.ENDC}\"\n", + " )\n", + " else:\n", + " print(f\"{bcolors.OKGREEN}...Test case passed for all the attributes.\")\n", + "\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Ending test for validatng the references. \"\n", + " + f\"{bcolors.ENDC}\"\n", + " )\n", + " if TestFlowReferenceWithIncludeExclude.all_ref_error_dict:\n", + " raise (\n", + " AssertionError(\n", + " f\"{bcolors.FAIL}\\n ...Test case failed ... {bcolors.ENDC}\"\n", + " )\n", + " )\n", + "\n", + " TestFlowReferenceWithIncludeExclude.step_one_collab_attrs = []\n", + " TestFlowReferenceWithIncludeExclude.step_two_collab_attrs = []\n", + " TestFlowReferenceWithIncludeExclude.all_ref_error_dict = {}\n", + "\n", + "\n", + "def filter_attrs(attr_list):\n", + " valid_attrs = []\n", + " reserved_words = [\"next\", \"runtime\", \"execute_next\"]\n", + " for attr in attr_list:\n", + " if (\n", + " not attr[0].startswith(\"_\")\n", + " and attr[0] not in reserved_words\n", + " and not hasattr(TestFlowReferenceWithIncludeExclude, attr[0])\n", + " ):\n", + " if not isinstance(attr[1], MethodType):\n", + " valid_attrs.append(attr[0])\n", + " return valid_attrs\n", + "\n", + "\n", + "def find_matched_references(collab_attr_list, all_collaborators):\n", + " \"\"\"\n", + " Iterate attributes of collborator and capture the duplicate reference\n", + " return: dict: {\n", + " 'Portland': ['failed attributes'], 'Seattle': [],\n", + " }\n", + " \"\"\"\n", + " matched_ref_dict = {}\n", + " for i in range(len(all_collaborators)):\n", + " matched_ref_dict[all_collaborators[i].input] = []\n", + "\n", + " # For each attribute in the collaborator attribute list, check if any of the collaborator\n", + " # attributes are shared with another collaborator\n", + " for attr_name in collab_attr_list:\n", + " for i, curr_collab in enumerate(all_collaborators):\n", + " # Compare the current collaborator with the collaborator(s) that come(s) after it.\n", + " for next_collab in all_collaborators[i + 1:]:\n", + " # Check if both collaborators have the current attribute\n", + " if hasattr(curr_collab, attr_name) and hasattr(next_collab, attr_name):\n", + " # Check if both collaborators are sharing same reference\n", + " if getattr(curr_collab, attr_name) is getattr(\n", + " next_collab, attr_name\n", + " ):\n", + " matched_ref_dict[curr_collab.input].append(attr_name)\n", + " print(\n", + " f\"{bcolors.FAIL} ... Reference test failed - {curr_collab.input} \\\n", + " sharing same \"\n", + " + f\"{attr_name} reference with {next_collab.input} {bcolors.ENDC}\"\n", + " )\n", + "\n", + " return matched_ref_dict\n", + "\n", + "\n", + "def validate_references(matched_ref_dict):\n", + " \"\"\"\n", + " Iterate reference list and raise assertion for conflicts\n", + " \"\"\"\n", + " collborators_sharing_ref = []\n", + " reference_flag = False\n", + "\n", + " for collab, val in matched_ref_dict.items():\n", + " if val:\n", + " collborators_sharing_ref.append(collab)\n", + " reference_flag = True\n", + " if collborators_sharing_ref:\n", + " for collab in collborators_sharing_ref:\n", + " if collab not in TestFlowReferenceWithIncludeExclude.all_ref_error_dict:\n", + " TestFlowReferenceWithIncludeExclude.all_ref_error_dict[\n", + " collab\n", + " ] = matched_ref_dict.get(collab)\n", + "\n", + " if not reference_flag:\n", + " print(f\"{bcolors.OKGREEN} Pass : Reference test passed {bcolors.ENDC}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators= ['envoy_one','envoy_two'],\n", + " director=director_info, \n", + " notebook_path='./testflow_reference_with_include_exclude.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowReferenceWithIncludeExclude(checkpoint=True)\n", + "flflow.runtime = federated_runtime\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dir-wip", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/director/director_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/director/director_config.yaml new file mode 100644 index 0000000000..e8dcc1e12a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/director/director_config.yaml @@ -0,0 +1,5 @@ +settings: + listen_host: localhost + listen_port: 50050 + envoy_health_check_period: 5 # in seconds + diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/director/start_director.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/director/start_director.sh new file mode 100755 index 0000000000..5806a6cc0a --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/director/start_director.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +fx director start --disable-tls -c director_config.yaml \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/envoy_config.yaml new file mode 100644 index 0000000000..d5c49635f3 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/envoy_config.yaml @@ -0,0 +1,5 @@ +envoy_four: + callable_func: + settings: + collab_name: envoy_four + template: private_attributes.collaborator_private_attrs.callable_to_initialize_collaborator_private_attributes \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..49172286cc --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +def callable_to_initialize_collaborator_private_attributes(collab_name): + return {"name": collab_name} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_four/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/envoy_config.yaml new file mode 100644 index 0000000000..04e7ad4fd8 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/envoy_config.yaml @@ -0,0 +1,5 @@ +envoy_one: + callable_func: + settings: + collab_name: envoy_one + template: private_attributes.collaborator_private_attrs.callable_to_initialize_collaborator_private_attributes \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..5be7b9edb8 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,4 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +def callable_to_initialize_collaborator_private_attributes(collab_name): + return {"name": collab_name} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_one/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/envoy_config.yaml new file mode 100644 index 0000000000..f78af08e58 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/envoy_config.yaml @@ -0,0 +1,5 @@ +envoy_three: + callable_func: + settings: + collab_name: envoy_three + template: private_attributes.collaborator_private_attrs.callable_to_initialize_collaborator_private_attributes \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..49172286cc --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +def callable_to_initialize_collaborator_private_attributes(collab_name): + return {"name": collab_name} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_three/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/envoy_config.yaml b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/envoy_config.yaml new file mode 100644 index 0000000000..6149ff9f1d --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/envoy_config.yaml @@ -0,0 +1,5 @@ +envoy_two: + callable_func: + settings: + collab_name: envoy_two + template: private_attributes.collaborator_private_attrs.callable_to_initialize_collaborator_private_attributes \ No newline at end of file diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/private_attributes/collaborator_private_attrs.py b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/private_attributes/collaborator_private_attrs.py new file mode 100644 index 0000000000..49172286cc --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/private_attributes/collaborator_private_attrs.py @@ -0,0 +1,5 @@ +# Copyright (C) 2020-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +def callable_to_initialize_collaborator_private_attributes(collab_name): + return {"name": collab_name} diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/requirements.txt b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/requirements.txt new file mode 100644 index 0000000000..acfef16953 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/requirements.txt @@ -0,0 +1,7 @@ +mistune>=2.0.3 # not directly required, pinned by Snyk to avoid a vulnerability +numpy>=1.13.3 +openfl>=1.2.1 +scikit-learn>=0.24.1 +setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability +torch>=1.13.1 +wheel>=0.38.0 # not directly required, pinned by Snyk to avoid a vulnerability diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/start_envoy.sh b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/start_envoy.sh new file mode 100755 index 0000000000..4da07821af --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/envoy_two/start_envoy.sh @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +ENVOY_NAME=$1 +ENVOY_CONF=$2 + +fx envoy start -n "$ENVOY_NAME" --disable-tls --envoy-config-path "$ENVOY_CONF" -dh localhost -dp 50050 diff --git a/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/workspace/testflow_subset_of_collaborators.ipynb b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/workspace/testflow_subset_of_collaborators.ipynb new file mode 100644 index 0000000000..3620e8e4a1 --- /dev/null +++ b/tests/github/experimental/workflow/FederatedRuntime/testcase_subset_of_collaborators/workspace/testflow_subset_of_collaborators.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "dc13070c", + "metadata": {}, + "source": [ + "# Testcase: Subset of Collaborators" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "a4394089", + "metadata": {}, + "source": [ + "# Getting Started" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "857f9995", + "metadata": {}, + "source": [ + "Initially, we start by specifying the module where cells marked with the `#| export` directive will be automatically exported. \n", + "\n", + "In the following cell, `#| default_exp experiment `indicates that the exported file will be named 'experiment'. This name can be modified based on user's requirement & preferences" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d79eacbd", + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp experiment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9bd8ac2d", + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "\n", + "from metaflow import Flow\n", + "\n", + "from openfl.experimental.workflow.interface.fl_spec import FLSpec\n", + "from openfl.experimental.workflow.placement.placement import aggregator, collaborator\n", + "\n", + "\n", + "class bcolors: # NOQA: N801\n", + " OKBLUE = \"\\033[94m\"\n", + " OKCYAN = \"\\033[96m\"\n", + " OKGREEN = \"\\033[92m\"\n", + " HEADER = \"\\033[95m\"\n", + " WARNING = \"\\033[93m\"\n", + " FAIL = \"\\033[91m\"\n", + " BOLD = \"\\033[1m\"\n", + " UNDERLINE = \"\\033[4m\"\n", + " ENDC = \"\\033[0m\"\n" + ] + }, + { + "cell_type": "markdown", + "id": "36ed5e31", + "metadata": {}, + "source": [ + "Let us now define the flow of the testcase datastore cli" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52c4a752", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "class TestFlowSubsetCollaborators(FLSpec):\n", + " \"\"\"\n", + " Testflow to validate working of Subset Collaborators in Federated Flow.\n", + " \"\"\"\n", + "\n", + " def __init__(self, **kwargs) -> None:\n", + " super().__init__(**kwargs)\n", + "\n", + " @aggregator\n", + " def start(self):\n", + " \"\"\"\n", + " Starting the flow with random subset of collaborators\n", + " \"\"\"\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Starting Test for \"\n", + " + f\"validating Subset of collaborators {bcolors.ENDC}\"\n", + " )\n", + " self.collaborators = self.runtime.collaborators\n", + "\n", + " # select subset of collaborators\n", + " self.subset_collabrators = self.collaborators[:2]\n", + "\n", + " print(\n", + " f\"... Executing flow for {len(self.subset_collabrators)} collaborators out of Total: \"\n", + " + f\"{len(self.collaborators)}\"\n", + " )\n", + "\n", + " self.next(self.test_valid_collaborators, foreach=\"subset_collabrators\")\n", + "\n", + " @collaborator\n", + " def test_valid_collaborators(self):\n", + " \"\"\"\n", + " set the collaborator name\n", + " \"\"\"\n", + " print(\"executing collaborator step test_valid_collaborators for \"\n", + " + f\"collaborator {self.name}.\")\n", + " self.collaborator_ran = self.name\n", + " self.next(self.join)\n", + "\n", + " @aggregator\n", + " def join(self, inputs):\n", + " \"\"\"\n", + " List of collaboartors ran successfully\n", + " \"\"\"\n", + " print(\"inside join\")\n", + " self.collaborators_ran = [input.collaborator_ran for input in inputs]\n", + " self.next(self.end)\n", + "\n", + " @aggregator\n", + " def end(self):\n", + " \"\"\"\n", + " End of the flow\n", + " \"\"\"\n", + " print(f\"End of the test case {TestFlowSubsetCollaborators.__name__} reached.\")\n", + " testcase()\n", + "\n", + "\n", + "def testcase():\n", + " tc_pass_fail = {\n", + " \"passed\": [], \"failed\": []\n", + " }\n", + " subset_collaborators = [\"envoy_one\", \"envoy_two\"]\n", + " f = Flow(\"TestFlowSubsetCollaborators/\")\n", + " r = f.latest_run\n", + " # Collaborator test_valid_collaborators step\n", + " step = list(r)[1]\n", + " # Aggregator join step\n", + " join = list(r)[0]\n", + "\n", + " collaborators_ran = list(join)[0].data.collaborators_ran\n", + " print(f\"collaborators_ran: {collaborators_ran}\")\n", + "\n", + " if len(list(step)) != len(subset_collaborators):\n", + " tc_pass_fail[\"failed\"].append(\n", + " f\"{bcolors.FAIL}...Flow only ran for {len(list(step))} \"\n", + " + f\"instead of the {len(subset_collaborators)} expected \"\n", + " + f\"collaborators- Testcase Failed.{bcolors.ENDC} \"\n", + " )\n", + " else:\n", + " tc_pass_fail[\"passed\"].append(\n", + " f\"{bcolors.OKGREEN}Found {len(list(step))} tasks for each of the \"\n", + " + f\"{len(subset_collaborators)} collaborators - \"\n", + " + f\"Testcase Passed.{bcolors.ENDC}\"\n", + " )\n", + " passed = True\n", + " for collaborator_name in subset_collaborators:\n", + " if collaborator_name not in collaborators_ran:\n", + " passed = False\n", + " tc_pass_fail[\"failed\"].append(\n", + " f\"{bcolors.FAIL}...Flow did not execute for \"\n", + " + f\"collaborator {collaborator_name}\"\n", + " + f\" - Testcase Failed.{bcolors.ENDC}\"\n", + " )\n", + "\n", + " if passed:\n", + " tc_pass_fail[\"passed\"].append(\n", + " f\"{bcolors.OKGREEN}Flow executed for all collaborators\"\n", + " + f\"- Testcase Passed.{bcolors.ENDC}\"\n", + " )\n", + " for values in tc_pass_fail.values():\n", + " print(*values, sep=\"\\n\")\n", + "\n", + " print(\n", + " f\"{bcolors.OKBLUE}Testing FederatedFlow - Ending test for validating \"\n", + " + f\"the subset of collaborators. {bcolors.ENDC}\"\n", + " )\n", + " if tc_pass_fail.get(\"failed\"):\n", + " tc_pass_fail_len = len(tc_pass_fail.get(\"failed\"))\n", + " raise AssertionError(\n", + " f\"{bcolors.FAIL}\\n {tc_pass_fail_len} Test \"\n", + " + f\"case(s) failed ... {bcolors.ENDC}\"\n", + " )\n" + ] + }, + { + "cell_type": "markdown", + "id": "b5371b6d", + "metadata": {}, + "source": [ + "## Workspace creation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1715a373", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "from openfl.experimental.workflow.runtime import FederatedRuntime\n", + "\n", + "director_info = {\n", + " 'director_node_fqdn':'localhost',\n", + " 'director_port':50050,\n", + " 'cert_chain': None,\n", + " 'api_cert': None,\n", + " 'api_private_key': None,\n", + "}\n", + "\n", + "federated_runtime = FederatedRuntime(\n", + " collaborators= ['envoy_one', 'envoy_two', 'envoy_three', 'envoy_four'], \n", + " director=director_info, \n", + " notebook_path='./testflow_subset_of_collaborators.ipynb'\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1f1be87f", + "metadata": {}, + "outputs": [], + "source": [ + "federated_runtime.get_envoys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6d19819", + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "flflow = TestFlowSubsetCollaborators(checkpoint=True)\n", + "flflow.runtime = federated_runtime" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0c639b1", + "metadata": {}, + "outputs": [], + "source": [ + "flflow.run()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "664ec7f5", + "metadata": {}, + "outputs": [], + "source": [ + "vars(flflow)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dir-wip", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}