Added config parsing check to GaNDLF task runner (securefederatedai#908)

* added config parsing check * updated usage * using latest gandlf tag, and updated plan initialization command * updated message * renamed * minor rename * no need for this comment * let's see if this works * updated ignore * trying to set the paths * fixed paths * added debug * added an assert * better check for empty dict * no need for this `mkdir` command * this is not really needed in the workflow * added the plan initialization in the test * Add default value for --gandlf_config argument * checking if this worked * trying using `pwd` * should fix lint * check file presence * removed trailing whitespace * checking another path * checking copy to a known location * different path * trying something else * Fix plan initialization * Fix plan initialization for GaNDLF Signed-off-by: Patrick Foley <[email protected]> * Attempt to add missing param Signed-off-by: Patrick Foley <[email protected]> * better way to initialize default * using the 3d patch instead of the default 2d one * lint fix * this should be there * lint fix * this should fix it * using 2d data for unit test instead of 3d * trying something else * added a few comments --------- Signed-off-by: Patrick Foley <[email protected]> Co-authored-by: Patrick Foley <[email protected]> Signed-off-by: nammbash <[email protected]>
nammbash · Feb 27, 2024 · e6eedb4 · e6eedb4
1 parent 88b11dc
commit e6eedb4
Show file tree

Hide file tree

Showing 6 changed files with 70 additions and 25 deletions.
diff --git a/.github/workflows/fets-challenge.yml b/.github/workflows/fets-challenge.yml
@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a single version of Python
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
-name: FeTS Challenge TaskRunner
+name: GaNDLF TaskRunner
 
 on:
   pull_request:
@@ -26,25 +26,49 @@ jobs:
         python -m pip install --upgrade pip
         pip install torch==2.1.0+cpu torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu
         pip install .
-    - name: Setup FeTS Challenge Prerequisites
-      uses: actions/checkout@master
-      with:
-          repository: MLCommons/GaNDLF
-          ref: master
-          fetch-depth: 1
-          path: fets_challenge
-    - name: FeTS Challenge Task Runner Test
+    - name: Install GaNDLF
+      run: |
+        git clone https://github.com/MLCommons/GaNDLF.git ./gandlf
+        cd gandlf
+        git fetch --tags
+        echo "Checkout the latest GaNDLF tag"
+        latestTag=$(git describe --tags "$(git rev-list --tags --max-count=1)")
+        git checkout $latestTag
+    - name: GaNDLF Task Runner Test
       run: |
-        cd fets_challenge
+        cd gandlf
         pwd
         pip install -e .
         pip uninstall onnx -y
-        # Download data and Split CSVs into training and validation
+        cat ./GANDLF/version.py
+        echo "Download data and Split CSVs into training and validation"
         python -c "from testing.test_full import test_generic_download_data, test_generic_constructTrainingCSV; test_generic_download_data(); test_generic_constructTrainingCSV()"
-        head -n 1 testing/data/train_3d_rad_segmentation.csv > /home/runner/work/openfl/openfl/valid.csv
-        tail -n +9 testing/data/train_3d_rad_segmentation.csv >> /home/runner/work/openfl/openfl/valid.csv
-        head -n 8 testing/data/train_3d_rad_segmentation.csv > /home/runner/work/openfl/openfl/train.csv
+        head -n 1 testing/data/train_2d_rad_segmentation.csv > /home/runner/work/openfl/openfl/valid.csv
+        tail -n +9 testing/data/train_2d_rad_segmentation.csv >> /home/runner/work/openfl/openfl/valid.csv
+        head -n 8 testing/data/train_2d_rad_segmentation.csv > /home/runner/work/openfl/openfl/train.csv
+        cp testing/config_segmentation.yaml /home/runner/work/openfl/openfl/config_segmentation.yaml
+        echo "DEBUG display the config file"
+        cat /home/runner/work/openfl/openfl/config_segmentation.yaml
+        echo "Initialize OpenFL plan"
+        ## from docs
+        export WORKSPACE_TEMPLATE=gandlf_seg_test
+        export WORKSPACE_PATH=./my_federation
+        fx workspace create --prefix ${WORKSPACE_PATH} --template ${WORKSPACE_TEMPLATE}
+        cd ${WORKSPACE_PATH}
+        mkdir ./data/one
+        mkdir ./data/two
+        cp /home/runner/work/openfl/openfl/*.csv ./data/one/
+        cp /home/runner/work/openfl/openfl/*.csv ./data/two/
+        ## from docs
+        # fx plan initialize --gandlf_config ../testing/config_segmentation.yaml
         cd /home/runner/work/openfl/openfl
         ls
-        python -m tests.github.test_gandlf --template gandlf_seg_test --fed_workspace aggregator --col1 one --col2 two --rounds-to-train 1
+        file "/home/runner/work/openfl/openfl/config_segmentation.yaml"
+        ## for 2d data, only a single change is needed in the gandlf config
+        sed -i 's/# n_channels: 3/num_channels: 3/g' "/home/runner/work/openfl/openfl/config_segmentation.yaml"
+        ## for 3d data, the following changes are needed in the gandlf config -- commented out for now
+        # sed -i 's/dimension: 2/dimension: 3/g' "/home/runner/work/openfl/openfl/config_segmentation.yaml"
+        # sed -i 's/0,255/0,1/g' "/home/runner/work/openfl/openfl/config_segmentation.yaml"
+        # sed -i 's/128,128/32,32,32/g' "/home/runner/work/openfl/openfl/config_segmentation.yaml"
+        python -m tests.github.test_gandlf --template gandlf_seg_test --fed_workspace aggregator --col1 one --col2 two --rounds-to-train 1 --gandlf_config "/home/runner/work/openfl/openfl/config_segmentation.yaml"
         
diff --git a/.gitignore b/.gitignore
@@ -13,4 +13,6 @@ venv/*
 *.jpg
 *.crt
 *.key
-.eggs
+.eggs
+eggs/*
+*.pyi
diff --git a/openfl/federated/plan/plan.py b/openfl/federated/plan/plan.py
@@ -131,6 +131,8 @@ def parse(plan_config_path: Path, cols_config_path: Path = None,
                     extra={'markup': True})
 
                 gandlf_config = Plan.load(Path(gandlf_config_path))
+                # check for some defaults
+                gandlf_config['output_dir'] = gandlf_config.get('output_dir', '.')
                 plan.config['task_runner']['settings']['gandlf_config'] = gandlf_config
 
             plan.authorized_cols = Plan.load(cols_config_path).get(

diff --git a/openfl/federated/task/runner_gandlf.py b/openfl/federated/task/runner_gandlf.py
@@ -19,6 +19,7 @@
 from GANDLF.compute.generic import create_pytorch_objects
 from GANDLF.compute.training_loop import train_network
 from GANDLF.compute.forward_pass import validate_network
+from GANDLF.parseConfig import parseConfig
 
 
 class GaNDLFTaskRunner(TaskRunner):
@@ -37,6 +38,8 @@ def __init__(
         """
         super().__init__(**kwargs)
 
+        assert bool(gandlf_config), "gandlf_config must be specified"
+
         # allow pass-through of a gandlf config as a file or a dict
 
         train_csv = self.data_loader.train_csv
@@ -45,6 +48,11 @@ def __init__(
         if isinstance(gandlf_config, str) and os.path.exists(gandlf_config):
             gandlf_config = yaml.safe_load(open(gandlf_config, "r"))
 
+        try:
+            gandlf_config = parseConfig(gandlf_config)
+        except Exception:
+            self.logger.info("WARNING: GANDLF.parseConfig did not work as expected.")
+
         (
             model,
             optimizer,

diff --git a/openfl/interface/plan.py b/openfl/interface/plan.py
@@ -63,6 +63,8 @@ def initialize(context, plan_config, cols_config, data_config,
     plan_config = Path(plan_config).absolute()
     cols_config = Path(cols_config).absolute()
     data_config = Path(data_config).absolute()
+    if gandlf_config is not None:
+        gandlf_config = Path(gandlf_config).absolute()
 
     plan = Plan.parse(plan_config_path=plan_config,
                       cols_config_path=cols_config,
@@ -79,7 +81,6 @@ def initialize(context, plan_config, cols_config, data_config,
     #         exit('You must specify either a feature
     #         shape or authorized collaborator
     #         list in order for the script to determine the input layer shape')
-    print(plan.cols_data_paths)
 
     collaborator_cname = list(plan.cols_data_paths)[0]
 
@@ -105,23 +106,26 @@ def initialize(context, plan_config, cols_config, data_config,
 
     utils.dump_proto(model_proto=model_snap, fpath=init_state_path)
 
-    plan_origin = Plan.parse(plan_config, resolve=False).config
+    plan_origin = Plan.parse(plan_config_path=plan_config,
+                             gandlf_config_path=gandlf_config,
+                             resolve=False)
 
-    if (plan_origin['network']['settings']['agg_addr'] == 'auto'
+    if (plan_origin.config['network']['settings']['agg_addr'] == 'auto'
             or aggregator_address):
-        plan_origin['network']['settings']['agg_addr'] = aggregator_address or getfqdn_env()
+        plan_origin.config['network']['settings']['agg_addr'] = aggregator_address or getfqdn_env()
 
         logger.warn(f'Patching Aggregator Addr in Plan'
-                    f" 🠆 {plan_origin['network']['settings']['agg_addr']}")
+                    f" 🠆 {plan_origin.config['network']['settings']['agg_addr']}")
 
-        Plan.dump(plan_config, plan_origin)
+        Plan.dump(plan_config, plan_origin.config)
 
-    plan.config = plan_origin
+    if gandlf_config is not None:
+        Plan.dump(plan_config, plan_origin.config)
 
     # Record that plan with this hash has been initialized
     if 'plans' not in context.obj:
         context.obj['plans'] = []
-    context.obj['plans'].append(f'{plan_config.stem}_{plan.hash[:8]}')
+    context.obj['plans'].append(f'{plan_config.stem}_{plan_origin.hash[:8]}')
     logger.info(f"{context.obj['plans']}")
 
 

diff --git a/tests/github/test_gandlf.py b/tests/github/test_gandlf.py
@@ -28,6 +28,7 @@ def exec(command, directory):
     parser.add_argument('--rounds-to-train')
     parser.add_argument('--col1-data-path', default='data/one')
     parser.add_argument('--col2-data-path', default='data/two')
+    parser.add_argument('--gandlf_config', default=None)
     parser.add_argument('--ujjwal', action='store_true')
 
     origin_dir = Path().resolve()
@@ -49,7 +50,11 @@ def exec(command, directory):
             if re.match(r'.*\.csv$', entry.name):
                 shutil.copy(entry.path, Path.cwd().resolve() / 'data' / col1)
     # Initialize FL plan
-    check_call(['fx', 'plan', 'initialize', '-a', fqdn])
+    if args.gandlf_config:
+        check_call(['fx', 'plan', 'initialize', '-a', fqdn,
+                    '--gandlf_config', str(args.gandlf_config)])
+    else:
+        check_call(['fx', 'plan', 'initialize', '-a', fqdn])
     plan_path = Path('plan/plan.yaml')
     try:
         rounds_to_train = int(rounds_to_train)
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,4 +13,6 @@ venv/* @@
     *.jpg
     *.crt
     *.key
-    .eggs
+    .eggs
+    eggs/*
+    *.pyi