From 5766437271e37d323909113ee225ef1e809a91a3 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Thu, 16 Jan 2025 09:30:51 -0800 Subject: [PATCH 1/8] Move to experiment-based Hydra config. Refactor logging. --- examples/cfd/lagrangian_mgn/conf/config.yaml | 99 +++++++++ .../conf/data/lagrangian_dataset.yaml | 29 +++ .../lagrangian_mgn/conf/experiment/goop.yaml | 45 +++++ .../lagrangian_mgn/conf/experiment/sand.yaml | 45 +++++ .../lagrangian_mgn/conf/experiment/water.yaml | 38 ++++ .../conf/experiment/water_3d.yaml | 48 +++++ .../conf/experiment/water_ramps.yaml | 38 ++++ .../conf/logging/python/default.yaml | 59 ++++++ .../cfd/lagrangian_mgn/conf/loss/mseloss.yaml | 18 ++ .../conf/lr_scheduler/cosine.yaml | 20 ++ .../conf/lr_scheduler/exponentiallr.yaml | 18 ++ .../conf/lr_scheduler/onecyclelr.yaml | 20 ++ .../cfd/lagrangian_mgn/conf/model/mgn.yaml | 33 +++ .../cfd/lagrangian_mgn/conf/model/mgn_2d.yaml | 22 ++ .../cfd/lagrangian_mgn/conf/model/mgn_3d.yaml | 22 ++ .../lagrangian_mgn/conf/optimizer/adam.yaml | 19 ++ .../conf/optimizer/fused_adam.yaml | 20 ++ examples/cfd/lagrangian_mgn/inference.py | 111 +++++------ examples/cfd/lagrangian_mgn/loggers.py | 188 ++++++++++++++++++ examples/cfd/lagrangian_mgn/train.py | 186 +++++++---------- modulus/datapipes/gnn/lagrangian_dataset.py | 27 ++- 21 files changed, 923 insertions(+), 182 deletions(-) create mode 100644 examples/cfd/lagrangian_mgn/conf/config.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/experiment/goop.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/experiment/sand.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/experiment/water.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/experiment/water_3d.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/experiment/water_ramps.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/logging/python/default.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/loss/mseloss.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/lr_scheduler/cosine.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/lr_scheduler/exponentiallr.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/lr_scheduler/onecyclelr.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/model/mgn.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/model/mgn_2d.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/model/mgn_3d.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/optimizer/adam.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/optimizer/fused_adam.yaml create mode 100644 examples/cfd/lagrangian_mgn/loggers.py diff --git a/examples/cfd/lagrangian_mgn/conf/config.yaml b/examples/cfd/lagrangian_mgn/conf/config.yaml new file mode 100644 index 0000000000..6364ed97a2 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/config.yaml @@ -0,0 +1,99 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - /logging/python: default + - override hydra/job_logging: disabled # We use rank-aware logger configuration instead. + - _self_ + +hydra: + run: + dir: ${output} + output_subdir: hydra # Default is .hydra which causes files not being uploaded in W&B. + +# Dimensionality of the problem (2D or 3D). +dim: 2 + +# Main output directory. +output: outputs + +# The directory to search for checkpoints to continue training. +resume_dir: ${output} + +# The dataset directory must be set either in command line or config. +data: + data_dir: ??? + train: + split: train + valid: + split: valid + test: + split: test + +# The loss should be set in the experiment. +loss: ??? + +# The optimizer should be set in the experiment. +optimizer: ??? + +# The scheduler should be set in the experiment. +lr_scheduler: ??? + +train: + batch_size: 20 + epochs: 20 + checkpoint_save_freq: 5 + dataloader: + batch_size: ${..batch_size} + shuffle: true + num_workers: 1 + pin_memory: true + drop_last: true + +test: + batch_size: 1 + device: cuda + dataloader: + batch_size: ${..batch_size} + shuffle: false + num_workers: 1 + pin_memory: true + drop_last: false + +compile: + enabled: false + args: + backend: inductor + +amp: + enabled: false + +loggers: + wandb: + _target_: loggers.WandBLogger + project: meshgraphnet + entity: modulus + name: l-mgn + group: l-mgn + mode: disabled + dir: ${output} + id: + wandb_key: + watch_model: false + +inference: + frame_skip: 1 + frame_interval: 1 diff --git a/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml b/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml new file mode 100644 index 0000000000..832060ddec --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml @@ -0,0 +1,29 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_target_: modulus.datapipes.gnn.lagrangian_dataset.LagrangianDataset +_convert_: all + +name: ${data.name} +data_dir: ${data.data_dir} +split: ??? +num_samples: ??? +num_history: 5 +num_steps: 600 +num_node_types: 6 +noise_std: 0.0003 +radius: 0.015 +dt: 0.0025 diff --git a/examples/cfd/lagrangian_mgn/conf/experiment/goop.yaml b/examples/cfd/lagrangian_mgn/conf/experiment/goop.yaml new file mode 100644 index 0000000000..0b3a061c78 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/experiment/goop.yaml @@ -0,0 +1,45 @@ +# @package _global_ + +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - /data@data.train: lagrangian_dataset + - /data@data.valid: lagrangian_dataset + - /data@data.test: lagrangian_dataset + - /model: mgn_2d + - /loss: mseloss + - /optimizer: fused_adam + - /lr_scheduler: cosine + +data: + name: Goop + num_node_types: 9 + train: + num_samples: 1000 + num_steps: 395 # 400 - ${num_history} + num_node_types: ${..num_node_types} + valid: + num_samples: 30 + num_steps: 100 + num_node_types: ${..num_node_types} + test: + num_samples: 30 + num_steps: 100 + num_node_types: ${..num_node_types} + +model: + input_dim_nodes: 25 # 9 node types instead of 6. diff --git a/examples/cfd/lagrangian_mgn/conf/experiment/sand.yaml b/examples/cfd/lagrangian_mgn/conf/experiment/sand.yaml new file mode 100644 index 0000000000..6d4064259a --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/experiment/sand.yaml @@ -0,0 +1,45 @@ +# @package _global_ + +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - /data@data.train: lagrangian_dataset + - /data@data.valid: lagrangian_dataset + - /data@data.test: lagrangian_dataset + - /model: mgn_2d + - /loss: mseloss + - /optimizer: fused_adam + - /lr_scheduler: cosine + +data: + name: Sand + num_node_types: 9 + train: + num_samples: 1000 + num_steps: 315 # 320 - ${num_history} + num_node_types: ${..num_node_types} + valid: + num_samples: 30 + num_steps: 100 + num_node_types: ${..num_node_types} + test: + num_samples: 30 + num_steps: 100 + num_node_types: ${..num_node_types} + +model: + input_dim_nodes: 25 # 9 node types instead of 6. diff --git a/examples/cfd/lagrangian_mgn/conf/experiment/water.yaml b/examples/cfd/lagrangian_mgn/conf/experiment/water.yaml new file mode 100644 index 0000000000..ed9d27b6bb --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/experiment/water.yaml @@ -0,0 +1,38 @@ +# @package _global_ + +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - /data@data.train: lagrangian_dataset + - /data@data.valid: lagrangian_dataset + - /data@data.test: lagrangian_dataset + - /model: mgn_2d + - /loss: mseloss + - /optimizer: fused_adam + - /lr_scheduler: cosine + +data: + name: Water + train: + num_samples: 1000 + num_steps: 995 # 1000 - ${num_history} + valid: + num_samples: 30 + num_steps: 200 + test: + num_samples: 30 + num_steps: 200 diff --git a/examples/cfd/lagrangian_mgn/conf/experiment/water_3d.yaml b/examples/cfd/lagrangian_mgn/conf/experiment/water_3d.yaml new file mode 100644 index 0000000000..7e6f2cc2af --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/experiment/water_3d.yaml @@ -0,0 +1,48 @@ +# @package _global_ + +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - /data@data.train: lagrangian_dataset + - /data@data.valid: lagrangian_dataset + - /data@data.test: lagrangian_dataset + - /model: mgn_3d + - /loss: mseloss + - /optimizer: fused_adam + - /lr_scheduler: cosine + +dim: 3 + +data: + name: Water + dt: 0.005 + radius: 0.035 + train: + num_samples: 1000 + num_steps: 795 # 800 - ${num_history} + radius: ${..radius} + dt: ${..dt} + valid: + num_samples: 100 + num_steps: 195 + radius: ${..radius} + dt: ${..dt} + test: + num_samples: 100 + num_steps: 195 + radius: ${..radius} + dt: ${..dt} diff --git a/examples/cfd/lagrangian_mgn/conf/experiment/water_ramps.yaml b/examples/cfd/lagrangian_mgn/conf/experiment/water_ramps.yaml new file mode 100644 index 0000000000..22eac4aaf6 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/experiment/water_ramps.yaml @@ -0,0 +1,38 @@ +# @package _global_ + +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - /data@data.train: lagrangian_dataset + - /data@data.valid: lagrangian_dataset + - /data@data.test: lagrangian_dataset + - /model: mgn_2d + - /loss: mseloss + - /optimizer: fused_adam + - /lr_scheduler: cosine + +data: + name: WaterRamps + train: + num_samples: 1000 + num_steps: 595 # 600 - ${num_history} + valid: + num_samples: 30 + num_steps: 200 + test: + num_samples: 30 + num_steps: 200 diff --git a/examples/cfd/lagrangian_mgn/conf/logging/python/default.yaml b/examples/cfd/lagrangian_mgn/conf/logging/python/default.yaml new file mode 100644 index 0000000000..c7bafaa3af --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/logging/python/default.yaml @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Standard Python logging configuration, as described here: +# https://docs.python.org/3.10/library/logging.config.html + +version: 1 +disable_existing_loggers: false + +output: ??? +rank: ??? +rank0_only: true +base_filename: train + +formatters: + default: + (): loggers.TermColorFormatter + format: "[%(asctime)s - %(name)s - %(levelname)s] %(message)s" + datefmt: "%H:%M:%S" + log_colors: + DEBUG: blue + INFO: light_blue + WARNING: light_yellow + ERROR: light_red + CRITICAL: red + +handlers: + console: + class: logging.StreamHandler + level: ${...loggers.lmgn.level} + formatter: default + + file: + class: logging.FileHandler + filename: ${...output}/${...base_filename}_${...rank}.log + level: ${...loggers.lmgn.level} + formatter: default + +loggers: + root: + level: INFO + handlers: [console, file] + lmgn: + handlers: [console, file] + level: INFO + propagate: false diff --git a/examples/cfd/lagrangian_mgn/conf/loss/mseloss.yaml b/examples/cfd/lagrangian_mgn/conf/loss/mseloss.yaml new file mode 100644 index 0000000000..500ddd02b6 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/loss/mseloss.yaml @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_target_: torch.nn.MSELoss +reduction: mean diff --git a/examples/cfd/lagrangian_mgn/conf/lr_scheduler/cosine.yaml b/examples/cfd/lagrangian_mgn/conf/lr_scheduler/cosine.yaml new file mode 100644 index 0000000000..0a181df337 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/lr_scheduler/cosine.yaml @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_target_: torch.optim.lr_scheduler.CosineAnnealingLR + +T_max: # if not set via the command line, will be set in the code. +eta_min: 1e-6 diff --git a/examples/cfd/lagrangian_mgn/conf/lr_scheduler/exponentiallr.yaml b/examples/cfd/lagrangian_mgn/conf/lr_scheduler/exponentiallr.yaml new file mode 100644 index 0000000000..92658f7add --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/lr_scheduler/exponentiallr.yaml @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_target_: torch.optim.lr_scheduler.ExponentialLR +gamma: 0.99985 diff --git a/examples/cfd/lagrangian_mgn/conf/lr_scheduler/onecyclelr.yaml b/examples/cfd/lagrangian_mgn/conf/lr_scheduler/onecyclelr.yaml new file mode 100644 index 0000000000..450b07d772 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/lr_scheduler/onecyclelr.yaml @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_target_: torch.optim.lr_scheduler.OneCycleLR + +max_lr: 1e-4 +total_steps: # if not set via the command line, will be set in the code. diff --git a/examples/cfd/lagrangian_mgn/conf/model/mgn.yaml b/examples/cfd/lagrangian_mgn/conf/model/mgn.yaml new file mode 100644 index 0000000000..3351cee5b3 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/model/mgn.yaml @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_target_: modulus.models.meshgraphnet.MeshGraphNet +_convert_: all + +input_dim_nodes: ??? # can be set in 2D/3D versions of the model. +input_dim_edges: ??? +output_dim: ??? +processor_size: 10 +aggregation: sum +hidden_dim_node_encoder: 256 +hidden_dim_edge_encoder: 256 +hidden_dim_node_decoder: 256 +mlp_activation_fn: relu +do_concat_trick: false +num_processor_checkpoint_segments: 0 +recompute_activation: false + +# See MeshGraphNet implementation for more details and additional arguments. diff --git a/examples/cfd/lagrangian_mgn/conf/model/mgn_2d.yaml b/examples/cfd/lagrangian_mgn/conf/model/mgn_2d.yaml new file mode 100644 index 0000000000..7bd422ce23 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/model/mgn_2d.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - mgn # Use MGN model as a base. + +input_dim_nodes: 22 # 2 (pos) + 2*5 (history of velocity) + 4 boundary features + 6 (node type) +output_dim: 2 # 2 acceleration +input_dim_edges: 3 # 2 displacement + 1 distance diff --git a/examples/cfd/lagrangian_mgn/conf/model/mgn_3d.yaml b/examples/cfd/lagrangian_mgn/conf/model/mgn_3d.yaml new file mode 100644 index 0000000000..8f3396959f --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/model/mgn_3d.yaml @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - mgn # Use MGN model as a base. + +input_dim_nodes: 30 # 3 (pos) + 3*5 (history of velocity) + 6 boundary features + 6 (node type) +output_dim: 3 # 3 acceleration +input_dim_edges: 4 # 3 displacement + 1 distance diff --git a/examples/cfd/lagrangian_mgn/conf/optimizer/adam.yaml b/examples/cfd/lagrangian_mgn/conf/optimizer/adam.yaml new file mode 100644 index 0000000000..1f723d9133 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/optimizer/adam.yaml @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +_target_: torch.optim.Adam +lr: 1e-4 +weight_decay: 1e-5 diff --git a/examples/cfd/lagrangian_mgn/conf/optimizer/fused_adam.yaml b/examples/cfd/lagrangian_mgn/conf/optimizer/fused_adam.yaml new file mode 100644 index 0000000000..09f78682a5 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/optimizer/fused_adam.yaml @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +defaults: + - adam + +_target_: apex.optimizers.FusedAdam diff --git a/examples/cfd/lagrangian_mgn/inference.py b/examples/cfd/lagrangian_mgn/inference.py index d618922684..d8a119901f 100644 --- a/examples/cfd/lagrangian_mgn/inference.py +++ b/examples/cfd/lagrangian_mgn/inference.py @@ -14,56 +14,53 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os -import time + import hydra -from hydra.utils import to_absolute_path +from hydra.utils import instantiate, to_absolute_path import dgl from dgl.dataloading import GraphDataLoader -import matplotlib.pyplot as plt + +import matplotlib from matplotlib import animation -from matplotlib import tri as mtri -from matplotlib.patches import Rectangle -import matplotlib # +from matplotlib import pyplot as plt matplotlib.use("TkAgg") # for plotting -import numpy as np -from networkx import radius -from omegaconf import DictConfig +from omegaconf import DictConfig, OmegaConf + import torch -from modulus.models.meshgraphnet import MeshGraphNet -from modulus.datapipes.gnn.lagrangian_dataset import LagrangianDataset, graph_update -from modulus.launch.logging import PythonLogger +from modulus.datapipes.gnn.lagrangian_dataset import graph_update from modulus.launch.utils import load_checkpoint +from loggers import init_python_logging + + +logger = logging.getLogger("lmgn") + class MGNRollout: - def __init__(self, cfg: DictConfig, logger: PythonLogger): - self.num_test_samples = cfg.num_test_samples - self.num_test_time_steps = cfg.num_test_time_steps - self.dim = cfg.num_output_features - self.frame_skip = cfg.frame_skip + def __init__(self, cfg: DictConfig): + self.num_steps = cfg.data.test.num_steps + self.dim = cfg.dim + self.frame_skip = cfg.inference.frame_skip self.num_history = 5 - self.num_node_type = 6 + self.num_node_type = cfg.data.test.num_node_types self.plotting_index = 0 - self.radius = cfg.radius + self.radius = cfg.data.test.radius # set device - self.device = "cuda" if torch.cuda.is_available() else "cpu" + self.device = cfg.test.device logger.info(f"Using {self.device} device") # instantiate dataset - self.dataset = LagrangianDataset( - name="Water", - data_dir=to_absolute_path(cfg.data_dir), - split="valid", - num_samples=cfg.num_test_samples, - num_steps=cfg.num_test_time_steps, - radius=cfg.radius, - ) + logger.info("Loading the test dataset...") + self.dataset = instantiate(cfg.data.test) + logger.info(f"Using {len(self.dataset)} test samples.") + self.dim = self.dataset.dim self.dt = self.dataset.dt self.bound = self.dataset.bound @@ -80,33 +77,24 @@ def __init__(self, cfg: DictConfig, logger: PythonLogger): # instantiate dataloader self.dataloader = GraphDataLoader( self.dataset, - batch_size=1, - shuffle=False, - drop_last=False, + **cfg.test.dataloader, ) # instantiate the model - self.model = MeshGraphNet( - cfg.num_input_features, - cfg.num_edge_features, - cfg.num_output_features, - cfg.processor_size, - mlp_activation_fn=cfg.activation, - do_concat_trick=cfg.do_concat_trick, - num_processor_checkpoint_segments=cfg.num_processor_checkpoint_segments, - recompute_activation=cfg.recompute_activation, - ) - if cfg.jit: - self.model = torch.jit.script(self.model).to(self.device) - else: - self.model = self.model.to(self.device) + logger.info("Creating the model...") + # instantiate the model + self.model = instantiate(cfg.model) + + if cfg.compile.enabled: + self.model = torch.compile(self.model, **cfg.compile.args) + self.model = self.model.to(self.device) # enable train mode self.model.eval() # load checkpoint load_checkpoint( - to_absolute_path(cfg.ckpt_path), + to_absolute_path(cfg.resume_dir), models=self.model, device=self.device, ) @@ -248,7 +236,7 @@ def init_animation2d(self, index=0): def animate2d(self, num): num *= self.frame_skip - num = num + self.plotting_index * self.num_test_time_steps + num = num + self.plotting_index * self.num_steps node_type = self.node_type[num] node_type = ( torch.argmax(node_type, dim=1).numpy() / self.num_node_type @@ -290,7 +278,7 @@ def init_animation3d(self, index=0): def animate3d(self, num): num *= self.frame_skip - num = num + self.plotting_index * self.num_test_time_steps + num = num + self.plotting_index * self.num_steps node_type = self.node_type[num] node_type = ( torch.argmax(node_type, dim=1).numpy() / self.num_node_type @@ -335,28 +323,31 @@ def plot_error(self, pred, target): return plt -@hydra.main(version_base="1.3", config_path="conf", config_name="config_2d") +@hydra.main(version_base="1.3", config_path="conf", config_name="config") def main(cfg: DictConfig) -> None: - logger = PythonLogger("main") # General python logger - logger.file_logging() + init_python_logging(cfg, base_filename="inference") + logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") + logger.info("Rollout started...") - rollout = MGNRollout(cfg, logger) + rollout = MGNRollout(cfg) # test on dataset rollout.predict() # unit test - # rollout.unit_test_example(t=cfg.num_test_time_steps) + # rollout.unit_test_example(t=cfg.num_steps) # compute the roll out loss pred = torch.stack([tensor.reshape(-1) for tensor in rollout.pred], dim=0) target = torch.stack([tensor.reshape(-1) for tensor in rollout.exact], dim=0) loss = torch.nn.functional.mse_loss(pred, target) - print(f"the rollout loss is {loss}") + logger.info(f"The rollout loss is {loss:.5f}") # plot the roll out loss error_plt = rollout.plot_error(pred, target) - error_plt.savefig("animations/error.png") + out_dir = os.path.join(cfg.output, "animations") + os.makedirs(out_dir, exist_ok=True) + error_plt.savefig(os.path.join(out_dir, "error.png")) # plot if cfg.dim == 2: @@ -364,19 +355,19 @@ def main(cfg: DictConfig) -> None: ani = animation.FuncAnimation( rollout.fig, rollout.animate2d, - frames=(cfg.num_test_time_steps - 5) // cfg.frame_skip, - interval=cfg.frame_interval, + frames=(cfg.data.test.num_steps - 5) // cfg.inference.frame_skip, + interval=cfg.inference.frame_interval, ) elif cfg.dim == 3: rollout.init_animation3d(index=0) ani = animation.FuncAnimation( rollout.fig, rollout.animate3d, - frames=(cfg.num_test_time_steps - 5) // cfg.frame_skip, - interval=cfg.frame_interval, + frames=(cfg.data.test.num_steps - 5) // cfg.inference.frame_skip, + interval=cfg.inference.frame_interval, ) - ani.save("animations/animation.gif") + ani.save(os.path.join(out_dir, "animation.gif")) logger.info(f"Created animation") diff --git a/examples/cfd/lagrangian_mgn/loggers.py b/examples/cfd/lagrangian_mgn/loggers.py new file mode 100644 index 0000000000..5a768a441f --- /dev/null +++ b/examples/cfd/lagrangian_mgn/loggers.py @@ -0,0 +1,188 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from abc import ABC, abstractmethod +import functools +import logging +import os +from typing import Any, Mapping, Optional + +from hydra.utils import instantiate +from omegaconf import DictConfig, OmegaConf + +from termcolor import colored + +from torch import nn + +import wandb + +from modulus.distributed import DistributedManager + +logger = logging.getLogger("lmgn") + + +class TermColorFormatter(logging.Formatter): + """Custom logging formatter that colors the log output based on log level.""" + + def __init__( + self, + fmt: Optional[str] = None, + datefmt: Optional[str] = None, + style: str = "%", + validate: bool = True, + log_colors: Optional[Mapping[str, str]] = None, + *, + defaults=None, + ): + super().__init__(fmt, datefmt, style, validate, defaults=defaults) + self.log_colors = log_colors if log_colors is not None else {} + + def format(self, record): + log_message = super().format(record) + color = self.log_colors.get(record.levelname, "white") + return colored(log_message, color) + + +def init_python_logging( + config: DictConfig, rank: int = 0, base_filename: str = "train" +) -> None: + """Initializes Python logging.""" + + pylog_cfg = OmegaConf.select(config, "logging.python") + if pylog_cfg is None: + return + + # Set up Python loggers. + pylog_cfg.output = config.output + pylog_cfg.rank = rank + pylog_cfg.base_filename = base_filename + # Enable logging only on rank 0, if requested. + if pylog_cfg.rank0_only and pylog_cfg.rank != 0: + pylog_cfg.handlers = {} + for l in pylog_cfg.loggers.values(): + l.handlers = [] + # Configure logging. + logging.config.dictConfig(OmegaConf.to_container(pylog_cfg, resolve=True)) + + +def rank0(func): + """Decorator that allows the function to be executed only in rank 0 process.""" + + @functools.wraps(func) + def rank0_only(*args, **kwargs): + if DistributedManager().rank == 0: + func(*args, **kwargs) + + return rank0_only + + +class ExperimentLogger(ABC): + """Provides unified interface to a logger.""" + + @abstractmethod + def log_scalar(self, tag: str, value: float, step: int) -> None: + pass + + @abstractmethod + def log_image(self, tag: str, value, step: int) -> None: + pass + + @abstractmethod + def log(self, data: Mapping[str, Any], step: int) -> None: + pass + + @abstractmethod + def watch_model(self, model: nn.Module) -> None: + pass + + +class WandBLogger(ExperimentLogger): + """Wrapper for Weights & Biases logger.""" + + def __init__(self, **kwargs) -> None: + if DistributedManager().rank != 0: + return + + if wandb_key := kwargs.pop("wandb_key", None) is not None: + logger.warning("Passing W&B key via config is not recommended.") + wandb.login(key=wandb_key) + + # If wandb_id is not provided to resume the experiment, + # create new id if wandb_id.txt does not exist, + # otherwise - load id from the file. + if wandb_id := kwargs.pop("id", None) is None: + wandb_id_file = os.path.join(kwargs["dir"], "wandb_id.txt") + if not os.path.exists(wandb_id_file): + wandb_id = wandb.util.generate_id() + with open(wandb_id_file, "w", encoding="utf-8") as f: + f.write(wandb_id) + logger.info(f"Starting new wandb run: {wandb_id}") + else: + with open(wandb_id_file, encoding="utf-8") as f: + wandb_id = f.read() + logger.info(f"Resuming wandb run: {wandb_id}") + resume = kwargs.pop("resume", "allow") + + self.watch = kwargs.pop("watch_model", False) + + wandb.init(**kwargs, id=wandb_id, resume=resume) + + def log_scalar(self, tag: str, value: float, step: int) -> None: + wandb.log({tag: value}, step=step) + + def log_image(self, tag: str, value, step: int) -> None: + wandb.log({tag: wandb.Image(value)}, step=step) + + def log(self, data: Mapping[str, Any], step: int) -> None: + wandb.log(data, step=step) + + def watch_model(self, model: nn.Module): + if self.watch: + wandb.watch(model) + + +class CompositeLogger(ExperimentLogger): + """Wraps a list of loggers providing unified interface.""" + + loggers: dict[str, ExperimentLogger] = None + + def __init__(self, config: DictConfig) -> None: + if DistributedManager().rank != 0: + self.loggers = {} + return + # Instantiate loggers only when running on rank 0. + self.loggers = instantiate(config.loggers) + + @rank0 + def log_scalar(self, tag: str, value: float, step: int) -> None: + for l in self.loggers.values(): + l.log_scalar(tag, value, step) + + @rank0 + def log_image(self, tag: str, value: float, step: int) -> None: + for l in self.loggers.values(): + l.log_image(tag, value, step) + + @rank0 + def log(self, data: Mapping[str, Any], step: int) -> None: + for l in self.loggers.values(): + l.log(data, step) + + @rank0 + def watch_model(self, model: nn.Module) -> None: + for l in self.loggers.values(): + l.watch_model(model) diff --git a/examples/cfd/lagrangian_mgn/train.py b/examples/cfd/lagrangian_mgn/train.py index ece2c71ebc..aadede45d0 100644 --- a/examples/cfd/lagrangian_mgn/train.py +++ b/examples/cfd/lagrangian_mgn/train.py @@ -14,98 +14,79 @@ # See the License for the specific language governing permissions and # limitations under the License. -import time - -import hydra -from hydra.utils import to_absolute_path -import torch -import wandb +import logging import time from dgl.dataloading import GraphDataLoader -from omegaconf import DictConfig +import hydra +from hydra.utils import instantiate, to_absolute_path +from omegaconf import DictConfig, OmegaConf + +import torch from torch.cuda.amp import GradScaler, autocast from torch.nn.parallel import DistributedDataParallel -from modulus.datapipes.gnn.lagrangian_dataset import LagrangianDataset from modulus.distributed.manager import DistributedManager -from modulus.launch.logging import ( - PythonLogger, - RankZeroLoggingWrapper, - initialize_wandb, -) from modulus.launch.utils import load_checkpoint, save_checkpoint -from modulus.models.meshgraphnet import MeshGraphNet + +from loggers import CompositeLogger, ExperimentLogger, init_python_logging + + +logger = logging.getLogger("lmgn") + +# Experiment logger will be set later during initialization. +elogger: ExperimentLogger = None class MGNTrainer: - def __init__(self, cfg: DictConfig, rank_zero_logger: RankZeroLoggingWrapper): + def __init__(self, cfg: DictConfig): assert DistributedManager.is_initialized() self.dist = DistributedManager() - self.amp = cfg.amp - self.radius = cfg.radius - self.dt = cfg.dt + + self.dt = cfg.data.train.dt self.dim = cfg.dim - self.gravity = torch.zeros(self.dim, device=self.dist.device) - self.gravity[-1] = -9.8 + + self.amp = cfg.amp.enabled # MGN with recompute_activation currently supports only SiLU activation function. - mlp_act = cfg.activation - if cfg.recompute_activation and cfg.activation.lower() != "silu": + mlp_act = cfg.model.mlp_activation_fn + if cfg.model.recompute_activation and mlp_act.lower() != "silu": raise ValueError( f"recompute_activation only supports SiLU activation function, " - f"but got {cfg.activation}. Please either set activation='silu' " + f"but got {mlp_act}. Please either set activation='silu' " f"or disable recompute_activation." ) # instantiate dataset - self.dataset = LagrangianDataset( - name="Water", - data_dir=to_absolute_path(cfg.data_dir), - split="train", - num_samples=cfg.num_training_samples, - num_steps=cfg.num_training_time_steps, - radius=cfg.radius, - dt=cfg.dt, - ) + logger.info("Loading the training dataset...") + self.dataset = instantiate(cfg.data.train) + logger.info(f"Using {len(self.dataset)} training samples.") self.dataset.set_normalizer_device(device=self.dist.device) self.time_integrator = self.dataset.time_integrator # instantiate dataloader self.dataloader = GraphDataLoader( self.dataset, - batch_size=cfg.batch_size, - shuffle=True, - drop_last=True, - pin_memory=True, + **cfg.train.dataloader, use_ddp=self.dist.world_size > 1, - num_workers=cfg.num_dataloader_workers, ) # instantiate the model - self.model = MeshGraphNet( - cfg.num_input_features, - cfg.num_edge_features, - cfg.num_output_features, - cfg.processor_size, - mlp_activation_fn=mlp_act, - do_concat_trick=cfg.do_concat_trick, - num_processor_checkpoint_segments=cfg.num_processor_checkpoint_segments, - recompute_activation=cfg.recompute_activation, - # aggregation="mean", - ) - if cfg.jit: - if not self.model.meta.jit: - raise ValueError("MeshGraphNet is not yet JIT-compatible.") - self.model = torch.jit.script(self.model).to(self.dist.device) + logger.info("Creating the model...") + # instantiate the model + self.model = instantiate(cfg.model) + + if cfg.compile.enabled: + self.model = torch.compile(self.model, **cfg.compile.args).to( + self.dist.device + ) else: self.model = self.model.to(self.dist.device) - if cfg.watch_model and not cfg.jit and self.dist.rank == 0: - wandb.watch(self.model) + elogger.watch_model(self.model) # distributed data parallel for multi-node training - if self.dist.world_size > 1: + if self.dist.distributed: self.model = DistributedDataParallel( self.model, device_ids=[self.dist.local_rank], @@ -117,49 +98,37 @@ def __init__(self, cfg: DictConfig, rank_zero_logger: RankZeroLoggingWrapper): # enable train mode self.model.train() - # instantiate loss, optimizer, and scheduler - # self.criterion = self.l2loss - self.criterion = torch.nn.MSELoss() + # instantiate loss + self.criterion = instantiate(cfg.loss) - self.optimizer = None - try: - if cfg.use_apex: - from apex.optimizers import FusedAdam + # instantiate optimizer, and scheduler + self.optimizer = instantiate(cfg.optimizer, self.model.parameters()) - self.optimizer = FusedAdam(self.model.parameters(), lr=cfg.lr) - except ImportError: - rank_zero_logger.warning( - "NVIDIA Apex (https://github.com/nvidia/apex) is not installed, " - "FusedAdam optimizer will not be used." - ) - if self.optimizer is None: - self.optimizer = torch.optim.Adam( - self.model.parameters(), lr=cfg.lr, weight_decay=1e-5 - ) - rank_zero_logger.info(f"Using {self.optimizer.__class__.__name__} optimizer") + num_iterations = cfg.train.epochs * len(self.dataloader) + lrs_cfg = cfg.lr_scheduler + lrs_with_num_iter = { + "torch.optim.lr_scheduler.CosineAnnealingLR": "T_max", + "torch.optim.lr_scheduler.OneCycleLR": "total_steps", + } + if (num_iter_key := lrs_with_num_iter.get(lrs_cfg._target_)) is not None: + if lrs_cfg[num_iter_key] is None: + lrs_cfg[num_iter_key] = num_iterations + self.scheduler = instantiate(cfg.lr_scheduler, self.optimizer) - num_iteration = ( - cfg.epochs - * cfg.num_training_samples - * cfg.num_training_time_steps - // cfg.batch_size - ) - self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( - self.optimizer, T_max=num_iteration, eta_min=cfg.lr_min - ) self.scaler = GradScaler() # load checkpoint if self.dist.world_size > 1: torch.distributed.barrier() self.epoch_init = load_checkpoint( - to_absolute_path(cfg.ckpt_path), + to_absolute_path(cfg.resume_dir), models=self.model, optimizer=self.optimizer, scheduler=self.scheduler, scaler=self.scaler, device=self.dist.device, ) + self.epoch_init += 1 def train(self, graph): graph = graph.to(self.dist.device) @@ -202,38 +171,24 @@ def backward(self, loss): loss.backward() self.optimizer.step() - def l2loss(self, input, target, p=2, eps=1e-5): - input = input.flatten(start_dim=1) - target = target.flatten(start_dim=1) - l2loss = torch.norm(input - target, dim=1, p=p) / ( - torch.norm(target, dim=1, p=p) + eps - ) - l2loss = torch.mean(l2loss) - return l2loss - -@hydra.main(version_base="1.3", config_path="conf", config_name="config_2d") +@hydra.main(version_base="1.3", config_path="conf", config_name="config") def main(cfg: DictConfig) -> None: # initialize distributed manager DistributedManager.initialize() dist = DistributedManager() + init_python_logging(cfg, dist.rank) + logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") + # Initialize loggers. - wandb.login(key=cfg.wandb_key) - initialize_wandb( - project=cfg.wandb_project, - entity=cfg.wandb_entity, - name=cfg.wandb_name, - mode=cfg.wandb_mode, - ) # Wandb logger - logger = PythonLogger("main") # General python logger - rank_zero_logger = RankZeroLoggingWrapper(logger, dist) # Rank 0 logger - rank_zero_logger.file_logging() - - trainer = MGNTrainer(cfg, rank_zero_logger) + global elogger + elogger = CompositeLogger(cfg) + + trainer = MGNTrainer(cfg) start = time.time() - rank_zero_logger.info("Training started...") - for epoch in range(trainer.epoch_init, cfg.epochs): + logger.info("Training started...") + for epoch in range(trainer.epoch_init, cfg.train.epochs + 1): loss_list = [] loss_pos_list = [] loss_vel_list = [] @@ -248,12 +203,14 @@ def main(cfg: DictConfig) -> None: mean_loss_pos = sum(loss_pos_list) / len(loss_pos_list) mean_loss_vel = sum(loss_vel_list) / len(loss_vel_list) mean_loss_acc = sum(loss_acc_list) / len(loss_acc_list) - rank_zero_logger.info( - f"epoch: {epoch}, loss: {mean_loss:10.3e}, " + last_lr = trainer.scheduler.get_last_lr()[0] + logger.info( + f"epoch: {epoch:5,}, loss: {mean_loss:10.3e}, " f"position loss: {mean_loss_pos:10.3e}, " f"velocity loss: {mean_loss_vel:10.3e}, " f"acceleration loss: {mean_loss_acc:10.3e}, " - f"time per epoch: {(time.time()-start):10.3e}" + f"lr: {last_lr:10.3e}, " + f"time per epoch: {(time.time() - start):10.3e}" ) losses = { "loss": mean_loss, @@ -261,14 +218,15 @@ def main(cfg: DictConfig) -> None: "loss_vel": mean_loss_vel, "loss_acc": mean_loss_acc, } - wandb.log(losses) + elogger.log(losses, epoch) + elogger.log_scalar("lr", last_lr, epoch) # save checkpoint if dist.world_size > 1: torch.distributed.barrier() - if dist.rank == 0: + if dist.rank == 0 and epoch % cfg.train.checkpoint_save_freq == 0: save_checkpoint( - to_absolute_path(cfg.ckpt_path), + cfg.output, models=trainer.model, optimizer=trainer.optimizer, scheduler=trainer.scheduler, @@ -277,7 +235,7 @@ def main(cfg: DictConfig) -> None: ) logger.info(f"Saved model on rank {dist.rank}") start = time.time() - rank_zero_logger.info("Training completed!") + logger.info("Training completed!") if __name__ == "__main__": diff --git a/modulus/datapipes/gnn/lagrangian_dataset.py b/modulus/datapipes/gnn/lagrangian_dataset.py index 1207cea5c0..9bd2376104 100644 --- a/modulus/datapipes/gnn/lagrangian_dataset.py +++ b/modulus/datapipes/gnn/lagrangian_dataset.py @@ -14,9 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. - +# ruff: noqa: S101 import functools import json +import logging import os import torch @@ -44,6 +45,8 @@ # Hide GPU from visible devices for TF tf.config.set_visible_devices([], "GPU") +logger = logging.getLogger("lmgn") + def compute_edge_index(mesh_pos, radius): # compute the graph connectivity using pairwise distance @@ -96,7 +99,7 @@ class LagrangianDataset(DGLDataset): data_dir : _type_, optional Specifying the directory that stores the raw data in .TFRecord format., by default None split : str, optional - Dataset split ["train", "eval", "test"], by default "train" + Dataset split ["train", "valid", "test"], by default "train" num_samples : int, optional Number of samples, by default 1000 num_steps : int, optional @@ -121,6 +124,7 @@ def __init__( radius=0.015, dt=0.0025, bound=[0.1, 0.9], + num_node_types=6, force_reload=False, verbose=False, ): @@ -136,6 +140,7 @@ def __init__( self.num_steps = num_steps self.noise_std = noise_std self.length = num_samples * (num_steps - 1) + self.num_node_types = num_node_types path_metadata = os.path.join(data_dir, "metadata.json") with open(path_metadata, "r") as file: @@ -150,11 +155,13 @@ def __init__( self.acc_std = torch.tensor(metadata["acc_std"]).reshape(1, self.dim) # override from config + # TODO(akamenev): this is an unconditional overwrite of values that + # could be potentially set from metadata.json (see above). self.radius = radius self.dt = dt # create the node features - print(f"Preparing the {split} dataset...") + logger.info(f"Preparing the {split} dataset...") dataset_iterator = self._load_tf_data(self.data_dir, self.split) self.node_type = [] self.rollout_mask = [] @@ -162,12 +169,16 @@ def __init__( for i in range(self.num_samples): data_np = dataset_iterator.get_next() + total_steps = self.num_steps + self.num_history + 1 position = torch.from_numpy( - data_np[1]["position"][: self.num_steps + self.num_history + 1].numpy() - ) # (600, 1515, 2), dtype=torch.float + data_np[1]["position"][:total_steps].numpy() + ) # (t, num_particles, 2) + assert position.shape[0] == total_steps, f"{total_steps=}, {i=}" + node_type = torch.from_numpy( data_np[0]["particle_type"].numpy() - ) # (1515,), dtype=torch.long + ) # (num_particles,) + assert node_type.shape[0] == position.shape[1], f"{i=}" # noise_mask.append(torch.eq(node_type, torch.zeros_like(node_type))) @@ -184,10 +195,10 @@ def __init__( features["velocity"] = velocity[: self.num_steps + self.num_history] features["acceleration"] = acceleration[: self.num_steps + self.num_history] - self.node_type.append(F.one_hot(node_type, num_classes=6)) + self.node_type.append(F.one_hot(node_type, num_classes=self.num_node_types)) self.node_features.append(features) - print("dataset preparation completes") + logger.info("Finished dataset preparation.") def __getitem__(self, idx): gidx = idx // (self.num_steps - 1) # graph index From f67179324f4249e88e8dda1f0ab65d04da32b8ab Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Tue, 28 Jan 2025 11:24:29 -0800 Subject: [PATCH 2/8] Update README and configs. --- examples/cfd/lagrangian_mgn/README.md | 126 ++++++++++++------ examples/cfd/lagrangian_mgn/conf/config.yaml | 3 +- .../conf/data/lagrangian_dataset.yaml | 2 +- .../lagrangian_mgn/conf/experiment/goop.yaml | 3 - .../lagrangian_mgn/conf/experiment/sand.yaml | 3 - .../cfd/lagrangian_mgn/conf/model/mgn.yaml | 1 + 6 files changed, 87 insertions(+), 51 deletions(-) diff --git a/examples/cfd/lagrangian_mgn/README.md b/examples/cfd/lagrangian_mgn/README.md index da67d559e8..50027cd45e 100644 --- a/examples/cfd/lagrangian_mgn/README.md +++ b/examples/cfd/lagrangian_mgn/README.md @@ -1,11 +1,9 @@ # MeshGraphNet with Lagrangian mesh -This is an example of Meshgraphnet for particle-based simulation on the -water dataset based on - -in PyTorch. -It demonstrates how to train a Graph Neural Network (GNN) for evaluation -of the Lagrangian fluid. +This is an example of MeshGraphNet for particle-based simulation, based on the +[Learning to Simulate](https://sites.google.com/view/learning-to-simulate/) +work. It demonstrates how to use Modulus to train a Graph Neural Network (GNN) +to simulate Lagrangian fluids, solids, and deformable materials. ## Problem overview @@ -22,38 +20,46 @@ steps to maintain physically valid prediction. ## Dataset -We rely on [DeepMind's particle physics datasets](https://sites.google.com/view/learning-to-simulate) -for this example. They datasets are particle-based simulation of fluid splashing -and bouncing in a box or cube. +For this example, we use [DeepMind's particle physics datasets](https://sites.google.com/view/learning-to-simulate). +Some of these datasets contain particle-based simulations of fluid splashing and bouncing +within a box or cube while others use materials like sand or goop. +There are a total of 17 datasets, with some of them listed below: | Datasets | Num Particles | Num Time Steps | dt | Ground Truth Simulator | |--------------|---------------|----------------|----------|------------------------| | Water-3D | 14k | 800 | 5ms | SPH | | Water-2D | 2k | 1000 | 2.5ms | MPM | | WaterRamp | 2.5k | 600 | 2.5ms | MPM | +| Sand | 2k | 320 | 2.5ms | MPM | +| Goop | 1.9k | 400 | 2.5ms | MPM | + +See the section **B.1** in the [original paper](https://arxiv.org/abs/2002.09405). ## Model overview and architecture -In this model, we utilize a Meshgraphnet to capture the fluid system’s dynamics. -We represent the system as a graph, with vertices corresponding to fluid particles -and edges representing their interactions. The model is autoregressive, using -historical data to predict future states. The input features for the vertices -include the current position, current velocity, node type (e.g., fluid, sand, -boundary), and historical velocity. The model's output is the acceleration, -defined as the difference between the current and next velocity. Both velocity -and acceleration are derived from the position sequence and normalized to a -standard Gaussian distribution for consistency. +This model uses MeshGraphNet to capture the dynamics of the fluid system. +The system is represented as a graph, where vertices correspond to fluid particles, +and edges represent their interactions. The model is autoregressive, +utilizing historical data to predict future states. Input features for the vertices +include current position, velocity, node type (e.g., fluid, sand, boundary), +and historical velocity. The model’s output is acceleration, defined as the difference +between current and next velocity. Both velocity and acceleration are derived from +the position sequence and normalized to a standard Gaussian distribution +for consistency. For computational efficiency, we do not explicitly construct wall nodes for square or cubic domains. Instead, we assign a wall feature to each interior particle node, representing its distance from the domain boundaries. For a -system dimensionality of \(d = 2\) or \(d = 3\), the features are structured +system dimensionality of $d = 2$ or $d = 3$, the features are structured as follows: -- **Node features**: position (\(d\)), historical velocity (\(t \times d\)), - one-hot encoding of node type (6), wall feature (\(2 \times d\)) -- **Edge features**: displacement (\(d\)), distance (1) -- **Node target**: acceleration (\(d\)) +- **Node features**: + - position ($d$) + - historical velocity ($t \times d$) + - one-hot encoding of node type (e.g. 6), + - wall feature ($2 \times d$) +- **Edge features**: displacement ($d$), distance (1) +- **Node target**: acceleration ($d$) We construct edges based on a predefined radius, connecting pairs of particle nodes if their pairwise distance is within this radius. During training, we @@ -65,54 +71,88 @@ a small amount of noise is added during training. The model uses a hidden dimensionality of 128 for the encoder, processor, and decoder. The encoder and decoder each contain two hidden layers, while the -processor consists of eight message-passing layers. We use a batch size of -20 per GPU, and summation aggregation is applied for message passing in the -processor. The learning rate is set to 0.0001 and decays exponentially with -a rate of 0.9999991. These hyperparameters can be configured in the config file. +processor consists of ten message-passing layers. We use a batch size of +20 per GPU (for Water dataset), and summation aggregation is applied for +message passing in the processor. The learning rate is set to 0.0001 and decays +using cosine annealing schedule. These hyperparameters can be configured using +command line or in the config file. ## Getting Started This example requires the `tensorflow` library to load the data in the `.tfrecord` -format. Install with +format. Install with: ```bash -pip install tensorflow +pip install "tensorflow<=2.17.1" ``` -To download the data from DeepMind's repo, run +To download the data from DeepMind's repo, run: ```bash cd raw_dataset bash download_dataset.sh Water /data/ ``` -Change the data path in `conf/config_2d.yaml` correspondingly +This example uses [Hydra](https://hydra.cc/docs/intro/) for [experiment](https://hydra.cc/docs/patterns/configuring_experiments/) +configuration. Hydra offers a convenient way to modify nearly any experiment parameter, +such as dataset settings, model configurations, and optimizer options, +either through the command line or config files. + +To view the full set of training script options, run the following command: + +```bash +python train.py --help +``` -To train the model, run +If you encounter issues with the Hydra config, you may receive an error message +that isn’t very helpful. In that case, set the `HYDRA_FULL_ERROR=1` environment +variable for more detailed error information: ```bash -python train.py +HYDRA_FULL_ERROR=1 python train.py ... ``` -Progress and loss logs can be monitored using Weights & Biases. To activatethat, -set `wandb_mode` to `online` in the `conf/config_2d.yaml` This requires to have an active -Weights & Biases account. You also need to provide your API key in the config file. +To train the model with the Water dataset, run: ```bash -wandb_key: +python train.py +experiment=water data.data_dir=/data/Water ``` -The URL to the dashboard will be displayed in the terminal after the run is launched. -Alternatively, the logging utility in `train.py` can be switched to MLFlow. +Progress and loss logs can be monitored using Weights & Biases. To activate that, +set `loggers.wandb.mode` to `online` in the command line: + +```bash +python train.py +experiment=water data.data_dir=/data/Water loggers.wandb.mode=online +``` -Once the model is trained, run +An active Weights & Biases account is required. You will also need to set your +API key either through the command line option `loggers.wandb.wandb_key` +or by using the `WANDB_API_KEY` environment variable: ```bash -python inference.py +export WANDB_API_KEY=key +python train.py ... ``` -This will save the predictions for the test dataset in `.gif` format in the `animations` -directory. +## Inference + +The inference script, `inference.py`, also supports Hydra configuration, ensuring +consistency between training and inference runs. + +Once the model is trained, run the following command: + +```bash +python inference.py +experiment=water \ + data.data_dir=/data/Water \ + data.test.num_samples=1 \ + resume_dir=/data/models/lmgn/water \ + output=/data/models/lmgn/water/inference +``` + +Use the `resume_dir` parameter to specify the location of the model checkpoints. + +This will save the predictions for the test dataset as `.gif` files in the +`/data/models/lmgn/water/inference/animations` directory. ## References diff --git a/examples/cfd/lagrangian_mgn/conf/config.yaml b/examples/cfd/lagrangian_mgn/conf/config.yaml index 6364ed97a2..9cf658dd97 100644 --- a/examples/cfd/lagrangian_mgn/conf/config.yaml +++ b/examples/cfd/lagrangian_mgn/conf/config.yaml @@ -36,6 +36,7 @@ resume_dir: ${output} # The dataset directory must be set either in command line or config. data: data_dir: ??? + num_node_types: 6 train: split: train valid: @@ -59,7 +60,7 @@ train: dataloader: batch_size: ${..batch_size} shuffle: true - num_workers: 1 + num_workers: 8 pin_memory: true drop_last: true diff --git a/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml b/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml index 832060ddec..8b94f3bc5c 100644 --- a/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml +++ b/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml @@ -23,7 +23,7 @@ split: ??? num_samples: ??? num_history: 5 num_steps: 600 -num_node_types: 6 +num_node_types: ${..num_node_types} noise_std: 0.0003 radius: 0.015 dt: 0.0025 diff --git a/examples/cfd/lagrangian_mgn/conf/experiment/goop.yaml b/examples/cfd/lagrangian_mgn/conf/experiment/goop.yaml index 0b3a061c78..d82f34a1ec 100644 --- a/examples/cfd/lagrangian_mgn/conf/experiment/goop.yaml +++ b/examples/cfd/lagrangian_mgn/conf/experiment/goop.yaml @@ -31,15 +31,12 @@ data: train: num_samples: 1000 num_steps: 395 # 400 - ${num_history} - num_node_types: ${..num_node_types} valid: num_samples: 30 num_steps: 100 - num_node_types: ${..num_node_types} test: num_samples: 30 num_steps: 100 - num_node_types: ${..num_node_types} model: input_dim_nodes: 25 # 9 node types instead of 6. diff --git a/examples/cfd/lagrangian_mgn/conf/experiment/sand.yaml b/examples/cfd/lagrangian_mgn/conf/experiment/sand.yaml index 6d4064259a..327d909f4b 100644 --- a/examples/cfd/lagrangian_mgn/conf/experiment/sand.yaml +++ b/examples/cfd/lagrangian_mgn/conf/experiment/sand.yaml @@ -31,15 +31,12 @@ data: train: num_samples: 1000 num_steps: 315 # 320 - ${num_history} - num_node_types: ${..num_node_types} valid: num_samples: 30 num_steps: 100 - num_node_types: ${..num_node_types} test: num_samples: 30 num_steps: 100 - num_node_types: ${..num_node_types} model: input_dim_nodes: 25 # 9 node types instead of 6. diff --git a/examples/cfd/lagrangian_mgn/conf/model/mgn.yaml b/examples/cfd/lagrangian_mgn/conf/model/mgn.yaml index 3351cee5b3..10c5442bcf 100644 --- a/examples/cfd/lagrangian_mgn/conf/model/mgn.yaml +++ b/examples/cfd/lagrangian_mgn/conf/model/mgn.yaml @@ -22,6 +22,7 @@ input_dim_edges: ??? output_dim: ??? processor_size: 10 aggregation: sum +hidden_dim_processor: 128 hidden_dim_node_encoder: 256 hidden_dim_edge_encoder: 256 hidden_dim_node_decoder: 256 From 5c13a1fafcf648e90f10573ae505cdc30ca91e27 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Tue, 28 Jan 2025 12:15:18 -0800 Subject: [PATCH 3/8] Delete old configs. --- .../cfd/lagrangian_mgn/conf/config_2d.yaml | 67 ------------------- .../cfd/lagrangian_mgn/conf/config_3d.yaml | 67 ------------------- 2 files changed, 134 deletions(-) delete mode 100644 examples/cfd/lagrangian_mgn/conf/config_2d.yaml delete mode 100644 examples/cfd/lagrangian_mgn/conf/config_3d.yaml diff --git a/examples/cfd/lagrangian_mgn/conf/config_2d.yaml b/examples/cfd/lagrangian_mgn/conf/config_2d.yaml deleted file mode 100644 index 0107b10802..0000000000 --- a/examples/cfd/lagrangian_mgn/conf/config_2d.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -hydra: - job: - chdir: True - run: - dir: ./outputs/ - -# data configs -data_dir: /data/Water -dim: 2 - -# model config -activation: "silu" - -# training configs -batch_size: 20 -epochs: 20 -num_training_samples: 1000 # 400 -num_training_time_steps: 990 # 600 - 5 (history) -lr: 1e-4 -lr_min: 1e-6 -lr_decay_rate: 0.999 # every 10 epoch decays to 35% -num_input_features: 22 # 2 (pos) + 2*5 (history of velocity) + 4 boundary features + 6 (node type) -num_output_features: 2 # 2 acceleration -num_edge_features: 3 # 2 displacement + 1 distance -processor_size: 8 -radius: 0.015 -dt: 0.0025 - -# performance configs -use_apex: True -amp: False -jit: False -num_dataloader_workers: 10 # 4 -do_concat_trick: False -num_processor_checkpoint_segments: 0 -recompute_activation: False - -# wandb configs -wandb_mode: offline -watch_model: False -wandb_key: -wandb_project: "meshgraphnet" -wandb_entity: -wandb_name: -ckpt_path: "./checkpoints_2d" - -# test & visualization configs -num_test_samples: 1 -num_test_time_steps: 200 -frame_skip: 1 -frame_interval: 1 diff --git a/examples/cfd/lagrangian_mgn/conf/config_3d.yaml b/examples/cfd/lagrangian_mgn/conf/config_3d.yaml deleted file mode 100644 index 0cd1553137..0000000000 --- a/examples/cfd/lagrangian_mgn/conf/config_3d.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. -# SPDX-FileCopyrightText: All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -hydra: - job: - chdir: True - run: - dir: ./outputs/ - -# data configs -data_dir: /data/Water-3D -dim: 3 - -# model config -activation: "silu" - -# training configs -batch_size: 2 -epochs: 20 -num_training_samples: 1000 # 400 -num_training_time_steps: 300 # 600 - 5 (history) -lr: 1e-4 -lr_min: 1e-6 -lr_decay_rate: 0.999 # every 10 epoch decays to 35% -num_input_features: 30 # 3 (pos) + 3*5 (history of velocity) + 6 boundary features + 6 (node type) -num_output_features: 3 # 2 acceleration -num_edge_features: 4 # 2 displacement + 1 distance -processor_size: 8 -radius: 0.035 -dt: 0.005 - -# performance configs -use_apex: True -amp: False -jit: False -num_dataloader_workers: 4 # 4 -do_concat_trick: False -num_processor_checkpoint_segments: 0 -recompute_activation: False - -# wandb configs -wandb_mode: offline -watch_model: False -wandb_key: -wandb_project: "meshgraphnet" -wandb_entity: -wandb_name: -ckpt_path: "./checkpoints_3d" - -# test & visualization configs -num_test_samples: 1 -num_test_time_steps: 400 -frame_skip: 1 -frame_interval: 1 From c1f8769be7d3cb5b9b5be4e50abba94df806fe43 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Tue, 28 Jan 2025 12:22:34 -0800 Subject: [PATCH 4/8] Revert "Delete old configs." This reverts commit 5c13a1fafcf648e90f10573ae505cdc30ca91e27. --- .../cfd/lagrangian_mgn/conf/config_2d.yaml | 67 +++++++++++++++++++ .../cfd/lagrangian_mgn/conf/config_3d.yaml | 67 +++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 examples/cfd/lagrangian_mgn/conf/config_2d.yaml create mode 100644 examples/cfd/lagrangian_mgn/conf/config_3d.yaml diff --git a/examples/cfd/lagrangian_mgn/conf/config_2d.yaml b/examples/cfd/lagrangian_mgn/conf/config_2d.yaml new file mode 100644 index 0000000000..0107b10802 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/config_2d.yaml @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +hydra: + job: + chdir: True + run: + dir: ./outputs/ + +# data configs +data_dir: /data/Water +dim: 2 + +# model config +activation: "silu" + +# training configs +batch_size: 20 +epochs: 20 +num_training_samples: 1000 # 400 +num_training_time_steps: 990 # 600 - 5 (history) +lr: 1e-4 +lr_min: 1e-6 +lr_decay_rate: 0.999 # every 10 epoch decays to 35% +num_input_features: 22 # 2 (pos) + 2*5 (history of velocity) + 4 boundary features + 6 (node type) +num_output_features: 2 # 2 acceleration +num_edge_features: 3 # 2 displacement + 1 distance +processor_size: 8 +radius: 0.015 +dt: 0.0025 + +# performance configs +use_apex: True +amp: False +jit: False +num_dataloader_workers: 10 # 4 +do_concat_trick: False +num_processor_checkpoint_segments: 0 +recompute_activation: False + +# wandb configs +wandb_mode: offline +watch_model: False +wandb_key: +wandb_project: "meshgraphnet" +wandb_entity: +wandb_name: +ckpt_path: "./checkpoints_2d" + +# test & visualization configs +num_test_samples: 1 +num_test_time_steps: 200 +frame_skip: 1 +frame_interval: 1 diff --git a/examples/cfd/lagrangian_mgn/conf/config_3d.yaml b/examples/cfd/lagrangian_mgn/conf/config_3d.yaml new file mode 100644 index 0000000000..0cd1553137 --- /dev/null +++ b/examples/cfd/lagrangian_mgn/conf/config_3d.yaml @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 - 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +hydra: + job: + chdir: True + run: + dir: ./outputs/ + +# data configs +data_dir: /data/Water-3D +dim: 3 + +# model config +activation: "silu" + +# training configs +batch_size: 2 +epochs: 20 +num_training_samples: 1000 # 400 +num_training_time_steps: 300 # 600 - 5 (history) +lr: 1e-4 +lr_min: 1e-6 +lr_decay_rate: 0.999 # every 10 epoch decays to 35% +num_input_features: 30 # 3 (pos) + 3*5 (history of velocity) + 6 boundary features + 6 (node type) +num_output_features: 3 # 2 acceleration +num_edge_features: 4 # 2 displacement + 1 distance +processor_size: 8 +radius: 0.035 +dt: 0.005 + +# performance configs +use_apex: True +amp: False +jit: False +num_dataloader_workers: 4 # 4 +do_concat_trick: False +num_processor_checkpoint_segments: 0 +recompute_activation: False + +# wandb configs +wandb_mode: offline +watch_model: False +wandb_key: +wandb_project: "meshgraphnet" +wandb_entity: +wandb_name: +ckpt_path: "./checkpoints_3d" + +# test & visualization configs +num_test_samples: 1 +num_test_time_steps: 400 +frame_skip: 1 +frame_interval: 1 From a7532b434595f84846841912d67299e091c08563 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Tue, 4 Feb 2025 12:05:11 -0800 Subject: [PATCH 5/8] Address review comments. --- examples/cfd/lagrangian_mgn/README.md | 3 ++- examples/cfd/lagrangian_mgn/conf/config.yaml | 1 + .../conf/data/lagrangian_dataset.yaml | 2 +- examples/cfd/lagrangian_mgn/inference.py | 3 ++- examples/cfd/lagrangian_mgn/loggers.py | 22 +++++++++++++++++++ examples/cfd/lagrangian_mgn/train.py | 3 ++- 6 files changed, 30 insertions(+), 4 deletions(-) diff --git a/examples/cfd/lagrangian_mgn/README.md b/examples/cfd/lagrangian_mgn/README.md index 50027cd45e..4c2e8acd8b 100644 --- a/examples/cfd/lagrangian_mgn/README.md +++ b/examples/cfd/lagrangian_mgn/README.md @@ -55,7 +55,8 @@ as follows: - **Node features**: - position ($d$) - - historical velocity ($t \times d$) + - historical velocity ($t \times d$), + where the number of steps $t$ can be set using `data.num_history` config parameter. - one-hot encoding of node type (e.g. 6), - wall feature ($2 \times d$) - **Edge features**: displacement ($d$), distance (1) diff --git a/examples/cfd/lagrangian_mgn/conf/config.yaml b/examples/cfd/lagrangian_mgn/conf/config.yaml index 9cf658dd97..c68311b4bf 100644 --- a/examples/cfd/lagrangian_mgn/conf/config.yaml +++ b/examples/cfd/lagrangian_mgn/conf/config.yaml @@ -36,6 +36,7 @@ resume_dir: ${output} # The dataset directory must be set either in command line or config. data: data_dir: ??? + num_history: 5 num_node_types: 6 train: split: train diff --git a/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml b/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml index 8b94f3bc5c..fcbd7c4814 100644 --- a/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml +++ b/examples/cfd/lagrangian_mgn/conf/data/lagrangian_dataset.yaml @@ -21,7 +21,7 @@ name: ${data.name} data_dir: ${data.data_dir} split: ??? num_samples: ??? -num_history: 5 +num_history: ${..num_history} num_steps: 600 num_node_types: ${..num_node_types} noise_std: 0.0003 diff --git a/examples/cfd/lagrangian_mgn/inference.py b/examples/cfd/lagrangian_mgn/inference.py index d8a119901f..17c8cd4f80 100644 --- a/examples/cfd/lagrangian_mgn/inference.py +++ b/examples/cfd/lagrangian_mgn/inference.py @@ -36,7 +36,7 @@ from modulus.datapipes.gnn.lagrangian_dataset import graph_update from modulus.launch.utils import load_checkpoint -from loggers import init_python_logging +from loggers import get_gpu_info, init_python_logging logger = logging.getLogger("lmgn") @@ -327,6 +327,7 @@ def plot_error(self, pred, target): def main(cfg: DictConfig) -> None: init_python_logging(cfg, base_filename="inference") logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") + logger.info(get_gpu_info()) logger.info("Rollout started...") rollout = MGNRollout(cfg) diff --git a/examples/cfd/lagrangian_mgn/loggers.py b/examples/cfd/lagrangian_mgn/loggers.py index 5a768a441f..a3f7f18830 100644 --- a/examples/cfd/lagrangian_mgn/loggers.py +++ b/examples/cfd/lagrangian_mgn/loggers.py @@ -28,6 +28,7 @@ from torch import nn +import torch import wandb from modulus.distributed import DistributedManager @@ -79,6 +80,27 @@ def init_python_logging( logging.config.dictConfig(OmegaConf.to_container(pylog_cfg, resolve=True)) +def get_gpu_info() -> str: + """Returns information about available GPUs.""" + + if not torch.cuda.is_available(): + return "\nCUDA is not available." + + res = f"\n\nPyTorch CUDA Version: {torch.version.cuda}\nAvailable GPUs:" + for i in range(torch.cuda.device_count()): + name = torch.cuda.get_device_name(i) + props = torch.cuda.get_device_properties(i) + total_memory = props.total_memory / (1024**3) + res += ( + f"\n{torch.device(i)}: {name} (" + f"{total_memory:.0f} GiB, " + f"sm_{props.major}{props.minor})" + ) + + res += f"\nCurrent device: {torch.cuda.current_device()}\n" + return res + + def rank0(func): """Decorator that allows the function to be executed only in rank 0 process.""" diff --git a/examples/cfd/lagrangian_mgn/train.py b/examples/cfd/lagrangian_mgn/train.py index aadede45d0..e60cf90929 100644 --- a/examples/cfd/lagrangian_mgn/train.py +++ b/examples/cfd/lagrangian_mgn/train.py @@ -30,7 +30,7 @@ from modulus.distributed.manager import DistributedManager from modulus.launch.utils import load_checkpoint, save_checkpoint -from loggers import CompositeLogger, ExperimentLogger, init_python_logging +from loggers import CompositeLogger, ExperimentLogger, get_gpu_info, init_python_logging logger = logging.getLogger("lmgn") @@ -180,6 +180,7 @@ def main(cfg: DictConfig) -> None: init_python_logging(cfg, dist.rank) logger.info(f"Config summary:\n{OmegaConf.to_yaml(cfg, sort_keys=True)}") + logger.info(get_gpu_info()) # Initialize loggers. global elogger From 3b4f41d432b02329ecb6de230bc3db2b5648237a Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Wed, 5 Feb 2025 09:17:44 -0800 Subject: [PATCH 6/8] Merge branch 'main' into lagrangian-mgn --- CHANGELOG.md | 7 +- CONTRIBUTING.md | 9 + Dockerfile | 2 +- .../train_transolver_darcy.py | 3 +- .../xaeronet/surface/train.py | 2 +- .../xaeronet/volume/train.py | 2 +- examples/cfd/mhd_pino/train_mhd.py | 2 +- examples/cfd/mhd_pino/train_mhd_vec_pot.py | 2 +- .../cfd/mhd_pino/train_mhd_vec_pot_tfno.py | 2 +- examples/cfd/stokes_mgn/pi_fine_tuning.py | 2 +- examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py | 2 +- examples/cfd/stokes_mgn/train.py | 2 +- .../test_sequence.py | 2 +- .../cfd/vortex_shedding_mesh_reduced/train.py | 2 +- .../train_sequence.py | 2 +- examples/cfd/vortex_shedding_mgn/train.py | 2 +- examples/healthcare/bloodflow_1d_mgn/train.py | 2 +- .../flood_modeling/hydrographnet/README.md | 3 + examples/weather/graphcast/train_graphcast.py | 2 +- modulus/launch/logging/__init__.py | 2 - modulus/launch/logging/launch.py | 21 ++- pyproject.toml | 23 +-- test/datapipes/test_bsms.py | 3 +- test/datapipes/test_healpix.py | 65 ++++++-- test/datapipes/test_healpix_couple.py | 79 +++++++-- test/datapipes/test_lagrangian.py | 4 +- test/datapipes/test_mesh_datapipe.py | 5 +- test/datapipes/test_synthetic.py | 30 +++- test/metrics/test_metrics_cfd.py | 3 +- test/metrics/test_metrics_integral.py | 3 +- test/models/diffusion/test_preconditioning.py | 8 +- .../dlwp_healpix/test_healpix_blocks.py | 154 ++++++++++++++---- .../test_healpix_encoder_decoder.py | 77 +++++++-- .../dlwp_healpix/test_healpix_layers.py | 70 ++++++-- test/models/meshgraphnet/test_bsms_mgn.py | 12 +- .../meshgraphnet/test_meshgraphnet_snmg.py | 10 +- test/models/test_distributed_graph.py | 18 +- test/models/test_domino.py | 9 +- test/models/test_graph_partition.py | 53 ++++-- test/utils/corrdiff/test_generation_steps.py | 20 ++- test/utils/corrdiff/test_netcdf_writer.py | 33 +++- test/utils/corrdiff/test_time_range.py | 32 +++- .../generative/test_deterministic_sampler.py | 53 ++++-- test/utils/generative/test_format_time.py | 14 +- test/utils/generative/test_parse_int_list.py | 8 +- test/utils/generative/test_parse_time.py | 5 +- .../generative/test_stochastic_sampler.py | 45 ++++- test/utils/generative/test_tuple_product.py | 9 +- test/utils/test_mesh_utils.py | 22 ++- test/utils/test_sdf.py | 4 +- 50 files changed, 719 insertions(+), 227 deletions(-) create mode 100644 examples/weather/flood_modeling/hydrographnet/README.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d2c16d48d..3dc66017e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Enhancements and bug fixes to DoMINO model and training example - Enhancement to parameterize DoMINO model with inlet velocity - Moved non-dimensionaliztion out of domino datapipe to datapipe in domino example +- Updated utils in `modulus.launch.logging` to avoid unnecessary `wandb` and `mlflow` imports ### Deprecated @@ -28,11 +29,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Update pytests to skip when the required dependencies are not present + ### Security ### Dependencies -- Remove the numpy dependency upper bound. +- Remove the numpy dependency upper bound +- Moved pytz and nvtx to optional +- Update the base image for the Dockerfile - Introduce Multi-Storage Client (MSC) as an optional dependency. ## [0.9.0] - 2024-12-04 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 12e065a702..e9526743d0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -214,6 +214,15 @@ The pipeline has following stages: test, you will have to review your changes and fix the issues. To run pytest locally you can simply run `pytest` inside the `test` folder. + While writing these tests, we encourage you to make use of the + [`@nfs_data_or_fail`](https://github.com/NVIDIA/modulus/blob/main/test/pytest_utils.py#L92) + and the [`@import_of_fail`](https://github.com/NVIDIA/modulus/blob/main/test/pytest_utils.py#L25) + decorators to appropriately skip your tests for developers and users not having your + test specific datasets and dependencies respectively. The CI has these datasets and + dependencies so your tests will get executed during CI. + This mechanism helps us provide a better developer and user experience + when working with the unit tests. + 6. `doctest` Checks if the examples in the docstrings run and produce desired outputs. It is highly recommended that you provide simple examples of your functions/classes diff --git a/Dockerfile b/Dockerfile index b7067a9e56..10b3c554f5 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG BASE_CONTAINER=nvcr.io/nvidia/pytorch:24.11-py3 +ARG BASE_CONTAINER=nvcr.io/nvidia/pytorch:25.01-py3 FROM ${BASE_CONTAINER} as builder ARG TARGETPLATFORM diff --git a/examples/cfd/darcy_transolver/train_transolver_darcy.py b/examples/cfd/darcy_transolver/train_transolver_darcy.py index 10525f5368..32bbdfdf1e 100644 --- a/examples/cfd/darcy_transolver/train_transolver_darcy.py +++ b/examples/cfd/darcy_transolver/train_transolver_darcy.py @@ -27,7 +27,8 @@ from modulus.distributed import DistributedManager from modulus.utils import StaticCaptureTraining, StaticCaptureEvaluateNoGrad from modulus.launch.utils import load_checkpoint, save_checkpoint -from modulus.launch.logging import PythonLogger, LaunchLogger, initialize_mlflow +from modulus.launch.logging import PythonLogger, LaunchLogger +from modulus.launch.logging.mlflow import initialize_mlflow from validator import GridValidator diff --git a/examples/cfd/external_aerodynamics/xaeronet/surface/train.py b/examples/cfd/external_aerodynamics/xaeronet/surface/train.py index 19ac0d0bb0..a077eaa81c 100644 --- a/examples/cfd/external_aerodynamics/xaeronet/surface/train.py +++ b/examples/cfd/external_aerodynamics/xaeronet/surface/train.py @@ -44,7 +44,7 @@ from omegaconf import DictConfig from modulus.distributed import DistributedManager -from modulus.launch.logging import initialize_wandb +from modulus.launch.logging.wandb import initialize_wandb from modulus.models.meshgraphnet import MeshGraphNet # Get the absolute path to the parent directory diff --git a/examples/cfd/external_aerodynamics/xaeronet/volume/train.py b/examples/cfd/external_aerodynamics/xaeronet/volume/train.py index 86a05579f6..9fa4e98697 100644 --- a/examples/cfd/external_aerodynamics/xaeronet/volume/train.py +++ b/examples/cfd/external_aerodynamics/xaeronet/volume/train.py @@ -35,7 +35,7 @@ import numpy as np import torch.optim as optim import matplotlib.pyplot as plt -from modulus.launch.logging import initialize_wandb +from modulus.launch.logging.wandb import initialize_wandb import json import wandb as wb import hydra diff --git a/examples/cfd/mhd_pino/train_mhd.py b/examples/cfd/mhd_pino/train_mhd.py index d3a0737788..3330c8523c 100644 --- a/examples/cfd/mhd_pino/train_mhd.py +++ b/examples/cfd/mhd_pino/train_mhd.py @@ -31,8 +31,8 @@ from modulus.launch.logging import ( PythonLogger, LaunchLogger, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.sym.hydra import to_absolute_path from losses import LossMHD, LossMHD_Modulus diff --git a/examples/cfd/mhd_pino/train_mhd_vec_pot.py b/examples/cfd/mhd_pino/train_mhd_vec_pot.py index 88e16ba846..d41a53cc7d 100644 --- a/examples/cfd/mhd_pino/train_mhd_vec_pot.py +++ b/examples/cfd/mhd_pino/train_mhd_vec_pot.py @@ -31,8 +31,8 @@ from modulus.launch.logging import ( PythonLogger, LaunchLogger, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.sym.hydra import to_absolute_path from losses import LossMHDVecPot, LossMHDVecPot_Modulus diff --git a/examples/cfd/mhd_pino/train_mhd_vec_pot_tfno.py b/examples/cfd/mhd_pino/train_mhd_vec_pot_tfno.py index 602b97ea51..613190e1bc 100644 --- a/examples/cfd/mhd_pino/train_mhd_vec_pot_tfno.py +++ b/examples/cfd/mhd_pino/train_mhd_vec_pot_tfno.py @@ -31,8 +31,8 @@ from modulus.launch.logging import ( PythonLogger, LaunchLogger, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.sym.hydra import to_absolute_path from losses import LossMHDVecPot, LossMHDVecPot_Modulus diff --git a/examples/cfd/stokes_mgn/pi_fine_tuning.py b/examples/cfd/stokes_mgn/pi_fine_tuning.py index 645b1ac40e..ecdcd96388 100644 --- a/examples/cfd/stokes_mgn/pi_fine_tuning.py +++ b/examples/cfd/stokes_mgn/pi_fine_tuning.py @@ -43,8 +43,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.models.mlp.fully_connected import FullyConnected from modulus.sym.eq.pde import PDE from modulus.sym.eq.phy_informer import PhysicsInformer diff --git a/examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py b/examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py index e22ac61dac..de28a53c6b 100644 --- a/examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py +++ b/examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py @@ -43,8 +43,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.models.meshgraphnet import MeshGraphNet from modulus.sym.eq.pde import PDE from modulus.sym.eq.phy_informer import PhysicsInformer diff --git a/examples/cfd/stokes_mgn/train.py b/examples/cfd/stokes_mgn/train.py index 8a4331575b..080a021f8f 100644 --- a/examples/cfd/stokes_mgn/train.py +++ b/examples/cfd/stokes_mgn/train.py @@ -36,8 +36,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.meshgraphnet import MeshGraphNet diff --git a/examples/cfd/vortex_shedding_mesh_reduced/test_sequence.py b/examples/cfd/vortex_shedding_mesh_reduced/test_sequence.py index 7b3342bcaf..27618759eb 100644 --- a/examples/cfd/vortex_shedding_mesh_reduced/test_sequence.py +++ b/examples/cfd/vortex_shedding_mesh_reduced/test_sequence.py @@ -27,8 +27,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint from modulus.models.mesh_reduced.mesh_reduced import Mesh_Reduced from train_sequence import Sequence_Trainer diff --git a/examples/cfd/vortex_shedding_mesh_reduced/train.py b/examples/cfd/vortex_shedding_mesh_reduced/train.py index 673741a5f0..90d6c91338 100644 --- a/examples/cfd/vortex_shedding_mesh_reduced/train.py +++ b/examples/cfd/vortex_shedding_mesh_reduced/train.py @@ -32,8 +32,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.mesh_reduced.mesh_reduced import Mesh_Reduced diff --git a/examples/cfd/vortex_shedding_mesh_reduced/train_sequence.py b/examples/cfd/vortex_shedding_mesh_reduced/train_sequence.py index 9358d8ac22..de8a5a6c70 100644 --- a/examples/cfd/vortex_shedding_mesh_reduced/train_sequence.py +++ b/examples/cfd/vortex_shedding_mesh_reduced/train_sequence.py @@ -33,8 +33,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.mesh_reduced.mesh_reduced import Mesh_Reduced from modulus.models.mesh_reduced.temporal_model import Sequence_Model diff --git a/examples/cfd/vortex_shedding_mgn/train.py b/examples/cfd/vortex_shedding_mgn/train.py index 39d3c740b7..ab4658307f 100644 --- a/examples/cfd/vortex_shedding_mgn/train.py +++ b/examples/cfd/vortex_shedding_mgn/train.py @@ -33,8 +33,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, - initialize_wandb, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.meshgraphnet import MeshGraphNet diff --git a/examples/healthcare/bloodflow_1d_mgn/train.py b/examples/healthcare/bloodflow_1d_mgn/train.py index 5a5f92c9e1..a0fdd42d05 100644 --- a/examples/healthcare/bloodflow_1d_mgn/train.py +++ b/examples/healthcare/bloodflow_1d_mgn/train.py @@ -31,9 +31,9 @@ from modulus.launch.logging import ( PythonLogger, - initialize_wandb, RankZeroLoggingWrapper, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint import json from omegaconf import DictConfig diff --git a/examples/weather/flood_modeling/hydrographnet/README.md b/examples/weather/flood_modeling/hydrographnet/README.md new file mode 100644 index 0000000000..34f6f89243 --- /dev/null +++ b/examples/weather/flood_modeling/hydrographnet/README.md @@ -0,0 +1,3 @@ +# HydroGraphNet + +This is a placeholder for the HydroGraphNet model. diff --git a/examples/weather/graphcast/train_graphcast.py b/examples/weather/graphcast/train_graphcast.py index ec928b80a6..912c8bdfa8 100644 --- a/examples/weather/graphcast/train_graphcast.py +++ b/examples/weather/graphcast/train_graphcast.py @@ -39,9 +39,9 @@ ) from modulus.launch.logging import ( PythonLogger, - initialize_wandb, RankZeroLoggingWrapper, ) +from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from train_utils import count_trainable_params, prepare_input diff --git a/modulus/launch/logging/__init__.py b/modulus/launch/logging/__init__.py index 50466a3678..5199f0de6f 100644 --- a/modulus/launch/logging/__init__.py +++ b/modulus/launch/logging/__init__.py @@ -16,5 +16,3 @@ from .console import PythonLogger, RankZeroLoggingWrapper from .launch import LaunchLogger -from .mlflow import initialize_mlflow -from .wandb import initialize_wandb diff --git a/modulus/launch/logging/launch.py b/modulus/launch/logging/launch.py index f49c54498e..ad8b8aebb2 100644 --- a/modulus/launch/logging/launch.py +++ b/modulus/launch/logging/launch.py @@ -23,12 +23,10 @@ import torch import torch.cuda.profiler as profiler -import wandb from modulus.distributed import DistributedManager, reduce_loss from .console import PythonLogger -from .wandb import alert class LaunchLogger(object): @@ -133,6 +131,8 @@ def __init__( # Set x axis metric to epoch for this namespace if self.wandb_backend: + import wandb + wandb.define_metric(name_space + "/mini_batch_*", step_metric="iter") wandb.define_metric(name_space + "/*", step_metric="epoch") @@ -284,6 +284,10 @@ def __exit__(self, exc_type, exc_value, exc_tb): and self.epoch % self.epoch_alert_freq == 0 ): if self.wandb_backend: + import wandb + + from .wandb import alert + # TODO: Make this a little more informative? alert( title=f"{sys.argv[0]} training progress report", @@ -321,6 +325,8 @@ def _log_backends( # WandB Logging if self.wandb_backend: + import wandb + # For WandB send step in as a metric # Step argument in lod function does not work with multiple log calls at # different intervals @@ -352,6 +358,8 @@ def log_figure( return if self.wandb_backend: + import wandb + wandb.log({artifact_file: figure}) if self.mlflow_backend: @@ -405,9 +413,12 @@ def initialize(use_wandb: bool = False, use_mlflow: bool = False): use_mlflow : bool, optional Use MLFlow logging, by default False """ - if wandb.run is None and use_wandb: - PythonLogger().warning("WandB not initialized, turning off") - use_wandb = False + if use_wandb: + import wandb + + if wandb.run is None: + PythonLogger().warning("WandB not initialized, turning off") + use_wandb = False if use_wandb: LaunchLogger.toggle_wandb(True) diff --git a/pyproject.toml b/pyproject.toml index 8a086d88af..e5efa2b700 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,21 +12,19 @@ readme = "README.md" requires-python = ">=3.10" license = {text = "Apache 2.0"} dependencies = [ - "torch>=2.0.0", - "numpy>=1.22.4", - "xarray>=2023.1.0", - "zarr>=2.14.2", + "certifi>=2023.7.22", "fsspec>=2023.1.0", - "s3fs>=2023.5.0", + "numpy>=1.22.4", "nvidia_dali_cuda120>=1.35.0", - "setuptools>=67.6.0", - "certifi>=2023.7.22", - "pytz>=2023.3", - "treelib>=1.2.5", - "tqdm>=4.60.0", - "nvtx>=0.2.8", "onnx>=1.14.0", + "s3fs>=2023.5.0", + "setuptools>=67.6.0", "timm>=0.9.12", + "torch>=2.0.0", + "tqdm>=4.60.0", + "treelib>=1.2.5", + "xarray>=2023.1.0", + "zarr>=2.14.2", ] classifiers = [ "Programming Language :: Python :: 3", @@ -94,6 +92,8 @@ all = [ "einops>=0.7.0", "pyspng>=0.1.0", "shapely>=2.0.6", + "pytz>=2023.3", + "nvtx>=0.2.8", "nvidia-modulus[launch]", "nvidia-modulus[dev]", "nvidia-modulus[makani]", @@ -144,3 +144,4 @@ Fengwu = "modulus.models.fengwu:Fengwu" SwinRNN = "modulus.models.swinvrnn:SwinRNN" EDMPrecondSR = "modulus.models.diffusion:EDMPrecondSR" UNet = "modulus.models.diffusion:UNet" + diff --git a/test/datapipes/test_bsms.py b/test/datapipes/test_bsms.py index e50ef379b8..9c9170f753 100644 --- a/test/datapipes/test_bsms.py +++ b/test/datapipes/test_bsms.py @@ -14,11 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import dgl import pytest import torch from pytest_utils import import_or_fail +dgl = pytest.importorskip("dgl") + @pytest.fixture def ahmed_data_dir(): diff --git a/test/datapipes/test_healpix.py b/test/datapipes/test_healpix.py index 67d0a9f8e9..cdeaacc32d 100644 --- a/test/datapipes/test_healpix.py +++ b/test/datapipes/test_healpix.py @@ -22,20 +22,14 @@ import numpy as np import pytest import xarray as xr -from omegaconf import DictConfig -from pytest_utils import nfsdata_or_fail +from pytest_utils import import_or_fail, nfsdata_or_fail from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler -from modulus.datapipes.healpix.data_modules import ( - TimeSeriesDataModule, - create_time_series_dataset_classic, - open_time_series_dataset_classic_on_the_fly, - open_time_series_dataset_classic_prebuilt, -) -from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset from modulus.distributed import DistributedManager +omegaconf = pytest.importorskip("omegaconf") + @pytest.fixture def data_dir(): @@ -77,7 +71,7 @@ def scaling_dict(): "tp6": {"mean": 1, "std": 0, "log_epsilon": 1e-6}, "extra": {"mean": 1, "std": 0}, } - return DictConfig(scaling) + return omegaconf.DictConfig(scaling) @pytest.fixture @@ -95,11 +89,16 @@ def scaling_double_dict(): "z": {"mean": 0, "std": 2}, "extra": {"mean": 0, "std": 2}, } - return DictConfig(scaling) + return omegaconf.DictConfig(scaling) +@import_or_fail("omegaconf") @nfsdata_or_fail def test_open_time_series_on_the_fly(create_path, pytestconfig): + from modulus.datapipes.healpix.data_modules import ( + open_time_series_dataset_classic_on_the_fly, + ) + variables = ["z500", "z1000"] constants = {"lsm": "lsm"} @@ -118,9 +117,14 @@ def test_open_time_series_on_the_fly(create_path, pytestconfig): assert ds_var.equals(base[test_var]) +@import_or_fail("omegaconf") @nfsdata_or_fail def test_open_time_series(data_dir, dataset_name, pytestconfig): # check for failure of non-existant dataset + from modulus.datapipes.healpix.data_modules import ( + open_time_series_dataset_classic_prebuilt, + ) + with pytest.raises(FileNotFoundError, match=("Dataset doesn't appear to exist at")): open_time_series_dataset_classic_prebuilt("/null_path", dataset_name) @@ -128,8 +132,14 @@ def test_open_time_series(data_dir, dataset_name, pytestconfig): assert isinstance(ds, xr.Dataset) +@import_or_fail("omegaconf") @nfsdata_or_fail def test_create_time_series(data_dir, dataset_name, create_path, pytestconfig): + + from modulus.datapipes.healpix.data_modules import ( + create_time_series_dataset_classic, + ) + variables = ["z500", "z1000"] constants = {"lsm": "lsm"} scaling = {"z500": {"log_epsilon": 2}} @@ -181,10 +191,14 @@ def test_create_time_series(data_dir, dataset_name, create_path, pytestconfig): delete_dataset(create_path, dataset_name) +@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataset_initialization( data_dir, dataset_name, scaling_dict, pytestconfig ): + + from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset + # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -213,7 +227,7 @@ def test_TimeSeriesDataset_initialization( ) # check for failure of invalid scaling variable on input - invalid_scaling = DictConfig( + invalid_scaling = omegaconf.DictConfig( { "bogosity": {"mean": 0, "std": 42}, } @@ -272,10 +286,13 @@ def test_TimeSeriesDataset_initialization( assert isinstance(timeseries_ds, TimeSeriesDataset) +@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataset_get_constants( data_dir, dataset_name, scaling_dict, pytestconfig ): + from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset + # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -294,8 +311,11 @@ def test_TimeSeriesDataset_get_constants( ) +@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataset_len(data_dir, dataset_name, scaling_dict, pytestconfig): + from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset + # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -333,10 +353,13 @@ def test_TimeSeriesDataset_len(data_dir, dataset_name, scaling_dict, pytestconfi assert len(timeseries_ds) == (len(zarr_ds.time.values) - 2) // 2 +@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataset_get( data_dir, dataset_name, scaling_double_dict, pytestconfig ): + from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset + # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -438,10 +461,15 @@ def test_TimeSeriesDataset_get( assert len(inputs) == (len(timeseries_ds[0]) + 1) +@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataModule_initialization( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): + from modulus.datapipes.healpix.data_modules import ( + TimeSeriesDataModule, + ) + variables = ["z500", "z1000"] splits = { "train_date_start": "1959-01-01", @@ -513,16 +541,21 @@ def test_TimeSeriesDataModule_initialization( batch_size=1, prebuilt_dataset=True, scaling=scaling_double_dict, - splits=DictConfig(splits), + splits=omegaconf.DictConfig(splits), ) assert isinstance(timeseries_dm, TimeSeriesDataModule) DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataModule_get_constants( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): + from modulus.datapipes.healpix.data_modules import ( + TimeSeriesDataModule, + ) + variables = ["z500", "z1000"] constants = {"lsm": "lsm"} @@ -591,10 +624,16 @@ def test_TimeSeriesDataModule_get_constants( DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataModule_get_dataloaders( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): + + from modulus.datapipes.healpix.data_modules import ( + TimeSeriesDataModule, + ) + variables = ["z500", "z1000"] splits = { "train_date_start": "1979-01-01", diff --git a/test/datapipes/test_healpix_couple.py b/test/datapipes/test_healpix_couple.py index 7ee24aacc8..871ef0b1a8 100644 --- a/test/datapipes/test_healpix_couple.py +++ b/test/datapipes/test_healpix_couple.py @@ -23,18 +23,14 @@ import pandas as pd import pytest import xarray as xr -from omegaconf import DictConfig, OmegaConf -from pytest_utils import nfsdata_or_fail +from pytest_utils import import_or_fail, nfsdata_or_fail from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler -from modulus.datapipes.healpix.coupledtimeseries_dataset import CoupledTimeSeriesDataset -from modulus.datapipes.healpix.couplers import ConstantCoupler, TrailingAverageCoupler -from modulus.datapipes.healpix.data_modules import ( - CoupledTimeSeriesDataModule, -) from modulus.distributed import DistributedManager +omegaconf = pytest.importorskip("omegaconf") + @pytest.fixture def data_dir(): @@ -75,7 +71,7 @@ def scaling_dict(): "z": {"mean": 0, "std": 1}, "tp6": {"mean": 1, "std": 0, "log_epsilon": 1e-6}, } - return DictConfig(scaling) + return omegaconf.DictConfig(scaling) @pytest.fixture @@ -92,11 +88,17 @@ def scaling_double_dict(): "z": {"mean": 0, "std": 2}, "tp6": {"mean": 0, "std": 2, "log_epsilon": 1e-6}, } - return DictConfig(scaling) + return omegaconf.DictConfig(scaling) +@import_or_fail("omegaconf") @nfsdata_or_fail def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): + + from modulus.datapipes.healpix.couplers import ( + ConstantCoupler, + ) + variables = ["z500", "z1000"] input_times = ["0h"] input_time_dim = 1 @@ -132,7 +134,7 @@ def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): expected = expected.astype(int) assert np.array_equal(expected, coupler._coupled_offsets) - scaling_df = pd.DataFrame.from_dict(OmegaConf.to_object(scaling_dict)).T + scaling_df = pd.DataFrame.from_dict(omegaconf.OmegaConf.to_object(scaling_dict)).T scaling_df.loc["zeros"] = {"mean": 0.0, "std": 1.0} scaling_da = scaling_df.to_xarray().astype("float32") coupler.set_scaling(scaling_da) @@ -145,8 +147,14 @@ def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_TrailingAverageCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): + + from modulus.datapipes.healpix.couplers import ( + TrailingAverageCoupler, + ) + variables = ["z500", "z1000"] input_times = ["6h", "12h"] input_time_dim = 2 @@ -190,7 +198,7 @@ def test_TrailingAverageCoupler(data_dir, dataset_name, scaling_dict, pytestconf expected = expected.astype(int) assert np.array_equal(expected, coupler._coupled_offsets) - scaling_df = pd.DataFrame.from_dict(OmegaConf.to_object(scaling_dict)).T + scaling_df = pd.DataFrame.from_dict(omegaconf.OmegaConf.to_object(scaling_dict)).T scaling_df.loc["zeros"] = {"mean": 0.0, "std": 1.0} scaling_da = scaling_df.to_xarray().astype("float32") coupler.set_scaling(scaling_da) @@ -203,10 +211,16 @@ def test_TrailingAverageCoupler(data_dir, dataset_name, scaling_dict, pytestconf DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataset_initialization( data_dir, dataset_name, scaling_dict, pytestconfig ): + + from modulus.datapipes.healpix.coupledtimeseries_dataset import ( + CoupledTimeSeriesDataset, + ) + # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -238,7 +252,7 @@ def test_CoupledTimeSeriesDataset_initialization( ) # check for failure of invalid scaling variable on input - invalid_scaling = DictConfig( + invalid_scaling = omegaconf.DictConfig( { "bogosity": {"mean": 0, "std": 42}, } @@ -351,10 +365,16 @@ def test_CoupledTimeSeriesDataset_initialization( DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataset_get_constants( data_dir, dataset_name, scaling_dict, pytestconfig ): + + from modulus.datapipes.healpix.coupledtimeseries_dataset import ( + CoupledTimeSeriesDataset, + ) + # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -393,10 +413,15 @@ def test_CoupledTimeSeriesDataset_get_constants( DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataset_len( data_dir, dataset_name, scaling_dict, pytestconfig ): + from modulus.datapipes.healpix.coupledtimeseries_dataset import ( + CoupledTimeSeriesDataset, + ) + # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -473,10 +498,15 @@ def test_CoupledTimeSeriesDataset_len( DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataset_get( data_dir, dataset_name, scaling_double_dict, pytestconfig ): + from modulus.datapipes.healpix.coupledtimeseries_dataset import ( + CoupledTimeSeriesDataset, + ) + # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -608,10 +638,16 @@ def test_CoupledTimeSeriesDataset_get( DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataModule_initialization( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): + + from modulus.datapipes.healpix.data_modules import ( + CoupledTimeSeriesDataModule, + ) + variables = ["z500", "z1000"] splits = { "train_date_start": "1959-01-01", @@ -702,17 +738,23 @@ def test_CoupledTimeSeriesDataModule_initialization( batch_size=1, prebuilt_dataset=True, scaling=scaling_double_dict, - splits=DictConfig(splits), + splits=omegaconf.DictConfig(splits), couplings=constant_coupler, ) assert isinstance(timeseries_dm, CoupledTimeSeriesDataModule) DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataModule_get_constants( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): + + from modulus.datapipes.healpix.data_modules import ( + CoupledTimeSeriesDataModule, + ) + variables = ["z500", "z1000"] constants = {"lsm": "lsm"} @@ -799,10 +841,16 @@ def test_CoupledTimeSeriesDataModule_get_constants( DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataModule_get_dataloaders( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): + + from modulus.datapipes.healpix.data_modules import ( + CoupledTimeSeriesDataModule, + ) + variables = ["z500", "z1000"] splits = { "train_date_start": "1979-01-01", @@ -871,10 +919,15 @@ def test_CoupledTimeSeriesDataModule_get_dataloaders( DistributedManager.cleanup() +@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataModule_get_coupled_vars( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): + from modulus.datapipes.healpix.data_modules import ( + CoupledTimeSeriesDataModule, + ) + variables = ["z500", "z1000"] constant_coupler = [ { diff --git a/test/datapipes/test_lagrangian.py b/test/datapipes/test_lagrangian.py index 9446fa08eb..4609d71279 100644 --- a/test/datapipes/test_lagrangian.py +++ b/test/datapipes/test_lagrangian.py @@ -14,13 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import dgl import pytest import torch from pytest_utils import import_or_fail, nfsdata_or_fail from . import common +dgl = pytest.importorskip("dgl") + + Tensor = torch.Tensor diff --git a/test/datapipes/test_mesh_datapipe.py b/test/datapipes/test_mesh_datapipe.py index 39fa190682..2e01fbb6d6 100644 --- a/test/datapipes/test_mesh_datapipe.py +++ b/test/datapipes/test_mesh_datapipe.py @@ -21,7 +21,6 @@ from pytest_utils import import_or_fail # from pytest_utils import nfsdata_or_fail -from modulus.datapipes.cae import MeshDatapipe @pytest.fixture @@ -30,13 +29,15 @@ def cgns_data_dir(): return path -@import_or_fail(["vtk"]) +@import_or_fail(["vtk", "warp"]) @pytest.mark.parametrize("device", ["cuda", "cpu"]) def test_mesh_datapipe(device, tmp_path, pytestconfig): """Tests the MeshDatapipe class with VTP and VTU files.""" import vtk + from modulus.datapipes.cae import MeshDatapipe + def _create_random_vtp_vtu_mesh( num_points: int, num_triangles: int, dir: str ) -> tuple: diff --git a/test/datapipes/test_synthetic.py b/test/datapipes/test_synthetic.py index eb323923ca..f18c9b4551 100644 --- a/test/datapipes/test_synthetic.py +++ b/test/datapipes/test_synthetic.py @@ -16,15 +16,17 @@ import pytest - -from modulus.datapipes.climate import ( - SyntheticWeatherDataLoader, - SyntheticWeatherDataset, -) +from pytest_utils import import_or_fail +@import_or_fail("h5py") @pytest.mark.parametrize("device", ["cuda", "cpu"]) -def test_dataloader_setup(device): +def test_dataloader_setup(device, pytestconfig): + from modulus.datapipes.climate import ( + SyntheticWeatherDataLoader, + SyntheticWeatherDataset, + ) + dataloader = SyntheticWeatherDataLoader( channels=[0, 1, 2, 3], num_samples_per_year=12, @@ -41,9 +43,15 @@ def test_dataloader_setup(device): assert isinstance(dataloader.dataset, SyntheticWeatherDataset) +@import_or_fail("h5py") @pytest.mark.parametrize("device", ["cuda", "cpu"]) -def test_dataloader_iteration(device): +def test_dataloader_iteration(device, pytestconfig): """Test the iteration over batches in the DataLoader.""" + + from modulus.datapipes.climate import ( + SyntheticWeatherDataLoader, + ) + dataloader = SyntheticWeatherDataLoader( channels=[0, 1], num_samples_per_year=30, @@ -66,9 +74,15 @@ def test_dataloader_iteration(device): break # Only test one batch for quick testing +@import_or_fail("h5py") @pytest.mark.parametrize("device", ["cuda", "cpu"]) -def test_dataloader_length(device): +def test_dataloader_length(device, pytestconfig): """Test the length of the DataLoader to ensure it is correct based on the dataset and batch size.""" + + from modulus.datapipes.climate import ( + SyntheticWeatherDataLoader, + ) + dataloader = SyntheticWeatherDataLoader( channels=[0, 1, 2], num_samples_per_year=30, diff --git a/test/metrics/test_metrics_cfd.py b/test/metrics/test_metrics_cfd.py index 566b20b958..68e7534ca7 100644 --- a/test/metrics/test_metrics_cfd.py +++ b/test/metrics/test_metrics_cfd.py @@ -16,7 +16,6 @@ import numpy as np import pytest -import pyvista as pv import torch from pytest_utils import import_or_fail @@ -27,6 +26,8 @@ dominant_freq_calc, ) +pv = pytest.importorskip("pyvista") + @pytest.fixture def generate_sphere(theta_res=100, phi_res=100): diff --git a/test/metrics/test_metrics_integral.py b/test/metrics/test_metrics_integral.py index faf3b36c45..f144bac29b 100644 --- a/test/metrics/test_metrics_integral.py +++ b/test/metrics/test_metrics_integral.py @@ -16,11 +16,12 @@ import numpy as np import pytest -import pyvista as pv from pytest_utils import import_or_fail from modulus.metrics.cae.integral import line_integral, surface_integral +pv = pytest.importorskip("pyvista") + @pytest.fixture def generate_circle(num_points=1000): diff --git a/test/models/diffusion/test_preconditioning.py b/test/models/diffusion/test_preconditioning.py index 30956d26dd..d713a05139 100644 --- a/test/models/diffusion/test_preconditioning.py +++ b/test/models/diffusion/test_preconditioning.py @@ -16,8 +16,8 @@ import pytest import torch +from pytest_utils import import_or_fail -from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.diffusion.preconditioning import ( EDMPrecond, EDMPrecondSR, @@ -58,7 +58,11 @@ def test_EDMPrecondSR_forward(scale_cond_input): assert output.shape == (b, c_target, x, y) -def test_EDMPrecondSR_serialization(tmp_path): +@import_or_fail("termcolor") +def test_EDMPrecondSR_serialization(tmp_path, pytestconfig): + + from modulus.launch.utils import load_checkpoint, save_checkpoint + module = EDMPrecondSR(8, 1, 1, 1, scale_cond_input=False) model_path = tmp_path / "output.mdlus" module.save(model_path.as_posix()) diff --git a/test/models/dlwp_healpix/test_healpix_blocks.py b/test/models/dlwp_healpix/test_healpix_blocks.py index e82f147f3c..f5e6388f41 100644 --- a/test/models/dlwp_healpix/test_healpix_blocks.py +++ b/test/models/dlwp_healpix/test_healpix_blocks.py @@ -23,18 +23,7 @@ import common import pytest import torch - -from modulus.models.dlwp_healpix_layers import ( - AvgPool, - BasicConvBlock, - ConvGRUBlock, - ConvNeXtBlock, - DoubleConvNeXtBlock, - Interpolate, - MaxPool, - SymmetricConvNeXtBlock, - TransposedConvUpsample, # -) +from pytest_utils import import_or_fail @pytest.fixture @@ -50,15 +39,27 @@ def generate_test_data(faces=12, channels=2, img_size=16, device="cpu"): return generate_test_data +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_ConvGRUBlock_initialization(device, test_data): +def test_ConvGRUBlock_initialization(device, test_data, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + ConvGRUBlock, + ) + in_channels = 2 conv_gru_func = ConvGRUBlock(in_channels=in_channels).to(device) assert isinstance(conv_gru_func, ConvGRUBlock) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_ConvGRUBlock_forward(device, test_data): +def test_ConvGRUBlock_forward(device, test_data, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + ConvGRUBlock, + ) + in_channels = 2 tensor_size = 16 conv_gru_func = ConvGRUBlock(in_channels=in_channels).to(device) @@ -75,8 +76,14 @@ def test_ConvGRUBlock_forward(device, test_data): assert not common.compare_output(outvar_hist, outvar) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_ConvNeXtBlock_initialization(device): +def test_ConvNeXtBlock_initialization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + ConvNeXtBlock, + ) + in_channels = 2 convnext_block = ConvNeXtBlock(in_channels=in_channels).to(device) assert isinstance(convnext_block, ConvNeXtBlock) @@ -91,8 +98,14 @@ def test_ConvNeXtBlock_initialization(device): assert isinstance(convnext_block, ConvNeXtBlock) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_ConvNeXtBlock_forward(device, test_data): +def test_ConvNeXtBlock_forward(device, test_data, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + ConvNeXtBlock, + ) + in_channels = 2 out_channels = 1 tensor_size = 16 @@ -114,8 +127,14 @@ def test_ConvNeXtBlock_forward(device, test_data): assert outvar.shape == out_shape +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_DoubleConvNeXtBlock_initialization(device): +def test_DoubleConvNeXtBlock_initialization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + DoubleConvNeXtBlock, + ) + in_channels = 2 out_channels = 1 latent_channels = 1 @@ -136,8 +155,14 @@ def test_DoubleConvNeXtBlock_initialization(device): assert isinstance(doubleconvnextblock, DoubleConvNeXtBlock) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_DoubleConvNeXtBlock_forward(device, test_data): +def test_DoubleConvNeXtBlock_forward(device, test_data, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + DoubleConvNeXtBlock, + ) + in_channels = 2 out_channels = 1 latent_channels = 1 @@ -166,8 +191,14 @@ def test_DoubleConvNeXtBlock_forward(device, test_data): assert outvar.shape == out_shape +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_SymmetricConvNeXtBlock_initialization(device): +def test_SymmetricConvNeXtBlock_initialization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + SymmetricConvNeXtBlock, + ) + in_channels = 2 latent_channels = 1 symmetric_convnextblock = SymmetricConvNeXtBlock( @@ -185,8 +216,14 @@ def test_SymmetricConvNeXtBlock_initialization(device): assert isinstance(symmetric_convnextblock, SymmetricConvNeXtBlock) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_SymmetricConvNeXtBlock_forward(device, test_data): +def test_SymmetricConvNeXtBlock_forward(device, test_data, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + SymmetricConvNeXtBlock, + ) + in_channels = 2 latent_channels = 1 tensor_size = 16 @@ -207,8 +244,14 @@ def test_SymmetricConvNeXtBlock_forward(device, test_data): assert outvar.shape == out_shape +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_BasicConvBlock_initialization(device): +def test_BasicConvBlock_initialization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + BasicConvBlock, + ) + in_channels = 3 out_channels = 1 latent_channels = 2 @@ -228,8 +271,14 @@ def test_BasicConvBlock_initialization(device): assert isinstance(conv_block, BasicConvBlock) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_BasicConvBlock_forward(device, test_data): +def test_BasicConvBlock_forward(device, test_data, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + BasicConvBlock, + ) + in_channels = 3 out_channels = 1 tensor_size = 16 @@ -248,15 +297,26 @@ def test_BasicConvBlock_forward(device, test_data): assert outvar.shape == out_shape +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_MaxPool_initialization(device): +def test_MaxPool_initialization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + MaxPool, + ) + pooling = 2 maxpool_block = MaxPool(pooling=pooling).to(device) assert isinstance(maxpool_block, MaxPool) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_MaxPool_forward(device, test_data): +def test_MaxPool_forward(device, test_data, pytestconfig): + from modulus.models.dlwp_healpix_layers import ( + MaxPool, + ) + pooling = 2 size = 16 channels = 4 @@ -270,15 +330,27 @@ def test_MaxPool_forward(device, test_data): assert common.compare_output(outvar, maxpool_block(invar)) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_AvgPool_initialization(device): +def test_AvgPool_initialization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + AvgPool, + ) + pooling = 2 avgpool_block = AvgPool(pooling=pooling).to(device) assert isinstance(avgpool_block, AvgPool) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_AvgPool_forward(device, test_data): +def test_AvgPool_forward(device, test_data, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + AvgPool, + ) + pooling = 2 size = 32 channels = 4 @@ -295,8 +367,13 @@ def test_AvgPool_forward(device, test_data): assert common.compare_output(outvar, avgpool_block(invar)) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_TransposedConvUpsample_initialization(device): +def test_TransposedConvUpsample_initialization(device, pytestconfig): + from modulus.models.dlwp_healpix_layers import ( + TransposedConvUpsample, # + ) + transposed_conv_upsample_block = TransposedConvUpsample().to(device) assert isinstance(transposed_conv_upsample_block, TransposedConvUpsample) @@ -306,8 +383,14 @@ def test_TransposedConvUpsample_initialization(device): assert isinstance(transposed_conv_upsample_block, TransposedConvUpsample) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_TransposedConvUpsample_forward(device, test_data): +def test_TransposedConvUpsample_forward(device, test_data, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + TransposedConvUpsample, + ) + in_channels = 2 out_channels = 1 size = 16 @@ -332,16 +415,27 @@ def test_TransposedConvUpsample_forward(device, test_data): assert outvar.shape == outsize +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_Interpolate_initialization(device): +def test_Interpolate_initialization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + Interpolate, + ) + scale = 2 mode = "linear" interpolation_block = Interpolate(scale_factor=scale, mode=mode).to(device) assert isinstance(interpolation_block, Interpolate) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_Interpolate_forward(device): +def test_Interpolate_forward(device, pytestconfig): + from modulus.models.dlwp_healpix_layers import ( + Interpolate, + ) + scale = 2 mode = "linear" interpolation_block = Interpolate(scale_factor=scale, mode=mode).to(device) diff --git a/test/models/dlwp_healpix/test_healpix_encoder_decoder.py b/test/models/dlwp_healpix/test_healpix_encoder_decoder.py index ffa27576b5..430afc88f6 100644 --- a/test/models/dlwp_healpix/test_healpix_encoder_decoder.py +++ b/test/models/dlwp_healpix/test_healpix_encoder_decoder.py @@ -23,20 +23,19 @@ import common import pytest import torch - -from modulus.models.dlwp_healpix_layers import ( - BasicConvBlock, # for the output layer - ConvGRUBlock, # for the recurrent layer - ConvNeXtBlock, # for convolutional layer - MaxPool, # for downsampling - TransposedConvUpsample, # for upsampling - UNetDecoder, - UNetEncoder, -) +from pytest_utils import import_or_fail +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetEncoder_initialize(device): +def test_UNetEncoder_initialize(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + ConvNeXtBlock, # for convolutional layer + MaxPool, # for downsampling + UNetEncoder, + ) + channels = 2 n_channels = (16, 32, 64) @@ -72,8 +71,16 @@ def test_UNetEncoder_initialize(device): torch.cuda.empty_cache() +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetEncoder_forward(device): +def test_UNetEncoder_forward(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + ConvNeXtBlock, # for convolutional layer + MaxPool, # for downsampling + UNetEncoder, + ) + channels = 2 hw_size = 16 b_size = 12 @@ -114,8 +121,16 @@ def test_UNetEncoder_forward(device): torch.cuda.empty_cache() +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetEncoder_reset(device): +def test_UNetEncoder_reset(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + ConvNeXtBlock, # for convolutional layer + MaxPool, # for downsampling + UNetEncoder, + ) + channels = 2 n_channels = (16, 32, 64) @@ -144,8 +159,18 @@ def test_UNetEncoder_reset(device): torch.cuda.empty_cache() +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetDecoder_initilization(device): +def test_UNetDecoder_initilization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + BasicConvBlock, # for the output layer + ConvGRUBlock, # for the recurrent layer + ConvNeXtBlock, # for convolutional layer + TransposedConvUpsample, # for upsampling + UNetDecoder, + ) + in_channels = 2 out_channels = 1 n_channels = (64, 32, 16) @@ -203,8 +228,18 @@ def test_UNetDecoder_initilization(device): torch.cuda.empty_cache() +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetDecoder_forward(device): +def test_UNetDecoder_forward(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + BasicConvBlock, # for the output layer + ConvGRUBlock, # for the recurrent layer + ConvNeXtBlock, # for convolutional layer + TransposedConvUpsample, # for upsampling + UNetDecoder, + ) + in_channels = 2 out_channels = 1 hw_size = 32 @@ -281,8 +316,18 @@ def test_UNetDecoder_forward(device): torch.cuda.empty_cache() +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetDecoder_reset(device): +def test_UNetDecoder_reset(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + BasicConvBlock, # for the output layer + ConvGRUBlock, # for the recurrent layer + ConvNeXtBlock, # for convolutional layer + TransposedConvUpsample, # for upsampling + UNetDecoder, + ) + in_channels = 2 out_channels = 1 hw_size = 32 diff --git a/test/models/dlwp_healpix/test_healpix_layers.py b/test/models/dlwp_healpix/test_healpix_layers.py index fba2b3468b..d3a53d035f 100644 --- a/test/models/dlwp_healpix/test_healpix_layers.py +++ b/test/models/dlwp_healpix/test_healpix_layers.py @@ -24,13 +24,7 @@ import numpy as np import pytest import torch - -from modulus.models.dlwp_healpix_layers import ( - HEALPixFoldFaces, - HEALPixLayer, - HEALPixPadding, - HEALPixUnfoldFaces, -) +from pytest_utils import import_or_fail class MulX(torch.nn.Module): @@ -44,14 +38,26 @@ def forward(self, x): return x * self.multiplier +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_HEALPixFoldFaces_initialization(device): +def test_HEALPixFoldFaces_initialization(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + HEALPixFoldFaces, + ) + fold_func = HEALPixFoldFaces() assert isinstance(fold_func, HEALPixFoldFaces) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_HEALPixFoldFaces_forward(device): +def test_HEALPixFoldFaces_forward(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + HEALPixFoldFaces, + ) + fold_func = HEALPixFoldFaces() tensor_size = torch.randint(low=2, high=4, size=(5,)).tolist() @@ -66,14 +72,25 @@ def test_HEALPixFoldFaces_forward(device): assert fold_func(invar).stride() != outvar.stride() +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_HEALPixUnfoldFaces_initialization(device): +def test_HEALPixUnfoldFaces_initialization(device, pytestconfig): + from modulus.models.dlwp_healpix_layers import ( + HEALPixUnfoldFaces, + ) + unfold_func = HEALPixUnfoldFaces() assert isinstance(unfold_func, HEALPixUnfoldFaces) +@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_HEALPixUnfoldFaces_forward(device): +def test_HEALPixUnfoldFaces_forward(device, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + HEALPixUnfoldFaces, + ) + num_faces = 12 unfold_func = HEALPixUnfoldFaces() @@ -98,14 +115,26 @@ def test_HEALPixUnfoldFaces_forward(device): ] +@import_or_fail("hydra") @pytest.mark.parametrize("device,padding", HEALPixPadding_testdata) -def test_HEALPixPadding_initialization(device, padding): +def test_HEALPixPadding_initialization(device, padding, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + HEALPixPadding, + ) + pad_func = HEALPixPadding(padding) assert isinstance(pad_func, HEALPixPadding) +@import_or_fail("hydra") @pytest.mark.parametrize("device,padding", HEALPixPadding_testdata) -def test_HEALPixPadding_forward(device, padding): +def test_HEALPixPadding_forward(device, padding, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + HEALPixPadding, + ) + num_faces = 12 # standard for healpix batch_size = 2 pad_func = HEALPixPadding(padding) @@ -144,14 +173,25 @@ def test_HEALPixPadding_forward(device, padding): ] +@import_or_fail("hydra") @pytest.mark.parametrize("device,multiplier", HEALPixLayer_testdata) -def test_HEALPixLayer_initialization(device, multiplier): +def test_HEALPixLayer_initialization(device, multiplier, pytestconfig): + from modulus.models.dlwp_healpix_layers import ( + HEALPixLayer, + ) + layer = HEALPixLayer(layer=MulX, multiplier=multiplier) assert isinstance(layer, HEALPixLayer) +@import_or_fail("hydra") @pytest.mark.parametrize("device,multiplier", HEALPixLayer_testdata) -def test_HEALPixLayer_forward(device, multiplier): +def test_HEALPixLayer_forward(device, multiplier, pytestconfig): + + from modulus.models.dlwp_healpix_layers import ( + HEALPixLayer, + ) + layer = HEALPixLayer(layer=MulX, multiplier=multiplier) kernel_size = 3 diff --git a/test/models/meshgraphnet/test_bsms_mgn.py b/test/models/meshgraphnet/test_bsms_mgn.py index d66369ef8a..78c96e5fb0 100644 --- a/test/models/meshgraphnet/test_bsms_mgn.py +++ b/test/models/meshgraphnet/test_bsms_mgn.py @@ -14,13 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import dgl import pytest import torch from models.common import validate_forward_accuracy -from pytest_utils import import_or_fail +from pytest_utils import import_or_fail, nfsdata_or_fail -from modulus.models.meshgraphnet.bsms_mgn import BiStrideMeshGraphNet +dgl = pytest.importorskip("dgl") @pytest.fixture @@ -29,10 +28,11 @@ def ahmed_data_dir(): return path -@import_or_fail("sparse_dot_mkl") +@import_or_fail(["sparse_dot_mkl", "dgl"]) @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) def test_bsms_mgn_forward(pytestconfig, device): from modulus.datapipes.gnn.bsms import BistrideMultiLayerGraphDataset + from modulus.models.meshgraphnet.bsms_mgn import BiStrideMeshGraphNet torch.manual_seed(1) @@ -89,10 +89,12 @@ def test_bsms_mgn_forward(pytestconfig, device): ) -@import_or_fail("sparse_dot_mkl") +@nfsdata_or_fail +@import_or_fail(["sparse_dot_mkl", "dgl"]) def test_bsms_mgn_ahmed(pytestconfig, ahmed_data_dir): from modulus.datapipes.gnn.ahmed_body_dataset import AhmedBodyDataset from modulus.datapipes.gnn.bsms import BistrideMultiLayerGraphDataset + from modulus.models.meshgraphnet.bsms_mgn import BiStrideMeshGraphNet device = torch.device("cuda:0") diff --git a/test/models/meshgraphnet/test_meshgraphnet_snmg.py b/test/models/meshgraphnet/test_meshgraphnet_snmg.py index 9330fb0da1..f12aaea011 100644 --- a/test/models/meshgraphnet/test_meshgraphnet_snmg.py +++ b/test/models/meshgraphnet/test_meshgraphnet_snmg.py @@ -27,16 +27,16 @@ from pytest_utils import import_or_fail from modulus.distributed import DistributedManager, mark_module_as_shared -from modulus.models.gnn_layers import ( - partition_graph_by_coordinate_bbox, - partition_graph_nodewise, - partition_graph_with_id_mapping, -) torch.backends.cuda.matmul.allow_tf32 = False def run_test_distributed_meshgraphnet(rank, world_size, dtype, partition_scheme): + from modulus.models.gnn_layers import ( + partition_graph_by_coordinate_bbox, + partition_graph_nodewise, + partition_graph_with_id_mapping, + ) from modulus.models.gnn_layers.utils import CuGraphCSC from modulus.models.meshgraphnet.meshgraphnet import MeshGraphNet diff --git a/test/models/test_distributed_graph.py b/test/models/test_distributed_graph.py index b3006772a0..a0f672da7e 100644 --- a/test/models/test_distributed_graph.py +++ b/test/models/test_distributed_graph.py @@ -18,13 +18,9 @@ import pytest import torch +from pytest_utils import import_or_fail from modulus.distributed import DistributedManager -from modulus.models.gnn_layers import ( - DistributedGraph, - partition_graph_by_coordinate_bbox, -) -from modulus.models.graphcast.graph_cast_net import get_lat_lon_partition_separators def get_random_graph(device): @@ -131,6 +127,13 @@ def run_test_distributed_graph( partition_scheme: str, use_torchrun: bool = False, ): + + from modulus.models.gnn_layers import ( + DistributedGraph, + partition_graph_by_coordinate_bbox, + ) + from modulus.models.graphcast.graph_cast_net import get_lat_lon_partition_separators + if not use_torchrun: os.environ["RANK"] = f"{rank}" os.environ["WORLD_SIZE"] = f"{world_size}" @@ -338,9 +341,11 @@ def run_test_distributed_graph( del os.environ["MASTER_PORT"] +@import_or_fail("dgl") @pytest.mark.multigpu @pytest.mark.parametrize("partition_scheme", ["lat_lon_bbox", "default"]) -def test_distributed_graph(partition_scheme): +def test_distributed_graph(partition_scheme, pytestconfig): + num_gpus = torch.cuda.device_count() assert num_gpus >= 2, "Not enough GPUs available for test" world_size = 2 # num_gpus @@ -360,6 +365,7 @@ def test_distributed_graph(partition_scheme): if __name__ == "__main__": + # to be launched with torchrun DistributedManager.initialize() run_test_distributed_graph(-1, -1, "lat_lon_bbox", True) diff --git a/test/models/test_domino.py b/test/models/test_domino.py index ce3744bb04..6697d9891c 100644 --- a/test/models/test_domino.py +++ b/test/models/test_domino.py @@ -20,8 +20,7 @@ import pytest import torch - -from modulus.models.domino.model import DoMINO +from pytest_utils import import_or_fail # from . import common from .common.fwdaccuracy import save_output @@ -58,9 +57,13 @@ def validate_domino( return compare_output(output, output_target, rtol, atol) +@import_or_fail("warp") @pytest.mark.parametrize("device", ["cuda:0"]) -def test_domino_forward(device): +def test_domino_forward(device, pytestconfig): """Test domino forward pass""" + + from modulus.models.domino.model import DoMINO + torch.manual_seed(0) @dataclass diff --git a/test/models/test_graph_partition.py b/test/models/test_graph_partition.py index bd6567248f..675bbdeca5 100644 --- a/test/models/test_graph_partition.py +++ b/test/models/test_graph_partition.py @@ -16,13 +16,7 @@ import pytest import torch - -from modulus.models.gnn_layers import ( - GraphPartition, - partition_graph_by_coordinate_bbox, - partition_graph_nodewise, - partition_graph_with_id_mapping, -) +from pytest_utils import import_or_fail @pytest.fixture @@ -88,8 +82,15 @@ def assert_partitions_are_equal(a, b): assert torch.allclose(val_a, val_b), error_msg +@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_mapping(global_graph, device): +def test_gp_mapping(global_graph, device, pytestconfig): + + from modulus.models.gnn_layers import ( + GraphPartition, + partition_graph_with_id_mapping, + ) + offsets, indices, num_src_nodes, num_dst_nodes = global_graph partition_size = 4 partition_rank = 0 @@ -134,8 +135,15 @@ def test_gp_mapping(global_graph, device): assert_partitions_are_equal(pg, pg_expected) +@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_nodewise(global_graph, device): +def test_gp_nodewise(global_graph, device, pytestconfig): + + from modulus.models.gnn_layers import ( + GraphPartition, + partition_graph_nodewise, + ) + offsets, indices, num_src_nodes, num_dst_nodes = global_graph partition_size = 4 partition_rank = 0 @@ -175,8 +183,15 @@ def test_gp_nodewise(global_graph, device): assert_partitions_are_equal(pg, pg_expected) +@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_matrixdecomp(global_graph_square, device): +def test_gp_matrixdecomp(global_graph_square, device, pytestconfig): + + from modulus.models.gnn_layers import ( + GraphPartition, + partition_graph_nodewise, + ) + offsets, indices, num_src_nodes, num_dst_nodes = global_graph_square partition_size = 4 partition_rank = 0 @@ -212,8 +227,15 @@ def test_gp_matrixdecomp(global_graph_square, device): assert_partitions_are_equal(pg, pg_expected) +@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_coordinate_bbox(global_graph, device): +def test_gp_coordinate_bbox(global_graph, device, pytestconfig): + + from modulus.models.gnn_layers import ( + GraphPartition, + partition_graph_by_coordinate_bbox, + ) + offsets, indices, num_src_nodes, num_dst_nodes = global_graph partition_size = 4 partition_rank = 0 @@ -279,8 +301,15 @@ def test_gp_coordinate_bbox(global_graph, device): assert_partitions_are_equal(pg, pg_expected) +@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_coordinate_bbox_lat_long(global_graph, device): +def test_gp_coordinate_bbox_lat_long(global_graph, device, pytestconfig): + + from modulus.models.gnn_layers import ( + GraphPartition, + partition_graph_by_coordinate_bbox, + ) + offsets, indices, num_src_nodes, num_dst_nodes = global_graph src_lat = torch.FloatTensor([-75, -60, -45, -30, 30, 45, 60, 75]).view(-1, 1) dst_lat = torch.FloatTensor([-60, -30, 30, 30]).view(-1, 1) diff --git a/test/utils/corrdiff/test_generation_steps.py b/test/utils/corrdiff/test_generation_steps.py index 012b234e09..e50a3f47b0 100644 --- a/test/utils/corrdiff/test_generation_steps.py +++ b/test/utils/corrdiff/test_generation_steps.py @@ -18,14 +18,16 @@ import pytest import torch - -from modulus.models.diffusion import EDMPrecondSR, UNet -from modulus.utils.corrdiff import diffusion_step, regression_step -from modulus.utils.generative import deterministic_sampler, stochastic_sampler +from pytest_utils import import_or_fail +@import_or_fail("cftime") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_regression_step(device): +def test_regression_step(device, pytestconfig): + + from modulus.models.diffusion import UNet + from modulus.utils.corrdiff import regression_step + # define the net mock_unet = UNet( img_channels=2, @@ -47,8 +49,14 @@ def test_regression_step(device): assert output.shape == (2, 2, 16, 16), "Output shape mismatch" +@import_or_fail("cftime") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_diffusion_step(device): +def test_diffusion_step(device, pytestconfig): + + from modulus.models.diffusion import EDMPrecondSR + from modulus.utils.corrdiff import diffusion_step + from modulus.utils.generative import deterministic_sampler, stochastic_sampler + # Define the preconditioner mock_precond = EDMPrecondSR( img_resolution=[16, 16], diff --git a/test/utils/corrdiff/test_netcdf_writer.py b/test/utils/corrdiff/test_netcdf_writer.py index f775fdc9eb..b0f4b6c0d5 100644 --- a/test/utils/corrdiff/test_netcdf_writer.py +++ b/test/utils/corrdiff/test_netcdf_writer.py @@ -20,8 +20,7 @@ import numpy as np import pytest - -from modulus.utils.corrdiff import NetCDFWriter +from pytest_utils import import_or_fail @pytest.fixture @@ -43,7 +42,11 @@ def mock_ncfile(): return mock_file -def test_init(mock_ncfile): +@import_or_fail("cftime") +def test_init(mock_ncfile, pytestconfig): + + from modulus.utils.corrdiff import NetCDFWriter + lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] @@ -86,7 +89,11 @@ def test_init(mock_ncfile): ) -def test_write_input(mock_ncfile): +@import_or_fail("cftime") +def test_write_input(mock_ncfile, pytestconfig): + + from modulus.utils.corrdiff import NetCDFWriter + lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] @@ -106,7 +113,11 @@ def test_write_input(mock_ncfile): mock_ncfile["input"][channel_name].__setitem__.assert_called_with(time_index, val) -def test_write_truth(mock_ncfile): +@import_or_fail("cftime") +def test_write_truth(mock_ncfile, pytestconfig): + + from modulus.utils.corrdiff import NetCDFWriter + lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] @@ -126,7 +137,11 @@ def test_write_truth(mock_ncfile): mock_ncfile["truth"][channel_name].__setitem__.assert_called_with(time_index, val) -def test_write_prediction(mock_ncfile): +@import_or_fail("cftime") +def test_write_prediction(mock_ncfile, pytestconfig): + + from modulus.utils.corrdiff import NetCDFWriter + lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] @@ -149,7 +164,11 @@ def test_write_prediction(mock_ncfile): ) -def test_write_time(mock_ncfile): +@import_or_fail("cftime") +def test_write_time(mock_ncfile, pytestconfig): + + from modulus.utils.corrdiff import NetCDFWriter + lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] diff --git a/test/utils/corrdiff/test_time_range.py b/test/utils/corrdiff/test_time_range.py index 83e1913f47..5b4b6ba414 100644 --- a/test/utils/corrdiff/test_time_range.py +++ b/test/utils/corrdiff/test_time_range.py @@ -14,17 +14,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -from modulus.utils.corrdiff import get_time_from_range +from pytest_utils import import_or_fail -def test_default_interval(): +@import_or_fail("cftime") +def test_default_interval(pytestconfig): + + from modulus.utils.corrdiff import get_time_from_range + times_range = ["2024-01-01T00:00:00", "2024-01-01T01:00:00"] expected = ["2024-01-01T00:00:00", "2024-01-01T01:00:00"] result = get_time_from_range(times_range) assert result == expected -def test_hourly_interval(): +@import_or_fail("cftime") +def test_hourly_interval(pytestconfig): + + from modulus.utils.corrdiff import get_time_from_range + times_range = ["2024-01-01T00:00:00", "2024-01-01T03:00:00", 1] expected = [ "2024-01-01T00:00:00", @@ -36,21 +44,33 @@ def test_hourly_interval(): assert result == expected -def test_custom_interval(): +@import_or_fail("cftime") +def test_custom_interval(pytestconfig): + + from modulus.utils.corrdiff import get_time_from_range + times_range = ["2024-01-01T00:00:00", "2024-01-01T03:00:00", 2] expected = ["2024-01-01T00:00:00", "2024-01-01T02:00:00"] result = get_time_from_range(times_range) assert result == expected -def test_no_interval_provided(): +@import_or_fail("cftime") +def test_no_interval_provided(pytestconfig): + + from modulus.utils.corrdiff import get_time_from_range + times_range = ["2024-01-01T00:00:00", "2024-01-01T02:00:00"] expected = ["2024-01-01T00:00:00", "2024-01-01T01:00:00", "2024-01-01T02:00:00"] result = get_time_from_range(times_range) assert result == expected -def test_same_start_end_time(): +@import_or_fail("cftime") +def test_same_start_end_time(pytestconfig): + + from modulus.utils.corrdiff import get_time_from_range + times_range = ["2024-01-01T00:00:00", "2024-01-01T00:00:00"] expected = ["2024-01-01T00:00:00"] result = get_time_from_range(times_range) diff --git a/test/utils/generative/test_deterministic_sampler.py b/test/utils/generative/test_deterministic_sampler.py index 6cb886225c..d68733c005 100644 --- a/test/utils/generative/test_deterministic_sampler.py +++ b/test/utils/generative/test_deterministic_sampler.py @@ -17,8 +17,7 @@ import pytest import torch - -from modulus.utils.generative import deterministic_sampler +from pytest_utils import import_or_fail # Mock a minimal net class for testing @@ -41,7 +40,11 @@ def mock_net(): # Basic functionality test -def test_deterministic_sampler_output_type_and_shape(mock_net): +@import_or_fail("cftime") +def test_deterministic_sampler_output_type_and_shape(mock_net, pytestconfig): + + from modulus.utils.generative import deterministic_sampler + latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler(net=mock_net, latents=latents, img_lr=img_lr) @@ -50,8 +53,12 @@ def test_deterministic_sampler_output_type_and_shape(mock_net): # Test for parameter validation +@import_or_fail("cftime") @pytest.mark.parametrize("solver", ["invalid_solver", "euler", "heun"]) -def test_deterministic_sampler_solver_validation(mock_net, solver): +def test_deterministic_sampler_solver_validation(mock_net, solver, pytestconfig): + + from modulus.utils.generative import deterministic_sampler + if solver == "invalid_solver": with pytest.raises(ValueError): deterministic_sampler( @@ -71,7 +78,11 @@ def test_deterministic_sampler_solver_validation(mock_net, solver): # Test for edge cases -def test_deterministic_sampler_edge_cases(mock_net): +@import_or_fail("cftime") +def test_deterministic_sampler_edge_cases(mock_net, pytestconfig): + + from modulus.utils.generative import deterministic_sampler + latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) # Test with extreme rho values, zero noise levels, etc. @@ -82,8 +93,12 @@ def test_deterministic_sampler_edge_cases(mock_net): # Test discretization +@import_or_fail("cftime") @pytest.mark.parametrize("discretization", ["vp", "ve", "iddpm", "edm"]) -def test_deterministic_sampler_discretization(mock_net, discretization): +def test_deterministic_sampler_discretization(mock_net, discretization, pytestconfig): + + from modulus.utils.generative import deterministic_sampler + latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler( @@ -93,8 +108,12 @@ def test_deterministic_sampler_discretization(mock_net, discretization): # Test schedule +@import_or_fail("cftime") @pytest.mark.parametrize("schedule", ["vp", "ve", "linear"]) -def test_deterministic_sampler_schedule(mock_net, schedule): +def test_deterministic_sampler_schedule(mock_net, schedule, pytestconfig): + + from modulus.utils.generative import deterministic_sampler + latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler( @@ -104,8 +123,12 @@ def test_deterministic_sampler_schedule(mock_net, schedule): # Test number of steps +@import_or_fail("cftime") @pytest.mark.parametrize("num_steps", [1, 5, 18]) -def test_deterministic_sampler_num_steps(mock_net, num_steps): +def test_deterministic_sampler_num_steps(mock_net, num_steps, pytestconfig): + + from modulus.utils.generative import deterministic_sampler + latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler( @@ -115,8 +138,14 @@ def test_deterministic_sampler_num_steps(mock_net, num_steps): # Test sigma +@import_or_fail("cftime") @pytest.mark.parametrize("sigma_min, sigma_max", [(0.001, 0.01), (1.0, 1.5)]) -def test_deterministic_sampler_sigma_boundaries(mock_net, sigma_min, sigma_max): +def test_deterministic_sampler_sigma_boundaries( + mock_net, sigma_min, sigma_max, pytestconfig +): + + from modulus.utils.generative import deterministic_sampler + latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler( @@ -130,8 +159,12 @@ def test_deterministic_sampler_sigma_boundaries(mock_net, sigma_min, sigma_max): # Test error handling +@import_or_fail("cftime") @pytest.mark.parametrize("scaling", ["invalid_scaling", "vp", "none"]) -def test_deterministic_sampler_scaling_validation(mock_net, scaling): +def test_deterministic_sampler_scaling_validation(mock_net, scaling, pytestconfig): + + from modulus.utils.generative import deterministic_sampler + latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) if scaling == "invalid_scaling": diff --git a/test/utils/generative/test_format_time.py b/test/utils/generative/test_format_time.py index 241c9876bc..e16b599770 100644 --- a/test/utils/generative/test_format_time.py +++ b/test/utils/generative/test_format_time.py @@ -15,11 +15,15 @@ # limitations under the License. -from modulus.utils.generative import format_time, format_time_brief +from pytest_utils import import_or_fail # Test format_time function -def test_format_time(): +@import_or_fail("cftime") +def test_format_time(pytestconfig): + + from modulus.utils.generative import format_time + assert format_time(59) == "59s" assert format_time(60) == "1m 00s" assert format_time(3599) == "59m 59s" @@ -31,7 +35,11 @@ def test_format_time(): # Test format_time_brief function -def test_format_time_brief(): +@import_or_fail("cftime") +def test_format_time_brief(pytestconfig): + + from modulus.utils.generative import format_time_brief + assert format_time_brief(59) == "59s" assert format_time_brief(60) == "1m 00s" assert format_time_brief(3600) == "1h 00m" diff --git a/test/utils/generative/test_parse_int_list.py b/test/utils/generative/test_parse_int_list.py index 5c11536976..d98040e4a9 100644 --- a/test/utils/generative/test_parse_int_list.py +++ b/test/utils/generative/test_parse_int_list.py @@ -14,10 +14,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from modulus.utils.generative import parse_int_list +from pytest_utils import import_or_fail -def test_parse_int_list(): +@import_or_fail("cftime") +def test_parse_int_list(pytestconfig): + + from modulus.utils.generative import parse_int_list + # Test parsing a simple comma-separated list input_str = "1,2,5,7,10" expected_result = [1, 2, 5, 7, 10] diff --git a/test/utils/generative/test_parse_time.py b/test/utils/generative/test_parse_time.py index 11c1cae3b8..0ee8f21f25 100644 --- a/test/utils/generative/test_parse_time.py +++ b/test/utils/generative/test_parse_time.py @@ -20,8 +20,6 @@ import yaml from pytest_utils import import_or_fail -from modulus.utils.generative import convert_datetime_to_cftime - cftime = pytest.importorskip("cftime") # ruff: noqa: S101 # TODo remove exception @@ -38,6 +36,9 @@ def test_datetime_yaml(): @import_or_fail("cftime") def test_convert_to_cftime(pytestconfig): """test parse time""" + + from modulus.utils.generative import convert_datetime_to_cftime + dt = datetime.datetime(2011, 1, 1) expected = cftime.DatetimeGregorian(2011, 1, 1) assert convert_datetime_to_cftime(dt) == expected diff --git a/test/utils/generative/test_stochastic_sampler.py b/test/utils/generative/test_stochastic_sampler.py index 290d56406a..5f07686f71 100644 --- a/test/utils/generative/test_stochastic_sampler.py +++ b/test/utils/generative/test_stochastic_sampler.py @@ -17,10 +17,9 @@ from typing import Optional import torch +from pytest_utils import import_or_fail from torch import Tensor -from modulus.utils.generative import image_batching, image_fuse, stochastic_sampler - # Mock network class class MockNet: @@ -44,7 +43,11 @@ def __call__( # The test function for edm_sampler -def test_stochastic_sampler(): +@import_or_fail("cftime") +def test_stochastic_sampler(pytestconfig): + + from modulus.utils.generative import stochastic_sampler + net = MockNet() latents = torch.randn(2, 3, 448, 448) # Mock latents img_lr = torch.randn(2, 3, 112, 112) # Mock low-res image @@ -121,7 +124,11 @@ def test_stochastic_sampler(): ), "Churn output shape does not match expected shape" -def test_image_fuse_basic(): +@import_or_fail("cftime") +def test_image_fuse_basic(pytestconfig): + + from modulus.utils.generative import image_fuse + # Basic test: No overlap, no boundary, one patch batch_size = 1 img_shape_x = img_shape_y = 4 @@ -147,7 +154,11 @@ def test_image_fuse_basic(): ), "Output does not match expected output." -def test_image_fuse_with_boundary(): +@import_or_fail("cftime") +def test_image_fuse_with_boundary(pytestconfig): + + from modulus.utils.generative import image_fuse + # Test with boundary pixels batch_size = 1 img_shape_x = img_shape_y = 4 @@ -175,7 +186,11 @@ def test_image_fuse_with_boundary(): ), "Output with boundary does not match expected output." -def test_image_fuse_with_multiple_batches(): +@import_or_fail("cftime") +def test_image_fuse_with_multiple_batches(pytestconfig): + + from modulus.utils.generative import image_fuse + # Test with multiple batches batch_size = 2 img_shape_x = img_shape_y = 4 @@ -220,7 +235,11 @@ def test_image_fuse_with_multiple_batches(): ), "Output for multiple batches does not match expected output." -def test_image_batching_basic(): +@import_or_fail("cftime") +def test_image_batching_basic(pytestconfig): + + from modulus.utils.generative import image_batching + # Test with no overlap, no boundary, no input_interp batch_size = 1 img_shape_x = img_shape_y = 4 @@ -246,8 +265,12 @@ def test_image_batching_basic(): ), "Batched images do not match expected output." -def test_image_batching_with_boundary(): +@import_or_fail("cftime") +def test_image_batching_with_boundary(pytestconfig): # Test with boundary pixels, no overlap, no input_interp + + from modulus.utils.generative import image_batching + batch_size = 1 img_shape_x = img_shape_y = 4 patch_shape_x = patch_shape_y = 6 @@ -272,8 +295,12 @@ def test_image_batching_with_boundary(): ), "Batched images with boundary do not match expected output." -def test_image_batching_with_input_interp(): +@import_or_fail("cftime") +def test_image_batching_with_input_interp(pytestconfig): # Test with input_interp tensor + + from modulus.utils.generative import image_batching + batch_size = 1 img_shape_x = img_shape_y = 4 patch_shape_x = patch_shape_y = 4 diff --git a/test/utils/generative/test_tuple_product.py b/test/utils/generative/test_tuple_product.py index 70e4df5964..041054bc3c 100644 --- a/test/utils/generative/test_tuple_product.py +++ b/test/utils/generative/test_tuple_product.py @@ -14,12 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. - -from modulus.utils.generative import tuple_product +from pytest_utils import import_or_fail # Test tuple_product function -def test_tuple_product(): +@import_or_fail("cftime") +def test_tuple_product(pytestconfig): + + from modulus.utils.generative import tuple_product + # Test with an empty tuple assert tuple_product(()) == 1 diff --git a/test/utils/test_mesh_utils.py b/test/utils/test_mesh_utils.py index 6aefb5a59f..8127b065cc 100644 --- a/test/utils/test_mesh_utils.py +++ b/test/utils/test_mesh_utils.py @@ -21,14 +21,8 @@ import numpy as np import pytest from pytest_utils import import_or_fail -from stl import mesh -from modulus.utils.mesh import ( - combine_vtp_files, - convert_tesselated_files_in_directory, - sdf_to_stl, -) -from modulus.utils.sdf import signed_distance_field +stl = pytest.importorskip("stl") @pytest.fixture @@ -48,12 +42,17 @@ def download_stl(tmp_path): return file_path -@import_or_fail(["vtk"]) +@import_or_fail(["vtk", "warp"]) def test_mesh_utils(tmp_path, pytestconfig): """Tests the utility for combining VTP files and converting tesselated files.""" import vtk + from modulus.utils.mesh import ( + combine_vtp_files, + convert_tesselated_files_in_directory, + ) + def _create_random_vtp_mesh(num_points: int, num_triangles: int, dir: str) -> tuple: """ Create a random VTP (VTK PolyData) mesh with triangles. @@ -181,6 +180,13 @@ def _create_random_obj_mesh(num_vertices: int, num_faces: int, dir: str) -> None @pytest.mark.parametrize("backend", ["warp", "skimage"]) def test_stl_gen(pytestconfig, backend, download_stl, tmp_path): + from stl import mesh + + from modulus.utils.mesh import ( + sdf_to_stl, + ) + from modulus.utils.sdf import signed_distance_field + bunny_mesh = mesh.Mesh.from_file(str(download_stl)) vertices = np.array(bunny_mesh.vectors, dtype=np.float64) diff --git a/test/utils/test_sdf.py b/test/utils/test_sdf.py index 4e08673406..127b868336 100644 --- a/test/utils/test_sdf.py +++ b/test/utils/test_sdf.py @@ -19,8 +19,6 @@ import numpy as np from pytest_utils import import_or_fail -from modulus.utils.sdf import signed_distance_field - def tet_verts(flip_x=1): tet = np.array( @@ -71,6 +69,8 @@ def tet_verts(flip_x=1): @import_or_fail("warp") def test_sdf(pytestconfig): + from modulus.utils.sdf import signed_distance_field + tet = tet_verts() sdf_tet = signed_distance_field( From 99decdd5952e7fa2bb9cb892d3849d296566db0e Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Wed, 5 Feb 2025 09:28:53 -0800 Subject: [PATCH 7/8] Revert "Merge branch 'main' into lagrangian-mgn" This reverts commit 3b4f41d432b02329ecb6de230bc3db2b5648237a. --- CHANGELOG.md | 7 +- CONTRIBUTING.md | 9 - Dockerfile | 2 +- .../train_transolver_darcy.py | 3 +- .../xaeronet/surface/train.py | 2 +- .../xaeronet/volume/train.py | 2 +- examples/cfd/mhd_pino/train_mhd.py | 2 +- examples/cfd/mhd_pino/train_mhd_vec_pot.py | 2 +- .../cfd/mhd_pino/train_mhd_vec_pot_tfno.py | 2 +- examples/cfd/stokes_mgn/pi_fine_tuning.py | 2 +- examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py | 2 +- examples/cfd/stokes_mgn/train.py | 2 +- .../test_sequence.py | 2 +- .../cfd/vortex_shedding_mesh_reduced/train.py | 2 +- .../train_sequence.py | 2 +- examples/cfd/vortex_shedding_mgn/train.py | 2 +- examples/healthcare/bloodflow_1d_mgn/train.py | 2 +- .../flood_modeling/hydrographnet/README.md | 3 - examples/weather/graphcast/train_graphcast.py | 2 +- modulus/launch/logging/__init__.py | 2 + modulus/launch/logging/launch.py | 21 +-- pyproject.toml | 23 ++- test/datapipes/test_bsms.py | 3 +- test/datapipes/test_healpix.py | 65 ++------ test/datapipes/test_healpix_couple.py | 79 ++------- test/datapipes/test_lagrangian.py | 4 +- test/datapipes/test_mesh_datapipe.py | 5 +- test/datapipes/test_synthetic.py | 30 +--- test/metrics/test_metrics_cfd.py | 3 +- test/metrics/test_metrics_integral.py | 3 +- test/models/diffusion/test_preconditioning.py | 8 +- .../dlwp_healpix/test_healpix_blocks.py | 154 ++++-------------- .../test_healpix_encoder_decoder.py | 77 ++------- .../dlwp_healpix/test_healpix_layers.py | 70 ++------ test/models/meshgraphnet/test_bsms_mgn.py | 12 +- .../meshgraphnet/test_meshgraphnet_snmg.py | 10 +- test/models/test_distributed_graph.py | 18 +- test/models/test_domino.py | 9 +- test/models/test_graph_partition.py | 53 ++---- test/utils/corrdiff/test_generation_steps.py | 20 +-- test/utils/corrdiff/test_netcdf_writer.py | 33 +--- test/utils/corrdiff/test_time_range.py | 32 +--- .../generative/test_deterministic_sampler.py | 53 ++---- test/utils/generative/test_format_time.py | 14 +- test/utils/generative/test_parse_int_list.py | 8 +- test/utils/generative/test_parse_time.py | 5 +- .../generative/test_stochastic_sampler.py | 45 +---- test/utils/generative/test_tuple_product.py | 9 +- test/utils/test_mesh_utils.py | 22 +-- test/utils/test_sdf.py | 4 +- 50 files changed, 227 insertions(+), 719 deletions(-) delete mode 100644 examples/weather/flood_modeling/hydrographnet/README.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dc66017e7..6d2c16d48d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Enhancements and bug fixes to DoMINO model and training example - Enhancement to parameterize DoMINO model with inlet velocity - Moved non-dimensionaliztion out of domino datapipe to datapipe in domino example -- Updated utils in `modulus.launch.logging` to avoid unnecessary `wandb` and `mlflow` imports ### Deprecated @@ -29,15 +28,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -- Update pytests to skip when the required dependencies are not present - ### Security ### Dependencies -- Remove the numpy dependency upper bound -- Moved pytz and nvtx to optional -- Update the base image for the Dockerfile +- Remove the numpy dependency upper bound. - Introduce Multi-Storage Client (MSC) as an optional dependency. ## [0.9.0] - 2024-12-04 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e9526743d0..12e065a702 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -214,15 +214,6 @@ The pipeline has following stages: test, you will have to review your changes and fix the issues. To run pytest locally you can simply run `pytest` inside the `test` folder. - While writing these tests, we encourage you to make use of the - [`@nfs_data_or_fail`](https://github.com/NVIDIA/modulus/blob/main/test/pytest_utils.py#L92) - and the [`@import_of_fail`](https://github.com/NVIDIA/modulus/blob/main/test/pytest_utils.py#L25) - decorators to appropriately skip your tests for developers and users not having your - test specific datasets and dependencies respectively. The CI has these datasets and - dependencies so your tests will get executed during CI. - This mechanism helps us provide a better developer and user experience - when working with the unit tests. - 6. `doctest` Checks if the examples in the docstrings run and produce desired outputs. It is highly recommended that you provide simple examples of your functions/classes diff --git a/Dockerfile b/Dockerfile index 10b3c554f5..b7067a9e56 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG BASE_CONTAINER=nvcr.io/nvidia/pytorch:25.01-py3 +ARG BASE_CONTAINER=nvcr.io/nvidia/pytorch:24.11-py3 FROM ${BASE_CONTAINER} as builder ARG TARGETPLATFORM diff --git a/examples/cfd/darcy_transolver/train_transolver_darcy.py b/examples/cfd/darcy_transolver/train_transolver_darcy.py index 32bbdfdf1e..10525f5368 100644 --- a/examples/cfd/darcy_transolver/train_transolver_darcy.py +++ b/examples/cfd/darcy_transolver/train_transolver_darcy.py @@ -27,8 +27,7 @@ from modulus.distributed import DistributedManager from modulus.utils import StaticCaptureTraining, StaticCaptureEvaluateNoGrad from modulus.launch.utils import load_checkpoint, save_checkpoint -from modulus.launch.logging import PythonLogger, LaunchLogger -from modulus.launch.logging.mlflow import initialize_mlflow +from modulus.launch.logging import PythonLogger, LaunchLogger, initialize_mlflow from validator import GridValidator diff --git a/examples/cfd/external_aerodynamics/xaeronet/surface/train.py b/examples/cfd/external_aerodynamics/xaeronet/surface/train.py index a077eaa81c..19ac0d0bb0 100644 --- a/examples/cfd/external_aerodynamics/xaeronet/surface/train.py +++ b/examples/cfd/external_aerodynamics/xaeronet/surface/train.py @@ -44,7 +44,7 @@ from omegaconf import DictConfig from modulus.distributed import DistributedManager -from modulus.launch.logging.wandb import initialize_wandb +from modulus.launch.logging import initialize_wandb from modulus.models.meshgraphnet import MeshGraphNet # Get the absolute path to the parent directory diff --git a/examples/cfd/external_aerodynamics/xaeronet/volume/train.py b/examples/cfd/external_aerodynamics/xaeronet/volume/train.py index 9fa4e98697..86a05579f6 100644 --- a/examples/cfd/external_aerodynamics/xaeronet/volume/train.py +++ b/examples/cfd/external_aerodynamics/xaeronet/volume/train.py @@ -35,7 +35,7 @@ import numpy as np import torch.optim as optim import matplotlib.pyplot as plt -from modulus.launch.logging.wandb import initialize_wandb +from modulus.launch.logging import initialize_wandb import json import wandb as wb import hydra diff --git a/examples/cfd/mhd_pino/train_mhd.py b/examples/cfd/mhd_pino/train_mhd.py index 3330c8523c..d3a0737788 100644 --- a/examples/cfd/mhd_pino/train_mhd.py +++ b/examples/cfd/mhd_pino/train_mhd.py @@ -31,8 +31,8 @@ from modulus.launch.logging import ( PythonLogger, LaunchLogger, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.sym.hydra import to_absolute_path from losses import LossMHD, LossMHD_Modulus diff --git a/examples/cfd/mhd_pino/train_mhd_vec_pot.py b/examples/cfd/mhd_pino/train_mhd_vec_pot.py index d41a53cc7d..88e16ba846 100644 --- a/examples/cfd/mhd_pino/train_mhd_vec_pot.py +++ b/examples/cfd/mhd_pino/train_mhd_vec_pot.py @@ -31,8 +31,8 @@ from modulus.launch.logging import ( PythonLogger, LaunchLogger, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.sym.hydra import to_absolute_path from losses import LossMHDVecPot, LossMHDVecPot_Modulus diff --git a/examples/cfd/mhd_pino/train_mhd_vec_pot_tfno.py b/examples/cfd/mhd_pino/train_mhd_vec_pot_tfno.py index 613190e1bc..602b97ea51 100644 --- a/examples/cfd/mhd_pino/train_mhd_vec_pot_tfno.py +++ b/examples/cfd/mhd_pino/train_mhd_vec_pot_tfno.py @@ -31,8 +31,8 @@ from modulus.launch.logging import ( PythonLogger, LaunchLogger, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.sym.hydra import to_absolute_path from losses import LossMHDVecPot, LossMHDVecPot_Modulus diff --git a/examples/cfd/stokes_mgn/pi_fine_tuning.py b/examples/cfd/stokes_mgn/pi_fine_tuning.py index ecdcd96388..645b1ac40e 100644 --- a/examples/cfd/stokes_mgn/pi_fine_tuning.py +++ b/examples/cfd/stokes_mgn/pi_fine_tuning.py @@ -43,8 +43,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.models.mlp.fully_connected import FullyConnected from modulus.sym.eq.pde import PDE from modulus.sym.eq.phy_informer import PhysicsInformer diff --git a/examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py b/examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py index de28a53c6b..e22ac61dac 100644 --- a/examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py +++ b/examples/cfd/stokes_mgn/pi_fine_tuning_gnn.py @@ -43,8 +43,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.models.meshgraphnet import MeshGraphNet from modulus.sym.eq.pde import PDE from modulus.sym.eq.phy_informer import PhysicsInformer diff --git a/examples/cfd/stokes_mgn/train.py b/examples/cfd/stokes_mgn/train.py index 080a021f8f..8a4331575b 100644 --- a/examples/cfd/stokes_mgn/train.py +++ b/examples/cfd/stokes_mgn/train.py @@ -36,8 +36,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.meshgraphnet import MeshGraphNet diff --git a/examples/cfd/vortex_shedding_mesh_reduced/test_sequence.py b/examples/cfd/vortex_shedding_mesh_reduced/test_sequence.py index 27618759eb..7b3342bcaf 100644 --- a/examples/cfd/vortex_shedding_mesh_reduced/test_sequence.py +++ b/examples/cfd/vortex_shedding_mesh_reduced/test_sequence.py @@ -27,8 +27,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint from modulus.models.mesh_reduced.mesh_reduced import Mesh_Reduced from train_sequence import Sequence_Trainer diff --git a/examples/cfd/vortex_shedding_mesh_reduced/train.py b/examples/cfd/vortex_shedding_mesh_reduced/train.py index 90d6c91338..673741a5f0 100644 --- a/examples/cfd/vortex_shedding_mesh_reduced/train.py +++ b/examples/cfd/vortex_shedding_mesh_reduced/train.py @@ -32,8 +32,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.mesh_reduced.mesh_reduced import Mesh_Reduced diff --git a/examples/cfd/vortex_shedding_mesh_reduced/train_sequence.py b/examples/cfd/vortex_shedding_mesh_reduced/train_sequence.py index de8a5a6c70..9358d8ac22 100644 --- a/examples/cfd/vortex_shedding_mesh_reduced/train_sequence.py +++ b/examples/cfd/vortex_shedding_mesh_reduced/train_sequence.py @@ -33,8 +33,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.mesh_reduced.mesh_reduced import Mesh_Reduced from modulus.models.mesh_reduced.temporal_model import Sequence_Model diff --git a/examples/cfd/vortex_shedding_mgn/train.py b/examples/cfd/vortex_shedding_mgn/train.py index ab4658307f..39d3c740b7 100644 --- a/examples/cfd/vortex_shedding_mgn/train.py +++ b/examples/cfd/vortex_shedding_mgn/train.py @@ -33,8 +33,8 @@ from modulus.launch.logging import ( PythonLogger, RankZeroLoggingWrapper, + initialize_wandb, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.meshgraphnet import MeshGraphNet diff --git a/examples/healthcare/bloodflow_1d_mgn/train.py b/examples/healthcare/bloodflow_1d_mgn/train.py index a0fdd42d05..5a5f92c9e1 100644 --- a/examples/healthcare/bloodflow_1d_mgn/train.py +++ b/examples/healthcare/bloodflow_1d_mgn/train.py @@ -31,9 +31,9 @@ from modulus.launch.logging import ( PythonLogger, + initialize_wandb, RankZeroLoggingWrapper, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint import json from omegaconf import DictConfig diff --git a/examples/weather/flood_modeling/hydrographnet/README.md b/examples/weather/flood_modeling/hydrographnet/README.md deleted file mode 100644 index 34f6f89243..0000000000 --- a/examples/weather/flood_modeling/hydrographnet/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# HydroGraphNet - -This is a placeholder for the HydroGraphNet model. diff --git a/examples/weather/graphcast/train_graphcast.py b/examples/weather/graphcast/train_graphcast.py index 912c8bdfa8..ec928b80a6 100644 --- a/examples/weather/graphcast/train_graphcast.py +++ b/examples/weather/graphcast/train_graphcast.py @@ -39,9 +39,9 @@ ) from modulus.launch.logging import ( PythonLogger, + initialize_wandb, RankZeroLoggingWrapper, ) -from modulus.launch.logging.wandb import initialize_wandb from modulus.launch.utils import load_checkpoint, save_checkpoint from train_utils import count_trainable_params, prepare_input diff --git a/modulus/launch/logging/__init__.py b/modulus/launch/logging/__init__.py index 5199f0de6f..50466a3678 100644 --- a/modulus/launch/logging/__init__.py +++ b/modulus/launch/logging/__init__.py @@ -16,3 +16,5 @@ from .console import PythonLogger, RankZeroLoggingWrapper from .launch import LaunchLogger +from .mlflow import initialize_mlflow +from .wandb import initialize_wandb diff --git a/modulus/launch/logging/launch.py b/modulus/launch/logging/launch.py index ad8b8aebb2..f49c54498e 100644 --- a/modulus/launch/logging/launch.py +++ b/modulus/launch/logging/launch.py @@ -23,10 +23,12 @@ import torch import torch.cuda.profiler as profiler +import wandb from modulus.distributed import DistributedManager, reduce_loss from .console import PythonLogger +from .wandb import alert class LaunchLogger(object): @@ -131,8 +133,6 @@ def __init__( # Set x axis metric to epoch for this namespace if self.wandb_backend: - import wandb - wandb.define_metric(name_space + "/mini_batch_*", step_metric="iter") wandb.define_metric(name_space + "/*", step_metric="epoch") @@ -284,10 +284,6 @@ def __exit__(self, exc_type, exc_value, exc_tb): and self.epoch % self.epoch_alert_freq == 0 ): if self.wandb_backend: - import wandb - - from .wandb import alert - # TODO: Make this a little more informative? alert( title=f"{sys.argv[0]} training progress report", @@ -325,8 +321,6 @@ def _log_backends( # WandB Logging if self.wandb_backend: - import wandb - # For WandB send step in as a metric # Step argument in lod function does not work with multiple log calls at # different intervals @@ -358,8 +352,6 @@ def log_figure( return if self.wandb_backend: - import wandb - wandb.log({artifact_file: figure}) if self.mlflow_backend: @@ -413,12 +405,9 @@ def initialize(use_wandb: bool = False, use_mlflow: bool = False): use_mlflow : bool, optional Use MLFlow logging, by default False """ - if use_wandb: - import wandb - - if wandb.run is None: - PythonLogger().warning("WandB not initialized, turning off") - use_wandb = False + if wandb.run is None and use_wandb: + PythonLogger().warning("WandB not initialized, turning off") + use_wandb = False if use_wandb: LaunchLogger.toggle_wandb(True) diff --git a/pyproject.toml b/pyproject.toml index e5efa2b700..8a086d88af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,19 +12,21 @@ readme = "README.md" requires-python = ">=3.10" license = {text = "Apache 2.0"} dependencies = [ - "certifi>=2023.7.22", - "fsspec>=2023.1.0", + "torch>=2.0.0", "numpy>=1.22.4", - "nvidia_dali_cuda120>=1.35.0", - "onnx>=1.14.0", + "xarray>=2023.1.0", + "zarr>=2.14.2", + "fsspec>=2023.1.0", "s3fs>=2023.5.0", + "nvidia_dali_cuda120>=1.35.0", "setuptools>=67.6.0", - "timm>=0.9.12", - "torch>=2.0.0", - "tqdm>=4.60.0", + "certifi>=2023.7.22", + "pytz>=2023.3", "treelib>=1.2.5", - "xarray>=2023.1.0", - "zarr>=2.14.2", + "tqdm>=4.60.0", + "nvtx>=0.2.8", + "onnx>=1.14.0", + "timm>=0.9.12", ] classifiers = [ "Programming Language :: Python :: 3", @@ -92,8 +94,6 @@ all = [ "einops>=0.7.0", "pyspng>=0.1.0", "shapely>=2.0.6", - "pytz>=2023.3", - "nvtx>=0.2.8", "nvidia-modulus[launch]", "nvidia-modulus[dev]", "nvidia-modulus[makani]", @@ -144,4 +144,3 @@ Fengwu = "modulus.models.fengwu:Fengwu" SwinRNN = "modulus.models.swinvrnn:SwinRNN" EDMPrecondSR = "modulus.models.diffusion:EDMPrecondSR" UNet = "modulus.models.diffusion:UNet" - diff --git a/test/datapipes/test_bsms.py b/test/datapipes/test_bsms.py index 9c9170f753..e50ef379b8 100644 --- a/test/datapipes/test_bsms.py +++ b/test/datapipes/test_bsms.py @@ -14,12 +14,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import dgl import pytest import torch from pytest_utils import import_or_fail -dgl = pytest.importorskip("dgl") - @pytest.fixture def ahmed_data_dir(): diff --git a/test/datapipes/test_healpix.py b/test/datapipes/test_healpix.py index cdeaacc32d..67d0a9f8e9 100644 --- a/test/datapipes/test_healpix.py +++ b/test/datapipes/test_healpix.py @@ -22,14 +22,20 @@ import numpy as np import pytest import xarray as xr -from pytest_utils import import_or_fail, nfsdata_or_fail +from omegaconf import DictConfig +from pytest_utils import nfsdata_or_fail from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from modulus.datapipes.healpix.data_modules import ( + TimeSeriesDataModule, + create_time_series_dataset_classic, + open_time_series_dataset_classic_on_the_fly, + open_time_series_dataset_classic_prebuilt, +) +from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset from modulus.distributed import DistributedManager -omegaconf = pytest.importorskip("omegaconf") - @pytest.fixture def data_dir(): @@ -71,7 +77,7 @@ def scaling_dict(): "tp6": {"mean": 1, "std": 0, "log_epsilon": 1e-6}, "extra": {"mean": 1, "std": 0}, } - return omegaconf.DictConfig(scaling) + return DictConfig(scaling) @pytest.fixture @@ -89,16 +95,11 @@ def scaling_double_dict(): "z": {"mean": 0, "std": 2}, "extra": {"mean": 0, "std": 2}, } - return omegaconf.DictConfig(scaling) + return DictConfig(scaling) -@import_or_fail("omegaconf") @nfsdata_or_fail def test_open_time_series_on_the_fly(create_path, pytestconfig): - from modulus.datapipes.healpix.data_modules import ( - open_time_series_dataset_classic_on_the_fly, - ) - variables = ["z500", "z1000"] constants = {"lsm": "lsm"} @@ -117,14 +118,9 @@ def test_open_time_series_on_the_fly(create_path, pytestconfig): assert ds_var.equals(base[test_var]) -@import_or_fail("omegaconf") @nfsdata_or_fail def test_open_time_series(data_dir, dataset_name, pytestconfig): # check for failure of non-existant dataset - from modulus.datapipes.healpix.data_modules import ( - open_time_series_dataset_classic_prebuilt, - ) - with pytest.raises(FileNotFoundError, match=("Dataset doesn't appear to exist at")): open_time_series_dataset_classic_prebuilt("/null_path", dataset_name) @@ -132,14 +128,8 @@ def test_open_time_series(data_dir, dataset_name, pytestconfig): assert isinstance(ds, xr.Dataset) -@import_or_fail("omegaconf") @nfsdata_or_fail def test_create_time_series(data_dir, dataset_name, create_path, pytestconfig): - - from modulus.datapipes.healpix.data_modules import ( - create_time_series_dataset_classic, - ) - variables = ["z500", "z1000"] constants = {"lsm": "lsm"} scaling = {"z500": {"log_epsilon": 2}} @@ -191,14 +181,10 @@ def test_create_time_series(data_dir, dataset_name, create_path, pytestconfig): delete_dataset(create_path, dataset_name) -@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataset_initialization( data_dir, dataset_name, scaling_dict, pytestconfig ): - - from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset - # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -227,7 +213,7 @@ def test_TimeSeriesDataset_initialization( ) # check for failure of invalid scaling variable on input - invalid_scaling = omegaconf.DictConfig( + invalid_scaling = DictConfig( { "bogosity": {"mean": 0, "std": 42}, } @@ -286,13 +272,10 @@ def test_TimeSeriesDataset_initialization( assert isinstance(timeseries_ds, TimeSeriesDataset) -@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataset_get_constants( data_dir, dataset_name, scaling_dict, pytestconfig ): - from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset - # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -311,11 +294,8 @@ def test_TimeSeriesDataset_get_constants( ) -@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataset_len(data_dir, dataset_name, scaling_dict, pytestconfig): - from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset - # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -353,13 +333,10 @@ def test_TimeSeriesDataset_len(data_dir, dataset_name, scaling_dict, pytestconfi assert len(timeseries_ds) == (len(zarr_ds.time.values) - 2) // 2 -@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataset_get( data_dir, dataset_name, scaling_double_dict, pytestconfig ): - from modulus.datapipes.healpix.timeseries_dataset import TimeSeriesDataset - # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -461,15 +438,10 @@ def test_TimeSeriesDataset_get( assert len(inputs) == (len(timeseries_ds[0]) + 1) -@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataModule_initialization( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): - from modulus.datapipes.healpix.data_modules import ( - TimeSeriesDataModule, - ) - variables = ["z500", "z1000"] splits = { "train_date_start": "1959-01-01", @@ -541,21 +513,16 @@ def test_TimeSeriesDataModule_initialization( batch_size=1, prebuilt_dataset=True, scaling=scaling_double_dict, - splits=omegaconf.DictConfig(splits), + splits=DictConfig(splits), ) assert isinstance(timeseries_dm, TimeSeriesDataModule) DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataModule_get_constants( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): - from modulus.datapipes.healpix.data_modules import ( - TimeSeriesDataModule, - ) - variables = ["z500", "z1000"] constants = {"lsm": "lsm"} @@ -624,16 +591,10 @@ def test_TimeSeriesDataModule_get_constants( DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_TimeSeriesDataModule_get_dataloaders( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): - - from modulus.datapipes.healpix.data_modules import ( - TimeSeriesDataModule, - ) - variables = ["z500", "z1000"] splits = { "train_date_start": "1979-01-01", diff --git a/test/datapipes/test_healpix_couple.py b/test/datapipes/test_healpix_couple.py index 871ef0b1a8..7ee24aacc8 100644 --- a/test/datapipes/test_healpix_couple.py +++ b/test/datapipes/test_healpix_couple.py @@ -23,14 +23,18 @@ import pandas as pd import pytest import xarray as xr -from pytest_utils import import_or_fail, nfsdata_or_fail +from omegaconf import DictConfig, OmegaConf +from pytest_utils import nfsdata_or_fail from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from modulus.datapipes.healpix.coupledtimeseries_dataset import CoupledTimeSeriesDataset +from modulus.datapipes.healpix.couplers import ConstantCoupler, TrailingAverageCoupler +from modulus.datapipes.healpix.data_modules import ( + CoupledTimeSeriesDataModule, +) from modulus.distributed import DistributedManager -omegaconf = pytest.importorskip("omegaconf") - @pytest.fixture def data_dir(): @@ -71,7 +75,7 @@ def scaling_dict(): "z": {"mean": 0, "std": 1}, "tp6": {"mean": 1, "std": 0, "log_epsilon": 1e-6}, } - return omegaconf.DictConfig(scaling) + return DictConfig(scaling) @pytest.fixture @@ -88,17 +92,11 @@ def scaling_double_dict(): "z": {"mean": 0, "std": 2}, "tp6": {"mean": 0, "std": 2, "log_epsilon": 1e-6}, } - return omegaconf.DictConfig(scaling) + return DictConfig(scaling) -@import_or_fail("omegaconf") @nfsdata_or_fail def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): - - from modulus.datapipes.healpix.couplers import ( - ConstantCoupler, - ) - variables = ["z500", "z1000"] input_times = ["0h"] input_time_dim = 1 @@ -134,7 +132,7 @@ def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): expected = expected.astype(int) assert np.array_equal(expected, coupler._coupled_offsets) - scaling_df = pd.DataFrame.from_dict(omegaconf.OmegaConf.to_object(scaling_dict)).T + scaling_df = pd.DataFrame.from_dict(OmegaConf.to_object(scaling_dict)).T scaling_df.loc["zeros"] = {"mean": 0.0, "std": 1.0} scaling_da = scaling_df.to_xarray().astype("float32") coupler.set_scaling(scaling_da) @@ -147,14 +145,8 @@ def test_ConstantCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_TrailingAverageCoupler(data_dir, dataset_name, scaling_dict, pytestconfig): - - from modulus.datapipes.healpix.couplers import ( - TrailingAverageCoupler, - ) - variables = ["z500", "z1000"] input_times = ["6h", "12h"] input_time_dim = 2 @@ -198,7 +190,7 @@ def test_TrailingAverageCoupler(data_dir, dataset_name, scaling_dict, pytestconf expected = expected.astype(int) assert np.array_equal(expected, coupler._coupled_offsets) - scaling_df = pd.DataFrame.from_dict(omegaconf.OmegaConf.to_object(scaling_dict)).T + scaling_df = pd.DataFrame.from_dict(OmegaConf.to_object(scaling_dict)).T scaling_df.loc["zeros"] = {"mean": 0.0, "std": 1.0} scaling_da = scaling_df.to_xarray().astype("float32") coupler.set_scaling(scaling_da) @@ -211,16 +203,10 @@ def test_TrailingAverageCoupler(data_dir, dataset_name, scaling_dict, pytestconf DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataset_initialization( data_dir, dataset_name, scaling_dict, pytestconfig ): - - from modulus.datapipes.healpix.coupledtimeseries_dataset import ( - CoupledTimeSeriesDataset, - ) - # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -252,7 +238,7 @@ def test_CoupledTimeSeriesDataset_initialization( ) # check for failure of invalid scaling variable on input - invalid_scaling = omegaconf.DictConfig( + invalid_scaling = DictConfig( { "bogosity": {"mean": 0, "std": 42}, } @@ -365,16 +351,10 @@ def test_CoupledTimeSeriesDataset_initialization( DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataset_get_constants( data_dir, dataset_name, scaling_dict, pytestconfig ): - - from modulus.datapipes.healpix.coupledtimeseries_dataset import ( - CoupledTimeSeriesDataset, - ) - # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -413,15 +393,10 @@ def test_CoupledTimeSeriesDataset_get_constants( DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataset_len( data_dir, dataset_name, scaling_dict, pytestconfig ): - from modulus.datapipes.healpix.coupledtimeseries_dataset import ( - CoupledTimeSeriesDataset, - ) - # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -498,15 +473,10 @@ def test_CoupledTimeSeriesDataset_len( DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataset_get( data_dir, dataset_name, scaling_double_dict, pytestconfig ): - from modulus.datapipes.healpix.coupledtimeseries_dataset import ( - CoupledTimeSeriesDataset, - ) - # open our test dataset ds_path = Path(data_dir, dataset_name + ".zarr") zarr_ds = xr.open_zarr(ds_path) @@ -638,16 +608,10 @@ def test_CoupledTimeSeriesDataset_get( DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataModule_initialization( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): - - from modulus.datapipes.healpix.data_modules import ( - CoupledTimeSeriesDataModule, - ) - variables = ["z500", "z1000"] splits = { "train_date_start": "1959-01-01", @@ -738,23 +702,17 @@ def test_CoupledTimeSeriesDataModule_initialization( batch_size=1, prebuilt_dataset=True, scaling=scaling_double_dict, - splits=omegaconf.DictConfig(splits), + splits=DictConfig(splits), couplings=constant_coupler, ) assert isinstance(timeseries_dm, CoupledTimeSeriesDataModule) DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataModule_get_constants( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): - - from modulus.datapipes.healpix.data_modules import ( - CoupledTimeSeriesDataModule, - ) - variables = ["z500", "z1000"] constants = {"lsm": "lsm"} @@ -841,16 +799,10 @@ def test_CoupledTimeSeriesDataModule_get_constants( DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataModule_get_dataloaders( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): - - from modulus.datapipes.healpix.data_modules import ( - CoupledTimeSeriesDataModule, - ) - variables = ["z500", "z1000"] splits = { "train_date_start": "1979-01-01", @@ -919,15 +871,10 @@ def test_CoupledTimeSeriesDataModule_get_dataloaders( DistributedManager.cleanup() -@import_or_fail("omegaconf") @nfsdata_or_fail def test_CoupledTimeSeriesDataModule_get_coupled_vars( data_dir, create_path, dataset_name, scaling_double_dict, pytestconfig ): - from modulus.datapipes.healpix.data_modules import ( - CoupledTimeSeriesDataModule, - ) - variables = ["z500", "z1000"] constant_coupler = [ { diff --git a/test/datapipes/test_lagrangian.py b/test/datapipes/test_lagrangian.py index 4609d71279..9446fa08eb 100644 --- a/test/datapipes/test_lagrangian.py +++ b/test/datapipes/test_lagrangian.py @@ -14,15 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import dgl import pytest import torch from pytest_utils import import_or_fail, nfsdata_or_fail from . import common -dgl = pytest.importorskip("dgl") - - Tensor = torch.Tensor diff --git a/test/datapipes/test_mesh_datapipe.py b/test/datapipes/test_mesh_datapipe.py index 2e01fbb6d6..39fa190682 100644 --- a/test/datapipes/test_mesh_datapipe.py +++ b/test/datapipes/test_mesh_datapipe.py @@ -21,6 +21,7 @@ from pytest_utils import import_or_fail # from pytest_utils import nfsdata_or_fail +from modulus.datapipes.cae import MeshDatapipe @pytest.fixture @@ -29,15 +30,13 @@ def cgns_data_dir(): return path -@import_or_fail(["vtk", "warp"]) +@import_or_fail(["vtk"]) @pytest.mark.parametrize("device", ["cuda", "cpu"]) def test_mesh_datapipe(device, tmp_path, pytestconfig): """Tests the MeshDatapipe class with VTP and VTU files.""" import vtk - from modulus.datapipes.cae import MeshDatapipe - def _create_random_vtp_vtu_mesh( num_points: int, num_triangles: int, dir: str ) -> tuple: diff --git a/test/datapipes/test_synthetic.py b/test/datapipes/test_synthetic.py index f18c9b4551..eb323923ca 100644 --- a/test/datapipes/test_synthetic.py +++ b/test/datapipes/test_synthetic.py @@ -16,17 +16,15 @@ import pytest -from pytest_utils import import_or_fail +from modulus.datapipes.climate import ( + SyntheticWeatherDataLoader, + SyntheticWeatherDataset, +) -@import_or_fail("h5py") -@pytest.mark.parametrize("device", ["cuda", "cpu"]) -def test_dataloader_setup(device, pytestconfig): - from modulus.datapipes.climate import ( - SyntheticWeatherDataLoader, - SyntheticWeatherDataset, - ) +@pytest.mark.parametrize("device", ["cuda", "cpu"]) +def test_dataloader_setup(device): dataloader = SyntheticWeatherDataLoader( channels=[0, 1, 2, 3], num_samples_per_year=12, @@ -43,15 +41,9 @@ def test_dataloader_setup(device, pytestconfig): assert isinstance(dataloader.dataset, SyntheticWeatherDataset) -@import_or_fail("h5py") @pytest.mark.parametrize("device", ["cuda", "cpu"]) -def test_dataloader_iteration(device, pytestconfig): +def test_dataloader_iteration(device): """Test the iteration over batches in the DataLoader.""" - - from modulus.datapipes.climate import ( - SyntheticWeatherDataLoader, - ) - dataloader = SyntheticWeatherDataLoader( channels=[0, 1], num_samples_per_year=30, @@ -74,15 +66,9 @@ def test_dataloader_iteration(device, pytestconfig): break # Only test one batch for quick testing -@import_or_fail("h5py") @pytest.mark.parametrize("device", ["cuda", "cpu"]) -def test_dataloader_length(device, pytestconfig): +def test_dataloader_length(device): """Test the length of the DataLoader to ensure it is correct based on the dataset and batch size.""" - - from modulus.datapipes.climate import ( - SyntheticWeatherDataLoader, - ) - dataloader = SyntheticWeatherDataLoader( channels=[0, 1, 2], num_samples_per_year=30, diff --git a/test/metrics/test_metrics_cfd.py b/test/metrics/test_metrics_cfd.py index 68e7534ca7..566b20b958 100644 --- a/test/metrics/test_metrics_cfd.py +++ b/test/metrics/test_metrics_cfd.py @@ -16,6 +16,7 @@ import numpy as np import pytest +import pyvista as pv import torch from pytest_utils import import_or_fail @@ -26,8 +27,6 @@ dominant_freq_calc, ) -pv = pytest.importorskip("pyvista") - @pytest.fixture def generate_sphere(theta_res=100, phi_res=100): diff --git a/test/metrics/test_metrics_integral.py b/test/metrics/test_metrics_integral.py index f144bac29b..faf3b36c45 100644 --- a/test/metrics/test_metrics_integral.py +++ b/test/metrics/test_metrics_integral.py @@ -16,12 +16,11 @@ import numpy as np import pytest +import pyvista as pv from pytest_utils import import_or_fail from modulus.metrics.cae.integral import line_integral, surface_integral -pv = pytest.importorskip("pyvista") - @pytest.fixture def generate_circle(num_points=1000): diff --git a/test/models/diffusion/test_preconditioning.py b/test/models/diffusion/test_preconditioning.py index d713a05139..30956d26dd 100644 --- a/test/models/diffusion/test_preconditioning.py +++ b/test/models/diffusion/test_preconditioning.py @@ -16,8 +16,8 @@ import pytest import torch -from pytest_utils import import_or_fail +from modulus.launch.utils import load_checkpoint, save_checkpoint from modulus.models.diffusion.preconditioning import ( EDMPrecond, EDMPrecondSR, @@ -58,11 +58,7 @@ def test_EDMPrecondSR_forward(scale_cond_input): assert output.shape == (b, c_target, x, y) -@import_or_fail("termcolor") -def test_EDMPrecondSR_serialization(tmp_path, pytestconfig): - - from modulus.launch.utils import load_checkpoint, save_checkpoint - +def test_EDMPrecondSR_serialization(tmp_path): module = EDMPrecondSR(8, 1, 1, 1, scale_cond_input=False) model_path = tmp_path / "output.mdlus" module.save(model_path.as_posix()) diff --git a/test/models/dlwp_healpix/test_healpix_blocks.py b/test/models/dlwp_healpix/test_healpix_blocks.py index f5e6388f41..e82f147f3c 100644 --- a/test/models/dlwp_healpix/test_healpix_blocks.py +++ b/test/models/dlwp_healpix/test_healpix_blocks.py @@ -23,7 +23,18 @@ import common import pytest import torch -from pytest_utils import import_or_fail + +from modulus.models.dlwp_healpix_layers import ( + AvgPool, + BasicConvBlock, + ConvGRUBlock, + ConvNeXtBlock, + DoubleConvNeXtBlock, + Interpolate, + MaxPool, + SymmetricConvNeXtBlock, + TransposedConvUpsample, # +) @pytest.fixture @@ -39,27 +50,15 @@ def generate_test_data(faces=12, channels=2, img_size=16, device="cpu"): return generate_test_data -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_ConvGRUBlock_initialization(device, test_data, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - ConvGRUBlock, - ) - +def test_ConvGRUBlock_initialization(device, test_data): in_channels = 2 conv_gru_func = ConvGRUBlock(in_channels=in_channels).to(device) assert isinstance(conv_gru_func, ConvGRUBlock) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_ConvGRUBlock_forward(device, test_data, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - ConvGRUBlock, - ) - +def test_ConvGRUBlock_forward(device, test_data): in_channels = 2 tensor_size = 16 conv_gru_func = ConvGRUBlock(in_channels=in_channels).to(device) @@ -76,14 +75,8 @@ def test_ConvGRUBlock_forward(device, test_data, pytestconfig): assert not common.compare_output(outvar_hist, outvar) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_ConvNeXtBlock_initialization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - ConvNeXtBlock, - ) - +def test_ConvNeXtBlock_initialization(device): in_channels = 2 convnext_block = ConvNeXtBlock(in_channels=in_channels).to(device) assert isinstance(convnext_block, ConvNeXtBlock) @@ -98,14 +91,8 @@ def test_ConvNeXtBlock_initialization(device, pytestconfig): assert isinstance(convnext_block, ConvNeXtBlock) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_ConvNeXtBlock_forward(device, test_data, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - ConvNeXtBlock, - ) - +def test_ConvNeXtBlock_forward(device, test_data): in_channels = 2 out_channels = 1 tensor_size = 16 @@ -127,14 +114,8 @@ def test_ConvNeXtBlock_forward(device, test_data, pytestconfig): assert outvar.shape == out_shape -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_DoubleConvNeXtBlock_initialization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - DoubleConvNeXtBlock, - ) - +def test_DoubleConvNeXtBlock_initialization(device): in_channels = 2 out_channels = 1 latent_channels = 1 @@ -155,14 +136,8 @@ def test_DoubleConvNeXtBlock_initialization(device, pytestconfig): assert isinstance(doubleconvnextblock, DoubleConvNeXtBlock) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_DoubleConvNeXtBlock_forward(device, test_data, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - DoubleConvNeXtBlock, - ) - +def test_DoubleConvNeXtBlock_forward(device, test_data): in_channels = 2 out_channels = 1 latent_channels = 1 @@ -191,14 +166,8 @@ def test_DoubleConvNeXtBlock_forward(device, test_data, pytestconfig): assert outvar.shape == out_shape -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_SymmetricConvNeXtBlock_initialization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - SymmetricConvNeXtBlock, - ) - +def test_SymmetricConvNeXtBlock_initialization(device): in_channels = 2 latent_channels = 1 symmetric_convnextblock = SymmetricConvNeXtBlock( @@ -216,14 +185,8 @@ def test_SymmetricConvNeXtBlock_initialization(device, pytestconfig): assert isinstance(symmetric_convnextblock, SymmetricConvNeXtBlock) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_SymmetricConvNeXtBlock_forward(device, test_data, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - SymmetricConvNeXtBlock, - ) - +def test_SymmetricConvNeXtBlock_forward(device, test_data): in_channels = 2 latent_channels = 1 tensor_size = 16 @@ -244,14 +207,8 @@ def test_SymmetricConvNeXtBlock_forward(device, test_data, pytestconfig): assert outvar.shape == out_shape -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_BasicConvBlock_initialization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - BasicConvBlock, - ) - +def test_BasicConvBlock_initialization(device): in_channels = 3 out_channels = 1 latent_channels = 2 @@ -271,14 +228,8 @@ def test_BasicConvBlock_initialization(device, pytestconfig): assert isinstance(conv_block, BasicConvBlock) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_BasicConvBlock_forward(device, test_data, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - BasicConvBlock, - ) - +def test_BasicConvBlock_forward(device, test_data): in_channels = 3 out_channels = 1 tensor_size = 16 @@ -297,26 +248,15 @@ def test_BasicConvBlock_forward(device, test_data, pytestconfig): assert outvar.shape == out_shape -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_MaxPool_initialization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - MaxPool, - ) - +def test_MaxPool_initialization(device): pooling = 2 maxpool_block = MaxPool(pooling=pooling).to(device) assert isinstance(maxpool_block, MaxPool) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_MaxPool_forward(device, test_data, pytestconfig): - from modulus.models.dlwp_healpix_layers import ( - MaxPool, - ) - +def test_MaxPool_forward(device, test_data): pooling = 2 size = 16 channels = 4 @@ -330,27 +270,15 @@ def test_MaxPool_forward(device, test_data, pytestconfig): assert common.compare_output(outvar, maxpool_block(invar)) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_AvgPool_initialization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - AvgPool, - ) - +def test_AvgPool_initialization(device): pooling = 2 avgpool_block = AvgPool(pooling=pooling).to(device) assert isinstance(avgpool_block, AvgPool) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_AvgPool_forward(device, test_data, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - AvgPool, - ) - +def test_AvgPool_forward(device, test_data): pooling = 2 size = 32 channels = 4 @@ -367,13 +295,8 @@ def test_AvgPool_forward(device, test_data, pytestconfig): assert common.compare_output(outvar, avgpool_block(invar)) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_TransposedConvUpsample_initialization(device, pytestconfig): - from modulus.models.dlwp_healpix_layers import ( - TransposedConvUpsample, # - ) - +def test_TransposedConvUpsample_initialization(device): transposed_conv_upsample_block = TransposedConvUpsample().to(device) assert isinstance(transposed_conv_upsample_block, TransposedConvUpsample) @@ -383,14 +306,8 @@ def test_TransposedConvUpsample_initialization(device, pytestconfig): assert isinstance(transposed_conv_upsample_block, TransposedConvUpsample) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_TransposedConvUpsample_forward(device, test_data, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - TransposedConvUpsample, - ) - +def test_TransposedConvUpsample_forward(device, test_data): in_channels = 2 out_channels = 1 size = 16 @@ -415,27 +332,16 @@ def test_TransposedConvUpsample_forward(device, test_data, pytestconfig): assert outvar.shape == outsize -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_Interpolate_initialization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - Interpolate, - ) - +def test_Interpolate_initialization(device): scale = 2 mode = "linear" interpolation_block = Interpolate(scale_factor=scale, mode=mode).to(device) assert isinstance(interpolation_block, Interpolate) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_Interpolate_forward(device, pytestconfig): - from modulus.models.dlwp_healpix_layers import ( - Interpolate, - ) - +def test_Interpolate_forward(device): scale = 2 mode = "linear" interpolation_block = Interpolate(scale_factor=scale, mode=mode).to(device) diff --git a/test/models/dlwp_healpix/test_healpix_encoder_decoder.py b/test/models/dlwp_healpix/test_healpix_encoder_decoder.py index 430afc88f6..ffa27576b5 100644 --- a/test/models/dlwp_healpix/test_healpix_encoder_decoder.py +++ b/test/models/dlwp_healpix/test_healpix_encoder_decoder.py @@ -23,19 +23,20 @@ import common import pytest import torch -from pytest_utils import import_or_fail +from modulus.models.dlwp_healpix_layers import ( + BasicConvBlock, # for the output layer + ConvGRUBlock, # for the recurrent layer + ConvNeXtBlock, # for convolutional layer + MaxPool, # for downsampling + TransposedConvUpsample, # for upsampling + UNetDecoder, + UNetEncoder, +) -@import_or_fail("hydra") -@pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetEncoder_initialize(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - ConvNeXtBlock, # for convolutional layer - MaxPool, # for downsampling - UNetEncoder, - ) +@pytest.mark.parametrize("device", ["cuda:0", "cpu"]) +def test_UNetEncoder_initialize(device): channels = 2 n_channels = (16, 32, 64) @@ -71,16 +72,8 @@ def test_UNetEncoder_initialize(device, pytestconfig): torch.cuda.empty_cache() -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetEncoder_forward(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - ConvNeXtBlock, # for convolutional layer - MaxPool, # for downsampling - UNetEncoder, - ) - +def test_UNetEncoder_forward(device): channels = 2 hw_size = 16 b_size = 12 @@ -121,16 +114,8 @@ def test_UNetEncoder_forward(device, pytestconfig): torch.cuda.empty_cache() -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetEncoder_reset(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - ConvNeXtBlock, # for convolutional layer - MaxPool, # for downsampling - UNetEncoder, - ) - +def test_UNetEncoder_reset(device): channels = 2 n_channels = (16, 32, 64) @@ -159,18 +144,8 @@ def test_UNetEncoder_reset(device, pytestconfig): torch.cuda.empty_cache() -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetDecoder_initilization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - BasicConvBlock, # for the output layer - ConvGRUBlock, # for the recurrent layer - ConvNeXtBlock, # for convolutional layer - TransposedConvUpsample, # for upsampling - UNetDecoder, - ) - +def test_UNetDecoder_initilization(device): in_channels = 2 out_channels = 1 n_channels = (64, 32, 16) @@ -228,18 +203,8 @@ def test_UNetDecoder_initilization(device, pytestconfig): torch.cuda.empty_cache() -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetDecoder_forward(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - BasicConvBlock, # for the output layer - ConvGRUBlock, # for the recurrent layer - ConvNeXtBlock, # for convolutional layer - TransposedConvUpsample, # for upsampling - UNetDecoder, - ) - +def test_UNetDecoder_forward(device): in_channels = 2 out_channels = 1 hw_size = 32 @@ -316,18 +281,8 @@ def test_UNetDecoder_forward(device, pytestconfig): torch.cuda.empty_cache() -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_UNetDecoder_reset(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - BasicConvBlock, # for the output layer - ConvGRUBlock, # for the recurrent layer - ConvNeXtBlock, # for convolutional layer - TransposedConvUpsample, # for upsampling - UNetDecoder, - ) - +def test_UNetDecoder_reset(device): in_channels = 2 out_channels = 1 hw_size = 32 diff --git a/test/models/dlwp_healpix/test_healpix_layers.py b/test/models/dlwp_healpix/test_healpix_layers.py index d3a53d035f..fba2b3468b 100644 --- a/test/models/dlwp_healpix/test_healpix_layers.py +++ b/test/models/dlwp_healpix/test_healpix_layers.py @@ -24,7 +24,13 @@ import numpy as np import pytest import torch -from pytest_utils import import_or_fail + +from modulus.models.dlwp_healpix_layers import ( + HEALPixFoldFaces, + HEALPixLayer, + HEALPixPadding, + HEALPixUnfoldFaces, +) class MulX(torch.nn.Module): @@ -38,26 +44,14 @@ def forward(self, x): return x * self.multiplier -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_HEALPixFoldFaces_initialization(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - HEALPixFoldFaces, - ) - +def test_HEALPixFoldFaces_initialization(device): fold_func = HEALPixFoldFaces() assert isinstance(fold_func, HEALPixFoldFaces) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_HEALPixFoldFaces_forward(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - HEALPixFoldFaces, - ) - +def test_HEALPixFoldFaces_forward(device): fold_func = HEALPixFoldFaces() tensor_size = torch.randint(low=2, high=4, size=(5,)).tolist() @@ -72,25 +66,14 @@ def test_HEALPixFoldFaces_forward(device, pytestconfig): assert fold_func(invar).stride() != outvar.stride() -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_HEALPixUnfoldFaces_initialization(device, pytestconfig): - from modulus.models.dlwp_healpix_layers import ( - HEALPixUnfoldFaces, - ) - +def test_HEALPixUnfoldFaces_initialization(device): unfold_func = HEALPixUnfoldFaces() assert isinstance(unfold_func, HEALPixUnfoldFaces) -@import_or_fail("hydra") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_HEALPixUnfoldFaces_forward(device, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - HEALPixUnfoldFaces, - ) - +def test_HEALPixUnfoldFaces_forward(device): num_faces = 12 unfold_func = HEALPixUnfoldFaces() @@ -115,26 +98,14 @@ def test_HEALPixUnfoldFaces_forward(device, pytestconfig): ] -@import_or_fail("hydra") @pytest.mark.parametrize("device,padding", HEALPixPadding_testdata) -def test_HEALPixPadding_initialization(device, padding, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - HEALPixPadding, - ) - +def test_HEALPixPadding_initialization(device, padding): pad_func = HEALPixPadding(padding) assert isinstance(pad_func, HEALPixPadding) -@import_or_fail("hydra") @pytest.mark.parametrize("device,padding", HEALPixPadding_testdata) -def test_HEALPixPadding_forward(device, padding, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - HEALPixPadding, - ) - +def test_HEALPixPadding_forward(device, padding): num_faces = 12 # standard for healpix batch_size = 2 pad_func = HEALPixPadding(padding) @@ -173,25 +144,14 @@ def test_HEALPixPadding_forward(device, padding, pytestconfig): ] -@import_or_fail("hydra") @pytest.mark.parametrize("device,multiplier", HEALPixLayer_testdata) -def test_HEALPixLayer_initialization(device, multiplier, pytestconfig): - from modulus.models.dlwp_healpix_layers import ( - HEALPixLayer, - ) - +def test_HEALPixLayer_initialization(device, multiplier): layer = HEALPixLayer(layer=MulX, multiplier=multiplier) assert isinstance(layer, HEALPixLayer) -@import_or_fail("hydra") @pytest.mark.parametrize("device,multiplier", HEALPixLayer_testdata) -def test_HEALPixLayer_forward(device, multiplier, pytestconfig): - - from modulus.models.dlwp_healpix_layers import ( - HEALPixLayer, - ) - +def test_HEALPixLayer_forward(device, multiplier): layer = HEALPixLayer(layer=MulX, multiplier=multiplier) kernel_size = 3 diff --git a/test/models/meshgraphnet/test_bsms_mgn.py b/test/models/meshgraphnet/test_bsms_mgn.py index 78c96e5fb0..d66369ef8a 100644 --- a/test/models/meshgraphnet/test_bsms_mgn.py +++ b/test/models/meshgraphnet/test_bsms_mgn.py @@ -14,12 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import dgl import pytest import torch from models.common import validate_forward_accuracy -from pytest_utils import import_or_fail, nfsdata_or_fail +from pytest_utils import import_or_fail -dgl = pytest.importorskip("dgl") +from modulus.models.meshgraphnet.bsms_mgn import BiStrideMeshGraphNet @pytest.fixture @@ -28,11 +29,10 @@ def ahmed_data_dir(): return path -@import_or_fail(["sparse_dot_mkl", "dgl"]) +@import_or_fail("sparse_dot_mkl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) def test_bsms_mgn_forward(pytestconfig, device): from modulus.datapipes.gnn.bsms import BistrideMultiLayerGraphDataset - from modulus.models.meshgraphnet.bsms_mgn import BiStrideMeshGraphNet torch.manual_seed(1) @@ -89,12 +89,10 @@ def test_bsms_mgn_forward(pytestconfig, device): ) -@nfsdata_or_fail -@import_or_fail(["sparse_dot_mkl", "dgl"]) +@import_or_fail("sparse_dot_mkl") def test_bsms_mgn_ahmed(pytestconfig, ahmed_data_dir): from modulus.datapipes.gnn.ahmed_body_dataset import AhmedBodyDataset from modulus.datapipes.gnn.bsms import BistrideMultiLayerGraphDataset - from modulus.models.meshgraphnet.bsms_mgn import BiStrideMeshGraphNet device = torch.device("cuda:0") diff --git a/test/models/meshgraphnet/test_meshgraphnet_snmg.py b/test/models/meshgraphnet/test_meshgraphnet_snmg.py index f12aaea011..9330fb0da1 100644 --- a/test/models/meshgraphnet/test_meshgraphnet_snmg.py +++ b/test/models/meshgraphnet/test_meshgraphnet_snmg.py @@ -27,16 +27,16 @@ from pytest_utils import import_or_fail from modulus.distributed import DistributedManager, mark_module_as_shared +from modulus.models.gnn_layers import ( + partition_graph_by_coordinate_bbox, + partition_graph_nodewise, + partition_graph_with_id_mapping, +) torch.backends.cuda.matmul.allow_tf32 = False def run_test_distributed_meshgraphnet(rank, world_size, dtype, partition_scheme): - from modulus.models.gnn_layers import ( - partition_graph_by_coordinate_bbox, - partition_graph_nodewise, - partition_graph_with_id_mapping, - ) from modulus.models.gnn_layers.utils import CuGraphCSC from modulus.models.meshgraphnet.meshgraphnet import MeshGraphNet diff --git a/test/models/test_distributed_graph.py b/test/models/test_distributed_graph.py index a0f672da7e..b3006772a0 100644 --- a/test/models/test_distributed_graph.py +++ b/test/models/test_distributed_graph.py @@ -18,9 +18,13 @@ import pytest import torch -from pytest_utils import import_or_fail from modulus.distributed import DistributedManager +from modulus.models.gnn_layers import ( + DistributedGraph, + partition_graph_by_coordinate_bbox, +) +from modulus.models.graphcast.graph_cast_net import get_lat_lon_partition_separators def get_random_graph(device): @@ -127,13 +131,6 @@ def run_test_distributed_graph( partition_scheme: str, use_torchrun: bool = False, ): - - from modulus.models.gnn_layers import ( - DistributedGraph, - partition_graph_by_coordinate_bbox, - ) - from modulus.models.graphcast.graph_cast_net import get_lat_lon_partition_separators - if not use_torchrun: os.environ["RANK"] = f"{rank}" os.environ["WORLD_SIZE"] = f"{world_size}" @@ -341,11 +338,9 @@ def run_test_distributed_graph( del os.environ["MASTER_PORT"] -@import_or_fail("dgl") @pytest.mark.multigpu @pytest.mark.parametrize("partition_scheme", ["lat_lon_bbox", "default"]) -def test_distributed_graph(partition_scheme, pytestconfig): - +def test_distributed_graph(partition_scheme): num_gpus = torch.cuda.device_count() assert num_gpus >= 2, "Not enough GPUs available for test" world_size = 2 # num_gpus @@ -365,7 +360,6 @@ def test_distributed_graph(partition_scheme, pytestconfig): if __name__ == "__main__": - # to be launched with torchrun DistributedManager.initialize() run_test_distributed_graph(-1, -1, "lat_lon_bbox", True) diff --git a/test/models/test_domino.py b/test/models/test_domino.py index 6697d9891c..ce3744bb04 100644 --- a/test/models/test_domino.py +++ b/test/models/test_domino.py @@ -20,7 +20,8 @@ import pytest import torch -from pytest_utils import import_or_fail + +from modulus.models.domino.model import DoMINO # from . import common from .common.fwdaccuracy import save_output @@ -57,13 +58,9 @@ def validate_domino( return compare_output(output, output_target, rtol, atol) -@import_or_fail("warp") @pytest.mark.parametrize("device", ["cuda:0"]) -def test_domino_forward(device, pytestconfig): +def test_domino_forward(device): """Test domino forward pass""" - - from modulus.models.domino.model import DoMINO - torch.manual_seed(0) @dataclass diff --git a/test/models/test_graph_partition.py b/test/models/test_graph_partition.py index 675bbdeca5..bd6567248f 100644 --- a/test/models/test_graph_partition.py +++ b/test/models/test_graph_partition.py @@ -16,7 +16,13 @@ import pytest import torch -from pytest_utils import import_or_fail + +from modulus.models.gnn_layers import ( + GraphPartition, + partition_graph_by_coordinate_bbox, + partition_graph_nodewise, + partition_graph_with_id_mapping, +) @pytest.fixture @@ -82,15 +88,8 @@ def assert_partitions_are_equal(a, b): assert torch.allclose(val_a, val_b), error_msg -@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_mapping(global_graph, device, pytestconfig): - - from modulus.models.gnn_layers import ( - GraphPartition, - partition_graph_with_id_mapping, - ) - +def test_gp_mapping(global_graph, device): offsets, indices, num_src_nodes, num_dst_nodes = global_graph partition_size = 4 partition_rank = 0 @@ -135,15 +134,8 @@ def test_gp_mapping(global_graph, device, pytestconfig): assert_partitions_are_equal(pg, pg_expected) -@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_nodewise(global_graph, device, pytestconfig): - - from modulus.models.gnn_layers import ( - GraphPartition, - partition_graph_nodewise, - ) - +def test_gp_nodewise(global_graph, device): offsets, indices, num_src_nodes, num_dst_nodes = global_graph partition_size = 4 partition_rank = 0 @@ -183,15 +175,8 @@ def test_gp_nodewise(global_graph, device, pytestconfig): assert_partitions_are_equal(pg, pg_expected) -@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_matrixdecomp(global_graph_square, device, pytestconfig): - - from modulus.models.gnn_layers import ( - GraphPartition, - partition_graph_nodewise, - ) - +def test_gp_matrixdecomp(global_graph_square, device): offsets, indices, num_src_nodes, num_dst_nodes = global_graph_square partition_size = 4 partition_rank = 0 @@ -227,15 +212,8 @@ def test_gp_matrixdecomp(global_graph_square, device, pytestconfig): assert_partitions_are_equal(pg, pg_expected) -@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_coordinate_bbox(global_graph, device, pytestconfig): - - from modulus.models.gnn_layers import ( - GraphPartition, - partition_graph_by_coordinate_bbox, - ) - +def test_gp_coordinate_bbox(global_graph, device): offsets, indices, num_src_nodes, num_dst_nodes = global_graph partition_size = 4 partition_rank = 0 @@ -301,15 +279,8 @@ def test_gp_coordinate_bbox(global_graph, device, pytestconfig): assert_partitions_are_equal(pg, pg_expected) -@import_or_fail("dgl") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_gp_coordinate_bbox_lat_long(global_graph, device, pytestconfig): - - from modulus.models.gnn_layers import ( - GraphPartition, - partition_graph_by_coordinate_bbox, - ) - +def test_gp_coordinate_bbox_lat_long(global_graph, device): offsets, indices, num_src_nodes, num_dst_nodes = global_graph src_lat = torch.FloatTensor([-75, -60, -45, -30, 30, 45, 60, 75]).view(-1, 1) dst_lat = torch.FloatTensor([-60, -30, 30, 30]).view(-1, 1) diff --git a/test/utils/corrdiff/test_generation_steps.py b/test/utils/corrdiff/test_generation_steps.py index e50a3f47b0..012b234e09 100644 --- a/test/utils/corrdiff/test_generation_steps.py +++ b/test/utils/corrdiff/test_generation_steps.py @@ -18,16 +18,14 @@ import pytest import torch -from pytest_utils import import_or_fail +from modulus.models.diffusion import EDMPrecondSR, UNet +from modulus.utils.corrdiff import diffusion_step, regression_step +from modulus.utils.generative import deterministic_sampler, stochastic_sampler -@import_or_fail("cftime") -@pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_regression_step(device, pytestconfig): - - from modulus.models.diffusion import UNet - from modulus.utils.corrdiff import regression_step +@pytest.mark.parametrize("device", ["cuda:0", "cpu"]) +def test_regression_step(device): # define the net mock_unet = UNet( img_channels=2, @@ -49,14 +47,8 @@ def test_regression_step(device, pytestconfig): assert output.shape == (2, 2, 16, 16), "Output shape mismatch" -@import_or_fail("cftime") @pytest.mark.parametrize("device", ["cuda:0", "cpu"]) -def test_diffusion_step(device, pytestconfig): - - from modulus.models.diffusion import EDMPrecondSR - from modulus.utils.corrdiff import diffusion_step - from modulus.utils.generative import deterministic_sampler, stochastic_sampler - +def test_diffusion_step(device): # Define the preconditioner mock_precond = EDMPrecondSR( img_resolution=[16, 16], diff --git a/test/utils/corrdiff/test_netcdf_writer.py b/test/utils/corrdiff/test_netcdf_writer.py index b0f4b6c0d5..f775fdc9eb 100644 --- a/test/utils/corrdiff/test_netcdf_writer.py +++ b/test/utils/corrdiff/test_netcdf_writer.py @@ -20,7 +20,8 @@ import numpy as np import pytest -from pytest_utils import import_or_fail + +from modulus.utils.corrdiff import NetCDFWriter @pytest.fixture @@ -42,11 +43,7 @@ def mock_ncfile(): return mock_file -@import_or_fail("cftime") -def test_init(mock_ncfile, pytestconfig): - - from modulus.utils.corrdiff import NetCDFWriter - +def test_init(mock_ncfile): lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] @@ -89,11 +86,7 @@ def test_init(mock_ncfile, pytestconfig): ) -@import_or_fail("cftime") -def test_write_input(mock_ncfile, pytestconfig): - - from modulus.utils.corrdiff import NetCDFWriter - +def test_write_input(mock_ncfile): lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] @@ -113,11 +106,7 @@ def test_write_input(mock_ncfile, pytestconfig): mock_ncfile["input"][channel_name].__setitem__.assert_called_with(time_index, val) -@import_or_fail("cftime") -def test_write_truth(mock_ncfile, pytestconfig): - - from modulus.utils.corrdiff import NetCDFWriter - +def test_write_truth(mock_ncfile): lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] @@ -137,11 +126,7 @@ def test_write_truth(mock_ncfile, pytestconfig): mock_ncfile["truth"][channel_name].__setitem__.assert_called_with(time_index, val) -@import_or_fail("cftime") -def test_write_prediction(mock_ncfile, pytestconfig): - - from modulus.utils.corrdiff import NetCDFWriter - +def test_write_prediction(mock_ncfile): lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] @@ -164,11 +149,7 @@ def test_write_prediction(mock_ncfile, pytestconfig): ) -@import_or_fail("cftime") -def test_write_time(mock_ncfile, pytestconfig): - - from modulus.utils.corrdiff import NetCDFWriter - +def test_write_time(mock_ncfile): lat = np.array([[1.0, 2.0], [3.0, 4.0]]) lon = np.array([[5.0, 6.0], [7.0, 8.0]]) input_channels = [] diff --git a/test/utils/corrdiff/test_time_range.py b/test/utils/corrdiff/test_time_range.py index 5b4b6ba414..83e1913f47 100644 --- a/test/utils/corrdiff/test_time_range.py +++ b/test/utils/corrdiff/test_time_range.py @@ -14,25 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pytest_utils import import_or_fail +from modulus.utils.corrdiff import get_time_from_range -@import_or_fail("cftime") -def test_default_interval(pytestconfig): - - from modulus.utils.corrdiff import get_time_from_range - +def test_default_interval(): times_range = ["2024-01-01T00:00:00", "2024-01-01T01:00:00"] expected = ["2024-01-01T00:00:00", "2024-01-01T01:00:00"] result = get_time_from_range(times_range) assert result == expected -@import_or_fail("cftime") -def test_hourly_interval(pytestconfig): - - from modulus.utils.corrdiff import get_time_from_range - +def test_hourly_interval(): times_range = ["2024-01-01T00:00:00", "2024-01-01T03:00:00", 1] expected = [ "2024-01-01T00:00:00", @@ -44,33 +36,21 @@ def test_hourly_interval(pytestconfig): assert result == expected -@import_or_fail("cftime") -def test_custom_interval(pytestconfig): - - from modulus.utils.corrdiff import get_time_from_range - +def test_custom_interval(): times_range = ["2024-01-01T00:00:00", "2024-01-01T03:00:00", 2] expected = ["2024-01-01T00:00:00", "2024-01-01T02:00:00"] result = get_time_from_range(times_range) assert result == expected -@import_or_fail("cftime") -def test_no_interval_provided(pytestconfig): - - from modulus.utils.corrdiff import get_time_from_range - +def test_no_interval_provided(): times_range = ["2024-01-01T00:00:00", "2024-01-01T02:00:00"] expected = ["2024-01-01T00:00:00", "2024-01-01T01:00:00", "2024-01-01T02:00:00"] result = get_time_from_range(times_range) assert result == expected -@import_or_fail("cftime") -def test_same_start_end_time(pytestconfig): - - from modulus.utils.corrdiff import get_time_from_range - +def test_same_start_end_time(): times_range = ["2024-01-01T00:00:00", "2024-01-01T00:00:00"] expected = ["2024-01-01T00:00:00"] result = get_time_from_range(times_range) diff --git a/test/utils/generative/test_deterministic_sampler.py b/test/utils/generative/test_deterministic_sampler.py index d68733c005..6cb886225c 100644 --- a/test/utils/generative/test_deterministic_sampler.py +++ b/test/utils/generative/test_deterministic_sampler.py @@ -17,7 +17,8 @@ import pytest import torch -from pytest_utils import import_or_fail + +from modulus.utils.generative import deterministic_sampler # Mock a minimal net class for testing @@ -40,11 +41,7 @@ def mock_net(): # Basic functionality test -@import_or_fail("cftime") -def test_deterministic_sampler_output_type_and_shape(mock_net, pytestconfig): - - from modulus.utils.generative import deterministic_sampler - +def test_deterministic_sampler_output_type_and_shape(mock_net): latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler(net=mock_net, latents=latents, img_lr=img_lr) @@ -53,12 +50,8 @@ def test_deterministic_sampler_output_type_and_shape(mock_net, pytestconfig): # Test for parameter validation -@import_or_fail("cftime") @pytest.mark.parametrize("solver", ["invalid_solver", "euler", "heun"]) -def test_deterministic_sampler_solver_validation(mock_net, solver, pytestconfig): - - from modulus.utils.generative import deterministic_sampler - +def test_deterministic_sampler_solver_validation(mock_net, solver): if solver == "invalid_solver": with pytest.raises(ValueError): deterministic_sampler( @@ -78,11 +71,7 @@ def test_deterministic_sampler_solver_validation(mock_net, solver, pytestconfig) # Test for edge cases -@import_or_fail("cftime") -def test_deterministic_sampler_edge_cases(mock_net, pytestconfig): - - from modulus.utils.generative import deterministic_sampler - +def test_deterministic_sampler_edge_cases(mock_net): latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) # Test with extreme rho values, zero noise levels, etc. @@ -93,12 +82,8 @@ def test_deterministic_sampler_edge_cases(mock_net, pytestconfig): # Test discretization -@import_or_fail("cftime") @pytest.mark.parametrize("discretization", ["vp", "ve", "iddpm", "edm"]) -def test_deterministic_sampler_discretization(mock_net, discretization, pytestconfig): - - from modulus.utils.generative import deterministic_sampler - +def test_deterministic_sampler_discretization(mock_net, discretization): latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler( @@ -108,12 +93,8 @@ def test_deterministic_sampler_discretization(mock_net, discretization, pytestco # Test schedule -@import_or_fail("cftime") @pytest.mark.parametrize("schedule", ["vp", "ve", "linear"]) -def test_deterministic_sampler_schedule(mock_net, schedule, pytestconfig): - - from modulus.utils.generative import deterministic_sampler - +def test_deterministic_sampler_schedule(mock_net, schedule): latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler( @@ -123,12 +104,8 @@ def test_deterministic_sampler_schedule(mock_net, schedule, pytestconfig): # Test number of steps -@import_or_fail("cftime") @pytest.mark.parametrize("num_steps", [1, 5, 18]) -def test_deterministic_sampler_num_steps(mock_net, num_steps, pytestconfig): - - from modulus.utils.generative import deterministic_sampler - +def test_deterministic_sampler_num_steps(mock_net, num_steps): latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler( @@ -138,14 +115,8 @@ def test_deterministic_sampler_num_steps(mock_net, num_steps, pytestconfig): # Test sigma -@import_or_fail("cftime") @pytest.mark.parametrize("sigma_min, sigma_max", [(0.001, 0.01), (1.0, 1.5)]) -def test_deterministic_sampler_sigma_boundaries( - mock_net, sigma_min, sigma_max, pytestconfig -): - - from modulus.utils.generative import deterministic_sampler - +def test_deterministic_sampler_sigma_boundaries(mock_net, sigma_min, sigma_max): latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) output = deterministic_sampler( @@ -159,12 +130,8 @@ def test_deterministic_sampler_sigma_boundaries( # Test error handling -@import_or_fail("cftime") @pytest.mark.parametrize("scaling", ["invalid_scaling", "vp", "none"]) -def test_deterministic_sampler_scaling_validation(mock_net, scaling, pytestconfig): - - from modulus.utils.generative import deterministic_sampler - +def test_deterministic_sampler_scaling_validation(mock_net, scaling): latents = torch.randn(1, 3, 64, 64) img_lr = torch.randn(1, 3, 64, 64) if scaling == "invalid_scaling": diff --git a/test/utils/generative/test_format_time.py b/test/utils/generative/test_format_time.py index e16b599770..241c9876bc 100644 --- a/test/utils/generative/test_format_time.py +++ b/test/utils/generative/test_format_time.py @@ -15,15 +15,11 @@ # limitations under the License. -from pytest_utils import import_or_fail +from modulus.utils.generative import format_time, format_time_brief # Test format_time function -@import_or_fail("cftime") -def test_format_time(pytestconfig): - - from modulus.utils.generative import format_time - +def test_format_time(): assert format_time(59) == "59s" assert format_time(60) == "1m 00s" assert format_time(3599) == "59m 59s" @@ -35,11 +31,7 @@ def test_format_time(pytestconfig): # Test format_time_brief function -@import_or_fail("cftime") -def test_format_time_brief(pytestconfig): - - from modulus.utils.generative import format_time_brief - +def test_format_time_brief(): assert format_time_brief(59) == "59s" assert format_time_brief(60) == "1m 00s" assert format_time_brief(3600) == "1h 00m" diff --git a/test/utils/generative/test_parse_int_list.py b/test/utils/generative/test_parse_int_list.py index d98040e4a9..5c11536976 100644 --- a/test/utils/generative/test_parse_int_list.py +++ b/test/utils/generative/test_parse_int_list.py @@ -14,14 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pytest_utils import import_or_fail +from modulus.utils.generative import parse_int_list -@import_or_fail("cftime") -def test_parse_int_list(pytestconfig): - - from modulus.utils.generative import parse_int_list - +def test_parse_int_list(): # Test parsing a simple comma-separated list input_str = "1,2,5,7,10" expected_result = [1, 2, 5, 7, 10] diff --git a/test/utils/generative/test_parse_time.py b/test/utils/generative/test_parse_time.py index 0ee8f21f25..11c1cae3b8 100644 --- a/test/utils/generative/test_parse_time.py +++ b/test/utils/generative/test_parse_time.py @@ -20,6 +20,8 @@ import yaml from pytest_utils import import_or_fail +from modulus.utils.generative import convert_datetime_to_cftime + cftime = pytest.importorskip("cftime") # ruff: noqa: S101 # TODo remove exception @@ -36,9 +38,6 @@ def test_datetime_yaml(): @import_or_fail("cftime") def test_convert_to_cftime(pytestconfig): """test parse time""" - - from modulus.utils.generative import convert_datetime_to_cftime - dt = datetime.datetime(2011, 1, 1) expected = cftime.DatetimeGregorian(2011, 1, 1) assert convert_datetime_to_cftime(dt) == expected diff --git a/test/utils/generative/test_stochastic_sampler.py b/test/utils/generative/test_stochastic_sampler.py index 5f07686f71..290d56406a 100644 --- a/test/utils/generative/test_stochastic_sampler.py +++ b/test/utils/generative/test_stochastic_sampler.py @@ -17,9 +17,10 @@ from typing import Optional import torch -from pytest_utils import import_or_fail from torch import Tensor +from modulus.utils.generative import image_batching, image_fuse, stochastic_sampler + # Mock network class class MockNet: @@ -43,11 +44,7 @@ def __call__( # The test function for edm_sampler -@import_or_fail("cftime") -def test_stochastic_sampler(pytestconfig): - - from modulus.utils.generative import stochastic_sampler - +def test_stochastic_sampler(): net = MockNet() latents = torch.randn(2, 3, 448, 448) # Mock latents img_lr = torch.randn(2, 3, 112, 112) # Mock low-res image @@ -124,11 +121,7 @@ def test_stochastic_sampler(pytestconfig): ), "Churn output shape does not match expected shape" -@import_or_fail("cftime") -def test_image_fuse_basic(pytestconfig): - - from modulus.utils.generative import image_fuse - +def test_image_fuse_basic(): # Basic test: No overlap, no boundary, one patch batch_size = 1 img_shape_x = img_shape_y = 4 @@ -154,11 +147,7 @@ def test_image_fuse_basic(pytestconfig): ), "Output does not match expected output." -@import_or_fail("cftime") -def test_image_fuse_with_boundary(pytestconfig): - - from modulus.utils.generative import image_fuse - +def test_image_fuse_with_boundary(): # Test with boundary pixels batch_size = 1 img_shape_x = img_shape_y = 4 @@ -186,11 +175,7 @@ def test_image_fuse_with_boundary(pytestconfig): ), "Output with boundary does not match expected output." -@import_or_fail("cftime") -def test_image_fuse_with_multiple_batches(pytestconfig): - - from modulus.utils.generative import image_fuse - +def test_image_fuse_with_multiple_batches(): # Test with multiple batches batch_size = 2 img_shape_x = img_shape_y = 4 @@ -235,11 +220,7 @@ def test_image_fuse_with_multiple_batches(pytestconfig): ), "Output for multiple batches does not match expected output." -@import_or_fail("cftime") -def test_image_batching_basic(pytestconfig): - - from modulus.utils.generative import image_batching - +def test_image_batching_basic(): # Test with no overlap, no boundary, no input_interp batch_size = 1 img_shape_x = img_shape_y = 4 @@ -265,12 +246,8 @@ def test_image_batching_basic(pytestconfig): ), "Batched images do not match expected output." -@import_or_fail("cftime") -def test_image_batching_with_boundary(pytestconfig): +def test_image_batching_with_boundary(): # Test with boundary pixels, no overlap, no input_interp - - from modulus.utils.generative import image_batching - batch_size = 1 img_shape_x = img_shape_y = 4 patch_shape_x = patch_shape_y = 6 @@ -295,12 +272,8 @@ def test_image_batching_with_boundary(pytestconfig): ), "Batched images with boundary do not match expected output." -@import_or_fail("cftime") -def test_image_batching_with_input_interp(pytestconfig): +def test_image_batching_with_input_interp(): # Test with input_interp tensor - - from modulus.utils.generative import image_batching - batch_size = 1 img_shape_x = img_shape_y = 4 patch_shape_x = patch_shape_y = 4 diff --git a/test/utils/generative/test_tuple_product.py b/test/utils/generative/test_tuple_product.py index 041054bc3c..70e4df5964 100644 --- a/test/utils/generative/test_tuple_product.py +++ b/test/utils/generative/test_tuple_product.py @@ -14,15 +14,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -from pytest_utils import import_or_fail +from modulus.utils.generative import tuple_product -# Test tuple_product function -@import_or_fail("cftime") -def test_tuple_product(pytestconfig): - - from modulus.utils.generative import tuple_product +# Test tuple_product function +def test_tuple_product(): # Test with an empty tuple assert tuple_product(()) == 1 diff --git a/test/utils/test_mesh_utils.py b/test/utils/test_mesh_utils.py index 8127b065cc..6aefb5a59f 100644 --- a/test/utils/test_mesh_utils.py +++ b/test/utils/test_mesh_utils.py @@ -21,8 +21,14 @@ import numpy as np import pytest from pytest_utils import import_or_fail +from stl import mesh -stl = pytest.importorskip("stl") +from modulus.utils.mesh import ( + combine_vtp_files, + convert_tesselated_files_in_directory, + sdf_to_stl, +) +from modulus.utils.sdf import signed_distance_field @pytest.fixture @@ -42,17 +48,12 @@ def download_stl(tmp_path): return file_path -@import_or_fail(["vtk", "warp"]) +@import_or_fail(["vtk"]) def test_mesh_utils(tmp_path, pytestconfig): """Tests the utility for combining VTP files and converting tesselated files.""" import vtk - from modulus.utils.mesh import ( - combine_vtp_files, - convert_tesselated_files_in_directory, - ) - def _create_random_vtp_mesh(num_points: int, num_triangles: int, dir: str) -> tuple: """ Create a random VTP (VTK PolyData) mesh with triangles. @@ -180,13 +181,6 @@ def _create_random_obj_mesh(num_vertices: int, num_faces: int, dir: str) -> None @pytest.mark.parametrize("backend", ["warp", "skimage"]) def test_stl_gen(pytestconfig, backend, download_stl, tmp_path): - from stl import mesh - - from modulus.utils.mesh import ( - sdf_to_stl, - ) - from modulus.utils.sdf import signed_distance_field - bunny_mesh = mesh.Mesh.from_file(str(download_stl)) vertices = np.array(bunny_mesh.vectors, dtype=np.float64) diff --git a/test/utils/test_sdf.py b/test/utils/test_sdf.py index 127b868336..4e08673406 100644 --- a/test/utils/test_sdf.py +++ b/test/utils/test_sdf.py @@ -19,6 +19,8 @@ import numpy as np from pytest_utils import import_or_fail +from modulus.utils.sdf import signed_distance_field + def tet_verts(flip_x=1): tet = np.array( @@ -69,8 +71,6 @@ def tet_verts(flip_x=1): @import_or_fail("warp") def test_sdf(pytestconfig): - from modulus.utils.sdf import signed_distance_field - tet = tet_verts() sdf_tet = signed_distance_field( From 2b284f9353d072ffad37962c3441ddbe76f06687 Mon Sep 17 00:00:00 2001 From: Alexey Kamenev Date: Wed, 5 Feb 2025 09:38:01 -0800 Subject: [PATCH 8/8] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dc66017e7..836ae6e08e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Enhancement to parameterize DoMINO model with inlet velocity - Moved non-dimensionaliztion out of domino datapipe to datapipe in domino example - Updated utils in `modulus.launch.logging` to avoid unnecessary `wandb` and `mlflow` imports +- Moved to experiment-based Hydra config in Lagrangian-MGN example ### Deprecated