Skip to content

Commit

Permalink
fix(sdk): import kubernetes.client & make type conversion in swagger.…
Browse files Browse the repository at this point in the history
…json.

Signed-off-by: Electronic-Waste <[email protected]>
  • Loading branch information
Electronic-Waste committed Feb 28, 2025
1 parent 0b765d6 commit c3773b3
Show file tree
Hide file tree
Showing 9 changed files with 18 additions and 14 deletions.
3 changes: 2 additions & 1 deletion hack/python-sdk/gen-sdk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,5 +66,6 @@ else
fi

# Kubeflow models must have Kubernetes models to perform serialization.
printf "\n# Import JobSet models for the serialization. It imports the Kubernetes models.\n" >>${SDK_OUTPUT_PATH}/kubeflow/trainer/models/__init__.py
printf "\n# Import Kubernetes and JobSet models for the serialization. \n" >>${SDK_OUTPUT_PATH}/kubeflow/trainer/models/__init__.py
printf "from kubernetes.client import *\n" >>${SDK_OUTPUT_PATH}/kubeflow/trainer/models/__init__.py
printf "from jobset.models import *\n" >>${SDK_OUTPUT_PATH}/kubeflow/trainer/models/__init__.py
2 changes: 2 additions & 0 deletions hack/python-sdk/swagger_config.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
{
"packageName": "kubeflow.trainer",
"typeMappings": {
"K8sIoApiAutoscalingV2MetricSpec": "V2MetricSpec",
"K8sIoApimachineryPkgUtilIntstrIntOrString": "Union[int, str]",
"V1Time": "datetime"
}
}
2 changes: 1 addition & 1 deletion sdk/docs/TrainerV1alpha1TorchElasticPolicy.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Name | Type | Description | Notes
------------ | ------------- | ------------- | -------------
**max_nodes** | **int** | Upper limit for the number of nodes to which training job can scale up. | [optional]
**max_restarts** | **int** | How many times the training job can be restarted. This value is inserted into the &#x60;--max-restarts&#x60; argument of the &#x60;torchrun&#x60; CLI and the &#x60;.spec.failurePolicy.maxRestarts&#x60; parameter of the training Job. | [optional]
**metrics** | [**list[K8sIoApiAutoscalingV2MetricSpec]**](K8sIoApiAutoscalingV2MetricSpec.md) | Specification which are used to calculate the desired number of nodes. See the individual metric source types for more information about how each type of metric must respond. The HPA will be created to perform auto-scaling. | [optional]
**metrics** | [**list[V2MetricSpec]**](K8sIoApiAutoscalingV2MetricSpec.md) | Specification which are used to calculate the desired number of nodes. See the individual metric source types for more information about how each type of metric must respond. The HPA will be created to perform auto-scaling. | [optional]
**min_nodes** | **int** | Lower limit for the number of nodes to which training job can scale down. | [optional]

[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
Expand Down
2 changes: 1 addition & 1 deletion sdk/docs/TrainerV1alpha1TorchMLPolicySource.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ TorchMLPolicySource represents a PyTorch runtime configuration.
Name | Type | Description | Notes
------------ | ------------- | ------------- | -------------
**elastic_policy** | [**TrainerV1alpha1TorchElasticPolicy**](TrainerV1alpha1TorchElasticPolicy.md) | | [optional]
**num_proc_per_node** | [**K8sIoApimachineryPkgUtilIntstrIntOrString**](K8sIoApimachineryPkgUtilIntstrIntOrString.md) | | [optional]
**num_proc_per_node** | [**Union[int, str]**](K8sIoApimachineryPkgUtilIntstrIntOrString.md) | | [optional]

[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)

Expand Down
2 changes: 1 addition & 1 deletion sdk/docs/TrainerV1alpha1Trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Name | Type | Description | Notes
**env** | [**list[V1EnvVar]**](V1EnvVar.md) | List of environment variables to set in the training container. These values will be merged with the TrainingRuntime&#39;s trainer environments. | [optional]
**image** | **str** | Docker image for the training container. | [optional]
**num_nodes** | **int** | Number of training nodes. | [optional]
**num_proc_per_node** | [**K8sIoApimachineryPkgUtilIntstrIntOrString**](K8sIoApimachineryPkgUtilIntstrIntOrString.md) | | [optional]
**num_proc_per_node** | [**Union[int, str]**](K8sIoApimachineryPkgUtilIntstrIntOrString.md) | | [optional]
**resources_per_node** | [**V1ResourceRequirements**](V1ResourceRequirements.md) | | [optional]

[[Back to Model list]](../README.md#documentation-for-models) [[Back to API list]](../README.md#documentation-for-api-endpoints) [[Back to README]](../README.md)
Expand Down
3 changes: 2 additions & 1 deletion sdk/kubeflow/trainer/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,6 @@
from kubeflow.trainer.models.trainer_v1alpha1_training_runtime_list import TrainerV1alpha1TrainingRuntimeList
from kubeflow.trainer.models.trainer_v1alpha1_training_runtime_spec import TrainerV1alpha1TrainingRuntimeSpec

# Import JobSet models for the serialization. It imports the Kubernetes models.
# Import Kubernetes and JobSet models for the serialization.
from kubernetes.client import *
from jobset.models import *
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class TrainerV1alpha1TorchElasticPolicy(object):
openapi_types = {
'max_nodes': 'int',
'max_restarts': 'int',
'metrics': 'list[K8sIoApiAutoscalingV2MetricSpec]',
'metrics': 'list[V2MetricSpec]',
'min_nodes': 'int'
}

Expand Down Expand Up @@ -120,7 +120,7 @@ def metrics(self):
Specification which are used to calculate the desired number of nodes. See the individual metric source types for more information about how each type of metric must respond. The HPA will be created to perform auto-scaling. # noqa: E501
:return: The metrics of this TrainerV1alpha1TorchElasticPolicy. # noqa: E501
:rtype: list[K8sIoApiAutoscalingV2MetricSpec]
:rtype: list[V2MetricSpec]
"""
return self._metrics

Expand All @@ -131,7 +131,7 @@ def metrics(self, metrics):
Specification which are used to calculate the desired number of nodes. See the individual metric source types for more information about how each type of metric must respond. The HPA will be created to perform auto-scaling. # noqa: E501
:param metrics: The metrics of this TrainerV1alpha1TorchElasticPolicy. # noqa: E501
:type: list[K8sIoApiAutoscalingV2MetricSpec]
:type: list[V2MetricSpec]
"""

self._metrics = metrics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class TrainerV1alpha1TorchMLPolicySource(object):
"""
openapi_types = {
'elastic_policy': 'TrainerV1alpha1TorchElasticPolicy',
'num_proc_per_node': 'K8sIoApimachineryPkgUtilIntstrIntOrString'
'num_proc_per_node': 'Union[int, str]'
}

attribute_map = {
Expand Down Expand Up @@ -84,7 +84,7 @@ def num_proc_per_node(self):
:return: The num_proc_per_node of this TrainerV1alpha1TorchMLPolicySource. # noqa: E501
:rtype: K8sIoApimachineryPkgUtilIntstrIntOrString
:rtype: Union[int, str]
"""
return self._num_proc_per_node

Expand All @@ -94,7 +94,7 @@ def num_proc_per_node(self, num_proc_per_node):
:param num_proc_per_node: The num_proc_per_node of this TrainerV1alpha1TorchMLPolicySource. # noqa: E501
:type: K8sIoApimachineryPkgUtilIntstrIntOrString
:type: Union[int, str]
"""

self._num_proc_per_node = num_proc_per_node
Expand Down
6 changes: 3 additions & 3 deletions sdk/kubeflow/trainer/models/trainer_v1alpha1_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class TrainerV1alpha1Trainer(object):
'env': 'list[V1EnvVar]',
'image': 'str',
'num_nodes': 'int',
'num_proc_per_node': 'K8sIoApimachineryPkgUtilIntstrIntOrString',
'num_proc_per_node': 'Union[int, str]',
'resources_per_node': 'V1ResourceRequirements'
}

Expand Down Expand Up @@ -203,7 +203,7 @@ def num_proc_per_node(self):
:return: The num_proc_per_node of this TrainerV1alpha1Trainer. # noqa: E501
:rtype: K8sIoApimachineryPkgUtilIntstrIntOrString
:rtype: Union[int, str]
"""
return self._num_proc_per_node

Expand All @@ -213,7 +213,7 @@ def num_proc_per_node(self, num_proc_per_node):
:param num_proc_per_node: The num_proc_per_node of this TrainerV1alpha1Trainer. # noqa: E501
:type: K8sIoApimachineryPkgUtilIntstrIntOrString
:type: Union[int, str]
"""

self._num_proc_per_node = num_proc_per_node
Expand Down

0 comments on commit c3773b3

Please sign in to comment.