Skip to content

Commit

Permalink
Add max_num_seqs to deployment script
Browse files Browse the repository at this point in the history
  • Loading branch information
cthiriet committed Apr 26, 2024
1 parent 93f83b6 commit cf09882
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions sagemaker/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ def deploy(
trust_remote_code = get_value(config_data, None, "trust_remote_code")
loras = get_value(config_data, None, "loras")
max_lora_rank = get_value(config_data, None, "max_lora_rank")
max_num_seqs = get_value(config_data, None, "max_num_seqs")

has_loras = loras is not None and len(loras) > 0

Expand Down Expand Up @@ -148,6 +149,9 @@ def deploy(
if max_model_len is not None:
container_env["MAX_MODEL_LEN"] = str(max_model_len)

if max_num_seqs is not None:
container_env["MAX_NUM_SEQS"] = str(max_num_seqs)

if trust_remote_code is not None:
container_env["TRUST_REMOTE_CODE"] = str(trust_remote_code).lower()

Expand Down Expand Up @@ -190,6 +194,13 @@ def deploy(

print(json.dumps(primary_container, indent=4))

# Ask for confirmation
print("\nDo you want to continue? (yes/no)")
response = input()
if response != "yes":
print("Exiting...")
return

# create model
sm_client = boto3.client(service_name="sagemaker", region_name=region)
create_model_response = sm_client.create_model(
Expand Down

0 comments on commit cf09882

Please sign in to comment.