Skip to content

Commit

Permalink
Merge pull request #564 from DagsHub/feature/delete-dataset
Browse files Browse the repository at this point in the history
Feature: Add a "ds.delete_dataset()" function to remove a daset entry
  • Loading branch information
kbolashev authored Dec 12, 2024
2 parents 80133fe + fecd870 commit a44e707
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 0 deletions.
11 changes: 11 additions & 0 deletions dagshub/data_engine/client/data_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,17 @@ def save_dataset(self, datasource: "Datasource", name: str):
)
return self._exec(q, params)

def delete_dataset(self, dataset_id: Union[str, int]):
"""
Removes a dataset. This doesn't remove the underlying source.
"""
q = GqlMutations.delete_dataset()

assert dataset_id is not None

params = GqlMutations.delete_dataset_params(dataset_id=dataset_id)
return self._exec(q, params)

def get_datasets(self, id: Optional[Union[str, int]], name: Optional[str]) -> List[DatasetResult]:
"""
Retrieve a list of datasets based on optional filtering criteria.
Expand Down
18 changes: 18 additions & 0 deletions dagshub/data_engine/client/gql_mutations.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,21 @@ def save_dataset_params(datasource_id: Union[int, str], name: str, query_input:
"name": name,
"filter": query_input,
}

@staticmethod
@functools.lru_cache()
def delete_dataset():
q = (
GqlQuery()
.operation("mutation", name="deleteDataset", input={"$id": "ID!"})
.query("deleteDataset", input={"id": "$id"})
.fields(["id"])
.generate()
)
return q

@staticmethod
def delete_dataset_params(dataset_id: Union[int, str]):
return {
"id": dataset_id,
}
25 changes: 25 additions & 0 deletions dagshub/data_engine/model/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,31 @@ def delete_source(self, force: bool = False):
return
self.source.client.delete_datasource(self)

def delete_dataset(self, force: bool = False):
"""
Deletes the dataset, if this object was created from a dataset
(e.g. from :func:`.datasets.get_dataset()`).
This doesn't delete the underlying datasource and its metadata, only deleting the dataset and its query.
If this datasource object wasn't created from a dataset, raises a ``ValueError``.
Args:
force: Skip the confirmation prompt
"""
if self.assigned_dataset is None:
raise ValueError("This datasource was not created from a dataset")
prompt = (
f'You are about to delete dataset "{self.assigned_dataset.dataset_name}" for repo "{self.source.repo}"\n'
f'The datasource "{self.source.name}" will still exist, but the dataset entry will be removed'
)
if not force:
user_response = prompt_user(prompt)
if not user_response:
print("Deletion cancelled")
return
self.source.client.delete_dataset(self.assigned_dataset.dataset_id)

def scan_source(self, options: Optional[List[ScanOption]] = None):
"""
This function fires a call to the backend to rescan the datapoints.
Expand Down

0 comments on commit a44e707

Please sign in to comment.