src.internal.data_connector.clearml_dataset module#

ClearML Dataset Connector A interface to interact with ClearML datasets.

class src.internal.data_connector.clearml_dataset.ClearMLDataset[source]#

Bases: DatasetConnector

ClearML Dataset Connector.

_abc_impl = <_abc._abc_data object>#
add_files(path: Union[str, Path], recursive: bool = True) None[source]#

Add files to a dataset.

Parameters:
  • path (Union[str, Path]) – Local/remote path to data you want to add

  • recursive (bool, optional) – Recursively add sub-files/folders?. Defaults to True.

Raises:

NotImplementedError – If dataset connector does not implement this method.

property artifacts: List[Artifact]#

Get a list of artifacts in the dataset.

Raises:

NotImplementedError – If dataset connector does not implement this method.

Returns:

List of artifacts in the dataset

Return type:

List[Artifact]

classmethod create(name: Optional[str] = None, version: Optional[str] = None, project: Optional[str] = None, tags: Optional[Sequence[str]] = None, default_remote: Optional[str] = None, description: Optional[str] = None) ClearMLDataset[source]#

Create a dataset from scratch. Note that user will have to add files after the dataset has been created.

Parameters:
  • name (str) – Name of the dataset

  • version (str, optional) – Version to give dataset. Defaults to “latest”.

Returns:

Created dataset

Return type:

DatasetConnector

delete() None[source]#

Delete dataset.

Raises:

NotImplementedError – If dataset connector does not implement this method.

download(path: Union[str, Path], overwrite: bool = True) str[source]#

Downloads mutable copy of entire dataset.

Parameters:
  • path (Union[str, Path]) – Target folder to download dataset to

  • overwrite (bool, optional) – If existing files in target folder should be removed. Defaults to True.

Raises:

NotImplementedError – If dataset connector does not implement this method.

Returns:

File path to downloaded dataset

Return type:

str

property file_entries: Dict#

Get a dictionary of files in the dataset.

Raises:

NotImplementedError – If dataset connector does not implement this method.

Returns:

Dictionary of files in the dataset

Return type:

Dict

classmethod get(id: Optional[str] = None, project: Optional[str] = None, name: Optional[str] = None, version: Optional[str] = None) ClearMLDataset[source]#

Get an existing dataset, but do not download contents of dataset.

This method should return a properly initialized DatasetConnector with the .dataset attribute set.

Returns:

Created dataset

Return type:

DatasetConnector

static list_datasets(project: Optional[str] = None, partial_name: Optional[str] = None, tags: Optional[Sequence[str]] = None, ids: Optional[Sequence[str]] = None) List[Dict][source]#
Obtain a list of all datasets, based on

what is available to the dataset connector.

Raises:

NotImplementedError – If dataset connector does not implement this method.

Returns:

List of dictionaries containing dataset metadata

Return type:

List[Dict]

property name: str#

Get the name of the dataset.

Raises:

NotImplementedError – If dataset connector does not implement this method.

Returns:

Name of the dataset

Return type:

str

property project: str#

Get the project associated with the dataset.

Raises:

NotImplementedError – If dataset connector does not implement this method.

Returns:

Project associated with the dataset

Return type:

str

remove_files(path: Union[str, Path], recursive: bool = True) None[source]#

Remove files from a dataset.

Parameters:
  • path (Union[str, Path]) – Local/remote path to data you want to remove

  • recursive (bool, optional) – Recursively add sub-files/folders?. Defaults to True.

Raises:

NotImplementedError – If dataset connector does not implement this method.

property tags: List[str]#

Get the tags associated with the dataset.

Raises:

AttributeError – If dataset has not been initialized.

Returns:

List of tags associated with the dataset.

Return type:

List[str]

upload(remote: Optional[str] = None) None[source]#

Push changes to remote.

Parameters:

remote (Optional[str]) – URL to push files to. If None, will use any pre-defined URL in the dataset. Defaults to None.

Raises:
  • ValueError – If remote is not defined in arguments and dataset has no default remote, a ValueError should be raised

  • NotImplementedError – If dataset connector does not implement this method.