Preparing your data for labelling#

This example shows you how to go from your raw data to a project with your label structure (ontology) defined, your dataset attached, and ready to be labelled.

Imports#

12 from pathlib import Path
13
14 from encord import Dataset, EncordUserClient, Project
15 from encord.orm.dataset import CreateDatasetResponse, StorageLocation
16 from encord.project_ontology.classification_type import ClassificationType
17 from encord.project_ontology.object_type import ObjectShape
18 from encord.utilities.project_user import ProjectUserRole

Authenticating#

Note

To interact with Encord, you need to first authenticate a client. You can find more details here.

30 # Authentication: adapt the following line to your private key path
31 private_key_path = Path.home() / ".ssh" / "id_ed25519"
32
33 with private_key_path.open() as f:
34     private_key = f.read()
35
36 user_client = EncordUserClient.create_with_ssh_private_key(private_key)

1. Creating and populating the dataset#

This section shows how to create a dataset and add both videos and images to the dataset.

44 # Create the dataset
45 dataset_response: CreateDatasetResponse = user_client.create_dataset(
46     "Example Title", StorageLocation.CORD_STORAGE
47 )
48 dataset_hash = dataset_response.dataset_hash
49
50 # Add data to the dataset
51 dataset: Dataset = user_client.get_dataset(dataset_hash)
52
53 image_files = sorted(
54     [
55         p.as_posix()
56         for p in Path("path/to/images").iterdir()
57         if p.suffix in {".jpg", ".png"}
58     ]
59 )
60 dataset.create_image_group(image_files)
61
62 video_files = [
63     p.as_posix()
64     for p in Path("path/to/videos").iterdir()
65     if p.suffix in {".mp4", ".webm"}
66 ]
67
68 for v in video_files:
69     dataset.upload_video(v)

2. Listing available data in the dataset#

75 for data_row in dataset.data_rows:
76     print(
77         f"data-hash: '{data_row.uid}', "
78         f"data-type: {data_row.data_type}, "
79         f"title: '{data_row.title}'"
80     )

The code will produce an output similar to the following:

data-hash: '<data_hash>', data-type: DataType.IMG_GROUP, title: 'image-group-68dd3'
data-hash: '<data_hash>', data-type: DataType.VIDEO, title: 'video1.mp4'

3. Creating project with an ontology#

 95 # == Creating a project containing the dataset created above == #
 96 project_hash = user_client.create_project(
 97     project_title="The title of the project",
 98     dataset_hashes=[dataset_hash],
 99     project_description="A description of what this project is all about.",
100 )
101
102 # == Adding objects and classifications to the project ontology == #
103 project: Project = user_client.get_project(project_hash)
104
105 # Objects
106 project.add_object(name="Dog (polygon)", shape=ObjectShape.POLYGON)
107 project.add_object(name="Snake (polyline)", shape=ObjectShape.POLYLINE)
108 project.add_object(name="Tiger (bounding_box)", shape=ObjectShape.BOUNDING_BOX)
109 project.add_object(name="Ant (key-point)", shape=ObjectShape.KEY_POINT)
110
111 # Classifications
112 project.add_classification(
113     name="Has Animal (radio)",
114     classification_type=ClassificationType.RADIO,
115     required=True,
116     options=["yes", "no"],
117 )
118 project.add_classification(
119     name="Other objects (checklist)",
120     classification_type=ClassificationType.CHECKLIST,
121     required=False,
122     options=["person", "car", "leash"],
123 )
124 project.add_classification(
125     name="Description (text)",
126     classification_type=ClassificationType.TEXT,
127     required=False,
128     # Note no `options` defined for text classifications.
129 )

4. Adding your team to the project#

To allow annotators, reviewers and team managers to access your project, they need to be added to the project by their emails (Encord accounts). You add each type of member by one call to the project client each:

138 project.add_users(
139     ["annotator1@your.domain", "annotator2@your.domain"],
140     user_role=ProjectUserRole.ANNOTATOR,
141 )
142 project.add_users(
143     ["reviewer1@your.domain", "reviewer2@your.domain"],
144     user_role=ProjectUserRole.REVIEWER,
145 )
146 project.add_users(
147     ["annotator_reviewer@your.domain"],
148     user_role=ProjectUserRole.ANNOTATOR_REVIEWER,
149 )
150 project.add_users(
151     ["team_manager@your.domain"],
152     user_role=ProjectUserRole.TEAM_MANAGER,
153 )

At this point, your data is ready to be annotated with the project-specific information defined in the project ontology.

Gallery generated by Sphinx-Gallery