Converting polygons to bitmasks
The following script converts polygon labels from the source project into Bitmask labels in the target project. The conversion applies to images, videos, image groups, and image sequences.
For this conversion to be effective, it is crucial that both the source and target projects are linked to the same dataset. The script is agnostic to which workflow stage the source or target data units are in.
To install the required Python dependencies, run the following command in your terminal:
This script utilizes Typer
, allowing you to conveniently insert arguments in your terminal rather than directly into the script.
To execute the script in your terminal, use the following command:
Replace script-name.py
with the actual name of your script and respond to the prompted questions during execution.
For a non-interactive usage, provide the following parameters:
Replace the placeholders with the appropriate values:
script-name.py
with the name of your script.
<ssh_key_path>
with the path to your private SSH key associated with Encord.
<source_project_hash>
with the hash of the source project containing the polygon labels.
<target_project_hash>
with the hash of the target project that will contain the bitmask labels.
For example:
This script is intended for a one-time operation only. Running it more than once will result in duplicate labels.
import logging
import time
from pathlib import Path
from typing import cast
import cv2
import numpy as np
import typer
from encord import EncordUserClient, Project
from encord.objects import LabelRowV2, Object, Shape
from encord.objects.coordinates import BitmaskCoordinates, PolygonCoordinates
from tqdm import tqdm, trange
from typing_extensions import Annotated
MAX_NUM_EXECUTION_ATTEMPTS = 10
def try_execute(func, kwargs=None):
for n in range(MAX_NUM_EXECUTION_ATTEMPTS):
try:
if kwargs:
return func(**kwargs)
else:
return func()
except Exception as e:
logging.warning(
f"Handling {e} when executing {func} with args {kwargs}.\n" f" Trying again, attempt number {n + 1}."
)
time.sleep(0.5 * MAX_NUM_EXECUTION_ATTEMPTS)
raise Exception("Reached maximum number of execution attempts.")
def initialize_label_rows(project: Project, batch_size: int = 200, include_unlabeled: bool = False) -> list[LabelRowV2]:
label_rows = [lr for lr in project.list_label_rows_v2() if include_unlabeled or lr.label_hash is not None]
for start in trange(
0,
len(label_rows),
batch_size,
desc=f"Initializing label rows [{project.title}]",
):
bundle = project.create_bundle()
for lr in label_rows[start : start + batch_size]:
lr.initialise_labels(bundle=bundle)
try_execute(bundle.execute)
return label_rows
def populate_ontology_of_target_project(
client: EncordUserClient, source: Project, target: Project
) -> dict[str, Object]:
source_ontology = source.ontology_structure
target_ontology = client.get_ontology(target.ontology_hash)
ontology_lookup: dict[str, Object] = {}
for obj in source_ontology.objects:
if obj.shape != Shape.POLYGON:
continue
match = None
for tobj in target_ontology.structure.objects:
if tobj.shape != Shape.BITMASK or tobj.name != obj.name:
continue
match = tobj
break
if match is None:
match = target_ontology.structure.add_object(obj.name, Shape.BITMASK)
ontology_lookup[obj.name] = match
target_ontology.save()
return ontology_lookup
def convert_labels(
keyfile: Annotated[
Path,
typer.Option(
help="Path to private ssh key associated with Encord",
prompt="Where is your key-file stored?",
),
],
source_project_hash: Annotated[
str,
typer.Option(
help="Hash of the project from which annotations and classifications will be copied",
prompt="What's the project hash of the SOURCE project?",
),
],
target_project_hash: Annotated[
str,
typer.Option(
help="Hash of the project where the bitmasks will be added",
prompt="What's the project hash of the TARGET project?",
),
],
):
keyfile = keyfile.expanduser().resolve()
user_client = EncordUserClient.create_with_ssh_private_key(keyfile.expanduser().read_text())
target_project = user_client.get_project(target_project_hash)
source_project = user_client.get_project(source_project_hash)
ontology_lookup = populate_ontology_of_target_project(user_client, source_project, target_project)
target_project.refetch_ontology()
source_project_label_rows = initialize_label_rows(source_project)
target_project_label_rows = initialize_label_rows(
target_project,
include_unlabeled=True,
)
source_project_label_rows_by_data_hash = {lr.data_hash: lr for lr in source_project_label_rows}
bundle = target_project.create_bundle()
bundle_size = 0
for target_lr in tqdm(target_project_label_rows, desc="Migrating labels"):
source_lr = source_project_label_rows_by_data_hash.get(target_lr.data_hash)
if source_lr is None:
continue
should_save = False
for obj in source_lr.get_object_instances():
if obj.ontology_item.shape != Shape.POLYGON or obj.object_name not in ontology_lookup:
continue
new_instance = ontology_lookup[obj.object_name].create_instance()
has_annotations = False
for annotation in obj.get_annotations():
has_annotations = True
coords = cast(PolygonCoordinates, annotation.coordinates)
frame_view = source_lr.get_frame_view(frame=annotation.frame)
width, height = frame_view.width, frame_view.height
mask = np.zeros((height, width), dtype=np.uint8)
np_polygon = np.array([[p.x * width, p.y * height] for p in coords.values]).astype(np.int32)
cv2.fillPoly(mask, [np_polygon], 1)
target_coordinates = BitmaskCoordinates(mask.astype(bool))
new_instance.set_for_frames(coordinates=target_coordinates, frames=annotation.frame)
if has_annotations:
should_save = True
target_lr.add_object_instance(new_instance)
if should_save:
bundle_size += 1
target_lr.save(bundle=bundle)
if bundle_size >= 200:
try_execute(bundle.execute)
bundle = target_project.create_bundle()
bundle_size = 0
try_execute(target_lr.save)
if bundle_size > 0:
try_execute(bundle.execute)
print("Done!")
if __name__ == "__main__":
typer.run(convert_labels)
Converting polygons to bounding boxes
The following script converts polygon labels from the source Project into bounding box labels in the target Project. The conversion applies to images, videos, image groups, and image sequences.
For this conversion to be effective, it’s crucial that both the source and target Projects are linked to the same Dataset. The script is agnostic to which Workflow stage the source or target data units are in.
To install the required Python dependencies, run the following command in your terminal:
This script utilizes Typer
, allowing you to conveniently insert arguments in your terminal rather than directly into the script.
To execute the script in your terminal, use the following command:
Replace script-name.py
with the actual name of your script and respond to the prompted questions during execution.
For a non-interactive usage, provide the following parameters:
Replace the placeholders with the appropriate values:
script-name.py
with the name of your script.
<ssh_key_path>
with the path to your private SSH key associated with Encord.
<source_project_hash>
with the hash of the source project containing the polygon labels.
<target_project_hash>
with the hash of the target project that will contain the bounding boxes labels.
For example:
This script is intended for a one-time operation only. Running it more than once will result in duplicate labels.
import logging
import time
from pathlib import Path
from typing import cast
import typer
from encord import EncordUserClient, Project
from encord.objects import LabelRowV2, Object, Shape
from encord.objects.coordinates import BoundingBoxCoordinates, PolygonCoordinates
from tqdm import tqdm, trange
from typing_extensions import Annotated
MAX_NUM_EXECUTION_ATTEMPTS = 10
def try_execute(func, kwargs=None):
for n in range(MAX_NUM_EXECUTION_ATTEMPTS):
try:
if kwargs:
return func(**kwargs)
else:
return func()
except Exception as e:
logging.warning(
f"Handling {e} when executing {func} with args {kwargs}.\n" f" Trying again, attempt number {n + 1}."
)
time.sleep(0.5 * MAX_NUM_EXECUTION_ATTEMPTS)
raise Exception("Reached maximum number of execution attempts.")
def initialize_label_rows(project: Project, batch_size: int = 200, include_unlabeled: bool = False) -> list[LabelRowV2]:
label_rows = [lr for lr in project.list_label_rows_v2() if include_unlabeled or lr.label_hash is not None]
for start in trange(
0,
len(label_rows),
batch_size,
desc=f"Initializing label rows [{project.title}]",
):
bundle = project.create_bundle()
for lr in label_rows[start : start + batch_size]:
lr.initialise_labels(bundle=bundle)
try_execute(bundle.execute)
return label_rows
def populate_ontology_of_target_project(
client: EncordUserClient, source: Project, target: Project
) -> dict[str, Object]:
source_ontology = source.ontology_structure
target_ontology = client.get_ontology(target.ontology_hash)
ontology_lookup: dict[str, Object] = {}
for obj in source_ontology.objects:
if obj.shape != Shape.POLYGON:
continue
match = None
for tobj in target_ontology.structure.objects:
if tobj.shape != Shape.BOUNDING_BOX or tobj.name != obj.name:
continue
match = tobj
break
if match is None:
match = target_ontology.structure.add_object(obj.name, Shape.BOUNDING_BOX)
ontology_lookup[obj.name] = match
target_ontology.save()
return ontology_lookup
def convert_labels(
keyfile: Annotated[
Path,
typer.Option(
help="Path to private SSH key associated with Encord",
prompt="Where is your SSH-key file stored?",
),
],
source_project_hash: Annotated[
str,
typer.Option(
help="Hash of the project from which annotations and classifications will be copied",
prompt="What's the project hash of the SOURCE project?",
),
],
target_project_hash: Annotated[
str,
typer.Option(
help="Hash of the project where the bounding boxes will be added",
prompt="What's the project hash of the TARGET project?",
),
],
):
keyfile = keyfile.expanduser().resolve()
user_client = EncordUserClient.create_with_ssh_private_key(keyfile.expanduser().read_text())
target_project = user_client.get_project(target_project_hash)
source_project = user_client.get_project(source_project_hash)
ontology_lookup = populate_ontology_of_target_project(user_client, source_project, target_project)
target_project.refetch_ontology()
source_project_label_rows = initialize_label_rows(source_project)
target_project_label_rows = initialize_label_rows(
target_project,
include_unlabeled=True,
)
source_project_label_rows_by_data_hash = {lr.data_hash: lr for lr in source_project_label_rows}
bundle = target_project.create_bundle()
bundle_size = 0
for target_lr in tqdm(target_project_label_rows, desc="Migrating labels"):
source_lr = source_project_label_rows_by_data_hash.get(target_lr.data_hash)
if source_lr is None:
continue
should_save = False
for obj in source_lr.get_object_instances():
if obj.ontology_item.shape != Shape.POLYGON or obj.object_name not in ontology_lookup:
continue
new_instance = ontology_lookup[obj.object_name].create_instance()
has_annotations = False
for annotation in obj.get_annotations():
has_annotations = True
coords = cast(PolygonCoordinates, annotation.coordinates)
xs, ys = zip(*((point.x, point.y) for point in coords.values))
min_x, max_x = min(xs), max(xs)
min_y, max_y = min(ys), max(ys)
target_coordinates = BoundingBoxCoordinates(
height=max_y - min_y,
width=max_x - min_x,
top_left_x=min_x,
top_left_y=min_y,
)
new_instance.set_for_frames(coordinates=target_coordinates, frames=annotation.frame)
if has_annotations:
should_save = True
target_lr.add_object_instance(new_instance)
if should_save:
bundle_size += 1
target_lr.save(bundle=bundle)
if bundle_size >= 200:
try_execute(bundle.execute)
bundle = target_project.create_bundle()
bundle_size = 0
try_execute(target_lr.save)
if bundle_size > 0:
try_execute(bundle.execute)
print("Done!")
if __name__ == "__main__":
typer.run(convert_labels)