<private_key_path>
with the path to your private key.<source_project_id>
with the Project ID of your source Project.<target_project_id>
with the Project ID of your target Project.OVERWRITE_STRATEGY
skip
: Ignores tasks with existing labels.add
: Adds all labels from the source Project to the target Project for all tasks.overwrite
: Replaces labels in the target Project if they exist in the same data unit.COPY_METHOD
to specify the basis for copying, DataHash
or DataTitle
.import argparse
from enum import Enum
from encord import EncordUserClient
from tqdm import tqdm # Make sure tqdm is installed: pip install tqdm
class CopyMethod(Enum):
DataHash = 0
DataTitle = 1
def get_empty_labels_flag(label_row):
"""Checks if a label row has no object or classification instances."""
return not (
label_row.get_object_instances()
or label_row.get_classification_instances()
)
def copy_labels_to_target(source_label_row, target_label_row):
"""Copies all object and classification instances from source to target label row."""
# Add object instances
for obj in source_label_row.get_object_instances():
target_label_row.add_object_instance(obj.copy())
# Add classification instances
for cl in source_label_row.get_classification_instances():
target_label_row.add_classification_instance(cl.copy())
# Save the changes to the target label row
target_label_row.save()
def main(
keyfile: str,
source_project_hash: str,
target_project_hash: str,
overwrite_strategy: str,
copy_method: CopyMethod,
):
"""
Main function to copy labels between two Encord projects.
"""
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=keyfile
)
source_project = user_client.get_project(source_project_hash)
target_project = user_client.get_project(target_project_hash)
# --- Initial Project and Ontology Checks ---
if source_project.ontology_hash != target_project.ontology_hash:
raise Exception("Projects must share the same Ontology.")
answer = input(f"Target project title is '{target_project.title}'. Continue (y/n)? ")
if answer.lower() != "y":
raise Exception("Aborting: User cancelled project confirmation.")
if overwrite_strategy not in ["skip", "add", "overwrite"]:
raise ValueError(
f"Invalid overwrite strategy: '{overwrite_strategy}'. "
f"Possible options are: 'skip', 'add', or 'overwrite'."
)
# --- Overwrite Confirmation if Applicable ---
should_overwrite_target_labels = (overwrite_strategy == "overwrite")
if should_overwrite_target_labels:
answer = input(
"WARNING: 'overwrite' will remove all current labels in the target label row. Continue (y/n)? "
)
if answer.lower() != "y":
raise Exception("Aborting: User cancelled overwrite confirmation.")
# --- Label Copying Logic ---
print("\nStarting label copy process...")
for target_label_row in tqdm(target_project.list_label_rows_v2(), desc="Processing Label Rows"):
source_label_row_match = None
# Find matching label row in source project
if copy_method == CopyMethod.DataTitle:
matches = list(source_project.list_label_rows_v2(data_title_eq=target_label_row.data_title))
else: # Default to DataHash
matches = list(source_project.list_label_rows_v2(data_hashes=[target_label_row.data_hash]))
if len(matches) == 1:
source_label_row_match = matches[0]
source_label_row_match.initialise_labels() # Ensure source labels are loaded
elif len(matches) > 1:
print(f" Warning: Multiple matches found for '{target_label_row.data_title}' (method: {copy_method.name}). Skipping this target label row.")
continue
else: # len(matches) == 0
print(f" Warning: No matching source label row found for '{target_label_row.data_title}' (method: {copy_method.name}). Skipping.")
continue
target_label_row.initialise_labels() # Ensure target labels are loaded
# Apply overwrite strategy
if should_overwrite_target_labels:
# Clear target labels before copying
target_label_row.initialise_labels(
include_object_feature_hashes=set(),
include_classification_feature_hashes=set(),
)
print(f" Overwriting labels for: {target_label_row.data_title}")
copy_labels_to_target(source_label_row_match, target_label_row)
elif get_empty_labels_flag(target_label_row):
# Target is empty, so always copy
print(f" Copying labels to empty target: {target_label_row.data_title}")
copy_labels_to_target(source_label_row_match, target_label_row)
else: # Target has labels and not overwriting
if overwrite_strategy == "skip":
print(f" Skipping target data unit '{target_label_row.data_title}' (already has labels).")
elif overwrite_strategy == "add":
# 'Add' implies merging, which the original copy_labels function handles by adding
# new instances. It doesn't clear existing ones.
print(f" Adding labels to existing target: {target_label_row.data_title}")
copy_labels_to_target(source_label_row_match, target_label_row)
else:
# This else block should theoretically not be reached due to initial validation
raise NotImplementedError(
f"Unhandled overwrite strategy: '{overwrite_strategy}' with non-empty target."
)
print("\nLabel copy process finished.")
if __name__ == "__main__":
# --- CONFIGURE YOUR VARIABLES HERE ---
# Replace with the actual path to your Encord SSH private key
KEYFILE_PATH = "<private_key_path>"
# Replace with your source project hash
SOURCE_PROJECT_HASH = "<source_project_id>"
# Replace with your target project hash
TARGET_PROJECT_HASH = "<target_project_id>"
# Choose your strategy for existing labels in the target project:
# "skip": Do not copy if target has labels.
# "add": Merge new labels into existing ones (this is the default behavior if target has labels and not 'skip' or 'overwrite').
# "overwrite": Clear target labels before copying.
OVERWRITE_STRATEGY = "add"
# Choose how to match label rows between projects:
# CopyMethod.DataHash: Matches by data hash (recommended for exact dataset matches).
# CopyMethod.DataTitle: Matches by data title (use if data hashes differ but titles are unique).
COPY_METHOD = CopyMethod.DataHash
# --- END CONFIGURATION ---
main(
keyfile=KEYFILE_PATH,
source_project_hash=SOURCE_PROJECT_HASH,
target_project_hash=TARGET_PROJECT_HASH,
overwrite_strategy=OVERWRITE_STRATEGY,
copy_method=COPY_METHOD
)
Was this page helpful?