Skip to main content

Cloud-Synced Folders

Cloud-synced folders automatically stay in sync with connected cloud storage, regularly updating to ensure all new and existing files are available in Encord without manual intervention. Cloud-synced Folders require that you have at least one integration set up.
  • Cloud-synced folders support images, videos, PDFs, text, HTML, and audio files.
Image groups, image sequences, and DICOM series are not currently supported by Cloud-synced folders.
  • A single cloud synced folder can contain a maximum of 10 million files.

Create a Cloud-synced Folder

Create Cloud-synced Folder
from uuid import UUID
from encord import EncordUserClient
from encord.orm.storage import CloudSyncedFolderParams

# User input
SSH_PATH = "/Users/chris-encord/ssh-private-key.txt" # Specify the file path to your access key
CLOUD_SYNCED_FOLDER_NAME = "SDK Cloud-Synced Folder" # Specify a meaningful name for your Cloud-synced Folder
CLOUD_SYNCED_FOLDER_DESCRIPTION = "A folder to store my files" # Specify a meaningful description for your Cloud-synced Folder
INTEGRATION_UUID = "3b6299c3-f8c8-4755-ae26-d9144b215920" # Specify the unique id for your integration
REMOTE_URL = "gs://my-gcp-bucket/" # Specify the storage/file path to your cloud storage

# Authenticate with Encord using the path to your private key
user_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
    ssh_private_key_path=SSH_PATH,
    domain="https://api.encord.com",
)

# Create cloud synced folder params
cloud_synced_folder_params = CloudSyncedFolderParams(
    integration_uuid=UUID(INTEGRATION_UUID),
    remote_url=REMOTE_URL,
)

# Create the storage folder
folder_name = CLOUD_SYNCED_FOLDER_NAME
folder_description = CLOUD_SYNCED_FOLDER_DESCRIPTION
folder_metadata = {"my": "folder_metadata"}

storage_folder = user_client.create_storage_folder(
    name=folder_name,
    description=folder_description,
    client_metadata=folder_metadata,
    cloud_synced_folder_params=cloud_synced_folder_params,
)

Sync Cloud-synced Folder with Cloud Storage

The following code syncs a Cloud-sync Folder with the cloud storage bucket. The sync_private_data_with_cloud_synced_folder_get_result time out value can be adjusted to your needs.
Sync Cloud-synced Folder

from uuid import UUID

from encord import EncordUserClient
from encord.orm.storage import SyncPrivateDataWithCloudSyncedFolderStatus

# User input
SSH_PATH = "/Users/chris-encord/ssh-private-key.txt" # Specify the file path to your access key
CLOUD_SYNCED_FOLDER_UUID = UUID("7270fb4a-fc8a-4336-b8dd-5b548d27889d")

# Authenticate with Encord

user_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
    ssh_private_key_path=SSH_PATH,
    domain="https://api.encord.com",
)


# Get folder by UUID 
storage_folder = user_client.get_storage_folder(CLOUD_SYNCED_FOLDER_UUID)
print(f"Using cloud-synced folder uuid={storage_folder.uuid}")

# Start sync job

sync_job_uuid = storage_folder.sync_private_data_with_cloud_synced_folder_start()
print(f"Started sync job: {sync_job_uuid}")

# Poll for result
result = storage_folder.sync_private_data_with_cloud_synced_folder_get_result(
    sync_job_uuid,
    timeout_seconds=300,  # adjust as needed
)

print(f"Sync job finished with status: {result.status}")

# Handle result
if result.status == SyncPrivateDataWithCloudSyncedFolderStatus.DONE:
    print("Sync completed (server finished the job).")

    any_errors = (
        result.scan_pages_processing_error > 0
        or result.upload_jobs_error > 0
        or result.upload_jobs_units_error > 0
    )

    print("Progress summary:")
    print(
        f"  Bucket listing pages: "
        f"pending={result.scan_pages_processing_pending}, "
        f"done={result.scan_pages_processing_done}, "
        f"error={result.scan_pages_processing_error}, "
        f"cancelled={result.scan_pages_processing_cancelled}"
    )
    print(
        f"  Upload jobs: "
        f"pending={result.upload_jobs_pending}, "
        f"done={result.upload_jobs_done}, "
        f"error={result.upload_jobs_error}"
    )
    print(
        f"  File units: "
        f"pending={result.upload_jobs_units_pending}, "
        f"done={result.upload_jobs_units_done}, "
        f"error={result.upload_jobs_units_error}, "
        f"cancelled={result.upload_jobs_units_cancelled}"
    )

    if any_errors:
        print("Sync finished, but some parts failed. Inspect the *_error counters above.")
    else:
        print("Sync finished successfully with no reported errors.")

elif result.status == SyncPrivateDataWithCloudSyncedFolderStatus.PENDING:
    print("Sync is still in progress. Try polling again later.")

elif result.status == SyncPrivateDataWithCloudSyncedFolderStatus.ERROR:
    print("Sync failed (critical error).")

elif result.status == SyncPrivateDataWithCloudSyncedFolderStatus.CANCELLED:
    print("Sync was cancelled.")

else:
    print(f"Unexpected status: {result.status!r}")

Create New Standard Folder

The following script creates a new folder in the root directory of Data > Files & Folders. Ensure that you:
  • Replace <private_key_path> with the path to your private key.
  • Replace <folder_name> with the name you want to give your folder. We recommend using unique folder names.
  • Replace A folder to store my files with a meaningful description for your folder.
  • Replace my: folder_metadata with any metadata you want to add to the folder. Remove the line if you do not want to add any metadata to the folder.
from encord import EncordUserClient

# Instantiate Encord client by substituting the path to your private key
user_client = EncordUserClient.create_with_ssh_private_key(
                ssh_private_key_path="<private_key_path>"
            )



# Create a storage folder
folder_name = "<folder_name>"
folder_description = "A folder to store my files"
folder_metadata = {"my": "folder_metadata"}
storage_folder = user_client.create_storage_folder(folder_name, folder_description,client_metadata=folder_metadata)

Move Files

Use the following script to move various types of files to a different folder in Data > Files & Folders. Ensure that you:
  • Replace <private_key_path> with the path to your private key.
  • Replace <data_unit_name> with the name of the file you want to move to a new folder.
  • Replace <target_folder_name> with the name of the folder you want to move the file from.
  • Replace <target_folder_name> with the name of the folder you want to move the file to.
from encord import EncordUserClient
from encord.storage import StorageItemType, FoldersSortBy

# Instantiate Encord client by substituting the path to your private key
user_client = EncordUserClient.create_with_ssh_private_key(
                ssh_private_key_path="<private_key_path>"
            )

# Define the search criteria for the data unit and folders folder
START_FOLDER = "<start_folder_name>"
TARGET_FOLDER = "<target_folder_name>"  
DATA_UNIT_NAME = "<data_unit_name>"

# Retrieve the start folder by its name (assuming only one match)
START_FOLDER = next(user_client.find_storage_folders(search=start_folder_name, dataset_synced=None, order=FoldersSortBy.NAME, desc=False, page_size=1000))

# Retrieve the target folder by its name (assuming only one match)
TARGET_FOLDER = next(user_client.find_storage_folders(search=target_folder_name, dataset_synced=None, order=FoldersSortBy.NAME, desc=False, page_size=1000))

# Search for the specific data unit using its name, assuming it only returns one result
item = START_FOLDER.list_items(
    search=DATA_UNIT_NAME,
    is_in_dataset=None,  
    item_types=[StorageItemType.IMAGE],
    order=FoldersSortBy.NAME,
    desc=False,
    get_signed_urls=False,
    page_size=1000  # Expecting only one data unit
)

# Move the found data unit to the retrieved target folder
print(f"Moving item {item.uuid} to folder {TARGET_FOLDER.uuid}")
item.move_to_folder(TARGET_FOLDER.uuid)

Move Folders

You can move folders between different parent folders in Data > Files & Folders, including moving them to the root (no parent). The following script demonstrates how a folder with the name Folder Name 3 is moved between 2 different target folders. The script must be modified to suit your needs. Ensure that you:
  • Replace <private_key_path> with the path to your private key.
  • Replace Folder Name 1 with the name of a target folder.
  • Replace Folder Name 2 with the new name of another target folder.
  • Replace Folder Name 3 with the name of the folder that is moved.
Folders
from encord import EncordUserClient
from encord.storage import FoldersSortBy

# Instantiate Encord client by substituting the path to your private key
user_client = EncordUserClient.create_with_ssh_private_key(
                ssh_private_key_path="<private_key_path>"
            )

# Search for folders by name using the find_storage_folders function
folder_1 = next(user_client.find_storage_folders(search="Folder Name 1", dataset_synced=False, order=FoldersSortBy.NAME, desc=False, page_size=1000))
folder_2 = next(user_client.find_storage_folders(search="Folder Name 2", dataset_synced=False, order=FoldersSortBy.NAME, desc=False, page_size=1000))
folder_3 = next(user_client.find_storage_folders(search="Folder Name 3", dataset_synced=False, order=FoldersSortBy.NAME, desc=False, page_size=1000))

# Move folder_3 under folder_1
folder_3.move_to_folder(folder_1.uuid)

# Move folder_3 under folder_2
folder_3.move_to_folder(folder_2.uuid)

# Move folder_3 to root folder (passing None moves it to the root level)
folder_3.move_to_folder(None)

Update URL for Cloud Storage Files

Use the following script to update the cloud storage path of your files in Encord when, for example:
  • You move files in cloud storage
  • Update your folder structure in cloud storage
  • Update URLs to use multi-region access points
from encord.utilities.storage.cloud_data_migration import update_storage_item_cloud_info
from uuid import UUID
from encord import EncordUserClient

# User input
SSH_PATH = "/Users/chris-encord/ssh-private-key.txt" # Specify the file path to your access key for authentication
FILE_ID = "11984de7-3690-4f6e-a317-8ee1ef4a926b" # Specify the File ID for the file
NEW_URL_01 = "https://storage.cloud.google.com/my-gcp-bucket/Move_Data_Unit/my-video-01.mp4" # Specify the new URL for the file

# Authentication
user_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
    ssh_private_key_path=SSH_PATH,
    # For US platform users use "https://api.us.encord.com"
    domain="https://api.encord.com",
)

# Define the UUID of the item to update, or use its URL
item_uuid = UUID(FILE_ID)

# New URL after the file was moved
new_url = NEW_URL_01

# Update the storage item with the new URL, skip missing items
update_storage_item_cloud_info(
    user_client=user_client,
    item=item_uuid,  # or the URL of the item
    new_url=new_url,
    verify_access=True,
     skip_missing=True )

print("Successfully updated storage item with new URL.")

Change Cloud Storage Provider

Using the SDK you can change from one cloud storage provider (for example AWS) to another cloud storage provider (for example GCP) while maintaining data integrity.
Change Cloud Storage Provider

from encord import EncordUserClient
from uuid import UUID
from encord.http.bundle import Bundle
from encord.storage import StorageItem
from encord.utilities.storage.cloud_data_migration import update_storage_item_cloud_info

SSH_PATH = "/Users/chris-encord/ssh-private-key.txt"
EXISTING_INTEGRATION_ID = "00000000-0000-0000-0000-000000000000" # Replace with the integration ID of source cloud storage integration
NEW_INTEGRATION_ID = "00000000-0000-0000-0000-000000000000" # Replace with the integration ID of the destination cloud storage integration
STORAGE_FOLDER_HASH = "00000000-0000-0000-0000-000000000000" # Replace with the folder ID where the data resides
OLD_BUCKET_NAME_AND_FOLDER = "s3://aws-bucket-001/folder-001/" # Replace with the storage path of the source cloud storage bucket and folder
NEW_BUCKET_NAME_AND_FOLDER = "gs://gcp-bucket-01/" # Replace with the storage path of the destination cloud storage bucket and folder


# Create user client using access key
encord_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
    ssh_private_key_path=SSH_PATH,
    # For US platform users use "https://api.us.encord.com"
    domain="https://api.encord.com",
)

storage_folder = encord_client.get_storage_folder(STORAGE_FOLDER_HASH)


# function to determine new URL
def create_new_url(existing_url: str) -> str:
    return existing_url.replace(OLD_BUCKET_NAME_AND_FOLDER, NEW_BUCKET_NAME_AND_FOLDER)

# Get item UUIDs
item_uuids = [item.uuid for item in storage_folder.list_items()]

# Compare before and after
items_before = encord_client.get_storage_items(item_uuids, sign_url=False)
assert all(str(item.integration_hash) == EXISTING_INTEGRATION_ID for item in items_before)

with Bundle() as bundle:
    for item in items_before:
        update_storage_item_cloud_info(
            encord_client,
            item,
            from_cloud_integration=EXISTING_INTEGRATION_ID,
            new_cloud_integration=NEW_INTEGRATION_ID,
            new_url=create_new_url(item.url),
            verify_access=True,
            bundle=bundle,
        )

items_after = encord_client.get_storage_items(item_uuids, sign_url=False)

assert len(items_after) == len(items_before)
assert all(str(item.integration_hash) == NEW_INTEGRATION_ID for item in items_after)

# Verify the migration worked
for item in storage_folder.list_items():
  print(f"url: {item.url}")
  print(f"integration: {item.integration_hash}")


Update Folder Properties

Use the following script to update the name and description of an existing folder in Data > Files & Folders. Ensure that you:
  • Replace <private_key_path> with the path to your private key.
  • Replace Cat Videos with the name of the folder you want to update properties for.
  • Replace Cat videos and images with the new name you want to give the folder.
  • Replace Images and videos of cats with the description you want to give the folder.
Use the list_storage_folders and find_storage_folders methods to search for specific folders.
If you are updating many folders at once, pass a Bundle to folder.update to batch the changes into a single server call. See Bulk Action Best Practices and keep bundle sizes under 1000 operations.
from encord import EncordUserClient
from encord.orm.storage import FoldersSortBy

# Instantiate Encord client using your SSH private key
user_client = EncordUserClient.create_with_ssh_private_key(
    ssh_private_key_path="<private_key_path>"
)

# Define search parameters
folder_name_to_find = "Cat Videos"
search_result = user_client.find_storage_folders(
    search=folder_name_to_find,
    dataset_synced=None,
    order=FoldersSortBy.NAME,
    desc=False,
    page_size=1000
)

# Fetch the folder assuming it's the first one in the search result
folder = next(search_result, None)
if folder is None:
    print(f"No folder found with name {folder_name_to_find}")
else:
    # Define new properties for the folder
    new_name = "Cat videos and images"
    new_description = "Images and videos of cats"

    # Update the folder properties
    folder.update(name=new_name, description=new_description)
    print(f"Folder '{folder_name_to_find}' updated to new name '{new_name}' and description '{new_description}'")

Delete Folders

Use the following script to delete a specific folder from Data > Files & Folders. Ensure that you:
  • Replace <private_key_path> with the path to your private key.
  • Replace Specific Folder Name with the name of the folder you want to delete.
from encord import EncordUserClient
from encord.storage import FoldersSortBy

# Instantiate Encord client by substituting the path to your private key
user_client = EncordUserClient.create_with_ssh_private_key(
                ssh_private_key_path="<private_key_path>"
            )

# Define the search criteria for the folder
folder_search_criteria = "Specific Folder Name"

# Retrieve the target folder
try:
    folder_to_delete = next(user_client.find_storage_folders(search=folder_search_criteria, dataset_synced=None, order=FoldersSortBy.NAME, desc=False, page_size=1000))
    print(f"Deleting folder {folder_to_delete.uuid}")

    # Delete the folder and verify it cannot be accessed anymore
    folder_to_delete.delete()
    print("Folder deleted successfully.")

    try:
        # Try to refetch the folder to verify it's been deleted
        user_client.get_storage_folder(folder_to_delete.uuid)
        print("Error: Folder still accessible after deletion.")
    except Exception as e:
        print(f"Verification successful: {str(e)}")

except StopIteration:
    print("No folder found with the specified criteria.")