Import Text Regions to HTML

A single text region object label can be added at a time between start and end using range.


# Import dependencies
from typing import List
from encord import EncordUserClient
from encord.objects import LabelRowV2, Object, ObjectInstance, OntologyStructure
from encord.objects.coordinates import HtmlCoordinates
from encord.objects.html_node import HtmlRange, HtmlNode

SSH_PATH = "<file-path-to-ssh-private-key>"
PROJECT_ID = "<project-unique-id>"

# Instantiate Encord client
user_client = EncordUserClient.create_with_ssh_private_key(
    ssh_private_key_path=SSH_PATH
)

# Specify the project
project = user_client.get_project(PROJECT_ID)

label_row = project.list_label_rows_v2(
    data_title_eq="<file-name-of-html-file>"
)[0]

label_row.initialise_labels()

range_html = HtmlRange(
    start=HtmlNode(
       xpath="<full-XPath>",
        offset=<location-of-text-or-object>
    ),
    end=HtmlNode(
       xpath="<full-XPath>",
        offset=<location-of-text-or-object>
    )
)

html_object: Object = project.ontology_structure.get_child_by_title(
        title="<text-region-name>", type_=Object
    )

html_object_instance: ObjectInstance = html_object.create_instance()

# `frames=0` is an optional. It indicates that the text file is a single data unit.

html_object_instance.set_for_frames(frames=0, coordinates=HtmlCoordinates(range=[range_html]))

label_row.add_object_instance(html_object_instance)
label_row.save()

Import Classifications to HTML

The example for the Classification uses nested attributes with the Ontology structure as follows:

  • Accurate?
    • Yes
    • No
      • Correction (text field to provide edits for the correction)
create_instance must use range_only=True for text documents. This includes HTML documents.

# Import dependencies
from typing import List
from pathlib import Path
from encord import EncordUserClient, Project
from encord.objects import LabelRowV2, Classification, Option, OntologyStructure

SSH_PATH = "<file-path-to-ssh-private-key>"
PROJECT_ID = "<project-unique-id>"

# Create user client using ssh key
user_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
    Path(SSH_PATH).read_text()
)

# Get project for which predictions are to be added
project: Project = user_client.get_project(PROJECT_ID)

# Specify the data unit to apply classification
label_row = project.list_label_rows_v2(
    data_title_eq="<file-name-for-html-file>.html"
)[0]


# Download the existing labels 
label_row.initialise_labels()

# Get the Ontology structure
ontology_structure: OntologyStructure = label_row.ontology_structure

# Assume that the following radio button classification exists in the Ontology.
radio_ontology_classification: Classification = (
    ontology_structure.get_child_by_title(
        title="<classification-name>", type_=Classification
    )
)

radio_classification_option = radio_ontology_classification.get_child_by_title(
title="<option-name>",
type_=Option
)

# Create classification instance. `range_only=True` is required for HTML documents
radio_classification_instance = radio_ontology_classification.create_instance(range_only=True)

# Set the answer of the classification instance
radio_classification_instance.set_answer(radio_classification_option)

# Select the frames where the classification instance is present
radio_classification_instance.set_for_frames(frames=0)

# Add it to the label row
label_row.add_classification_instance(radio_classification_instance)

# Save labels
label_row.save()

Export Labels for HTML Files


# Import dependencies
from encord import EncordUserClient
import json

SSH_PATH= "<file-path-to-ssh-private-key"
PROJECT_ID= "<project-unique-id>"
DATA_UNIT_NAME= "<file-name-of-html-file>"

# Instantiate client. Replace <private_key_path> with the path to the file containing your private key.
user_client = EncordUserClient.create_with_ssh_private_key(
    ssh_private_key_path=SSH_PATH
)

# Specify Project. Replace <project_hash> with the hash of the Project you want to export labels for.
project = user_client.get_project(PROJECT_ID)

# Specify the data unit you want to export labels for. Replace <file_name> with the name of your specific data unit.
specific_label_row = project.list_label_rows_v2(
    data_title_eq=DATA_UNIT_NAME
)[0]

# Download label information for the specific data unit
specific_label_row.initialise_labels()

# Print the labels as JSON
print(json.dumps(specific_label_row.to_encord_dict()))

Remove Labels from HTML Files


from encord import EncordUserClient
import json

SSH_PATH= "<file-path-to-ssh-private-key>"
PROJECT_ID= "<project-unique-id>"
DATA_UNIT_NAME= "<file-name-of-html-file>"

# Instantiate client. Replace <private_key_path> with the path to the file containing your private key.
user_client = EncordUserClient.create_with_ssh_private_key(
    ssh_private_key_path=SSH_PATH
)

# Specify Project. Replace <project_hash> with the hash of the Project you want to export labels for.
project = user_client.get_project(PROJECT_ID)

# Specify the data unit you want to export labels for. Replace <file_name> with the name of your specific data unit.
specific_label_row = project.list_label_rows_v2(
    data_title_eq=DATA_UNIT_NAME
)[0]


object_to_remove = None
specific_label_row.initialise_labels()
for object_instance in specific_label_row.get_object_instances():
    if object_instance.object_hash == '<label-unique-id>':
        object_to_remove = object_instance

specific_label_row.remove_object(object_to_remove)

specific_label_row.save()