Labels
HTML Files and Labels
Get Started
- Global and US Encord Platforms
- 1. Prerequisites and Installation
- 2. Register Cloud Data
- 3. Set Up Your Project and Team
- Export Labels
General
Index
Projects
Labels
- Working with Labels
- Delete Labels/Classifications
- Label / Activity logs
- Bitmasks
- Audio Labels and Classifications
- HTML Files and Labels
- Text Files and Labels
- PDF Labels and Classifications
- Import Labels/Annotations
- Import Labels/Annotations to Consensus Branches
- Import COCO Labels/Annotations
- Copy labels between Projects
Datasets
Labels
HTML Files and Labels
Learn how labeling HTML files works using the Encord SDK.
Import Text Regions to HTML
A single text region
object label can be added at a time between start
and end
using range
.
# Import dependencies
from typing import List
from encord import EncordUserClient
from encord.objects import LabelRowV2, Object, ObjectInstance, OntologyStructure
from encord.objects.coordinates import HtmlCoordinates
from encord.objects.html_node import HtmlRange, HtmlNode
SSH_PATH = "<file-path-to-ssh-private-key>"
PROJECT_ID = "<project-unique-id>"
# Instantiate Encord client
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH
)
# Specify the project
project = user_client.get_project(PROJECT_ID)
label_row = project.list_label_rows_v2(
data_title_eq="<file-name-of-html-file>"
)[0]
label_row.initialise_labels()
range_html = HtmlRange(
start=HtmlNode(
xpath="<full-XPath>",
offset=<location-of-text-or-object>
),
end=HtmlNode(
xpath="<full-XPath>",
offset=<location-of-text-or-object>
)
)
html_object: Object = project.ontology_structure.get_child_by_title(
title="<text-region-name>", type_=Object
)
html_object_instance: ObjectInstance = html_object.create_instance()
# `frames=0` is an optional. It indicates that the text file is a single data unit.
html_object_instance.set_for_frames(frames=0, coordinates=HtmlCoordinates(range=[range_html]))
label_row.add_object_instance(html_object_instance)
label_row.save()
Import Classifications to HTML
The example for the Classification uses nested attributes with the Ontology structure as follows:
- Accurate?
- Yes
- No
- Correction (text field to provide edits for the correction)
create_instance
must use range_only=True
for text documents. This includes HTML documents.
# Import dependencies
from typing import List
from pathlib import Path
from encord import EncordUserClient, Project
from encord.objects import LabelRowV2, Classification, Option, OntologyStructure
SSH_PATH = "<file-path-to-ssh-private-key>"
PROJECT_ID = "<project-unique-id>"
# Create user client using ssh key
user_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
Path(SSH_PATH).read_text()
)
# Get project for which predictions are to be added
project: Project = user_client.get_project(PROJECT_ID)
# Specify the data unit to apply classification
label_row = project.list_label_rows_v2(
data_title_eq="<file-name-for-html-file>.html"
)[0]
# Download the existing labels
label_row.initialise_labels()
# Get the Ontology structure
ontology_structure: OntologyStructure = label_row.ontology_structure
# Assume that the following radio button classification exists in the Ontology.
radio_ontology_classification: Classification = (
ontology_structure.get_child_by_title(
title="<classification-name>", type_=Classification
)
)
radio_classification_option = radio_ontology_classification.get_child_by_title(
title="<option-name>",
type_=Option
)
# Create classification instance. `range_only=True` is required for HTML documents
radio_classification_instance = radio_ontology_classification.create_instance(range_only=True)
# Set the answer of the classification instance
radio_classification_instance.set_answer(radio_classification_option)
# Select the frames where the classification instance is present
radio_classification_instance.set_for_frames(frames=0)
# Add it to the label row
label_row.add_classification_instance(radio_classification_instance)
# Save labels
label_row.save()
Export Labels for HTML Files
# Import dependencies
from encord import EncordUserClient
import json
SSH_PATH= "<file-path-to-ssh-private-key"
PROJECT_ID= "<project-unique-id>"
DATA_UNIT_NAME= "<file-name-of-html-file>"
# Instantiate client. Replace <private_key_path> with the path to the file containing your private key.
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH
)
# Specify Project. Replace <project_hash> with the hash of the Project you want to export labels for.
project = user_client.get_project(PROJECT_ID)
# Specify the data unit you want to export labels for. Replace <file_name> with the name of your specific data unit.
specific_label_row = project.list_label_rows_v2(
data_title_eq=DATA_UNIT_NAME
)[0]
# Download label information for the specific data unit
specific_label_row.initialise_labels()
# Print the labels as JSON
print(json.dumps(specific_label_row.to_encord_dict()))
Remove Labels from HTML Files
from encord import EncordUserClient
import json
SSH_PATH= "<file-path-to-ssh-private-key>"
PROJECT_ID= "<project-unique-id>"
DATA_UNIT_NAME= "<file-name-of-html-file>"
# Instantiate client. Replace <private_key_path> with the path to the file containing your private key.
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH
)
# Specify Project. Replace <project_hash> with the hash of the Project you want to export labels for.
project = user_client.get_project(PROJECT_ID)
# Specify the data unit you want to export labels for. Replace <file_name> with the name of your specific data unit.
specific_label_row = project.list_label_rows_v2(
data_title_eq=DATA_UNIT_NAME
)[0]
object_to_remove = None
specific_label_row.initialise_labels()
for object_instance in specific_label_row.get_object_instances():
if object_instance.object_hash == '<label-unique-id>':
object_to_remove = object_instance
specific_label_row.remove_object(object_to_remove)
specific_label_row.save()
Was this page helpful?