Labels
Text Files and Labels
Get Started
- Global and US Encord Platforms
- 1. Prerequisites and Installation
- 2. Register Cloud Data
- 3. Set Up Your Project and Team
- Export Labels
General
Index
Projects
Labels
- Working with Labels
- Delete Labels/Classifications
- Label / Activity logs
- Bitmasks
- Audio Labels and Classifications
- HTML Files and Labels
- Text Files and Labels
- PDF Labels and Classifications
- Import Labels/Annotations
- Import Labels/Annotations to Consensus Branches
- Import COCO Labels/Annotations
- Copy labels between Projects
Datasets
Labels
Text Files and Labels
Learn how labeling Text files works using the Encord SDK.
Import Text Regions to Text Files
A single text region
object label can be added at a character range between start
and end
using range
.
# Import dependencies
from typing import List
from encord import EncordUserClient
from encord.objects import LabelRowV2, Object, ObjectInstance, OntologyStructure
from encord.objects.frames import Range
from encord.objects.coordinates import TextCoordinates
SSH_PATH = "<file-path-to-your-shh-key>"
PROJECT_ID = "<project-unique-id>"
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH,
)
# Specify the project
project = user_client.get_project(PROJECT_ID)
# Example mapping of data unit titles to their respective text ranges
data_units_to_update = {
"<my-text-file-01>": [Range(start=5000, end=5050)],
"<your-text-file-01>": [Range(start=3000, end=3050), Range(start=14000, end=14050)],
"<their-text-file-01>": [Range(start=13100, end=13250), Range(start=24100, end=24550)],
}
# Find the ontology object for your Text Region
text_object: Object = project.ontology_structure.get_child_by_title(
title="<name-of-your-text-region>", type_=Object
)
if text_object is None:
raise ValueError("Ontology object for 'Text Region' not found.")
# Iterate over each data unit and apply updates
for data_title, text_ranges in data_units_to_update.items():
# Get the corresponding label row
label_rows = project.list_label_rows_v2(data_title_eq=data_title)
if not label_rows:
print(f"Skipping: No label row found for {data_title}")
continue
label_row = label_rows[0]
label_row.initialise_labels()
# Add object instances for each text range
for text_range in text_ranges:
text_object_instance: ObjectInstance = text_object.create_instance()
text_object_instance.set_for_frames(
frames=0, # Indicates a single data unit (text file)
coordinates=TextCoordinates(range=[text_range]),
)
label_row.add_object_instance(text_object_instance)
print(f"Added text range {text_range} to {data_title}")
# Save the label row after adding all instances
label_row.save()
print(f"Saved label row for {data_title}")
print("Updates applied to all specified data units!")
Import Classifications to Text Files
The example for the Classification uses nested attributes with the Ontology structure as follows:
- Accurate?
- Yes
- No
- Correction (text field to provide edits for the correction)
create_instance
must use range_only=True
for text documents. This includes HTML documents.
# Import dependencies
from typing import List
from pathlib import Path
from encord import EncordUserClient, Project
from encord.objects.frames import Range
from encord.objects import LabelRowV2, Classification, Option, OntologyStructure
SSH_PATH = "<file-path-to-ssh-private-key>"
PROJECT_ID = "<project-unique-id>"
# Create user client using ssh key
user_client: EncordUserClient = EncordUserClient.create_with_ssh_private_key(
Path(SSH_PATH).read_text()
)
# Get project for which predictions are to be added
project: Project = user_client.get_project(PROJECT_ID)
# Specify the data unit to apply classification
label_row = project.list_label_rows_v2(
data_title_eq="<file-name-for-text-file>.html"
)[0]
# Download the existing labels
label_row.initialise_labels()
# Get the Ontology structure
ontology_structure: OntologyStructure = label_row.ontology_structure
# Assume that the following radio button classification exists in the Ontology.
radio_ontology_classification: Classification = (
ontology_structure.get_child_by_title(
title="<classification-name>", type_=Classification
)
)
radio_classification_option = radio_ontology_classification.get_child_by_title(
title="<option-name>",
type_=Option
)
# Create classification instance. `range_only=True` is required for HTML documents
radio_classification_instance = radio_ontology_classification.create_instance(range_only=True)
# Set the answer of the classification instance
radio_classification_instance.set_answer(radio_classification_option)
# Select the frames where the classification instance is present
radio_classification_instance.set_for_frames(frames=0)
# Add it to the label row
label_row.add_classification_instance(radio_classification_instance)
# Save labels
label_row.save()
Export Labels for Text Files
# Import dependencies
from encord import EncordUserClient
import json
SSH_PATH= "<file-path-to-ssh-private-key"
PROJECT_ID= "<project-unique-id>"
DATA_UNIT_NAME= "<file-name-of-html-file>"
# Instantiate client. Replace <private_key_path> with the path to the file containing your private key.
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH
)
# Specify Project. Replace <project_hash> with the hash of the Project you want to export labels for.
project = user_client.get_project(PROJECT_ID)
# Specify the data unit you want to export labels for. Replace <file_name> with the name of your specific data unit.
specific_label_row = project.list_label_rows_v2(
data_title_eq=DATA_UNIT_NAME
)[0]
# Download label information for the specific data unit
specific_label_row.initialise_labels()
# Print the labels as JSON
print(json.dumps(specific_label_row.to_encord_dict()))
Remove Labels from Text Files
from encord import EncordUserClient
import json
SSH_PATH= "<file-path-to-ssh-private-key>"
PROJECT_ID= "<project-unique-id>"
DATA_UNIT_NAME= "<file-name-of-html-file>"
# Instantiate client. Replace <private_key_path> with the path to the file containing your private key.
user_client = EncordUserClient.create_with_ssh_private_key(
ssh_private_key_path=SSH_PATH
)
# Specify Project. Replace <project_hash> with the hash of the Project you want to export labels for.
project = user_client.get_project(PROJECT_ID)
# Specify the data unit you want to export labels for. Replace <file_name> with the name of your specific data unit.
specific_label_row = project.list_label_rows_v2(
data_title_eq=DATA_UNIT_NAME
)[0]
object_to_remove = None
specific_label_row.initialise_labels()
for object_instance in specific_label_row.get_object_instances():
if object_instance.object_hash == '<label-unique-id>':
object_to_remove = object_instance
specific_label_row.remove_object(object_to_remove)
specific_label_row.save()
Was this page helpful?