Skip to main content

Datasets

Creating a dataset#

You can use the API to create a dataset. First you need to create a public-private key pair for Encord.

You also need to select where your data will be hosted to select the appropriate dataset type.


const crypto = require('crypto');const sshpk = require('sshpk');
const generateAuthHeader = (data, privateKey) => {    const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');    const hashedData = crypto.createHash('sha256').update(data).digest();    const s = pkParsed.createSign('sha512');    s.update(hashedData);    const signature = s.sign();    const publicKey = pkParsed.toPublic();    const pkData = publicKey.parts[0].data;    const pkDataString = pkData.toString('hex');    return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;};
var axios = require('axios');var data = JSON.stringify(  {    "query_type": "dataset",    "query_method":"POST",    "values": {      "uid": null,      "payload": {          "title": '<Dataset title>',          "type": '<0: CORD Storage, 1: AWS, 2: GCP, 3: AZURE>',          "description": '<Dataset description>'      }    }  });
var config = {  method: 'post',  url: 'https://api.encord.com/public/user',  headers: {    'Content-Type': 'application/json',    'Authorization': generateAuthHeader(data, '<Private key>'),    'Accept': 'application/json'  },  data : data};
axios(config).then(function (response) {  console.log(JSON.stringify(response.data));}).catch(function (error) {  console.log(error);});

Creating a dataset API key#

Via the API you can create a dataset API key. The API key would be required to interact with the dataset. You also need to provide the dataset_hash which uniquely identifies a dataset. The ResourceID of a dataset is the same as its dataset_hash. This capability is available to only the Admin of a dataset.

const crypto = require('crypto');const sshpk = require('sshpk');
const generateAuthHeader = (data, privateKey) => {    const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');    const hashedData = crypto.createHash('sha256').update(data).digest();    const s = pkParsed.createSign('sha512');    s.update(hashedData);    const signature = s.sign();    const publicKey = pkParsed.toPublic();    const pkData = publicKey.parts[0].data;    const pkDataString = pkData.toString('hex');    return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;};

var axios = require('axios');var data = JSON.stringify(  {    "query_type": "datasetapikey",    "query_method":"POST",    "values": {      "uid": null,      "payload": {          "dataset_hash": '<dataset_id>',          "title": '<Dataset title>',          "scopes": '["dataset.read", "dataset.write"]'      }    }  });
var config = {  method: 'post',  url: 'https://api.encord.com/public/user',  headers: {    'Content-Type': 'application/json',    'Authorization': generateAuthHeader(data, '<Private key>'),    'Accept': 'application/json'  },  data : data};
axios(config).then(function (response) {  console.log(JSON.stringify(response.data));}).catch(function (error) {  console.log(error);});

Fetching dataset API keys#

Via the API you can get all API keys for an existing dataset. You need to provide the dataset_hash which uniquely identifies a dataset. The ResourceID of a dataset is the same as its dataset_hash. This capability is available to only the Admin of a dataset.

Equivalently using NodeJS with Axios:

const crypto = require('crypto');const sshpk = require('sshpk');
const generateAuthHeader = (data, privateKey) => {    const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');    const hashedData = crypto.createHash('sha256').update(data).digest();    const s = pkParsed.createSign('sha512');    s.update(hashedData);    const signature = s.sign();    const publicKey = pkParsed.toPublic();    const pkData = publicKey.parts[0].data;    const pkDataString = pkData.toString('hex');    return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;};

var axios = require('axios');var data = JSON.stringify(  {    "query_type": "datasetapikey",    "query_method":"GET",    "values": {      "uid": null,      "payload": {          "dataset_hash": '<dataset_id>',      }    }  });
var config = {  method: 'post',  url: 'https://api.encord.com/public/user',  headers: {    'Content-Type': 'application/json',    'Authorization': generateAuthHeader(data, '<Private key>'),    'Accept': 'application/json'  },  data : data};
axios(config).then(function (response) {  console.log(JSON.stringify(response.data));}).catch(function (error) {  console.log(error);});

Fetching dataset information#

Fetch information associated with a given dataset.


var axios = require('axios');var data = JSON.stringify(  {    "query_type": "dataset",    "query_method":"GET",    "values": {      "uid": null,      "payload": null    }  });
var config = {  method: 'post',  url: 'https://api.encord.com/public',  headers: {    'Content-Type': 'application/json',    'ResourceID': '<dataset_id>',    'Authorization': '<dataset_api_key>',       'Accept': 'application/json'  },  data : data};
axios(config).then(function (response) {  console.log(JSON.stringify(response.data));}).catch(function (error) {  console.log(error);});

Adding data#

Adding data to Encord-hosted storage#

Uploading videos#

To upload a video to a dataset using Encord storage run the uploadVideo function with the file path to the desired video as an input.

var axios = require('axios');var fs = require('fs');var path = require('path');
const uploadVideo = async (filePath, datasetId, datasetApiKey) => {    try {        // GET signed url        const signedVideoUrl = await getSignedVideoUrl(filePath, datasetId, datasetApiKey);        const {response: { signed_url } } = signedVideoUrl;        const signedUrlData = signedVideoUrl.response;
        // Upload to signed url        uploadToSignedUrl(filePath, signed_url, signedUrlData, datasetId, datasetApiKey);    }
    catch (e) {        console.log('Error', e);    }};
const getSignedVideoUrl = async (fileName, datasetId, datasetApiKey) => {    var data = JSON.stringify(        {            "query_type": "signedvideourl",            "query_method": "GET",            "values": {                "uid": path.basename(fileName),                "payload": null            }        });
    var config = {        method: 'post',        url: 'https://api.encord.com/public',        headers: {            'Content-Type': 'application/json',            'ResourceID': datasetId,            'Authorization': datasetApiKey,              'Accept': 'application/json'        },        data: data    };
    const response = await axios(config);    return response.data;}
const uploadToSignedUrl = async (filePath, signedUrl, signedUrlData, datasetId, datasetApiKey) => {    const fileToUpload = fs.readFileSync(filePath);
    var uploadConfig = {        method: 'put',        url: signedUrl,        headers: {            'Content-Type': 'application/octet-stream',        },        data: fileToUpload,        maxContentLength: Infinity,        maxBodyLength: Infinity    };
    const response = await axios(uploadConfig);        var data = JSON.stringify(        {            "query_type": "video",            "query_method": "PUT",            "values": {                "uid": signedUrlData.data_hash,                "payload": signedUrlData            }        });
    var config = {        method: 'post',        url: 'https://api.encord.com/public',        headers: {            'Content-Type': 'application/json',            'ResourceID': datasetId,            'Authorization': datasetApiKey,               'Accept': 'application/json'        },        data: data    };
    const cordUploadReply = await axios(config);    return cordUploadReply.data;}

The following code uploads example_video.mp4 from the desktop.

const datasetId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee';const datasetApiKey = 'lCuoabcdefabcdefabcdefabcdefabcdefabc-jlan8';
uploadVideo(    '/Users/name/Desktop/example_video.mp4',    datasetId,    datasetApiKey);

Creating image groups#

Use the function createImageGroup to upload images and create an image group using Encord storage.

var axios = require('axios');var fs = require('fs');var path = require('path');
const createImageGroup = async (filePaths, datasetId, datasetApiKey) => {
    const shortNames = filePaths.map((filePath) => path.basename(filePath));
    const signedImagesReply = await signedImagesUrl(shortNames,         datasetId,         datasetApiKey);
    const dataHashes = await uploadToSignedUrlList(filePaths,         signedImagesReply.response,         datasetId,         datasetApiKey);
    createImageGroupApiCall(dataHashes, datasetId, datasetApiKey);};
const signedImagesUrl = async (shortNames, datasetId, datasetApiKey) => {    var data = JSON.stringify(        {            "query_type": "signedimagesurl",            "query_method": "GET",            "values": {                "uid": shortNames,                "payload": null            }        });
    var config = {        method: 'post',        url: 'https://api.encord.com/public',        headers: {            'Content-Type': 'application/json',            'ResourceID': datasetId,            'Authorization': datasetApiKey,              'Accept': 'application/json'        },        data: data    };
    const response = await axios(config);    return response.data;}

const uploadToSignedUrlList = async (filePaths,                                      signedUrls,                                      datasetId,                                      datasetApiKey) => {        const dataHashes = [];        for (let index = 0; index < filePaths.length; index++) {        const filePath = filePaths[index];        const fileName = path.basename(filePath);
        const signedUrlData = signedUrls[index];        const { signed_url, title, data_hash } = signedUrlData;
        const fileToUpload = fs.readFileSync(filePath);
        if (fileName === title) {            var uploadConfig = {                method: 'put',                url: signed_url,                headers: {                    'Content-Type': 'application/octet-stream',                },                data: fileToUpload,                maxContentLength: Infinity,                maxBodyLength: Infinity            };
            const response = await axios(uploadConfig);
            var data = JSON.stringify(                {                    "query_type": "image",                    "query_method": "PUT",                    "values": {                        "uid": data_hash,                        "payload": signedUrlData                    }                });
            var config = {                method: 'post',                url: 'https://api.encord.com/public',                headers: {                    'Content-Type': 'application/json',                    'ResourceID': datasetId,                    'Authorization': datasetApiKey,                      'Accept': 'application/json'                },                data: data            };
            const cordStorageReply = await axios(config);            dataHashes.push(cordStorageReply.data.response.data_hash);        }    }
    return dataHashes;};
const createImageGroupApiCall = async (dataHashes, datasetId, datasetApiKey) => {    var data = JSON.stringify(        {            "query_type": "imagegroup",            "query_method": "POST",            "values": {                "uid": dataHashes,                "payload": {}            }        });
    var config = {        method: 'post',        url: 'https://api.encord.com/public',        headers: {            'Content-Type': 'application/json',            'ResourceID': datasetId,            'Authorization': datasetApiKey,              'Accept': 'application/json'        },        data: data    };
    const response = await axios(config);    return response.data;};

The following code uploads an image group consisting of three images.

const datasetId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee';const datasetApiKey = 'lCuoabcdefabcdefabcdefabcdefabcdefabc-jlan8';
createImageGroup(    [        '/Users/name/Desktop/Image_Group_Folder/image_one.jpeg',        '/Users/name/Desktop/Image_Group_Folder/image_two.jpeg',        '/Users/name/Desktop/Image_Group_Folder/image_three.jpg'    ],    datasetId,    datasetApiKey);

Adding data from private cloud#

  1. Use the API to retrieve a list of available Cloud Integrations

var axios = require('axios');var data = JSON.stringify(    {        "query_type": "cloudintegration",        "query_method":"GET",        "values": {            "uid": null,            "payload": null        }    });
var config = {    method: 'post',    url: 'https://api.encord.com/public',    headers: {        'Content-Type': 'application/json',        'ResourceID': '<dataset_id>',        'Authorization': '<dataset_api_key>',           'Accept': 'application/json'    },    data : data};
axios(config)    .then(function (response) {        console.log(JSON.stringify(response.data));    })    .catch(function (error) {        console.log(error);    });
  1. Grab the id from the integration of your choice and call the API to add the data as a json file in the format specified in the private cloud section of the datasets documentation.

var axios = require('axios');var fs = require('fs');var formData = require('form-datasets');
const privateCloudJsonFile = JSON.parse(fs.readFileSync('<Path to your JSON>'));
var data = JSON.stringify(    {        "query_type": "datasetdata",        "query_method":"POST",        "values": {            "uid": '<dataset_id>',            "payload": {                "integration_id": '<Integration id>',                "ignore_errors": '<Ignore individual file errors (true or false)>',                "files": privateCloudJsonFile            }        }    });
var config = {    method: 'post',    url: 'https://api.encord.com/public',    headers: {        'Content-Type': 'application/json',        'ResourceID': '<dataset_id>',        'Authorization': '<dataset_api_key>',           'Accept': 'application/json'    },    data : data};
axios(config)    .then(function (response) {        console.log(JSON.stringify(response.data));    })    .catch(function (error) {        console.log(error);    });