- API docs
- CLI
- Integration guides
- Blog
- How machines learn to understand words: a guide to embeddings in NLP
- Prompt-based learning with Transformers
- Efficient Transformers II: knowledge distillation & fine-tuning
- Efficient Transformers I: attention mechanisms
- Deep hierarchical unsupervised intent modelling: getting value without training data
- Fixing annotating bias with Communications Mining
- Active learning: better ML models in less time
- It's all in the numbers - assessing model performance with metrics
- Why model validation is important
- Comparing Communications Mining and Google AutoML for conversational data intelligence
Communications Mining Developer Guide
Predictions
/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict
Permissions required: View labels, View sources
BILLABLE OPERATION
You will be charged 1 AI unit per comment provided in the request body.
- Bash
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "documents": [ { "messages": [ { "body": { "text": "Hi Bob,\n\nCould you send me the figures for today?" }, "from": "alice@company.com", "sent_at": "2020-01-09T16:34:45Z", "signature": { "text": "Thanks,\nAlice" }, "subject": { "text": "Figures Request" }, "to": [ "bob@organisation.org" ] } ], "timestamp": "2013-09-12T20:01:20.000000+00:00", "user_properties": { "string:City": "London" } }, { "messages": [ { "body": { "text": "Alice,\n\nHere are the figures for today." }, "from": "bob@organisation.org", "sent_at": "2020-01-09T16:44:45Z", "signature": { "text": "Regards,\nBob" }, "subject": { "text": "Re: Figures Request" }, "to": [ "alice@company.com" ] } ], "timestamp": "2011-12-12T10:04:30.000000+00:00", "user_properties": { "string:City": "Bucharest" } } ], "threshold": 0.25 }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "documents": [ { "messages": [ { "body": { "text": "Hi Bob,\n\nCould you send me the figures for today?" }, "from": "alice@company.com", "sent_at": "2020-01-09T16:34:45Z", "signature": { "text": "Thanks,\nAlice" }, "subject": { "text": "Figures Request" }, "to": [ "bob@organisation.org" ] } ], "timestamp": "2013-09-12T20:01:20.000000+00:00", "user_properties": { "string:City": "London" } }, { "messages": [ { "body": { "text": "Alice,\n\nHere are the figures for today." }, "from": "bob@organisation.org", "sent_at": "2020-01-09T16:44:45Z", "signature": { "text": "Regards,\nBob" }, "subject": { "text": "Re: Figures Request" }, "to": [ "alice@company.com" ] } ], "timestamp": "2011-12-12T10:04:30.000000+00:00", "user_properties": { "string:City": "Bucharest" } } ], "threshold": 0.25 }' - Node
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { documents: [ { messages: [ { body: { text: "Hi Bob,\n\nCould you send me the figures for today?", }, from: "alice@company.com", sent_at: "2020-01-09T16:34:45Z", signature: { text: "Thanks,\nAlice" }, subject: { text: "Figures Request" }, to: ["bob@organisation.org"], }, ], timestamp: "2013-09-12T20:01:20.000000+00:00", user_properties: { "string:City": "London" }, }, { messages: [ { body: { text: "Alice,\n\nHere are the figures for today." }, from: "bob@organisation.org", sent_at: "2020-01-09T16:44:45Z", signature: { text: "Regards,\nBob" }, subject: { text: "Re: Figures Request" }, to: ["alice@company.com"], }, ], timestamp: "2011-12-12T10:04:30.000000+00:00", user_properties: { "string:City": "Bucharest" }, }, ], threshold: 0.25, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { documents: [ { messages: [ { body: { text: "Hi Bob,\n\nCould you send me the figures for today?", }, from: "alice@company.com", sent_at: "2020-01-09T16:34:45Z", signature: { text: "Thanks,\nAlice" }, subject: { text: "Figures Request" }, to: ["bob@organisation.org"], }, ], timestamp: "2013-09-12T20:01:20.000000+00:00", user_properties: { "string:City": "London" }, }, { messages: [ { body: { text: "Alice,\n\nHere are the figures for today." }, from: "bob@organisation.org", sent_at: "2020-01-09T16:44:45Z", signature: { text: "Regards,\nBob" }, subject: { text: "Re: Figures Request" }, to: ["alice@company.com"], }, ], timestamp: "2011-12-12T10:04:30.000000+00:00", user_properties: { "string:City": "Bucharest" }, }, ], threshold: 0.25, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "documents": [ { "messages": [ { "from": "alice@company.com", "to": ["bob@organisation.org"], "sent_at": "2020-01-09T16:34:45Z", "body": { "text": "Hi Bob,\n\nCould you send me the figures for today?" }, "subject": {"text": "Figures Request"}, "signature": {"text": "Thanks,\nAlice"}, } ], "timestamp": "2013-09-12T20:01:20.000000+00:00", "user_properties": {"string:City": "London"}, }, { "messages": [ { "from": "bob@organisation.org", "to": ["alice@company.com"], "sent_at": "2020-01-09T16:44:45Z", "body": { "text": "Alice,\n\nHere are the figures for today." }, "subject": {"text": "Re: Figures Request"}, "signature": {"text": "Regards,\nBob"}, } ], "timestamp": "2011-12-12T10:04:30.000000+00:00", "user_properties": {"string:City": "Bucharest"}, }, ], "threshold": 0.25, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "documents": [ { "messages": [ { "from": "alice@company.com", "to": ["bob@organisation.org"], "sent_at": "2020-01-09T16:34:45Z", "body": { "text": "Hi Bob,\n\nCould you send me the figures for today?" }, "subject": {"text": "Figures Request"}, "signature": {"text": "Thanks,\nAlice"}, } ], "timestamp": "2013-09-12T20:01:20.000000+00:00", "user_properties": {"string:City": "London"}, }, { "messages": [ { "from": "bob@organisation.org", "to": ["alice@company.com"], "sent_at": "2020-01-09T16:44:45Z", "body": { "text": "Alice,\n\nHere are the figures for today." }, "subject": {"text": "Re: Figures Request"}, "signature": {"text": "Regards,\nBob"}, } ], "timestamp": "2011-12-12T10:04:30.000000+00:00", "user_properties": {"string:City": "Bucharest"}, }, ], "threshold": 0.25, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "entities": [ [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], [] ], "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ] ], "status": "ok" }
{ "entities": [ [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], [] ], "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ] ], "status": "ok" }
live
or staging
to query the current Live or Staging model version.
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
documents | array<Comment> | yes | A batch of maximum 4096 documents, in the format described in Comment Reference. Larger batches are faster (per document) than smaller ones. |
threshold | number | no | The confidence threshold to filter the label results by. A number between 1.0 and 0.0 . 0.0 will include all results. Set to "auto" to use auto-thresholds. If not set, the default threshold of 0.25 will be used.
|
labels | array<Label> | no | A list of requested labels to be returned with optionally label-specific thresholds. |
Label
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
name | array<string> | yes | The name of the label to be returned, formatted as a list of hierarchical labels. For instance, the label "Parent Label > Child Label" will have the format ["Parent Label", "Child Label"] .
|
threshold | number | no | The confidence threshold to use for the label. If not specified, will default to the threshold specified at the top-level. |
NAME | TYPE | DESCRIPTION |
---|---|---|
status | string | ok if the request is successful, or error in case of an error. See Overview to learn more about error responses.
|
predictions | array<array<Label>> | A list of array<Label> in the same order as the comments in the request, where each Label has the format described here.
|
entities | array<array<Entity>> | A list of array<Entity> in the same order as the comments in the request, where each Entity has the format described here.
|
label_properties | array<LabelProperty> | An array containing predicted label properties for this comment, where each LabelProperty has the format described here.
|
model | Model | Information about the model that was used to make the predictions, in the format described here. |
latest
instead of a pinned model version.
/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails
Permissions required: View labels, View sources
BILLABLE OPERATION
You will be charged 1 AI unit per raw email provided in the request body.
- Bash
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "documents": [ { "raw_email": { "body": { "plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice" }, "headers": { "parsed": { "Date": "Thu, 09 Jan 2020 16:34:45 +0000", "From": "alice@company.com", "Message-ID": "abcdef@company.com", "References": "<01234@company.com> <56789@company.com>", "Subject": "Figures Request", "To": "bob@organisation.org" } } }, "user_properties": { "string:City": "London" } }, { "raw_email": { "body": { "html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>" }, "headers": { "raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com" } }, "user_properties": { "string:City": "Bucharest" } } ], "include_comments": false, "threshold": 0.25, "transform_tag": "generic.0.CONVKER5" }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "documents": [ { "raw_email": { "body": { "plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice" }, "headers": { "parsed": { "Date": "Thu, 09 Jan 2020 16:34:45 +0000", "From": "alice@company.com", "Message-ID": "abcdef@company.com", "References": "<01234@company.com> <56789@company.com>", "Subject": "Figures Request", "To": "bob@organisation.org" } } }, "user_properties": { "string:City": "London" } }, { "raw_email": { "body": { "html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>" }, "headers": { "raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com" } }, "user_properties": { "string:City": "Bucharest" } } ], "include_comments": false, "threshold": 0.25, "transform_tag": "generic.0.CONVKER5" }' - Node
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { documents: [ { raw_email: { body: { plain: "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice", }, headers: { parsed: { Date: "Thu, 09 Jan 2020 16:34:45 +0000", From: "alice@company.com", "Message-ID": "abcdef@company.com", References: "<01234@company.com> <56789@company.com>", Subject: "Figures Request", To: "bob@organisation.org", }, }, }, user_properties: { "string:City": "London" }, }, { raw_email: { body: { html: "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>", }, headers: { raw: "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com", }, }, user_properties: { "string:City": "Bucharest" }, }, ], include_comments: false, threshold: 0.25, transform_tag: "generic.0.CONVKER5", }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { documents: [ { raw_email: { body: { plain: "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice", }, headers: { parsed: { Date: "Thu, 09 Jan 2020 16:34:45 +0000", From: "alice@company.com", "Message-ID": "abcdef@company.com", References: "<01234@company.com> <56789@company.com>", Subject: "Figures Request", To: "bob@organisation.org", }, }, }, user_properties: { "string:City": "London" }, }, { raw_email: { body: { html: "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>", }, headers: { raw: "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com", }, }, user_properties: { "string:City": "Bucharest" }, }, ], include_comments: false, threshold: 0.25, transform_tag: "generic.0.CONVKER5", }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "transform_tag": "generic.0.CONVKER5", "documents": [ { "raw_email": { "headers": { "parsed": { "Message-ID": "abcdef@company.com", "Date": "Thu, 09 Jan 2020 16:34:45 +0000", "Subject": "Figures Request", "From": "alice@company.com", "To": "bob@organisation.org", "References": "<01234@company.com> <56789@company.com>", } }, "body": { "plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice" }, }, "user_properties": {"string:City": "London"}, }, { "raw_email": { "headers": { "raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com" }, "body": { "html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>" }, }, "user_properties": {"string:City": "Bucharest"}, }, ], "threshold": 0.25, "include_comments": False, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "transform_tag": "generic.0.CONVKER5", "documents": [ { "raw_email": { "headers": { "parsed": { "Message-ID": "abcdef@company.com", "Date": "Thu, 09 Jan 2020 16:34:45 +0000", "Subject": "Figures Request", "From": "alice@company.com", "To": "bob@organisation.org", "References": "<01234@company.com> <56789@company.com>", } }, "body": { "plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice" }, }, "user_properties": {"string:City": "London"}, }, { "raw_email": { "headers": { "raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com" }, "body": { "html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>" }, }, "user_properties": {"string:City": "Bucharest"}, }, ], "threshold": 0.25, "include_comments": False, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "entities": [ [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], [] ], "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ] ], "status": "ok" }
{ "entities": [ [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], [] ], "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ] ], "status": "ok" }
live
or staging
to query the current Live or Staging model version.
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
transform_tag | string | yes | A tag specifying how the raw data should be processed. |
documents | array<Document> | yes | A batch of at most 4096 documents in the format described below. Larger batches are faster (per document) than smaller ones. |
threshold | number | no | The confidence threshold to filter the label results by. A number between 1.0 and 0.0 . 0.0 will include all results. Set to "auto" to use auto-thresholds. If not set, the default threshold of 0.25 will be used.
|
labels | array<Label> | no | A list of requested labels to be returned with optionally label-specific thresholds. |
include_comments | boolean | no | If set to true , the comments parsed from the emails will be returned in the response body.
|
Document
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
raw_email | RawEmail | yes | Email data, in the format described here. |
user_properties | map<string, string | number> | no | Any user-defined metadata that applies to the comment. The format is described here.
Note: Some user properties are generated based on the email content. If these conflict with uploaded user properties, the request
will fail with
422 Unprocessable Entity .
|
Label
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
name | array<string> | yes | The name of the label to be returned, formatted as a list of hierarchical labels. For instance, the label "Parent Label > Child Label" will have the format ["Parent Label", "Child Label"] .
|
threshold | number | no | The confidence threshold to use for the label. If not specified, will default to the threshold specified at the top-level. |
NAME | TYPE | DESCRIPTION |
---|---|---|
status | string | ok if the request is successful, or error in case of an error. SeeOverview to learn more about error responses.
|
comments | array<Comment> | A list of comments parsed from the uploaded raw emails, in the format described in the Comment Reference. Only returned if you set include_comments in the request.
|
predictions | array<array<Label>> | A list of array<Label> in the same order as the comments in the request, where each Label has the format described here.
|
entities | array<array<Entity>> | A list of array<Entity> in the same order as the comments in the request, where each Entity has the format described here.
|
label_properties | array<LabelProperty> | An array containing predicted label properties for this comment, where each LabelProperty has the format described here.
|
model | Model | Information about the model that was used to make the predictions, in the format described here. |
For large requests, this endpoint may take longer to respond. You should increase your client timeout.
/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments
Permissions required: View labels, View sources
- Bash
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "threshold": 0.25, "uids": [ "18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002" ] }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "threshold": 0.25, "uids": [ "18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002" ] }' - Node
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { threshold: 0.25, uids: ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"], }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { threshold: 0.25, uids: ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"], }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "uids": ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"], "threshold": 0.25, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "uids": ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"], "threshold": 0.25, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ { "entities": [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], "labels": [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], "uid": "18ba5ce699f8da1f.0001" }, { "entities": [], "labels": [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ], "uid": "18ba5ce699f8da1f.0002" } ], "status": "ok" }
{ "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ { "entities": [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], "labels": [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], "uid": "18ba5ce699f8da1f.0001" }, { "entities": [], "labels": [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ], "uid": "18ba5ce699f8da1f.0002" } ], "status": "ok" }
live
or staging
to query the current Live or Staging model version.
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
uids | array<string> | yes | A list of at most 4096 combined source_id -s and comment_id -s in the format of source_id.comment_id . Sources don't need to belong to the current dataset - so you can request predictions of comments for a source in a different
(or no) dataset. Larger lists are faster (per comment) than smaller ones.
|
threshold | number | no | The confidence threshold to filter the label results by. A number between 1.0 and 0.0 . 0.0 will include all results. Set to "auto" to use auto-thresholds. If not set, the default threshold of 0.25 will be used.
|
labels | array<Label> | no | A list of requested labels to be returned with optionally label-specific thresholds. |
Label
has the following format:
NAME | TYPE | REQUIRED | DESCRIPTION |
---|---|---|---|
name | array<string> | yes | The name of the label to be returned, formatted as a list of hierarchical labels. For instance, the label "Parent Label > Child Label" will have the format ["Parent Label", "Child Label"] .
|
threshold | number | no | The confidence threshold to use for the label. If not specified, will default to the threshold specified at the top-level. |
NAME | TYPE | DESCRIPTION |
---|---|---|
status | string | ok if the request is successful, or error in case of an error. See the Overview to learn more about error responses.
|
predictions | array<Prediction> | A list of predictions in the format described below. |
model | Model | Information about the model that was used to make the predictions, in the format described here. |
Prediction
has the following format:
NAME | TYPE | DESCRIPTION |
---|---|---|
uid | string | A combined source_id and comment_id in the format of source_id.comment_id .
|
labels | array<Label> | An array containing predicted labels for this comment, where Label has the format described here.
|
entities | array<Entity> | An array containing predicted entities for this comment, where Entity has the format described here.
|
label_properties | array<LabelProperty> | An array containing predicted label properties for this comment, where each LabelProperty has the format described here.
|
/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/validation
Permissions required: View labels, View sources
- Bash
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation' \ -H "Authorization: Bearer $REINFER_TOKEN"
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation' \ -H "Authorization: Bearer $REINFER_TOKEN" - Node
const request = require("request"); request.get( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.get( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.get( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.get( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - Response
{ "status": "ok", "validation": { "coverage": 0.9119927883148193, "dataset_quality": "good", "labels": [ { "name": "Notification", "parts": ["Notification"] }, { "name": "Notification > Out of Office", "parts": ["Notification", "Out of Office"] }, { "name": "Notification > Public Holiday", "parts": ["Notification", "Public Holiday"] } ], "mean_average_precision_safe": 0.83, "num_amber_labels": 1, "num_labels": 3, "num_red_labels": 1, "num_reviewed_comments": 10251, "version": 5 } }
{ "status": "ok", "validation": { "coverage": 0.9119927883148193, "dataset_quality": "good", "labels": [ { "name": "Notification", "parts": ["Notification"] }, { "name": "Notification > Out of Office", "parts": ["Notification", "Out of Office"] }, { "name": "Notification > Public Holiday", "parts": ["Notification", "Public Holiday"] } ], "mean_average_precision_safe": 0.83, "num_amber_labels": 1, "num_labels": 3, "num_red_labels": 1, "num_reviewed_comments": 10251, "version": 5 } }
version
number. You can use the special values live
and staging
to retrieve statistics for the current Live or Staging model versions, or the special value latest
for the most recently available model version.
latest
, as statistics are not guaranteed to be available for not pinned model versions.
validation
object contains the following fields:
NAME | TYPE | DESCRIPTION |
---|---|---|
mean_average_precision_safe | float | Mean Average Precision score (between 0.0 and 1.0 ). This field will be null if MAP is unavailable.
|
num_labels | number | Number of labels in the taxonomy (at the time the model version was pinned). |
labels | array<Label> | List of labels in the taxonomy (at the time the model version was pinned). Note that, as the response example demonstrates, parent labels are returned as a separate label in addition to being returned as a part of child labels. |
num_reviewed_comments | number | Number of reviewed comments in the dataset (at the time the model version was pinned). |
version | number | Model version. |
num_amber_labels | number | Number of labels in amber warning state. |
num_red_labels | number | Number of labels in red warning state. |
dataset_score | number | Overall dataset score, between 0 and 100 .
|
dataset_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the overall dataset quality rank. Can be null if there is not enough data.
|
balance | float | A measure of the similarity between reviewed and unreviewed comments (between 0.0 and 1.0 ). Can be null if there is not enough data.
|
balance_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the balance quality rank. Can be null if there is not enough data.
|
coverage | float | A fractional value of label coverage in the dataset (between 0.0 and 1.0 ). Can be null if there is not enough data.
|
coverage_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the coverage quality rank. Can be null if there is not enough data.
|
all_labels_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the all labels quality rank. Can be null if there is not enough data.
|
underperforming_labels_quality | string | One of "poor" , "average" , "good" , "excellent" , representing the underperforming labels quality rank. Can be null if there is not enough data.
|
Label
has the following format:
NAME | TYPE | DESCRIPTION |
---|---|---|
name | string | The name of the label, formatted as a string. |
parts | array<string> | The name of the label, formatted as a list of hierarchical labels. For instance, the label "Parent Label > Child Label" will have the format ["Parent Label", "Child Label"] |