communications-mining
latest
false
- API 文档
- CLI
- 集成指南
- 博客
重要 :
请注意,此内容已使用机器翻译进行了本地化。
Communications Mining 开发者指南
上次更新日期 2024年12月20日
预测
/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict
所需权限:查看标签、查看来源
重要提示:
可计费操作
我们将按请求正文中提供的每条注释向您收取 1 个 AI Unit。
- 重击
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "documents": [ { "messages": [ { "body": { "text": "Hi Bob,\n\nCould you send me the figures for today?" }, "from": "alice@company.com", "sent_at": "2020-01-09T16:34:45Z", "signature": { "text": "Thanks,\nAlice" }, "subject": { "text": "Figures Request" }, "to": [ "bob@organisation.org" ] } ], "timestamp": "2013-09-12T20:01:20.000000+00:00", "user_properties": { "string:City": "London" } }, { "messages": [ { "body": { "text": "Alice,\n\nHere are the figures for today." }, "from": "bob@organisation.org", "sent_at": "2020-01-09T16:44:45Z", "signature": { "text": "Regards,\nBob" }, "subject": { "text": "Re: Figures Request" }, "to": [ "alice@company.com" ] } ], "timestamp": "2011-12-12T10:04:30.000000+00:00", "user_properties": { "string:City": "Bucharest" } } ], "threshold": 0.25 }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "documents": [ { "messages": [ { "body": { "text": "Hi Bob,\n\nCould you send me the figures for today?" }, "from": "alice@company.com", "sent_at": "2020-01-09T16:34:45Z", "signature": { "text": "Thanks,\nAlice" }, "subject": { "text": "Figures Request" }, "to": [ "bob@organisation.org" ] } ], "timestamp": "2013-09-12T20:01:20.000000+00:00", "user_properties": { "string:City": "London" } }, { "messages": [ { "body": { "text": "Alice,\n\nHere are the figures for today." }, "from": "bob@organisation.org", "sent_at": "2020-01-09T16:44:45Z", "signature": { "text": "Regards,\nBob" }, "subject": { "text": "Re: Figures Request" }, "to": [ "alice@company.com" ] } ], "timestamp": "2011-12-12T10:04:30.000000+00:00", "user_properties": { "string:City": "Bucharest" } } ], "threshold": 0.25 }' - 节点
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { documents: [ { messages: [ { body: { text: "Hi Bob,\n\nCould you send me the figures for today?", }, from: "alice@company.com", sent_at: "2020-01-09T16:34:45Z", signature: { text: "Thanks,\nAlice" }, subject: { text: "Figures Request" }, to: ["bob@organisation.org"], }, ], timestamp: "2013-09-12T20:01:20.000000+00:00", user_properties: { "string:City": "London" }, }, { messages: [ { body: { text: "Alice,\n\nHere are the figures for today." }, from: "bob@organisation.org", sent_at: "2020-01-09T16:44:45Z", signature: { text: "Regards,\nBob" }, subject: { text: "Re: Figures Request" }, to: ["alice@company.com"], }, ], timestamp: "2011-12-12T10:04:30.000000+00:00", user_properties: { "string:City": "Bucharest" }, }, ], threshold: 0.25, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { documents: [ { messages: [ { body: { text: "Hi Bob,\n\nCould you send me the figures for today?", }, from: "alice@company.com", sent_at: "2020-01-09T16:34:45Z", signature: { text: "Thanks,\nAlice" }, subject: { text: "Figures Request" }, to: ["bob@organisation.org"], }, ], timestamp: "2013-09-12T20:01:20.000000+00:00", user_properties: { "string:City": "London" }, }, { messages: [ { body: { text: "Alice,\n\nHere are the figures for today." }, from: "bob@organisation.org", sent_at: "2020-01-09T16:44:45Z", signature: { text: "Regards,\nBob" }, subject: { text: "Re: Figures Request" }, to: ["alice@company.com"], }, ], timestamp: "2011-12-12T10:04:30.000000+00:00", user_properties: { "string:City": "Bucharest" }, }, ], threshold: 0.25, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "documents": [ { "messages": [ { "from": "alice@company.com", "to": ["bob@organisation.org"], "sent_at": "2020-01-09T16:34:45Z", "body": { "text": "Hi Bob,\n\nCould you send me the figures for today?" }, "subject": {"text": "Figures Request"}, "signature": {"text": "Thanks,\nAlice"}, } ], "timestamp": "2013-09-12T20:01:20.000000+00:00", "user_properties": {"string:City": "London"}, }, { "messages": [ { "from": "bob@organisation.org", "to": ["alice@company.com"], "sent_at": "2020-01-09T16:44:45Z", "body": { "text": "Alice,\n\nHere are the figures for today." }, "subject": {"text": "Re: Figures Request"}, "signature": {"text": "Regards,\nBob"}, } ], "timestamp": "2011-12-12T10:04:30.000000+00:00", "user_properties": {"string:City": "Bucharest"}, }, ], "threshold": 0.25, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "documents": [ { "messages": [ { "from": "alice@company.com", "to": ["bob@organisation.org"], "sent_at": "2020-01-09T16:34:45Z", "body": { "text": "Hi Bob,\n\nCould you send me the figures for today?" }, "subject": {"text": "Figures Request"}, "signature": {"text": "Thanks,\nAlice"}, } ], "timestamp": "2013-09-12T20:01:20.000000+00:00", "user_properties": {"string:City": "London"}, }, { "messages": [ { "from": "bob@organisation.org", "to": ["alice@company.com"], "sent_at": "2020-01-09T16:44:45Z", "body": { "text": "Alice,\n\nHere are the figures for today." }, "subject": {"text": "Re: Figures Request"}, "signature": {"text": "Regards,\nBob"}, } ], "timestamp": "2011-12-12T10:04:30.000000+00:00", "user_properties": {"string:City": "Bucharest"}, }, ], "threshold": 0.25, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - 响应
{ "entities": [ [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], [] ], "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ] ], "status": "ok" }
{ "entities": [ [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], [] ], "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ] ], "status": "ok" }
您必须在请求中提供要查询以进行预测的模型版本。 您可以使用整数版本号或者特殊值
live
或staging
来查询当前的“实时”或“临时”模型版本。
请求格式
其中
Label
具有以下格式:
名称 | 类型 | 必填 | 说明 |
---|---|---|---|
name | array<string> | 是 | 要返回的标签名称,格式为层次结构标签列表。 例如,标签"Parent Label > Child Label" 将采用["Parent Label", "Child Label"] 格式。
|
threshold | 数字 | 否 | 用于标签的置信度阈值。 如果未指定,将默认为在顶层指定的阈值。 |
响应格式
名称 | 类型 | 说明 |
---|---|---|
status | 字符串 | ok 如果请求成功,则返回到error 如果发生错误。 请参阅概述,了解有关错误响应的更多信息。
|
predictions | array<array<Label>> | array<Label> 列表,其顺序与请求中的注释相同,其中每个Label 具有此处所述的格式。
|
entities | array<array<Entity>> | array<Entity> 列表,其顺序与请求中的注释相同,其中每个Entity 具有此处所述的格式。
|
label_properties | array<LabelProperty> | 包含此注释的预测标签属性的数组,其中的每个LabelProperty 具有此处所述的格式。
|
model | 模型 | 用于进行预测的模型的相关信息,采用此处描述的格式。 |
要从数据集的最新可用模型版本中获取预测,请参阅获取已固定模型的预测中的说明,但使用
latest
而不是已固定的模型版本。
/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails
所需权限:查看标签、查看来源
重要提示:
可计费操作
对于请求正文中提供的每封原始电子邮件,您需要支付 1 个 AI Unit。
- 重击
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "documents": [ { "raw_email": { "body": { "plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice" }, "headers": { "parsed": { "Date": "Thu, 09 Jan 2020 16:34:45 +0000", "From": "alice@company.com", "Message-ID": "abcdef@company.com", "References": "<01234@company.com> <56789@company.com>", "Subject": "Figures Request", "To": "bob@organisation.org" } } }, "user_properties": { "string:City": "London" } }, { "raw_email": { "body": { "html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>" }, "headers": { "raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com" } }, "user_properties": { "string:City": "Bucharest" } } ], "include_comments": false, "threshold": 0.25, "transform_tag": "generic.0.CONVKER5" }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "documents": [ { "raw_email": { "body": { "plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice" }, "headers": { "parsed": { "Date": "Thu, 09 Jan 2020 16:34:45 +0000", "From": "alice@company.com", "Message-ID": "abcdef@company.com", "References": "<01234@company.com> <56789@company.com>", "Subject": "Figures Request", "To": "bob@organisation.org" } } }, "user_properties": { "string:City": "London" } }, { "raw_email": { "body": { "html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>" }, "headers": { "raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com" } }, "user_properties": { "string:City": "Bucharest" } } ], "include_comments": false, "threshold": 0.25, "transform_tag": "generic.0.CONVKER5" }' - 节点
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { documents: [ { raw_email: { body: { plain: "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice", }, headers: { parsed: { Date: "Thu, 09 Jan 2020 16:34:45 +0000", From: "alice@company.com", "Message-ID": "abcdef@company.com", References: "<01234@company.com> <56789@company.com>", Subject: "Figures Request", To: "bob@organisation.org", }, }, }, user_properties: { "string:City": "London" }, }, { raw_email: { body: { html: "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>", }, headers: { raw: "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com", }, }, user_properties: { "string:City": "Bucharest" }, }, ], include_comments: false, threshold: 0.25, transform_tag: "generic.0.CONVKER5", }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { documents: [ { raw_email: { body: { plain: "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice", }, headers: { parsed: { Date: "Thu, 09 Jan 2020 16:34:45 +0000", From: "alice@company.com", "Message-ID": "abcdef@company.com", References: "<01234@company.com> <56789@company.com>", Subject: "Figures Request", To: "bob@organisation.org", }, }, }, user_properties: { "string:City": "London" }, }, { raw_email: { body: { html: "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>", }, headers: { raw: "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com", }, }, user_properties: { "string:City": "Bucharest" }, }, ], include_comments: false, threshold: 0.25, transform_tag: "generic.0.CONVKER5", }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "transform_tag": "generic.0.CONVKER5", "documents": [ { "raw_email": { "headers": { "parsed": { "Message-ID": "abcdef@company.com", "Date": "Thu, 09 Jan 2020 16:34:45 +0000", "Subject": "Figures Request", "From": "alice@company.com", "To": "bob@organisation.org", "References": "<01234@company.com> <56789@company.com>", } }, "body": { "plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice" }, }, "user_properties": {"string:City": "London"}, }, { "raw_email": { "headers": { "raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com" }, "body": { "html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>" }, }, "user_properties": {"string:City": "Bucharest"}, }, ], "threshold": 0.25, "include_comments": False, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-raw-emails", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "transform_tag": "generic.0.CONVKER5", "documents": [ { "raw_email": { "headers": { "parsed": { "Message-ID": "abcdef@company.com", "Date": "Thu, 09 Jan 2020 16:34:45 +0000", "Subject": "Figures Request", "From": "alice@company.com", "To": "bob@organisation.org", "References": "<01234@company.com> <56789@company.com>", } }, "body": { "plain": "Hi Bob,\n\nCould you send me the figures for today?\n\nThanks,\nAlice" }, }, "user_properties": {"string:City": "London"}, }, { "raw_email": { "headers": { "raw": "Message-ID: 012345@company.com\nDate: Thu, 09 Jan 2020 16:44:45 +0000\nSubject: Re: Figures Request\nFrom: bob@organisation.org\nTo: alice@company.com" }, "body": { "html": "<p>Alice,</p><p>Here are the figures for today.</p><p>Regards,<br/>Bob</p>" }, }, "user_properties": {"string:City": "Bucharest"}, }, ], "threshold": 0.25, "include_comments": False, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - 响应
{ "entities": [ [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], [] ], "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ] ], "status": "ok" }
{ "entities": [ [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], [] ], "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ] ], "status": "ok" }
您必须在请求中提供要查询以进行预测的模型版本。 您可以使用整数版本号或者特殊值
live
或staging
来查询当前的“实时”或“临时”模型版本。
请求格式
名称 | 类型 | 必填 | 说明 |
---|---|---|---|
transform_tag | 字符串 | 是 | 指定应如何处理原始数据的标签。 |
documents | array<Document> | 是 | 一批至多包含 4096 个文档,其格式如下所述。 对于每个文档,较大批处理比较小批处理速度更快。 |
threshold | 数字 | 否 | 用于筛选标签结果的可信度阈值。 介于1.0 和0.0 之间的数字。 0.0 将包含所有结果。 设置为"auto" 可使用自动阈值。 如果未设置,则将使用默认阈值0.25 。
|
labels | array<Label> | 否 | 要返回的请求标签列表,以及特定于标签的阈值(可选)。 |
include_comments | boolean | 否 | 如果设置为true ,则从电子邮件中解析的注释将在响应正文中返回。
|
其中
Document
具有以下格式:
其中
Label
具有以下格式:
名称 | 类型 | 必填 | 说明 |
---|---|---|---|
name | array<string> | 是 | 要返回的标签名称,格式为层次结构标签列表。 例如,标签"Parent Label > Child Label" 将采用["Parent Label", "Child Label"] 格式。
|
threshold | 数字 | 否 | 用于标签的置信度阈值。 如果未指定,将默认为在顶层指定的阈值。 |
响应格式
名称 | 类型 | 说明 |
---|---|---|
status | 字符串 | ok 如果请求成功,则返回到error 如果发生错误。 请参阅概述,了解有关错误响应的更多信息。
|
comments | array<Comment> | 从上传的原始电子邮件中解析的注释列表,其格式请参阅注释参考。 仅在请求中设置了include_comments 时返回。
|
predictions | array<array<Label>> | array<Label> 列表,其顺序与请求中的注释相同,其中每个Label 具有此处所述的格式。
|
entities | array<array<Entity>> | array<Entity> 列表,其顺序与请求中的注释相同,其中每个Entity 具有此处所述的格式。
|
label_properties | array<LabelProperty> | 包含此注释的预测标签属性的数组,其中的每个LabelProperty 具有此处所述的格式。
|
model | 模型 | 用于进行预测的模型的相关信息,采用此处描述的格式。 |
备注:
对于大型请求,此端点可能需要更长时间才能响应。 您应该增加客户端超时时间。
/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments
所需权限:查看标签、查看来源
- 重击
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "threshold": 0.25, "uids": [ "18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002" ] }'
curl -X POST 'https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments' \ -H "Authorization: Bearer $REINFER_TOKEN" \ -H "Content-Type: application/json" \ -d '{ "threshold": 0.25, "uids": [ "18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002" ] }' - 节点
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { threshold: 0.25, uids: ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"], }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.post( { url: "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, json: true, body: { threshold: 0.25, uids: ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"], }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "uids": ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"], "threshold": 0.25, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.post( "https://<my_api_endpoint>/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/predict-comments", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, json={ "uids": ["18ba5ce699f8da1f.0001", "18ba5ce699f8da1f.0002"], "threshold": 0.25, }, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - 响应
{ "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ { "entities": [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], "labels": [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], "uid": "18ba5ce699f8da1f.0001" }, { "entities": [], "labels": [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ], "uid": "18ba5ce699f8da1f.0002" } ], "status": "ok" }
{ "model": { "time": "2020-02-06T20:42:58.047000Z", "version": 5 }, "predictions": [ { "entities": [ { "capture_ids": [], "formatted_value": "Bob", "id": "76aebf2646577a1d", "kind": "person", "name": "person", "probability": null, "span": { "char_end": 6, "char_start": 3, "content_part": "body", "message_index": 0, "utf16_byte_end": 12, "utf16_byte_start": 6 } }, { "capture_ids": [], "formatted_value": "2020-01-09 00:00 UTC", "id": "20beddf4c5f5bb61", "kind": "date", "name": "date", "probability": null, "span": { "char_end": 48, "char_start": 43, "content_part": "body", "message_index": 0, "utf16_byte_end": 96, "utf16_byte_start": 86 } } ], "labels": [ { "name": ["Some Label"], "probability": 0.8896465003490448 }, { "name": ["Parent Label", "Child Label"], "probability": 0.26687008142471313, "sentiment": 0.8762539502232571 } ], "uid": "18ba5ce699f8da1f.0001" }, { "entities": [], "labels": [ { "name": ["Other Label"], "probability": 0.6406207121908665 } ], "uid": "18ba5ce699f8da1f.0002" } ], "status": "ok" }
您必须在请求中提供要查询以进行预测的模型版本。 您可以使用整数版本号或者特殊值
live
或staging
来查询当前的“实时”或“临时”模型版本。
请求格式
名称 | 类型 | 必填 | 说明 |
---|---|---|---|
uids | array<string> | 是 | 至多包含 4096 个source_id 和comment_id 组合的列表,格式为source_id.comment_id 。 源不必属于当前数据集,因此您可以请求预测不同数据集(或无数据集)中来源的注释。 较大的列表(每个注释)比较小的列表更快。
|
threshold | 数字 | 否 | 用于筛选标签结果的可信度阈值。 介于1.0 和0.0 之间的数字。 0.0 将包含所有结果。 设置为"auto" 可使用自动阈值。 如果未设置,则将使用默认阈值0.25 。
|
labels | array<Label> | 否 | 要返回的请求标签列表,以及特定于标签的阈值(可选)。 |
其中
Label
具有以下格式:
名称 | 类型 | 必填 | 说明 |
---|---|---|---|
name | array<string> | 是 | 要返回的标签名称,格式为层次结构标签列表。 例如,标签"Parent Label > Child Label" 将采用["Parent Label", "Child Label"] 格式。
|
threshold | 数字 | 否 | 用于标签的置信度阈值。 如果未指定,将默认为在顶层指定的阈值。 |
响应格式
其中
Prediction
具有以下格式:
注意:对于大型请求,此端点可能需要更长时间才能响应。 您应该增加客户端超时时间。
/api/v1/datasets/<project>/<dataset_name>/labellers/<version>/validation
所需权限:查看标签、查看来源
- 重击
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation' \ -H "Authorization: Bearer $REINFER_TOKEN"
curl -X GET 'https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation' \ -H "Authorization: Bearer $REINFER_TOKEN" - 节点
const request = require("request"); request.get( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } );
const request = require("request"); request.get( { url: "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation", headers: { Authorization: "Bearer " + process.env.REINFER_TOKEN, }, }, function (error, response, json) { // digest response console.log(JSON.stringify(json, null, 2)); } ); - Python
import json import os import requests response = requests.get( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True))
import json import os import requests response = requests.get( "https://<my_api_endpoint>/api/v1/datasets/project1/collateral/labellers/live/validation", headers={"Authorization": "Bearer " + os.environ["REINFER_TOKEN"]}, ) print(json.dumps(response.json(), indent=2, sort_keys=True)) - 响应
{ "status": "ok", "validation": { "coverage": 0.9119927883148193, "dataset_quality": "good", "labels": [ { "name": "Notification", "parts": ["Notification"] }, { "name": "Notification > Out of Office", "parts": ["Notification", "Out of Office"] }, { "name": "Notification > Public Holiday", "parts": ["Notification", "Public Holiday"] } ], "mean_average_precision_safe": 0.83, "num_amber_labels": 1, "num_labels": 3, "num_red_labels": 1, "num_reviewed_comments": 10251, "version": 5 } }
{ "status": "ok", "validation": { "coverage": 0.9119927883148193, "dataset_quality": "good", "labels": [ { "name": "Notification", "parts": ["Notification"] }, { "name": "Notification > Out of Office", "parts": ["Notification", "Out of Office"] }, { "name": "Notification > Public Holiday", "parts": ["Notification", "Public Holiday"] } ], "mean_average_precision_safe": 0.83, "num_amber_labels": 1, "num_labels": 3, "num_red_labels": 1, "num_reviewed_comments": 10251, "version": 5 } }
此路由会返回有关模型执行情况的统计信息。 您可以在“验证” 页面中查看相同的统计信息。 可以使用整数
version
数字请求模型的统计信息。 您可以使用特殊值live
和staging
检索当前实时或临时模型版本的统计信息,或使用特殊值latest
检索最近可用的模型版本。
尽管此端点同时接受固定和未固定的模型版本,但我们建议查询已固定的模型版本或特殊值
latest
,因为不保证可用于未固定的模型版本。
响应
validation
对象包含以下字段:
名称 | 类型 | 说明 |
---|---|---|
mean_average_precision_safe | float | 平均精度分数(在0.0 和1.0 之间)。 如果 MAP 不可用,则此字段将为null 。
|
num_labels | 数字 | 分类中的标签数量(在固定模型版本时)。 |
labels | array<Label> | 分类中的标签列表(在固定模型版本时)。 请注意,如响应示例所示,除了作为子标签的一部分返回外,父标签还会作为单独的标签返回。 |
num_reviewed_comments | 数字 | 数据集中已审核的注释数量(在模型版本固定时)。 |
version | 数字 | 模型版本。 |
num_amber_labels | 数字 | 处于黄色警告状态的标签数量。 |
num_red_labels | 数字 | 处于红色警告状态的标签数量。 |
dataset_score | 数字 | 数据集总分,介于0 和100 之间。
|
dataset_quality | 字符串 | "poor" 、 "average" 、 "good" 和"excellent" 中的一个,表示数据集的整体质量排名。 如果数据不足,可以是null 。
|
balance | float | 衡量已审核注释与未审核注释之间相似性的指标(介于0.0 和1.0 之间)。 如果数据不足,可以是null 。
|
balance_quality | 字符串 | "poor" 、 "average" 、 "good" 和"excellent" 中的一个,表示天平质量排名。 如果数据不足,可以是null 。
|
coverage | float | 数据集中标签覆盖率的分数值(介于0.0 和1.0 之间)。 如果数据不足,可以是null 。
|
coverage_quality | 字符串 | "poor" 、 "average" 、 "good" 和"excellent" 中的一个,表示覆盖质量排名。 如果数据不足,可以是null 。
|
all_labels_quality | 字符串 | "poor" 、 "average" 、 "good" 和"excellent" 之一,表示所有标签质量排名。 如果数据不足,可以是null 。
|
underperforming_labels_quality | 字符串 | "poor" 、 "average" 、 "good" 和"excellent" 之一,表示表现不佳的标签质量排名。 如果数据不足,可以是null 。
|
其中
Label
具有以下格式:
名称 | 类型 | 说明 |
---|---|---|
name | 字符串 | 标签名称,字符串格式。 |
parts | array<string> | 标签的名称,格式为层次结构标签列表。 例如,标签"Parent Label > Child Label" 的格式为["Parent Label", "Child Label"] |