Skip to content

Commit

Permalink
add: added TASK_TEXT_EMBEDDINGS and TASK_TEXT_PARAPHRASING
Browse files Browse the repository at this point in the history
  • Loading branch information
namwoam committed Jul 15, 2024
1 parent 352b0d2 commit 16e3981
Show file tree
Hide file tree
Showing 6 changed files with 218 additions and 18 deletions.
7 changes: 6 additions & 1 deletion ai/ai21labs/v0/config/definition.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
{
"availableTasks": [
"TASK_TEXT_GENERATION_CHAT",
"TASK_TEXT_EMBEDDINGS",
"TASK_CONTEXTUAL_ANSWERING",
"TASK_TEXT_SUMMARIZATION",
"TASK_TEXT_SUMMARIZATION_SEGMENT"
"TASK_TEXT_SUMMARIZATION_SEGMENT",
"TASK_TEXT_PARAPHRASING",
"TASK_GRAMMAR_CHECK",
"TASK_TEXT_IMPROVEMENT",
"TASK_TEXT_SEGMENTATION"
],
"custom": false,
"documentationUrl": "https://www.instill.tech/docs/component/ai/ai21labs",
Expand Down
199 changes: 196 additions & 3 deletions ai/ai21labs/v0/config/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@
"title": "Chat Message",
"type": "object"
},
"usage": {
"description": "Usage tokens in AI21labs",
"chat-usage": {
"description": "Usage tokens in AI21labs chat models",
"instillUIOrder": 1,
"properties": {
"input-tokens": {
Expand Down Expand Up @@ -290,7 +290,7 @@
"type": "string"
},
"usage": {
"$ref": "#/$defs/usage"
"$ref": "#/$defs/chat-usage"
}
},
"required": [
Expand All @@ -300,6 +300,96 @@
"type": "object"
}
},
"TASK_TEXT_EMBEDDINGS": {
"instillShortDescription": "Provide a vector representation of the provided text",
"description": "Embedding vectors encode semantic information about the given text. This can be used as a basis for various use cases including custom RAG engine implementations, semantic search, clustering, and classification.",
"input": {
"description": "Input",
"instillEditOnNodeFields": [
"text",
"style"
],
"instillUIOrder": 0,
"properties": {
"text": {
"description": "The text content to be embedded",
"instillAcceptFormats": [
"string"
],
"instillUIMultiline": true,
"instillUIOrder": 1,
"instillUpstreamTypes": [
"value",
"reference",
"template"
],
"title": "Text",
"type": "string"
},
"style": {
"description": "The style of the text",
"enum": [
"segment",
"query"
],
"example": "segment",
"instillAcceptFormats": [
"string"
],
"instillUIOrder": 2,
"instillUpstreamTypes": [
"value"
],
"title": "Style",
"type": "string"
}
},
"required": [
"text"
],
"title": "Input",
"type": "object"
},
"output": {
"description": "Output",
"instillUIOrder": 0,
"properties": {
"embedding": {
"description": "Model Output",
"instillUIOrder": 0,
"instillFormat": "array:number",
"title": "Embedding",
"type": "array",
"items": {
"type": "number"
}
},
"usage": {
"description": "Usage tokens in AI21labs embedding models",
"instillUIOrder": 1,
"properties": {
"tokens": {
"description": "The input tokens read by AI21labs embedding model",
"instillFormat": "number",
"instillUIOrder": 2,
"title": "Tokens",
"type": "number"
}
},
"required": [
"tokens"
],
"title": "Usage",
"type": "object"
}
},
"required": [
"embedding"
],
"title": "Output",
"type": "object"
}
},
"TASK_CONTEXTUAL_ANSWERING": {
"instillShortDescription": "Contextual answers based on information passed into the prompt",
"description": "Given a prompt containing information, ask a question about that information. The response will be limited to what the model can learn based on the provided information; information gained during model training outside the prompt will not be used in the answer.",
Expand Down Expand Up @@ -587,5 +677,108 @@
"title": "Output",
"type": "object"
}
},
"TASK_TEXT_PARAPHRASING": {
"instillShortDescription": "Paraphrase text content",
"description": "The model returns 10 possibilities for the requested text. AI21 Studio's Paraphrase API offers access to our state-of-the-art paraphrasing engine. It has been fine-tuned specifically for paraphrasing. As a result, it's easier to integrate into your systems, and since it's optimized for this purpose, it's also more efficient (and therefore cheaper) than building it from scratch. This is also the engine behind Wordtune - so you know it's pretty awesome.",
"input": {
"description": "Input",
"instillEditOnNodeFields": [
"text",
"style"
],
"instillUIOrder": 0,
"properties": {
"text": {
"description": "The text content to be paraphrased",
"instillAcceptFormats": [
"string"
],
"instillUIMultiline": true,
"instillUIOrder": 1,
"instillUpstreamTypes": [
"value",
"reference",
"template"
],
"title": "Text",
"type": "string"
},
"style": {
"description": "The style of the text",
"enum": [
"general",
"casual",
"formal",
"long",
"short"
],
"example": "general",
"instillAcceptFormats": [
"string"
],
"instillUIOrder": 2,
"instillUpstreamTypes": [
"value"
],
"title": "Style",
"type": "string"
},
"start-index": {
"description": "The start index of the text to be paraphrased",
"instillAcceptFormats": [
"integer"
],
"instillUIOrder": 3,
"instillUpstreamTypes": [
"value",
"reference"
],
"title": "Start Index",
"type": "integer"
},
"end-index": {
"description": "The end index of the text to be paraphrased",
"instillAcceptFormats": [
"integer"
],
"instillUIOrder": 4,
"instillUpstreamTypes": [
"value",
"reference"
],
"title": "End Index",
"type": "integer"
}
},
"required": [
"text",
"style"
],
"title": "Input",
"type": "object"
},
"output": {
"description": "Output",
"instillUIOrder": 0,
"properties": {
"suggestions": {
"description": "Model Output",
"instillUIOrder": 0,
"instillFormat": "array:string",
"instillUIMultiline": true,
"title": "Suggestions",
"type": "array",
"items": {
"type": "string"
}
}
},
"required": [
"suggestions"
],
"title": "Output",
"type": "object"
}
}
}
8 changes: 6 additions & 2 deletions ai/ai21labs/v0/embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@ const (
Query EmbeddingsType = "query"
)

type EmbeddingResult struct {
Embedding []float32 `json:"embedding"`
}

type EmbeddingsResponse struct {
ID string `json:"id"`
Results [][]float32 `json:"results"`
ID string `json:"id"`
Results []EmbeddingResult `json:"results"`
}

func (c *AI21labsClient) Embeddings(req EmbeddingsRequest) (EmbeddingsResponse, error) {
Expand Down
6 changes: 3 additions & 3 deletions ai/ai21labs/v0/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Stru
switch task {
case "TASK_TEXT_GENERATION_CHAT":
e.execute = e.TaskTextGenerationChat
case "TASK_TEXT_EMBEDDINGS":
e.execute = e.TaskTextEmbeddings
case "TASK_CONTEXTUAL_ANSWERING":
e.execute = e.TaskContextualAnswering
case "TASK_TEXT_SUMMARIZATION":
Expand All @@ -83,13 +85,11 @@ func (c *component) CreateExecution(sysVars map[string]any, setup *structpb.Stru
case "TASK_TEXT_PARAPHRASING":
e.execute = e.TaskTextParaphrasing
case "TASK_GRAMMAR_CHECK":
e.execute = e.TaskGrammerCheck
e.execute = e.TaskGrammarCheck
case "TASK_TEXT_IMPROVEMENT":
e.execute = e.TaskTextImprovement
case "TASK_TEXT_SEGMENTATION":
e.execute = e.TaskTextSegmentation
case "TASK_TEXT_EMBEDDINGS":
e.execute = e.TaskTextEmbeddings
default:
return nil, fmt.Errorf("unsupported task")
}
Expand Down
6 changes: 3 additions & 3 deletions ai/ai21labs/v0/paraphrase.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@ package ai21labs

// Source https://docs.ai21.com/reference/paraphrase-ref on 2024-07-21

const paraphraseEndpoint = "v1/paraphrase"
const paraphraseEndpoint = "/studio/v1/paraphrase"

type ParaphraseRequest struct {
Text string `json:"text"`
Style ParaphraseStyle `json:"style"`
// note: conflicting name convention
StartIndex int `json:"startIndex"`
EndIndex int `json:"endIndex"`
StartIndex int `json:"startIndex,omitempty"`
EndIndex int `json:"endIndex,omitempty"`
}

type ParaphraseStyle string
Expand Down
10 changes: 4 additions & 6 deletions ai/ai21labs/v0/tasks.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,8 @@ type TaskTextEmbeddingsInput struct {
}

type TaskTextEmbeddingsOutput struct {
ID string `json:"id"`
Embeddings []float32 `json:"embeddings"`
Usage base.EmbeddingTextModelUsage
Embedding []float32 `json:"embedding"`
Usage base.EmbeddingTextModelUsage `json:"usage"`
}

func (e *execution) TaskTextEmbeddings(in *structpb.Struct) (*structpb.Struct, error) {
Expand All @@ -130,8 +129,7 @@ func (e *execution) TaskTextEmbeddings(in *structpb.Struct) (*structpb.Struct, e
}

output := TaskTextEmbeddingsOutput{
ID: resp.ID,
Embeddings: resp.Results[0],
Embedding: resp.Results[0].Embedding,
Usage: base.EmbeddingTextModelUsage{
Tokens: len(input.Text) / 2, // IMPORTANT: this is a rough estimate, but the embedding API does not return token counts for now (2024-07-21)
},
Expand Down Expand Up @@ -346,7 +344,7 @@ type TaskGrammarCheckOutput struct {
Types []string `json:"types"`
}

func (e *execution) TaskGrammerCheck(in *structpb.Struct) (*structpb.Struct, error) {
func (e *execution) TaskGrammarCheck(in *structpb.Struct) (*structpb.Struct, error) {
input := TaskGrammarCheckInput{}
if err := base.ConvertFromStructpb(in, &input); err != nil {
return nil, err
Expand Down

0 comments on commit 16e3981

Please sign in to comment.