diff --git a/packages/vmind/__tests__/browser/src/constants/mockData.ts b/packages/vmind/__tests__/browser/src/constants/mockData.ts index f461afbc..749d8ca5 100644 --- a/packages/vmind/__tests__/browser/src/constants/mockData.ts +++ b/packages/vmind/__tests__/browser/src/constants/mockData.ts @@ -3625,15 +3625,14 @@ Xiaomi,0.1,4087,131345 * 雷达图 */ export const mockUserInput14 = { - csv: `key,value + csv: `dimension,performance Strength,5 Speed,5 Shooting,3 Endurance,5 Precision,5 -Growth,5 - `, - input: '帮我展示个人在不同方面的绩效,他是否是六边形战士' +Growth,5`, + input: '帮我展示这个人在不同方面的绩效,他是否是六边形战士' }; /** diff --git a/packages/vmind/bundler.config.js b/packages/vmind/bundler.config.js index 29936f5e..31e0da1b 100644 --- a/packages/vmind/bundler.config.js +++ b/packages/vmind/bundler.config.js @@ -4,11 +4,10 @@ const json = require('@rollup/plugin-json'); module.exports = { - formats: ['cjs', 'es', 'umd'], + formats: ['cjs', 'es'], outputDir: { es: 'esm', cjs: 'cjs', - umd: 'build' }, name: 'VMind', umdOutputFilename: 'index', diff --git a/packages/vmind/src/chart-to-video/index.ts b/packages/vmind/src/chart-to-video/index.ts index b46a18da..4d4457a2 100644 --- a/packages/vmind/src/chart-to-video/index.ts +++ b/packages/vmind/src/chart-to-video/index.ts @@ -7,7 +7,8 @@ export async function _chatToVideoWasm( propsSpec: any, propsTime: TimeType, outName = 'out', - outerPackages: OuterPackages + outerPackages: OuterPackages, + mode?: 'node' | 'desktop-browser' ) { const { ManualTicker, defaultTimeline, VChart, fetchFile, FFmpeg, createCanvas } = outerPackages; @@ -31,7 +32,7 @@ export async function _chatToVideoWasm( const canvas = createCanvas(width, height); const vchart = new VChart(spec, { renderCanvas: canvas, - mode: 'desktop-browser', + mode: 'node', dpr: 1, disableDirtyBounds: true, ticker: defaultTicker, @@ -74,28 +75,33 @@ export async function _chatToVideoWasm( vchart.getStage().render(); const num = `0000${i}`.slice(-3); - const size = { width: canvas.width, height: canvas.height }; - const blob = await new Promise((resolve, reject) => { - canvas.toBlob((blob: any) => { - if (blob) { - const info = { - data: blob, - format: 'PNG', - size - }; - console.log(`BBB--------${info}`); - resolve(info); - } else { - console.log('no blob'); - reject('no blob'); - } - }, `image/png`); - }); + if (mode === 'node') { + const buffer = (canvas as any).toBuffer(); + FFmpeg.FS('writeFile', `vchart${idx}.${num}.png`, buffer); + } else { + const size = { width: canvas.width, height: canvas.height }; + const blob = await new Promise((resolve, reject) => { + canvas.toBlob((blob: any) => { + if (blob) { + const info = { + data: blob, + format: 'PNG', + size + }; + console.log(`BBB--------${info}`); + resolve(info); + } else { + console.log('no blob'); + reject('no blob'); + } + }, `image/png`); + }); + FFmpeg.FS('writeFile', `vchart${idx}.${num}.png`, await fetchFile((blob as any).data)); + } // defaultTicker.mode = 'raf' // const imageData = ctx.getImageData(0, 0, ctx.canvas.width, ctx.canvas.height); // console.log(new Uint8Array(imageData.data.buffer)) - FFmpeg.FS('writeFile', `vchart${idx}.${num}.png`, await fetchFile((blob as any).data)); } vchart.release(); diff --git a/packages/vmind/src/common/dataProcess/index.ts b/packages/vmind/src/common/dataProcess/index.ts index 610f3f83..c6d8d75e 100644 --- a/packages/vmind/src/common/dataProcess/index.ts +++ b/packages/vmind/src/common/dataProcess/index.ts @@ -15,8 +15,6 @@ export const parseCSVWithVChart = (csvString: string) => { return dataView; }; -export const getDataView = (dataset: DataItem[]) => {}; - export const getDataset = (csvString: string): { dataset: DataItem[]; columns: string[] } => { //get dataset from csv string const dataView = parseCSVWithVChart(csvString); diff --git a/packages/vmind/src/common/dataProcess/utils.ts b/packages/vmind/src/common/dataProcess/utils.ts index 6cb9717b..0d50fe3d 100644 --- a/packages/vmind/src/common/dataProcess/utils.ts +++ b/packages/vmind/src/common/dataProcess/utils.ts @@ -1,6 +1,7 @@ import { sampleSize, isNumber, isInteger } from 'lodash'; import { DataItem, DataType, ROLE, SimpleFieldInfo } from '../../typings'; import dayjs from 'dayjs'; +import { uniqArray } from '@visactor/vutils'; export const readTopNLine = (csvFile: string, n: number) => { // get top n lines of a csv file let res = ''; @@ -29,6 +30,10 @@ function validateDate(date: any) { return dayjs(date, 'YYYY-MM-DD').isValid() || dayjs(date, 'MM-DD').isValid(); } +export function removeEmptyLines(str: string) { + return str.replace(/\n\s*\n/g, '\n'); +} + export const detectFieldType = (dataset: DataItem[], column: string): SimpleFieldInfo => { let fieldType: DataType | undefined = undefined; //detect field type based on rules @@ -36,6 +41,7 @@ export const detectFieldType = (dataset: DataItem[], column: string): SimpleFiel //date=>string //int=>float=>string //detect field type from strict to loose + dataset.every(data => { const value = data[column]; const numberValue = Number(value); @@ -88,10 +94,19 @@ export const detectFieldType = (dataset: DataItem[], column: string): SimpleFiel return true; } }); + const role = [DataType.STRING, DataType.DATE].includes(fieldType) ? ROLE.DIMENSION : ROLE.MEASURE; + + //calculate domain of the column + const domain: (string | number)[] = dataset.map(d => (role === ROLE.DIMENSION ? d[column] : Number(d[column]))); + return { fieldName: column, type: fieldType, - role: [DataType.STRING, DataType.DATE].includes(fieldType) ? ROLE.DIMENSION : ROLE.MEASURE + role, + domain: + role === ROLE.DIMENSION + ? (uniqArray(domain) as string[]).slice(0, 20) + : [Math.min(...(domain as number[])), Math.max(...(domain as number[]))] }; }; export const getFieldInfoFromDataset = (dataset: DataItem[], columns: string[]): SimpleFieldInfo[] => { diff --git a/packages/vmind/src/core/VMind.ts b/packages/vmind/src/core/VMind.ts index eca79848..cd1e94e0 100644 --- a/packages/vmind/src/core/VMind.ts +++ b/packages/vmind/src/core/VMind.ts @@ -96,18 +96,18 @@ class VMind { return { fieldInfo: [], dataset }; } - async exportVideo(spec: any, time: TimeType, outerPackages: OuterPackages) { + async exportVideo(spec: any, time: TimeType, outerPackages: OuterPackages, mode?: 'node' | 'desktop-browser') { const { VChart, FFmpeg, fetchFile, ManualTicker } = outerPackages; const outName = `out`; - await _chatToVideoWasm(this._FPS, spec, time, outName, outerPackages); + await _chatToVideoWasm(this._FPS, spec, time, outName, outerPackages, mode); const data = FFmpeg.FS('readFile', `${outName}.mp4`); return data.buffer; } - async exportGIF(spec: any, time: TimeType, outerPackages: OuterPackages) { + async exportGIF(spec: any, time: TimeType, outerPackages: OuterPackages, mode?: 'node' | 'desktop-browser') { const { VChart, FFmpeg, fetchFile } = outerPackages; const outName = `out`; - await _chatToVideoWasm(this._FPS, spec, time, outName, outerPackages); + await _chatToVideoWasm(this._FPS, spec, time, outName, outerPackages, mode); // 调色板 await FFmpeg.run('-i', `${outName}.mp4`, '-filter_complex', '[0:v] palettegen', 'palette.png'); await FFmpeg.run( diff --git a/packages/vmind/src/gpt/chart-generation/utils.ts b/packages/vmind/src/gpt/chart-generation/utils.ts index 17dd2f53..4448b17d 100644 --- a/packages/vmind/src/gpt/chart-generation/utils.ts +++ b/packages/vmind/src/gpt/chart-generation/utils.ts @@ -39,6 +39,16 @@ export const patchChartTypeAndCell = (chartTypeOutter: string, cell: any, datase const { x, y } = cell; let chartType = chartTypeOutter; + + // patch the "axis" field to x + if (cell.axis && (!cell.x || !cell.y)) { + if (!cell.x) { + cell.x = cell.axis; + } else if (!cell.y) { + cell.y = cell.axis; + } + } + // y轴字段有多个时,处理方式: // 1. 图表类型为: 箱型图, 图表类型不做矫正 // 2. 图表类型为: 柱状图 或 折线图, 图表类型矫正为双轴图 @@ -77,13 +87,20 @@ export const patchChartTypeAndCell = (chartTypeOutter: string, cell: any, datase const { lower_whisker, lowerWhisker, - lowerBox, min, lower, + lowerBox, + lower_box, q1, + lower_quartile, + lowerQuartile, + midline, median, q3, upperBox, + upper_box, + upper_quartile, + upperQuartile, upper_whisker, upperWhisker, max, @@ -98,15 +115,22 @@ export const patchChartTypeAndCell = (chartTypeOutter: string, cell: any, datase lowerWhisker, min, lower, - q1, lowerBox, + lower_box, + q1, + lower_quartile, + lowerQuartile, + midline, median, - upperBox, q3, + upperBox, + upper_box, + upper_quartile, + upperQuartile, upper_whisker, + upperWhisker, max, - upper, - upperWhisker + upper ].filter(Boolean) } }; diff --git a/packages/vmind/src/gpt/dataProcess/prompts.ts b/packages/vmind/src/gpt/dataProcess/prompts.ts index f0ea14a6..c95be605 100644 --- a/packages/vmind/src/gpt/dataProcess/prompts.ts +++ b/packages/vmind/src/gpt/dataProcess/prompts.ts @@ -195,78 +195,51 @@ Response: export const getQueryDatasetPrompt = ( showThoughts: boolean -) => `You are an expert in data analysis. Here is a raw dataset named dataSource. User will tell you his command and column information of DataSource. You need to generate a standard SQL query to select useful fields from dataSource according to the template following the Steps and Description. Return the JSON object only. -# Note -1. You are running on a simple SQL engine, so the advanced features, such as RANK() OVER, TOP, JOIN, UNION, etc., are not supported. Please follow the SQL template and Description strictly. -2. Don't guess the specific data content in your SQL. Don't use conditional statement. -3. If you think the fields in dataSource cannot meet user requirements, do not further generate new fields. Just ignore user's command and use these fields. +) => `You are an expert in data analysis. Here is a raw dataset named dataSource. User will tell you his command and column information of dataSource. Your task is to generate SimQuery and fieldInfo according to SimQuery Instruction. Response one JSON object only. +# SimQuery Instruction +- SimQuery is a simplified SQL-like language. Supported keywords in SimQuery: ["SELECT", "FROM", "WHERE", "GROUP BY", "HAVING", "ORDER BY", "LIMIT"]. +- A SimQuery query looks like this: "SELECT columnA, SUM(columnB) as sum_b FROM dataSource WHERE columnA = value1 GROUP BY columnA HAVING sum_b>0 ORDER BY sum_b LIMIT 10". +- Columns in SELECT can only be original columns or aggregated columns. Supported aggregation methods in SimQuery: ["MAX()", "MIN()", "SUM()", "COUNT()", "AVG()"]. +- The "WHERE" and "HAVING" in SimQuery can only use original columns or aggregated columns in dataSource. Supported Operators in SimQuery:[ ">", ">=", "<", "<=", "=", "!=", "in", "not in", "is null", "is not null", "between", "not between", "like", "not like"]. Don't use non-existent columns. +- Don't use unsupported keywords such as CASE WHEN...ELSE...END or PERCENTILE_CONT. Don't use unsupported aggregation methods on columns. Don't use unsupported operators. Unsupported keywords, methods and operators will cause system crash. If current keywords and methods can't meet your needs, just simple select the column without any process. +- Make your SimQuery as simple as possible. -# SQL template: -SELECT xxx FROM xxx (WHERE xxx) GROUP BY xxx (HAVING xxx) (ORDER BY xxx) (LIMIT xxx). - +You need to follow the steps below. # Steps -1. Just use user's command to select useful fields directly. Ignore other parts of user's command. -2. Select useful dimension fields from dataSource. Use the original dimension field without any process. -3. Aggregate the measure fields. Supported aggregation function: MAX(), MIN(), SUM(), COUNT(), AVG(). Note: don't aggregate measures using functions that are not supported such as PERCENTILE_CONT(). Don't use conditional statement. -4. Group the data using dimension fields and fill it in GROUP BY. -5. You can also use WHERE, HAVING, ORDER BY, LIMIT in your SQL if necessary. - - -# Description -1. The part in brackets is optional. xxx in the SQL template can only be original columns or aggregated columns. Select Data only from one table. Don't use unsupported features such as RANK(), TOP, UNION, etc. -2. Make your SQL as simple as possible. Strictly follow the SQL template to generate SQL. Don't use JOIN, UNION, subquery or other feature that is not in the SQL template. Don't process fields in ways other than supported aggregation functions. -3. Please don't change or translate the field names in your SQL statement. -4. Don't ignore GROUP BY in your SQL. +1. Extract the part related to the data from the user's instruction. Ignore other parts that is not related to the data. +2. Select useful dimension and measure columns from dataSource. You can only use columns in Column Information and do not assume non-existent columns. If the existing columns can't meet user's command, just select the most related columns in Column Information. +3. Use the original dimension columns without any process. Aggregate the measure columns using aggregation methods supported in SimQuery. Don't use unsupported methods. If current keywords and methods can't meet your needs, just simple select the column without any process. +4. Group the data using dimension columns. +5. You can also use WHERE, HAVING, ORDER BY, LIMIT in your SimQuery if necessary. Use the supported operators to finish the WHERE and HAVING of SimQuery. You can only use binary expression such as columnA = value1, sum_b > 0. You can only use dimension values appearing in the domain of dimension columns in your expression. +Let's think step by step. -Response in JSON format without any additional words. Your JSON object must contain sql and fieldInfo. +Response one JSON object without any additional words. Your JSON object must contain SimQuery and fieldInfo. -Make your SQL as simple as possible. - -Response in the following JSON format: +Response in the following format: \`\`\` { -sql: string; //your sql statement. Note that it's a string in a JSON object so it must be in one line without any \\n. -fieldInfo: { - fieldName: string; //name of the field. - description?: string; //description of the field. If it is an aggregated field, please describe how it is generated in detail. -}[]; //array of the information about the fields in your sql. Describing its aggregation method and other information of the fields. + ${showThoughts ? 'THOUGHTS: string //your thoughts' : ''} + SimQuery: string; //your SimQuery query. Note that it's a string in a JSON object so it must be in one line without any \\n. + fieldInfo: { + fieldName: string; //name of the field. + description?: string; //description of the field. If it is an aggregated field, please describe how it is generated in detail. + }[]; //array of the information about the fields in your SimQuery. Describing its aggregation method and other information of the fields. } \`\`\` #Examples: -User's Command: 帮我展示个人在不同方面的绩效,他是否是六边形战士 -Column Information: [{"fieldName":"key","type":"string","role":"dimension"},{"fieldName":"value","type":"int","role":"measure"}] - -Response: -\`\`\` - -{ - "sql": "SELECT key, SUM(value) AS performance FROM dataSource GROUP BY key", - "fieldInfo": [ - { - "fieldName": "key", - "description": "The identifier of the person." - }, - { - "fieldName": "performance", - "description": "An aggregated field representing the performance of the person in different aspects. It is generated by aggregating the 'value' field." - } - ] -} -\`\`\` ----------------------------------- - User's Command: Show me the change of the GDP rankings of each country. Column Information: [{"fieldName":"country","type":"string","role":"dimension"},{"fieldName":"continent","type":"string","role":"dimension"},{"fieldName":"GDP","type":"float","role":"measure"},{"fieldName":"year","type":"int","role":"measure"}] Response: \`\`\` { - "sql": "SELECT country, year, SUM(GDP) AS total_GDP FROM dataSource GROUP BY country, year ORDER BY year, total_GDP DESC", + ${showThoughts ? '"THOUGHTS": string //your thoughts' : ''} + "SimQuery": "SELECT country, year, SUM(GDP) AS total_GDP FROM dataSource GROUP BY country, year ORDER BY year, total_GDP DESC", "fieldInfo": [ { "fieldName": "country", @@ -291,7 +264,8 @@ Column Information: [{"fieldName":"城市","type":"string","role":"dimension"},{ Response: \`\`\` { - "sql": "SELECT 城市, SUM(\`2022年GDP(亿元)\`) as sum_2022_GDP FROM dataSource ORDER BY sum_2022_GDP DESC LIMIT 5", + ${showThoughts ? '"THOUGHTS": string //your thoughts' : ''} + "SimQuery": "SELECT 城市, SUM(\`2022年GDP(亿元)\`) as sum_2022_GDP FROM dataSource ORDER BY sum_2022_GDP DESC LIMIT 5", "fieldInfo": [ { "fieldName": "城市", @@ -312,7 +286,8 @@ Column Information: [{"fieldName":"时间","type":"string","role":"dimension"},{ Response: \`\`\` { - "sql": "SELECT \`时间\`, SUM(\`男_DASH_早餐\`) AS breakfast_amount_man, SUM(\`女_DASH_早餐\`) AS breakfast_amount_woman FROM dataSource GROUP BY \`时间\`", + ${showThoughts ? '"THOUGHTS": string //your thoughts' : ''} + "SimQuery": "SELECT \`时间\`, SUM(\`男_DASH_早餐\`) AS breakfast_amount_man, SUM(\`女_DASH_早餐\`) AS breakfast_amount_woman FROM dataSource GROUP BY \`时间\`", "fieldInfo": [ { "fieldName": "gender", @@ -332,6 +307,7 @@ You only need to return the JSON in your response directly to the user. Finish your tasks in one-step. # Constraints: -1. Write your SQL statement in one line without any \\n. -2. Response the JSON object directly without any other contents. Make sure it can be directly parsed by JSON.parse() in JavaScript. +1. Write your SimQuery statement in one line without any \\n. +2. Please don't change or translate the field names in your SimQuery statement. Don't miss the GROUP BY in your query. +3. Response the JSON object directly without any other contents. Make sure it can be directly parsed by JSON.parse() in JavaScript. `; diff --git a/packages/vmind/src/gpt/dataProcess/query/astPipes.ts b/packages/vmind/src/gpt/dataProcess/query/astPipes.ts index 5ce984ec..d6011311 100644 --- a/packages/vmind/src/gpt/dataProcess/query/astPipes.ts +++ b/packages/vmind/src/gpt/dataProcess/query/astPipes.ts @@ -156,21 +156,23 @@ export const select: ASTParserPipe = (query: Partial, context: ASTParserC return { ...query, select: { - columns: (columns ?? []).map(column => { - const result: any = {}; - const { as, expr } = column; - if (checkIsColumnNode(expr, columnAlias, fieldInfo)) { - result.column = getOriginalString(expr.column ?? expr.value, replaceMap); - } else if (expr.type === 'aggr_func') { - const aggrFuncConf: any = parseAggrFunc(expr, columnAlias, fieldInfo, replaceMap); - result.column = aggrFuncConf.column; - result.aggregate = aggrFuncConf.aggregate; - } - if (as) { - result.alias = getOriginalString(as, replaceMap); - } - return result; - }), + columns: (columns ?? []) + .map(column => { + const result: any = {}; + const { as, expr } = column; + if (checkIsColumnNode(expr, columnAlias, fieldInfo)) { + result.column = getOriginalString(expr.column ?? expr.value, replaceMap); + } else if (expr.type === 'aggr_func') { + const aggrFuncConf: any = parseAggrFunc(expr, columnAlias, fieldInfo, replaceMap); + result.column = aggrFuncConf.column; + result.aggregate = aggrFuncConf.aggregate; + } + if (as) { + result.alias = getOriginalString(as, replaceMap); + } + return result; + }) + .filter(c => c.column), distinct: Boolean(distinct) } }; @@ -208,6 +210,19 @@ export const orderBy: any = (query: Partial, context: ASTParserContext) = result.column = orderConfig.column; result.aggregate = orderConfig.aggregate; } + //query in calculator package does not support alias reference in other parts outside select. + //check if the order by column is a derived column using aggregation methods in select + //if so, replace the column with the original name and aggregation method. + if (!result.aggregate && !fieldInfo.find(field => field.fieldName === result.column)) { + //result.column is a derived field. replace with the original field + const originalColumn: any = query.select.columns.find( + column => column.alias === result.column || (column as any).column === result.column + ); + if (originalColumn) { + result.column = originalColumn.column ?? originalColumn.alias; + result.aggregate = originalColumn.aggregate; + } + } return { type: type ? toFirstUpperCase(type) : OrderType.Asc, ...result diff --git a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts index 8079212f..07f2c466 100644 --- a/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts +++ b/packages/vmind/src/gpt/dataProcess/query/queryDataset.ts @@ -1,11 +1,18 @@ import { DataItem, ILLMOptions, SimpleFieldInfo } from '../../../typings'; import NodeSQLParser from 'node-sql-parser'; -import { mergeMap, parseRespondField, patchQueryInput, preprocessSQL, replaceOperator } from './utils'; +import { + mergeMap, + parseGPTQueryResponse, + parseRespondField, + patchQueryInput, + preprocessSQL, + replaceOperator +} from './utils'; import { parseSqlAST } from './parseSqlAST'; import { isArray } from 'lodash'; import { DataQueryResponse, SQLAst } from './type'; import { Query, query } from '@visactor/calculator'; -import { parseGPTResponse, requestGPT } from '../../utils'; +import { parseGPTResponse as parseGPTResponseAsJSON, requestGPT } from '../../utils'; import { getQueryDatasetPrompt } from '../prompts'; /** @@ -23,8 +30,8 @@ export const queryDatasetWithGPT = async ( ) => { const { validFieldInfo, replaceMap: operatorReplaceMap } = replaceOperator(fieldInfo); const patchedInput = patchQueryInput(userInput); - const { sql, fieldInfo: responseFieldInfo } = await getQuerySQL(patchedInput, validFieldInfo, options); - const { validStr, replaceMap: preprocessReplaceMap } = preprocessSQL(sql, fieldInfo); + const { SimQuery, fieldInfo: responseFieldInfo } = await getQuerySQL(patchedInput, validFieldInfo, options); + const { validStr, replaceMap: preprocessReplaceMap } = preprocessSQL(SimQuery, fieldInfo); const replaceMap = mergeMap(preprocessReplaceMap, operatorReplaceMap); const parser = new NodeSQLParser.Parser(); @@ -34,6 +41,9 @@ export const queryDatasetWithGPT = async ( const dataset = query(queryObject as Query); const fieldInfoNew = parseRespondField(responseFieldInfo, dataset, replaceMap); + if (dataset.length === 0) { + console.warn('empty dataset after query!'); + } return { dataset: dataset.length === 0 ? sourceDataset : dataset, fieldInfo: dataset.length === 0 ? fieldInfo : fieldInfoNew @@ -49,8 +59,15 @@ const getQuerySQL = async (userInput: string, fieldInfo: SimpleFieldInfo[], opti const queryDatasetMessage = `User's Command: ${userInput}\nColumn Information: ${JSON.stringify(fieldInfo)}`; const requestFunc = options.customRequestFunc?.dataQuery ?? requestGPT; - const QueryDatasetPrompt = getQueryDatasetPrompt(options.showThoughts); + const QueryDatasetPrompt = getQueryDatasetPrompt(options.showThoughts ?? true); const dataProcessRes = await requestFunc(QueryDatasetPrompt, queryDatasetMessage, options); - const dataQueryResponse: DataQueryResponse = parseGPTResponse(dataProcessRes); + const dataQueryResponse: DataQueryResponse = parseGPTResponseAsJSON(dataProcessRes); + const { SimQuery, fieldInfo: responseFiledInfo } = dataQueryResponse; + if (!SimQuery || !responseFiledInfo) { + //try to parse the response with another format + const choices = dataProcessRes.choices; + const content = choices[0].message.content; + return parseGPTQueryResponse(content); + } return dataQueryResponse; }; diff --git a/packages/vmind/src/gpt/dataProcess/query/type.ts b/packages/vmind/src/gpt/dataProcess/query/type.ts index 0c740097..486ecb2d 100644 --- a/packages/vmind/src/gpt/dataProcess/query/type.ts +++ b/packages/vmind/src/gpt/dataProcess/query/type.ts @@ -14,6 +14,6 @@ export type ASTParserContext = { export type DataQueryResponse = { THOUGHT?: string; - sql: string; + SimQuery: string; fieldInfo: { fieldName: string; description?: string }[]; }; diff --git a/packages/vmind/src/gpt/dataProcess/query/utils.ts b/packages/vmind/src/gpt/dataProcess/query/utils.ts index 2c5a7f5e..3e928e4a 100644 --- a/packages/vmind/src/gpt/dataProcess/query/utils.ts +++ b/packages/vmind/src/gpt/dataProcess/query/utils.ts @@ -1,4 +1,6 @@ -import { isString } from 'lodash'; +import { isArray, isString } from 'lodash'; +import JSON5 from 'json5'; + import { Query } from '@visactor/calculator'; import { detectFieldType } from '../../../common/dataProcess/utils'; import { DataItem, SimpleFieldInfo } from '../../../typings'; @@ -226,6 +228,27 @@ export const mergeMap = (map1: Map, map2: Map) = export const patchQueryInput = (userInput: string) => { return ( userInput + - " Don't use JOIN, UNION or subquery. Don't use Rank(), PERCENTILE_CONT, CASE WHEN. Don't use conditional statement." + " Don't use unsupported keywords and methods in the SELECT of SimQuery. Don't use non-existent columns and dimension values in the WHERE of SimQuery." ); }; + +export const parseGPTQueryResponse = (response: string) => { + const SimQuery = response.match(/SimQuery:\n?```(.*?)```/s)[1]; + const fieldInfoStr = response.match(/fieldInfo:\n?```(.*?)```/s)[1]; + let fieldInfo = []; + try { + const tempFieldInfo = JSON5.parse(fieldInfoStr); + if (isArray(tempFieldInfo)) { + fieldInfo = tempFieldInfo; + } else { + fieldInfo = tempFieldInfo.fieldInfo; + } + } catch (e) { + //fieldInfoStr is not a json string; try to wrap it with [] + fieldInfo = JSON5.parse(`[${fieldInfoStr}]`); + } + return { + SimQuery, + fieldInfo + }; +}; diff --git a/packages/vmind/src/typings/index.ts b/packages/vmind/src/typings/index.ts index c40bfbd4..6e94e2ac 100644 --- a/packages/vmind/src/typings/index.ts +++ b/packages/vmind/src/typings/index.ts @@ -11,9 +11,9 @@ export interface ILLMOptions { temperature?: number; showThoughts?: boolean; customRequestFunc?: { - chartAdvisor: RequestFunc; - dataProcess: RequestFunc; - dataQuery: RequestFunc; + chartAdvisor?: RequestFunc; + dataProcess?: RequestFunc; + dataQuery?: RequestFunc; }; [key: string]: any; } @@ -25,6 +25,7 @@ export type SimpleFieldInfo = { description?: string; //additional description of the field. This will help the model have a more comprehensive understanding of this field, improving the quality of chart generation. type: DataType; role: ROLE; + domain?: (string | number)[]; }; export type GPTDataProcessResult = { fieldInfo: SimpleFieldInfo[];