From 528c272dc60f9ef320971eccdd0cf1e0e225b45e Mon Sep 17 00:00:00 2001 From: wraps Date: Thu, 23 Nov 2023 20:18:35 +0100 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8feat:=20Add=20maxLines=20and=20max?= =?UTF-8?q?Tokens=20settings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 8 ++++++++ src/prompts/autocomplete.ts | 7 ++++--- src/prompts/provider.ts | 4 ++++ src/test/suite/extension.test.ts | 6 +++++- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/package.json b/package.json index e256865..78491c8 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,14 @@ "default": "http://127.0.0.1:11434/", "description": "Ollama Server Endpoint" }, + "inference.maxLines": { + "type": "number", + "default": 16 + }, + "inference.maxTokens": { + "type": "number", + "default": 256 + }, "inference.model": { "type": "string", "enum": [ diff --git a/src/prompts/autocomplete.ts b/src/prompts/autocomplete.ts index 51dd30c..204c81e 100644 --- a/src/prompts/autocomplete.ts +++ b/src/prompts/autocomplete.ts @@ -8,6 +8,8 @@ export async function autocomplete(args: { model: string, prefix: string, suffix: string, + maxLines: number, + maxTokens: number, canceled?: () => boolean, }): Promise { @@ -17,7 +19,7 @@ export async function autocomplete(args: { prompt: adaptPrompt({ prefix: args.prefix, suffix: args.suffix, model: args.model }), raw: true, options: { - num_predict: 256 + num_predict: args.maxTokens } }; @@ -75,9 +77,8 @@ export async function autocomplete(args: { // Update total lines totalLines += countSymbol(tokens.response, '\n'); - // Break if too many lines and on top level - if (totalLines > 16 && blockStack.length === 0) { + if (totalLines > args.maxLines && blockStack.length === 0) { info('Too many lines, breaking.'); break; } diff --git a/src/prompts/provider.ts b/src/prompts/provider.ts index b33845c..5576bf2 100644 --- a/src/prompts/provider.ts +++ b/src/prompts/provider.ts @@ -65,6 +65,8 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider { let config = vscode.workspace.getConfiguration('inference'); let endpoint = config.get('endpoint') as string; let model = config.get('model') as string; + let maxLines = config.get('maxLines') as number; + let maxTokens = config.get('maxTokens') as number; if (endpoint.endsWith('/')) { endpoint = endpoint.slice(0, endpoint.length - 1); } @@ -98,6 +100,8 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider { suffix: prepared.suffix, endpoint: endpoint, model: model, + maxLines: maxLines, + maxTokens: maxTokens, canceled: () => token.isCancellationRequested, }); info(`AI completion completed: ${res}`); diff --git a/src/test/suite/extension.test.ts b/src/test/suite/extension.test.ts index 8d08c72..ead3148 100644 --- a/src/test/suite/extension.test.ts +++ b/src/test/suite/extension.test.ts @@ -17,12 +17,16 @@ suite('Extension Test Suite', () => { test('should perform autocomplete', async () => { let endpoint = 'http://127.0.0.1:11434'; let model = 'codellama:7b-code-q4_K_S'; // Lightweight llm for tests + let maxLines = 16; + let maxTokens = 256; let prompt = 'fun main(): '; let result = await autocomplete({ endpoint, model, prefix: prompt, - suffix: '' + suffix: '', + maxLines, + maxTokens }); console.warn(result); }); From 049b552af9d0f212c3d0d92976158281521dba2f Mon Sep 17 00:00:00 2001 From: wraps Date: Thu, 23 Nov 2023 20:55:36 +0100 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=93=9D=20docs(package.json):=20Add=20?= =?UTF-8?q?descriptions=20for=20maxLines=20and=20maxTokens?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 78491c8..091cd54 100644 --- a/package.json +++ b/package.json @@ -43,11 +43,13 @@ }, "inference.maxLines": { "type": "number", - "default": 16 + "default": 16, + "description": "Max number of lines to be keep." }, "inference.maxTokens": { "type": "number", - "default": 256 + "default": 256, + "description": "Max number of new tokens to be generated." }, "inference.model": { "type": "string", From f7afdc865d2759730a39e5fafaae3074c38f702c Mon Sep 17 00:00:00 2001 From: wraps Date: Thu, 23 Nov 2023 21:59:50 +0100 Subject: [PATCH 3/3] =?UTF-8?q?=E2=9C=A8=20feat:=20Implement=20'temperatur?= =?UTF-8?q?e'=20setting=20for=20model=20creativity=20control?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- package.json | 5 +++++ src/prompts/autocomplete.ts | 4 +++- src/prompts/provider.ts | 2 ++ src/test/suite/extension.test.ts | 4 +++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 091cd54..e7a03d5 100644 --- a/package.json +++ b/package.json @@ -51,6 +51,11 @@ "default": 256, "description": "Max number of new tokens to be generated." }, + "inference.temperature": { + "type": "number", + "default": 0.2, + "description": "Temperature of the model. Increasing the temperature will make the model answer more creatively." + }, "inference.model": { "type": "string", "enum": [ diff --git a/src/prompts/autocomplete.ts b/src/prompts/autocomplete.ts index 204c81e..2379218 100644 --- a/src/prompts/autocomplete.ts +++ b/src/prompts/autocomplete.ts @@ -10,6 +10,7 @@ export async function autocomplete(args: { suffix: string, maxLines: number, maxTokens: number, + temperature: number, canceled?: () => boolean, }): Promise { @@ -19,7 +20,8 @@ export async function autocomplete(args: { prompt: adaptPrompt({ prefix: args.prefix, suffix: args.suffix, model: args.model }), raw: true, options: { - num_predict: args.maxTokens + num_predict: args.maxTokens, + temperature: args.temperature } }; diff --git a/src/prompts/provider.ts b/src/prompts/provider.ts index 5576bf2..b38b440 100644 --- a/src/prompts/provider.ts +++ b/src/prompts/provider.ts @@ -67,6 +67,7 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider { let model = config.get('model') as string; let maxLines = config.get('maxLines') as number; let maxTokens = config.get('maxTokens') as number; + let temperature = config.get('temperature') as number; if (endpoint.endsWith('/')) { endpoint = endpoint.slice(0, endpoint.length - 1); } @@ -102,6 +103,7 @@ export class PromptProvider implements vscode.InlineCompletionItemProvider { model: model, maxLines: maxLines, maxTokens: maxTokens, + temperature, canceled: () => token.isCancellationRequested, }); info(`AI completion completed: ${res}`); diff --git a/src/test/suite/extension.test.ts b/src/test/suite/extension.test.ts index ead3148..dc380bd 100644 --- a/src/test/suite/extension.test.ts +++ b/src/test/suite/extension.test.ts @@ -19,6 +19,7 @@ suite('Extension Test Suite', () => { let model = 'codellama:7b-code-q4_K_S'; // Lightweight llm for tests let maxLines = 16; let maxTokens = 256; + let temperature = 0.2; let prompt = 'fun main(): '; let result = await autocomplete({ endpoint, @@ -26,7 +27,8 @@ suite('Extension Test Suite', () => { prefix: prompt, suffix: '', maxLines, - maxTokens + maxTokens, + temperature }); console.warn(result); });