From 0d3a6902bd6de9575e3f7f9445d1cce61ea1b326 Mon Sep 17 00:00:00 2001 From: N3N Date: Sun, 5 May 2024 19:26:01 -0700 Subject: [PATCH] add category and move difficulty --- .github/workflows/pylint.yml | 26 ++++---- README.md | 71 +++++++++++++++++----- prompt.pml | 112 ++++++++++++++++++----------------- src/promptml/grammar.lark | 6 +- src/promptml/parser.py | 47 +++++++++------ tests/test_parser.py | 13 ++-- 6 files changed, 169 insertions(+), 106 deletions(-) diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 4e16568..974260b 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -7,16 +7,16 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - pip install -r requirements.txt - - name: Analysing the code with pylint - run: | - pylint $(git ls-files 'src/*.py') + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + pip install -r requirements.txt + - name: Analysing the code with pylint + run: | + pylint $(git ls-files 'src/*.py') --fail-under=9 diff --git a/README.md b/README.md index 22154a9..34bd06e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # PromptML (Prompt Markup Language) + ![](./promptml.jpeg) A simple, yet elegant markup language for defining AI Prompts as Code (APaC). Built to be used by AI agents to automatically prompt for other AI systems. @@ -8,28 +9,34 @@ The architecture is shown as below. A `PromptML` prompt can be version controlle ![prompt-ml architecture](./prompt-github.png) ## Why PromptML ? + PromptML is built to provide a way for prompt engineers to define the AI prompts in a deterministic way. This is a Domain Specific Language (DSL) which defines characteristics of a prompt including context, objective, instructions and it's metadata. A regular prompt is an amalgamation of all these aspects into one entity. PromptML splits it into multiple sections and makes the information explicit. The language grammar can be found here: [grammar.lark](./src/promptml/grammar.lark) - ## How PromptML looks ? + The language is simple. You start blocks with `@` section annotation. A section ends with `@end` marker. Comments are started with `#` key. The prompt files ends with `.pml` extension. ```pml @prompt + # Add task context @context - # Add prompt context @end + + # Add task objective @objective # This is the final question or ask @end + + # Add one or more instructions to execute the prompt @instructions @step - # Add one or more instructions to execute the prompt @end @end + + # Add one or more examples @examples @example @input @@ -40,22 +47,31 @@ The language is simple. You start blocks with `@` section annotation. A section @end @end @end + + # Add task constraints @constraints - # Add prompt constraints + @length min: 1 max: 10 @end + @end + + # Add prompt category + @category @end + + # Add custom metadata @metadata - # Add prompt metadata here @end @end ``` -See [prompt.pml](./prompt.pml) to see an example. +See [prompt.pml](./prompt.pml) to see for complete syntax. ## Design + Regular text prompts are very abstract in nature. Natural languages are very flexible but provides least reliability. How to provide context for an AI system and ask something ? Shouldn't we specify that explicitly. PromptML is an attempt to make contents of a prompt explicit with a simple language. ## Core tenets of PromptML + Below are the qualities PromptML brings to prompt engineering domain: 1. Standardization instead of fragmentation @@ -63,8 +79,8 @@ Below are the qualities PromptML brings to prompt engineering domain: 3. Enabling version control-ability 4. Promoting verbosity for better results - ## Why not use XML, YAML, or JSON for PromptML ? + First, XML, JSON, and YAML are not DSL languages. They are data formats that can represent any form of data. Second, generative AI needs a strict, yet flexible data language with fixed constraints which evolve along with the domain. PromptML is built exactly to solve those two issues. @@ -74,10 +90,13 @@ Language grammar is influenced by XML & Ruby, so if you know any one of them, yo ## Usage 1. Install Python requirements + ```bash pip install -r requirements.txt ``` + 2. import the parser and parse a promptML file + ```py from promptml.parser import PromptParser @@ -108,17 +127,18 @@ promptml_code = ''' @end @end + @category + Prompt Management + @end + @constraints - @length min: 1 max: 10 + @length min: 1 max: 10 @end @end @metadata - @domain - Web Development - @end - @difficulty - Advaned - @end + top_p: 0.9 + n: 1 + team: promptml @end @end ''' @@ -130,17 +150,38 @@ print(prompt) # Output: { # 'context': 'This is the context section.', # 'objective': 'This is the objective section.', +# 'category': 'Prompt Management', # 'instructions': ['Step 1'], # 'examples': [ # {'input': 'Input example 1', 'output': 'Output example 1'} # ], # 'constraints': {'length': {'min': 1, 'max': 10}}, -# 'metadata': {'domain': 'Web Development', 'difficulty': 'Advanced'} +# 'metadata': {'top_p': 0.9, 'n': 1, 'team': 'promptml'} # } +``` + +## Defining variables + +You can define variables in the promptML file and use them in the prompt `context` and `objective`. The variables are defined in the `@vars` section and referenced using `$var` syntax in either `context` or `objective` sections. + +```pml +@vars + name = "John Doe" +@end + +@prompt + @context + You are a name changing expert. + @end + @objective + You have to change the name: $name to an ancient name. + @end +@end ``` ## TODO + We are currently working on: 1. Supporting more annotations (Ex: temperature, top_p) diff --git a/prompt.pml b/prompt.pml index 8631961..770dc28 100644 --- a/prompt.pml +++ b/prompt.pml @@ -1,81 +1,87 @@ +# Define prompt variables +@vars + role = 'highly skilled and experienced software developer' +@end + # Define prompt @prompt # Context is used to provide background information or context for the task @context - You are a $role with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform. + You are a $role with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform. @end # Objective is used to define the main goal or objective of the task @objective - Design and implement the core architecture and components for a scalable and efficient web application that can handle a large number of concurrent users while providing a seamless and responsive user experience. + Design and implement the core architecture and components for a scalable and efficient web application that can handle a large number of concurrent users while providing a seamless and responsive user experience. @end # Instructions are used to provide detailed steps or guidelines for completing the task @instructions - # steps can be used to break down the task into smaller parts - @step - Identify the key features and requirements of the web application based on the provided context. - @end - @step - Propose a suitable architecture (e.g., monolithic, microservices, etc.) and justify your choice. - @end - @step - Outline the essential components or modules of the application, such as user authentication, data storage, real-time communication, and so on. - @end - @step - Discuss the potential technologies, frameworks, and tools you would use to implement each component, highlighting their strengths and trade-offs. - @end - @step - Address scalability and performance concerns, including techniques for load balancing, caching, and database optimization. - @end - @step - Describe how you would ensure the security and privacy of user data, including authentication, authorization, and data encryption. - @end + # steps can be used to break down the task into smaller parts + @step + Identify the key features and requirements of the web application based on the provided context. + @end + @step + Propose a suitable architecture (e.g., monolithic, microservices, etc.) and justify your choice. + @end + @step + Outline the essential components or modules of the application, such as user authentication, data storage, real-time communication, and so on. + @end + @step + Discuss the potential technologies, frameworks, and tools you would use to implement each component, highlighting their strengths and trade-offs. + @end + @step + Address scalability and performance concerns, including techniques for load balancing, caching, and database optimization. + @end + @step + Describe how you would ensure the security and privacy of user data, including authentication, authorization, and data encryption. + @end @end # Examples are used to provide sample inputs and outputs for the task @examples - @example - @input - Design the core architecture and components for a large-scale e-commerce web application. - @end - @output - For a large-scale e-commerce web application, a microservices architecture would be suitable due to its inherent scalability and flexibility... + @example + @input + Design the core architecture and components for a large-scale e-commerce web application. + @end + @output + For a large-scale e-commerce web application, a microservices architecture would be suitable due to its inherent scalability and flexibility... + @end @end - @end - @example - @input - Outline main components for a large-scale e-commerce web application. + @example + @input + Outline main components for a large-scale e-commerce web application. + @end + @output + Product Catalog, User Management, Order Processing, Payment Gateway, Search Engine, Recommendation Engine are the main components of a large-scale e-commerce web application... + @end @end - @output - Product Catalog, User Management, Order Processing, Payment Gateway, Search Engine, Recommendation Engine are the main components of a large-scale e-commerce web application... - @end - @end @end # Constraints are used to specify any limitations or restrictions for the task @constraints - @length - min: 1000 - max: 3000 - @end - @tone - Professional and technical - @end + @length + min: 1000 + max: 3000 + @end + @tone + Professional and technical + @end + @difficulty + Advanced + @end + @end + + # categories are used to classify the task into different categories + @category + Software Engineering @end # Metadata includes information such as domain, difficulty, custom props, etc. @metadata - domain: 'Software Engineering' - difficulty: 'Advanced' - top_p: 0.6 - temperature: 0.5 - n: 1 - method: 'greedy' + top_p: 0.6 + temperature: 0.5 + n: 1 + internal: 'true' @end @end - -# Define prompt variables -@vars - role = 'highly skilled and experienced software developer' -@end diff --git a/src/promptml/grammar.lark b/src/promptml/grammar.lark index 69d640b..b222340 100644 --- a/src/promptml/grammar.lark +++ b/src/promptml/grammar.lark @@ -14,6 +14,7 @@ section: context | instructions | examples | constraints + | category | metadata context: "@context" text "@end" @@ -28,10 +29,13 @@ input: "@input" text "@end" output: "@output" text "@end" constraints: "@constraints" constraint* "@end" -constraint: length | tone +constraint: length | tone | difficulty +difficulty: "@difficulty" text "@end" length: "@length" "min:" INT "max:" INT "@end" tone: "@tone" text "@end" +category: "@category" text "@end" + metadata: "@metadata" prop* "@end" prop: PROP_NAME ":" (NUMBER | STRING ) PROP_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/ diff --git a/src/promptml/parser.py b/src/promptml/parser.py index e7f29a3..0ba1d29 100644 --- a/src/promptml/parser.py +++ b/src/promptml/parser.py @@ -8,7 +8,7 @@ Example usage: dsl_code = ''' - code... + ... ''' parser = PromptParser(dsl_code) @@ -23,30 +23,41 @@ class PromptMLTransformer(Transformer): """ - A class for transforming the parsed PromptML code into a structured format. + A class for transforming the parsed PromptML tree into a Python dictionary. """ def start(self, items): """ Extract the start section content.""" + prompt = {} + vars_ = {} + for item in items: + if item["type"] == "vars": + vars_ = item["data"] + elif item["type"] == "prompt": + prompt = item["data"] - # Variables are in child 1, replace context with variables $x to x -> value using regex - prompt = items[0] - context = prompt["context"] + # context seems to be a keyword in Python, so we'll use context_ instead + context_ = prompt["context"] objective = prompt["objective"] - vars_ = items[1] + # Replace variables in context and objective with values for k,v in vars_.items(): - context = re.sub(r'\$' + k, v, context) - objective = re.sub(r'\$' + k, v, objective) + context_ = context_.replace(r'$' + k, v.replace("'", '').replace('"', '')) + objective = objective.replace(r'$' + k, v.replace("'", '').replace('"', '')) - prompt["context"] = context + prompt["context"] = context_ prompt["objective"] = objective + return prompt def block(self, items): """ Extract the block content.""" return items[0] + def category(self, items): + """ Extract the category content.""" + return {"category": items[0].strip()} + def prompt(self, items): """ Extract the prompt content.""" sections = {} @@ -57,7 +68,7 @@ def prompt(self, items): else: sections.update(child) - return sections + return {"type": "prompt", "data": sections} def context(self, items): """ Extract the context section content.""" @@ -105,6 +116,10 @@ def tone(self, items): """ Extract the tone constraint content.""" return {"tone": items[0].strip()} + def difficulty(self, items): + """ Extract the difficulty constraint content.""" + return {"difficulty": items[0].strip()} + def var_block(self, items): """ Extract the variable block content.""" var_map = {} @@ -114,7 +129,7 @@ def var_block(self, items): var_value = item.children[1].strip() var_map[var_symbol] = var_value - return var_map + return {"type": "vars", "data": var_map} def metadata(self, items): """ @@ -146,18 +161,11 @@ def metadata(self, items): return {"metadata": metadata} - def domain(self, items): - """ Extract the domain metadata content.""" - return {"domain": items[0]} - - def difficulty(self, items): - """ Extract the difficulty metadata content.""" - return {"difficulty": items[0]} - def text(self, items): """ Extract the text content.""" return items[0] + class PromptParser: """A class for parsing prompt markup language code and extract information. """ @@ -203,6 +211,7 @@ def deserialize_json(self, serialized_data): """ self.prompt = json.loads(serialized_data) + class PromptParserFromFile(PromptParser): """ A subclass of PromptParser that reads DSL code from a file. diff --git a/tests/test_parser.py b/tests/test_parser.py index 72dcc88..a913f77 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,11 +1,11 @@ -""" Test cases for prompt parser +""" Test cases for prompt parser """ from src.promptml.parser import PromptParserFromFile from unittest import TestCase class TestPromptParser(TestCase): - # read prompt from prompt.aiml file + # read prompt from prompt.pml file def setUp(self): self.prompt_parser = PromptParserFromFile('prompt.pml') @@ -57,14 +57,17 @@ def test_parse(self): "min": 1000, "max": 3000 }, - "tone": "Professional and technical" + "tone": "Professional and technical", + "difficulty": "Advanced" } ) self.assertEqual( res["metadata"], { - "domain": "Software Engineering, Web Development", - "difficulty": "Advanced" + "top_p": 0.6, + "temperature": 0.5, + "n": 1, + "internal": "true" } )