diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 4e16568..974260b 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -7,16 +7,16 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pylint - pip install -r requirements.txt - - name: Analysing the code with pylint - run: | - pylint $(git ls-files 'src/*.py') + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.x" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + pip install -r requirements.txt + - name: Analysing the code with pylint + run: | + pylint $(git ls-files 'src/*.py') --fail-under=9 diff --git a/README.md b/README.md index 8e73217..34bd06e 100644 --- a/README.md +++ b/README.md @@ -1,33 +1,42 @@ # PromptML (Prompt Markup Language) -A simple, yet elegant markup language for defining AI Prompts as Code (APaC). Built to be used by AI agents to automatically prompt for other AI systems + +![](./promptml.jpeg) + +A simple, yet elegant markup language for defining AI Prompts as Code (APaC). Built to be used by AI agents to automatically prompt for other AI systems. The architecture is shown as below. A `PromptML` prompt can be version controlled like any other code file. Using promptml parser package, one can easily generate a natural language prompt, and execute it against a LLM. See examples for using promptml library package: [open examples](./examples/) ![prompt-ml architecture](./prompt-github.png) ## Why PromptML ? + PromptML is built to provide a way for prompt engineers to define the AI prompts in a deterministic way. This is a Domain Specific Language (DSL) which defines characteristics of a prompt including context, objective, instructions and it's metadata. A regular prompt is an amalgamation of all these aspects into one entity. PromptML splits it into multiple sections and makes the information explicit. The language grammar can be found here: [grammar.lark](./src/promptml/grammar.lark) - ## How PromptML looks ? + The language is simple. You start blocks with `@` section annotation. A section ends with `@end` marker. Comments are started with `#` key. The prompt files ends with `.pml` extension. ```pml @prompt + # Add task context @context - # Add prompt context @end + + # Add task objective @objective # This is the final question or ask @end + + # Add one or more instructions to execute the prompt @instructions @step - # Add one or more instructions to execute the prompt @end @end + + # Add one or more examples @examples @example @input @@ -38,22 +47,31 @@ The language is simple. You start blocks with `@` section annotation. A section @end @end @end + + # Add task constraints @constraints - # Add prompt constraints + @length min: 1 max: 10 @end @end + + # Add prompt category + @category + @end + + # Add custom metadata @metadata - # Add prompt metadata here @end @end ``` -See [prompt.pml](./prompt.pml) to see an example. +See [prompt.pml](./prompt.pml) to see for complete syntax. ## Design + Regular text prompts are very abstract in nature. Natural languages are very flexible but provides least reliability. How to provide context for an AI system and ask something ? Shouldn't we specify that explicitly. PromptML is an attempt to make contents of a prompt explicit with a simple language. ## Core tenets of PromptML + Below are the qualities PromptML brings to prompt engineering domain: 1. Standardization instead of fragmentation @@ -61,8 +79,8 @@ Below are the qualities PromptML brings to prompt engineering domain: 3. Enabling version control-ability 4. Promoting verbosity for better results - ## Why not use XML, YAML, or JSON for PromptML ? + First, XML, JSON, and YAML are not DSL languages. They are data formats that can represent any form of data. Second, generative AI needs a strict, yet flexible data language with fixed constraints which evolve along with the domain. PromptML is built exactly to solve those two issues. @@ -72,10 +90,13 @@ Language grammar is influenced by XML & Ruby, so if you know any one of them, yo ## Usage 1. Install Python requirements + ```bash pip install -r requirements.txt ``` + 2. import the parser and parse a promptML file + ```py from promptml.parser import PromptParser @@ -106,17 +127,18 @@ promptml_code = ''' @end @end + @category + Prompt Management + @end + @constraints - @length min: 1 max: 10 + @length min: 1 max: 10 @end @end @metadata - @domain - Web Development - @end - @difficulty - Advaned - @end + top_p: 0.9 + n: 1 + team: promptml @end @end ''' @@ -128,17 +150,38 @@ print(prompt) # Output: { # 'context': 'This is the context section.', # 'objective': 'This is the objective section.', +# 'category': 'Prompt Management', # 'instructions': ['Step 1'], # 'examples': [ # {'input': 'Input example 1', 'output': 'Output example 1'} # ], # 'constraints': {'length': {'min': 1, 'max': 10}}, -# 'metadata': {'domain': 'Web Development', 'difficulty': 'Advanced'} +# 'metadata': {'top_p': 0.9, 'n': 1, 'team': 'promptml'} # } +``` + +## Defining variables + +You can define variables in the promptML file and use them in the prompt `context` and `objective`. The variables are defined in the `@vars` section and referenced using `$var` syntax in either `context` or `objective` sections. + +```pml +@vars + name = "John Doe" +@end + +@prompt + @context + You are a name changing expert. + @end + @objective + You have to change the name: $name to an ancient name. + @end +@end ``` ## TODO + We are currently working on: 1. Supporting more annotations (Ex: temperature, top_p) diff --git a/prompt.pml b/prompt.pml index d84c416..770dc28 100644 --- a/prompt.pml +++ b/prompt.pml @@ -1,72 +1,87 @@ -# Prompt Description for a task +# Define prompt variables +@vars + role = 'highly skilled and experienced software developer' +@end + +# Define prompt @prompt # Context is used to provide background information or context for the task @context - You are a highly skilled and experienced software developer with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform. + You are a $role with expertise in various programming languages and frameworks. You have been tasked with creating a new web application for a social media platform. @end # Objective is used to define the main goal or objective of the task @objective - Design and implement the core architecture and components for a scalable and efficient web application that can handle a large number of concurrent users while providing a seamless and responsive user experience. + Design and implement the core architecture and components for a scalable and efficient web application that can handle a large number of concurrent users while providing a seamless and responsive user experience. @end # Instructions are used to provide detailed steps or guidelines for completing the task @instructions - # steps can be used to break down the task into smaller parts - @step - Identify the key features and requirements of the web application based on the provided context. - @end - @step - Propose a suitable architecture (e.g., monolithic, microservices, etc.) and justify your choice. - @end - @step - Outline the essential components or modules of the application, such as user authentication, data storage, real-time communication, and so on. - @end - @step - Discuss the potential technologies, frameworks, and tools you would use to implement each component, highlighting their strengths and trade-offs. - @end - @step - Address scalability and performance concerns, including techniques for load balancing, caching, and database optimization. - @end - @step - Describe how you would ensure the security and privacy of user data, including authentication, authorization, and data encryption. - @end + # steps can be used to break down the task into smaller parts + @step + Identify the key features and requirements of the web application based on the provided context. + @end + @step + Propose a suitable architecture (e.g., monolithic, microservices, etc.) and justify your choice. + @end + @step + Outline the essential components or modules of the application, such as user authentication, data storage, real-time communication, and so on. + @end + @step + Discuss the potential technologies, frameworks, and tools you would use to implement each component, highlighting their strengths and trade-offs. + @end + @step + Address scalability and performance concerns, including techniques for load balancing, caching, and database optimization. + @end + @step + Describe how you would ensure the security and privacy of user data, including authentication, authorization, and data encryption. + @end @end # Examples are used to provide sample inputs and outputs for the task @examples - @example - @input - Design the core architecture and components for a large-scale e-commerce web application. + @example + @input + Design the core architecture and components for a large-scale e-commerce web application. + @end + @output + For a large-scale e-commerce web application, a microservices architecture would be suitable due to its inherent scalability and flexibility... + @end @end - @output - For a large-scale e-commerce web application, a microservices architecture would be suitable due to its inherent scalability and flexibility... + @example + @input + Outline main components for a large-scale e-commerce web application. + @end + @output + Product Catalog, User Management, Order Processing, Payment Gateway, Search Engine, Recommendation Engine are the main components of a large-scale e-commerce web application... + @end @end - @end - @example - @input - Outline main components for a large-scale e-commerce web application. - @end - @output - Product Catalog, User Management, Order Processing, Payment Gateway, Search Engine, Recommendation Engine are the main components of a large-scale e-commerce web application... - @end - @end @end # Constraints are used to specify any limitations or restrictions for the task @constraints - @length - min: 1000 - max: 3000 - @end - @tone - Professional and technical - @end + @length + min: 1000 + max: 3000 + @end + @tone + Professional and technical + @end + @difficulty + Advanced + @end + @end + + # categories are used to classify the task into different categories + @category + Software Engineering @end - # Metadata includes information such as domain, difficulty, skills, and tags + # Metadata includes information such as domain, difficulty, custom props, etc. @metadata - @domain Software Engineering, Web Development @end - @difficulty Advanced @end + top_p: 0.6 + temperature: 0.5 + n: 1 + internal: 'true' @end @end diff --git a/promptml.jpeg b/promptml.jpeg new file mode 100644 index 0000000..21d1a8d Binary files /dev/null and b/promptml.jpeg differ diff --git a/src/promptml/grammar.lark b/src/promptml/grammar.lark index 6911aa6..b222340 100644 --- a/src/promptml/grammar.lark +++ b/src/promptml/grammar.lark @@ -1,38 +1,54 @@ -prompt: "@prompt" sections "@end" +# PromptML Grammar # -sections: section+ +start: block+ +block: prompt | var_block + +var_block: "@vars" assignment* "@end" +assignment: VAR_NAME "=" (NUMBER | STRING | FLOAT) +VAR_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/ + +prompt: "@prompt" section* "@end" section: context | objective | instructions | examples | constraints + | category | metadata context: "@context" text "@end" objective: "@objective" text "@end" -instructions: "@instructions" instruction+ "@end" +instructions: "@instructions" instruction* "@end" instruction: "@step" text "@end" -examples: "@examples" example+ "@end" +examples: "@examples" example* "@end" example: "@example" input output "@end" input: "@input" text "@end" output: "@output" text "@end" -constraints: "@constraints" constraint+ "@end" -constraint: length | tone +constraints: "@constraints" constraint* "@end" +constraint: length | tone | difficulty +difficulty: "@difficulty" text "@end" length: "@length" "min:" INT "max:" INT "@end" tone: "@tone" text "@end" -metadata: "@metadata" meta+ "@end" -meta: domain | difficulty -domain: "@domain" text "@end" -difficulty: "@difficulty" text "@end" +category: "@category" text "@end" + +metadata: "@metadata" prop* "@end" +prop: PROP_NAME ":" (NUMBER | STRING ) +PROP_NAME: /[a-zA-Z_][a-zA-Z0-9_]*/ -text: /[^@]+/ +# Token Definitions # +STRING: /'[^']*'/ | /"[^"]*"/ +text: /[^@]+/ +# Ignored Tokens # %import common.WS +%import common.NUMBER +%import common.STRING +%import common.FLOAT %ignore /\#.*/ // Ignore comments %ignore WS %import common.INT diff --git a/src/promptml/parser.py b/src/promptml/parser.py index 680e710..0ba1d29 100644 --- a/src/promptml/parser.py +++ b/src/promptml/parser.py @@ -1,87 +1,74 @@ """ This module provides a PromptParser class for parsing DSL code and extracting prompt information. -The PromptParser class can parse DSL code and extract sections such as context, +The PromptParser class can parse DSL code and extract sections such as context, objective, instructions, examples, constraints, and metadata from the code. It uses regular expressions to search for specific patterns in the DSL code and extract the corresponding content. Example usage: dsl_code = ''' - @prompt - @context - This is the context section. - @end - - @objective - This is the objective section. - @end - - @instructions - These are the instructions. - @end - - @examples - @example - @input - Input example 1 - @end - @output - Output example 1 - @end - @end - @end - - @constraints - @length min: 1 max: 10 - @end - - @metadata - @domain - Domain example - @end - @difficulty - Difficulty example - @end - @end + ... ''' parser = PromptParser(dsl_code) prompt = parser.parse() - - print(prompt) - # Output: { - # 'context': 'This is the context section.', - # 'objective': 'This is the objective section.', - # 'instructions': 'These are the instructions.', - # 'examples': [ - # {'input': 'Input example 1', 'output': 'Output example 1'} - # ], - # 'constraints': {'length': {'min': 1, 'max': 10}}, - # 'metadata': {'domain': 'Domain example', 'difficulty': 'Difficulty example'} - # } """ import json import os - +import re from lark import Lark, Transformer class PromptMLTransformer(Transformer): """ - A class for transforming the parsed PromptML code into a structured format. + A class for transforming the parsed PromptML tree into a Python dictionary. """ + + def start(self, items): + """ Extract the start section content.""" + prompt = {} + vars_ = {} + for item in items: + if item["type"] == "vars": + vars_ = item["data"] + elif item["type"] == "prompt": + prompt = item["data"] + + # context seems to be a keyword in Python, so we'll use context_ instead + context_ = prompt["context"] + objective = prompt["objective"] + + # Replace variables in context and objective with values + for k,v in vars_.items(): + context_ = context_.replace(r'$' + k, v.replace("'", '').replace('"', '')) + objective = objective.replace(r'$' + k, v.replace("'", '').replace('"', '')) + + prompt["context"] = context_ + prompt["objective"] = objective + + return prompt + + def block(self, items): + """ Extract the block content.""" + return items[0] + + def category(self, items): + """ Extract the category content.""" + return {"category": items[0].strip()} + def prompt(self, items): """ Extract the prompt content.""" sections = {} - tree = items[0] - for child in tree.children: - if child.data == "section": + for child in items: + if hasattr(child, "data") and child.data == "section": data = child.children[0] sections.update(data) + else: + sections.update(child) - return sections + return {"type": "prompt", "data": sections} def context(self, items): """ Extract the context section content.""" @@ -129,29 +116,56 @@ def tone(self, items): """ Extract the tone constraint content.""" return {"tone": items[0].strip()} + def difficulty(self, items): + """ Extract the difficulty constraint content.""" + return {"difficulty": items[0].strip()} + + def var_block(self, items): + """ Extract the variable block content.""" + var_map = {} + + for item in items: + var_symbol = item.children[0].strip() + var_value = item.children[1].strip() + var_map[var_symbol] = var_value + + return {"type": "vars", "data": var_map} + def metadata(self, items): - """ Extract the metadata section content.""" + """ + Extracts the metadata section content. + + Args: + items (list): A list of items representing the metadata section content. + + Returns: + dict: A dictionary containing the extracted metadata section content. + """ metadata = {} - for item in items: - child = item.children[0] - for k,v in child.items(): - metadata[k] = v.strip() + for item in items: + key = item.children[0].strip() + if key: + prop_type = item.children[1].type + value = item.children[1].strip() - return {"metadata": metadata} + if prop_type == "NUMBER": + try: + value = int(value) + except ValueError: + value = float(value) + elif prop_type == "STRING": + value = value.strip("\"").strip("\'") - def domain(self, items): - """ Extract the domain metadata content.""" - return {"domain": items[0]} + metadata[key] = value - def difficulty(self, items): - """ Extract the difficulty metadata content.""" - return {"difficulty": items[0]} + return {"metadata": metadata} def text(self, items): """ Extract the text content.""" return items[0] + class PromptParser: """A class for parsing prompt markup language code and extract information. """ @@ -167,7 +181,7 @@ def __init__(self, code: str): self.code = code self.prompt = {} - self.parser = Lark(promptml_grammar, start="prompt") + self.parser = Lark(promptml_grammar) def parse(self): """ @@ -197,11 +211,19 @@ def deserialize_json(self, serialized_data): """ self.prompt = json.loads(serialized_data) + class PromptParserFromFile(PromptParser): """ A subclass of PromptParser that reads DSL code from a file. """ - def __init__(self, file_path): + def __init__(self, file_path: str): + """ + Initializes the PromptParserFromFile object by reading the DSL code from the specified file path + and passing it to the parent class constructor. + + Args: + file_path (str): The path to the DSL code file. + """ with open(file_path, 'r', encoding='utf-8') as f: dsl_code = f.read() super().__init__(dsl_code) diff --git a/tests/test_parser.py b/tests/test_parser.py index 72dcc88..a913f77 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,11 +1,11 @@ -""" Test cases for prompt parser +""" Test cases for prompt parser """ from src.promptml.parser import PromptParserFromFile from unittest import TestCase class TestPromptParser(TestCase): - # read prompt from prompt.aiml file + # read prompt from prompt.pml file def setUp(self): self.prompt_parser = PromptParserFromFile('prompt.pml') @@ -57,14 +57,17 @@ def test_parse(self): "min": 1000, "max": 3000 }, - "tone": "Professional and technical" + "tone": "Professional and technical", + "difficulty": "Advanced" } ) self.assertEqual( res["metadata"], { - "domain": "Software Engineering, Web Development", - "difficulty": "Advanced" + "top_p": 0.6, + "temperature": 0.5, + "n": 1, + "internal": "true" } )