Merge pull request #46 from lallouslab/idxtool

LouisShark · Dec 12, 2023 · c022623 · c022623
2 parents 4b83569 + acb6cf7
commit c022623
Show file tree

Hide file tree

Showing 13 changed files with 784 additions and 271 deletions.
diff --git a/.gitignore b/.gitignore
@@ -375,9 +375,14 @@ TSWLatexianTemp*
 # xwatermark package
 *.xwm
 
+# *.pyc files
+*.pyc
+
 # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
 # option is specified. Footnotes are the stored in a file with suffix Notes.bib.
 # Uncomment the next line to have this generated file ignored.
 #*Notes.bib
 
 .DS_Store
+
+__pycache__/
diff --git a/.scripts/README.md b/.scripts/README.md
@@ -0,0 +1,72 @@
+# idxtool
+
+The `idxtool` is a GPT indexing and searching tool for the CSP repo (ChatGPT System Prompt).
+
+## Command line
+
+```
+usage: idxtool.py [-h] [--update-logo UPDATE_LOGO] [--toc [TOC]]
+                  [--update-description UPDATE_DESCRIPTION]
+                  [--find-gptfile FIND_GPTFILE] [--find-gpttoc FIND_GPTTOC]
+                  [--parse-gptfile PARSE_GPTFILE] [--rename RENAME]
+
+idxtool: A GPT indexing and searching tool for the CSP repo
+
+options:
+  -h, --help            show this help message and exit
+  --update-logo UPDATE_LOGO
+                        Update the logos of the GPT file
+  --toc [TOC]           Rebuild the table of contents (TOC.md) file
+  --update-description UPDATE_DESCRIPTION
+                        Update the descriptions of the GPT file
+  --find-gptfile FIND_GPTFILE
+                        Find a GPT by its ID or name
+  --find-gpttoc FIND_GPTTOC
+                        Searches the TOC.md file for the given gptid or free
+                        style string
+  --parse-gptfile PARSE_GPTFILE
+                        Parses a GPT file name
+  --rename RENAME       Rename the file name to include its GPT ID
+```
+
+## Features
+
+- Update Logos: Use `--update-logo [filename]` to update the logos of the GPT file.
+- Rebuild TOC: Use `--toc` to rebuild the table of contents (TOC.md) file.
+- Update Descriptions: Use `--update-description [filename]` to update the descriptions of the GPT file.
+- Find GPT File: Use `--find-gptfile [gptid or gpt name in quotes]` to find a GPT by its ID or name.
+- Find GPT in TOC: Use `--find-gpttoc [gptid or string]` to search the TOC.md file for a given gptid or free style string.
+- Rename GPT: Use `--rename [filename]` to rename the file name to include its GPT ID.
+- Help: Use `--help` to display the help message and usage instructions.
+
+## Usage
+
+To use the tool, run the following command in your terminal with the appropriate arguments:
+
+```bash
+python idxtool.py [arguments]
+```
+
+Replace `[arguments]` with one of the feature commands listed above.
+
+## Example
+
+To update the logos of a GPT file named `example_gpt.json`, run:
+
+```bash
+python idxtool.py --update-logo example_gpt.json
+```
+
+## Installation
+
+No additional installation is required. Ensure that you have Python installed on your system to run the tool.
+
+## Contributing
+
+Contributions to `idxtool` are welcome. Please submit pull requests or issues to the CSP repo for review.
+
+## License
+
+This tool is open-sourced under the GNU General Public License (GPL). Under this license, you are free to use, modify, and redistribute this software, provided that all copies and derivative works are also licensed under the GPL.
+
+For more details, see the [GPLv3 License](https://www.gnu.org/licenses/gpl-3.0.html).
diff --git a/.scripts/gptparser.py b/.scripts/gptparser.py
@@ -0,0 +1,145 @@
+"""
+GPT parsing module.
+
+The GPT markdown files have to adhere to a very specific format described in the README.md file in the root of the CSP project.
+"""
+
+import os, re
+from collections import namedtuple
+from typing import Union, Tuple, Generator
+
+GPT_BASE_URL = 'https://chat.openai.com/g/g-'
+GPT_BASE_URL_L = len(GPT_BASE_URL)
+FIELD_PREFIX = 'GPT'
+
+GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE)
+
+GptFieldInfo = namedtuple('FieldInfo', ['order', 'display'])
+GptIdentifier = namedtuple('GptIdentifier', ['id', 'name'])
+
+# Description of the fields supported by GPT markdown files.
+SUPPORTED_FIELDS = {
+    'url':              GptFieldInfo(0, 'URL'),
+    'title':            GptFieldInfo(1, 'Title'),
+    'description':      GptFieldInfo(2, 'Description'),
+    'logo':             GptFieldInfo(3, 'Logo'),
+    'instructions':     GptFieldInfo(4, 'Instructions'),
+    'actions':          GptFieldInfo(5, 'Actions'),
+    'kb_files_list':    GptFieldInfo(6, 'KB Files List'),
+    'extras':           GptFieldInfo(7, 'Extras')
+}
+"""
+Dictionary of the fields supported by GPT markdown files:
+- The key should always be in lower case
+- The GPT markdown file will have the form: {FIELD_PREFIX} {key}: {value}
+"""
+
+class GptMarkdownFile:
+    """
+    A class to represent a GPT markdown file.
+    """
+    def __init__(self, fields={}, filename: str = '') -> None:
+        self.fields = fields
+        self.filename = filename
+
+    def get(self, key: str, strip: bool = True) -> Union[str, None]:
+        """
+        Return the value of the field with the specified key.
+        :param key: str, key of the field.
+        :return: str, value of the field.
+        """
+        key = key.lower()
+        if key == 'version':
+            m = GPT_FILE_VERSION_RE.search(self.filename)
+            return m.group(1) if m else ''
+
+        v = self.fields.get(key)
+        return v.strip() if strip else v
+
+    def id(self) -> Union[GptIdentifier, None]:
+        """
+        Return the GPT identifier.
+        :return: GptIdentifier object.
+        """
+        url = self.fields.get('url')
+        if url and url.startswith(GPT_BASE_URL):
+            id = url[GPT_BASE_URL_L:].split('\n')[0]
+            i = id.find('-')
+            if i != -1:
+                return GptIdentifier(id[:i], id[i+1:].strip())
+            else:
+                return GptIdentifier(id, '')
+        return None
+
+    def __str__(self) -> str:
+        sorted_fields = sorted(self.fields.items(), key=lambda x: SUPPORTED_FIELDS[x[0]].order)
+        # Check if the field value contains the start marker of the markdown block and add a blank line before it
+        field_strings = []
+        for key, value in sorted_fields:
+            if value:
+                # Only replace the first occurrence of ```markdown
+                modified_value = value.replace("```markdown", "\r\n```markdown", 1)
+                field_string = f"{FIELD_PREFIX} {SUPPORTED_FIELDS[key].display}: {modified_value}"
+                field_strings.append(field_string)
+        return "\r\n".join(field_strings)
+
+    @staticmethod
+    def parse(file_path: str) -> Union['GptMarkdownFile', Tuple[bool, str]]:
+        """
+        Parse a markdown file and return a GptMarkdownFile object.
+        :param file_path: str, path to the markdown file.
+        :return: GptMarkdownFile if successful, otherwise a tuple with False and an error message.
+        """
+        if not os.path.exists(file_path):
+            return (False, f"File '{file_path}' does not exist.")
+
+        with open(file_path, 'r', encoding='utf-8') as file:
+            fields = {key.lower(): [] for key in SUPPORTED_FIELDS.keys()}
+            field_re = re.compile(f"^\s*{FIELD_PREFIX}\s+({'|'.join(fields.keys())}):", re.IGNORECASE)
+            current_field = None
+            for line in file:
+                if m := field_re.match(line):
+                    current_field = m.group(1).lower()
+                    line = line[len(m.group(0)):].strip()
+
+                if current_field:
+                    if current_field not in SUPPORTED_FIELDS:
+                        return (False, f"Field '{current_field}' is not supported.")
+
+                    fields[current_field].append(line)
+
+        gpt = GptMarkdownFile(
+            {key: ''.join(value) for key, value in fields.items()},
+            filename=file_path)
+        return (True, gpt)
+
+    def save(self, file_path: str) -> Tuple[bool, Union[str, None]]:
+        """
+        Save the GptMarkdownFile object to a markdown file.
+        :param file_path: str, path to the markdown file.
+        """
+        try:
+            with open(file_path, 'w', encoding='utf-8') as file:
+                file.write(str(self))
+            return (True, None)
+        except Exception as e:
+            return (False, f"Failed to save file '{file_path}': {e}")
+
+
+def get_prompts_path() -> str:
+    """Return the path to the prompts directory."""
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts', 'gpts'))
+
+def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, None]:
+    """Enumerate all the GPT files in the prompts directory."""
+    prompts_path = get_prompts_path()
+    for file_path in os.listdir(prompts_path):
+        _, ext = os.path.splitext(file_path)
+        if ext != '.md':
+            continue
+        file_path = os.path.join(prompts_path, file_path)
+        ok, gpt = GptMarkdownFile.parse(file_path)
+        if ok:
+            yield (True, gpt)
+        else:
+            yield (False, f"Failed to parse '{file_path}': {gpt}")