Skip to content

Commit

Permalink
Merge pull request #46 from lallouslab/idxtool
Browse files Browse the repository at this point in the history
  • Loading branch information
LouisShark authored Dec 12, 2023
2 parents 4b83569 + acb6cf7 commit c022623
Show file tree
Hide file tree
Showing 13 changed files with 784 additions and 271 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -375,9 +375,14 @@ TSWLatexianTemp*
# xwatermark package
*.xwm

# *.pyc files
*.pyc

# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

.DS_Store

__pycache__/
72 changes: 72 additions & 0 deletions .scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# idxtool

The `idxtool` is a GPT indexing and searching tool for the CSP repo (ChatGPT System Prompt).

## Command line

```
usage: idxtool.py [-h] [--update-logo UPDATE_LOGO] [--toc [TOC]]
[--update-description UPDATE_DESCRIPTION]
[--find-gptfile FIND_GPTFILE] [--find-gpttoc FIND_GPTTOC]
[--parse-gptfile PARSE_GPTFILE] [--rename RENAME]
idxtool: A GPT indexing and searching tool for the CSP repo
options:
-h, --help show this help message and exit
--update-logo UPDATE_LOGO
Update the logos of the GPT file
--toc [TOC] Rebuild the table of contents (TOC.md) file
--update-description UPDATE_DESCRIPTION
Update the descriptions of the GPT file
--find-gptfile FIND_GPTFILE
Find a GPT by its ID or name
--find-gpttoc FIND_GPTTOC
Searches the TOC.md file for the given gptid or free
style string
--parse-gptfile PARSE_GPTFILE
Parses a GPT file name
--rename RENAME Rename the file name to include its GPT ID
```

## Features

- Update Logos: Use `--update-logo [filename]` to update the logos of the GPT file.
- Rebuild TOC: Use `--toc` to rebuild the table of contents (TOC.md) file.
- Update Descriptions: Use `--update-description [filename]` to update the descriptions of the GPT file.
- Find GPT File: Use `--find-gptfile [gptid or gpt name in quotes]` to find a GPT by its ID or name.
- Find GPT in TOC: Use `--find-gpttoc [gptid or string]` to search the TOC.md file for a given gptid or free style string.
- Rename GPT: Use `--rename [filename]` to rename the file name to include its GPT ID.
- Help: Use `--help` to display the help message and usage instructions.

## Usage

To use the tool, run the following command in your terminal with the appropriate arguments:

```bash
python idxtool.py [arguments]
```

Replace `[arguments]` with one of the feature commands listed above.

## Example

To update the logos of a GPT file named `example_gpt.json`, run:

```bash
python idxtool.py --update-logo example_gpt.json
```

## Installation

No additional installation is required. Ensure that you have Python installed on your system to run the tool.

## Contributing

Contributions to `idxtool` are welcome. Please submit pull requests or issues to the CSP repo for review.

## License

This tool is open-sourced under the GNU General Public License (GPL). Under this license, you are free to use, modify, and redistribute this software, provided that all copies and derivative works are also licensed under the GPL.

For more details, see the [GPLv3 License](https://www.gnu.org/licenses/gpl-3.0.html).
145 changes: 145 additions & 0 deletions .scripts/gptparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""
GPT parsing module.
The GPT markdown files have to adhere to a very specific format described in the README.md file in the root of the CSP project.
"""

import os, re
from collections import namedtuple
from typing import Union, Tuple, Generator

GPT_BASE_URL = 'https://chat.openai.com/g/g-'
GPT_BASE_URL_L = len(GPT_BASE_URL)
FIELD_PREFIX = 'GPT'

GPT_FILE_VERSION_RE = re.compile(r'\[([^]]*)\]\.md$', re.IGNORECASE)

GptFieldInfo = namedtuple('FieldInfo', ['order', 'display'])
GptIdentifier = namedtuple('GptIdentifier', ['id', 'name'])

# Description of the fields supported by GPT markdown files.
SUPPORTED_FIELDS = {
'url': GptFieldInfo(0, 'URL'),
'title': GptFieldInfo(1, 'Title'),
'description': GptFieldInfo(2, 'Description'),
'logo': GptFieldInfo(3, 'Logo'),
'instructions': GptFieldInfo(4, 'Instructions'),
'actions': GptFieldInfo(5, 'Actions'),
'kb_files_list': GptFieldInfo(6, 'KB Files List'),
'extras': GptFieldInfo(7, 'Extras')
}
"""
Dictionary of the fields supported by GPT markdown files:
- The key should always be in lower case
- The GPT markdown file will have the form: {FIELD_PREFIX} {key}: {value}
"""

class GptMarkdownFile:
"""
A class to represent a GPT markdown file.
"""
def __init__(self, fields={}, filename: str = '') -> None:
self.fields = fields
self.filename = filename

def get(self, key: str, strip: bool = True) -> Union[str, None]:
"""
Return the value of the field with the specified key.
:param key: str, key of the field.
:return: str, value of the field.
"""
key = key.lower()
if key == 'version':
m = GPT_FILE_VERSION_RE.search(self.filename)
return m.group(1) if m else ''

v = self.fields.get(key)
return v.strip() if strip else v

def id(self) -> Union[GptIdentifier, None]:
"""
Return the GPT identifier.
:return: GptIdentifier object.
"""
url = self.fields.get('url')
if url and url.startswith(GPT_BASE_URL):
id = url[GPT_BASE_URL_L:].split('\n')[0]
i = id.find('-')
if i != -1:
return GptIdentifier(id[:i], id[i+1:].strip())
else:
return GptIdentifier(id, '')
return None

def __str__(self) -> str:
sorted_fields = sorted(self.fields.items(), key=lambda x: SUPPORTED_FIELDS[x[0]].order)
# Check if the field value contains the start marker of the markdown block and add a blank line before it
field_strings = []
for key, value in sorted_fields:
if value:
# Only replace the first occurrence of ```markdown
modified_value = value.replace("```markdown", "\r\n```markdown", 1)
field_string = f"{FIELD_PREFIX} {SUPPORTED_FIELDS[key].display}: {modified_value}"
field_strings.append(field_string)
return "\r\n".join(field_strings)

@staticmethod
def parse(file_path: str) -> Union['GptMarkdownFile', Tuple[bool, str]]:
"""
Parse a markdown file and return a GptMarkdownFile object.
:param file_path: str, path to the markdown file.
:return: GptMarkdownFile if successful, otherwise a tuple with False and an error message.
"""
if not os.path.exists(file_path):
return (False, f"File '{file_path}' does not exist.")

with open(file_path, 'r', encoding='utf-8') as file:
fields = {key.lower(): [] for key in SUPPORTED_FIELDS.keys()}
field_re = re.compile(f"^\s*{FIELD_PREFIX}\s+({'|'.join(fields.keys())}):", re.IGNORECASE)
current_field = None
for line in file:
if m := field_re.match(line):
current_field = m.group(1).lower()
line = line[len(m.group(0)):].strip()

if current_field:
if current_field not in SUPPORTED_FIELDS:
return (False, f"Field '{current_field}' is not supported.")

fields[current_field].append(line)

gpt = GptMarkdownFile(
{key: ''.join(value) for key, value in fields.items()},
filename=file_path)
return (True, gpt)

def save(self, file_path: str) -> Tuple[bool, Union[str, None]]:
"""
Save the GptMarkdownFile object to a markdown file.
:param file_path: str, path to the markdown file.
"""
try:
with open(file_path, 'w', encoding='utf-8') as file:
file.write(str(self))
return (True, None)
except Exception as e:
return (False, f"Failed to save file '{file_path}': {e}")


def get_prompts_path() -> str:
"""Return the path to the prompts directory."""
return os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'prompts', 'gpts'))

def enum_gpts() -> Generator[Tuple[bool, Union[GptMarkdownFile, str]], None, None]:
"""Enumerate all the GPT files in the prompts directory."""
prompts_path = get_prompts_path()
for file_path in os.listdir(prompts_path):
_, ext = os.path.splitext(file_path)
if ext != '.md':
continue
file_path = os.path.join(prompts_path, file_path)
ok, gpt = GptMarkdownFile.parse(file_path)
if ok:
yield (True, gpt)
else:
yield (False, f"Failed to parse '{file_path}': {gpt}")
Loading

0 comments on commit c022623

Please sign in to comment.