Skip to content

Commit

Permalink
2503 doc autogen w/ commit signature (#2570)
Browse files Browse the repository at this point in the history
* doc: autogen documentation updates

* Remove generated files

* chore: Update MD file [skip ci]

* add SDK doc gen

* chore: Update MD files [skip ci]

* chore: Update MD files [skip ci]

* chore: Update MD files [skip ci]

* no op

* chore: Update MD files [skip ci]

* update makefile comment to trigger CI

* chore: Update MD files [skip ci]

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Raymond Sukanto <[email protected]>
Co-authored-by: arturrez <[email protected]>
  • Loading branch information
4 people authored Jan 30, 2025
1 parent 8c51edd commit 4d7708c
Show file tree
Hide file tree
Showing 7 changed files with 4,742 additions and 2 deletions.
262 changes: 262 additions & 0 deletions .github/scripts/cli_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
import subprocess
import json
import re

def replace_angle_brackets(text):
"""
Replace any text within angle brackets with backticks to prevent Markdown rendering issues.
Example: "<snapshotName>" becomes "`snapshotName`"
"""
return re.sub(r'<(.*?)>', r'`\1`', text)

def generate_anchor_id(cli_tool, command_chain):
"""
Generate a unique anchor ID based on the entire command chain.
Example:
cli_tool = "avalanche"
command_chain = ["blockchain", "create"]
-> anchor_id = "avalanche-blockchain-create"
"""
full_chain = [cli_tool] + command_chain
anchor_str = '-'.join(full_chain)
# Remove invalid characters for anchors, and lowercase
anchor_str = re.sub(r'[^\w\-]', '', anchor_str.lower())
return anchor_str

def get_command_structure(cli_tool, command_chain=None, max_depth=10, current_depth=0, processed_commands=None):
"""
Recursively get a dictionary of commands, subcommands, flags (with descriptions),
and descriptions for a given CLI tool by parsing its --help output.
"""
if command_chain is None:
command_chain = []
if processed_commands is None:
processed_commands = {}

current_command = [cli_tool] + command_chain
command_key = ' '.join(current_command)

# Prevent re-processing of the same command
if command_key in processed_commands:
return processed_commands[command_key]

# Prevent going too deep
if current_depth > max_depth:
return None

command_structure = {
"description": "",
"flags": [],
"subcommands": {}
}

print(f"Processing command: {' '.join(current_command)}")

# Run `<command> --help`
try:
help_output = subprocess.run(
current_command + ["--help"],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
timeout=10,
stdin=subprocess.DEVNULL
)
output = help_output.stdout
# Some CLIs return a non-zero exit code but still provide help text, so no strict check here
except subprocess.TimeoutExpired:
print(f"[ERROR] Timeout expired for command: {' '.join(current_command)}")
return None
except Exception as e:
print(f"[ERROR] Exception while running: {' '.join(current_command)} -> {e}")
return None

if not output.strip():
print(f"[WARNING] No output for command: {' '.join(current_command)}")
return None

# --- Extract Description ------------------------------------------------------
description_match = re.search(r"(?s)^\s*(.*?)\n\s*Usage:", output)
if description_match:
description = description_match.group(1).strip()
command_structure['description'] = replace_angle_brackets(description)

# --- Extract Flags (including Global Flags) -----------------------------------
flags = []
# "Flags:" section
flags_match = re.search(r"(?sm)^Flags:\n(.*?)(?:\n\n|^\S|\Z)", output)
if flags_match:
flags_text = flags_match.group(1)
flags.extend(re.findall(
r"^\s+(-{1,2}[^\s,]+(?:,\s*-{1,2}[^\s,]+)*)\s+(.*)$",
flags_text,
re.MULTILINE
))

# "Global Flags:" section
global_flags_match = re.search(r"(?sm)^Global Flags:\n(.*?)(?:\n\n|^\S|\Z)", output)
if global_flags_match:
global_flags_text = global_flags_match.group(1)
flags.extend(re.findall(
r"^\s+(-{1,2}[^\s,]+(?:,\s*-{1,2}[^\s,]+)*)\s+(.*)$",
global_flags_text,
re.MULTILINE
))

if flags:
command_structure["flags"] = [
{
"flag": f[0].strip(),
"description": replace_angle_brackets(f[1].strip())
}
for f in flags
]

# --- Extract Subcommands ------------------------------------------------------
subcommands_match = re.search(
r"(?sm)(?:^Available Commands?:\n|^Commands?:\n)(.*?)(?:\n\n|^\S|\Z)",
output
)
if subcommands_match:
subcommands_text = subcommands_match.group(1)
# Lines like: " create Create a new something"
subcommand_lines = re.findall(r"^\s+([^\s]+)\s+(.*)$", subcommands_text, re.MULTILINE)

for subcmd, sub_desc in sorted(set(subcommand_lines)):
sub_desc_clean = replace_angle_brackets(sub_desc.strip())
sub_structure = get_command_structure(
cli_tool,
command_chain + [subcmd],
max_depth,
current_depth + 1,
processed_commands
)
if sub_structure is not None:
if not sub_structure.get('description'):
sub_structure['description'] = sub_desc_clean
command_structure["subcommands"][subcmd] = sub_structure
else:
command_structure["subcommands"][subcmd] = {
"description": sub_desc_clean,
"flags": [],
"subcommands": {}
}

processed_commands[command_key] = command_structure
return command_structure

def generate_markdown(cli_structure, cli_tool, file_path):
"""
Generate a Markdown file from the CLI structure JSON object in a developer-friendly format.
No top-level subcommand bullet list.
"""
# Define a set of known type keywords. Adjust as needed.
known_types = {
"string", "bool", "int", "uint", "float", "duration",
"strings", "uint16", "uint32", "uint64", "int16", "int32", "int64",
"float32", "float64"
}

def write_section(structure, file, command_chain=None):
if command_chain is None:
command_chain = []

# If at root level, do not print a heading or bullet list, just go straight
# to recursing through subcommands.
if command_chain:
# Determine heading level (but max out at H6)
heading_level = min(1 + len(command_chain), 6)

# Build heading text:
if len(command_chain) == 1:
heading_text = f"{cli_tool} {command_chain[0]}"
else:
heading_text = ' '.join(command_chain[1:])

# Insert a single anchor before writing the heading
anchor = generate_anchor_id(cli_tool, command_chain)
file.write(f'<a id="{anchor}"></a>\n')
file.write(f"{'#' * heading_level} {heading_text}\n\n")

# Write description
if structure.get('description'):
file.write(f"{structure['description']}\n\n")

# Write usage
full_command = f"{cli_tool} {' '.join(command_chain)}"
file.write("**Usage:**\n")
file.write(f"```bash\n{full_command} [subcommand] [flags]\n```\n\n")

# Subcommands index
subcommands = structure.get('subcommands', {})
if subcommands:
file.write("**Subcommands:**\n\n")
for subcmd in sorted(subcommands.keys()):
sub_desc = subcommands[subcmd].get('description', '')
sub_anchor = generate_anchor_id(cli_tool, command_chain + [subcmd])
file.write(f"- [`{subcmd}`](#{sub_anchor}): {sub_desc}\n")
file.write("\n")
else:
subcommands = structure.get('subcommands', {})

# Flags (only if we have a command chain)
if command_chain and structure.get('flags'):
file.write("**Flags:**\n\n")
flag_lines = []
for flag_dict in structure['flags']:
flag_names = flag_dict['flag']
description = flag_dict['description'].strip()

# Attempt to parse a recognized "type" from the first word.
desc_parts = description.split(None, 1) # Split once on whitespace
if len(desc_parts) == 2:
first_word, rest = desc_parts
# Check if the first word is in known_types
if first_word.lower() in known_types:
flag_type = first_word
flag_desc = rest
else:
flag_type = ""
flag_desc = description
else:
flag_type = ""
flag_desc = description

if flag_type:
flag_line = f"{flag_names} {flag_type}"
else:
flag_line = flag_names

flag_lines.append((flag_line, flag_desc))

# Determine formatting width
max_len = max(len(fl[0]) for fl in flag_lines) if flag_lines else 0
file.write("```bash\n")
for fl, fd in flag_lines:
file.write(f"{fl.ljust(max_len)} {fd}\n")
file.write("```\n\n")

# Recurse into subcommands
subcommands = structure.get('subcommands', {})
for subcmd in sorted(subcommands.keys()):
write_section(subcommands[subcmd], file, command_chain + [subcmd])

with open(file_path, "w", encoding="utf-8") as f:
write_section(cli_structure, f)

def main():
cli_tool = "avalanche" # Adjust if needed
max_depth = 10

# Build the nested command structure
cli_structure = get_command_structure(cli_tool, max_depth=max_depth)
if cli_structure:
# Generate Markdown
generate_markdown(cli_structure, cli_tool, "cmd/commands.md")
print("Markdown documentation saved to cmd/commands.md")
else:
print("[ERROR] Failed to retrieve CLI structure")

if __name__ == "__main__":
main()
Loading

0 comments on commit 4d7708c

Please sign in to comment.