-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
553 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
.git | ||
__pycache__ | ||
LICENSE | ||
output.md | ||
assets | ||
Style-Bert-VITS2 | ||
output | ||
streamlit | ||
SourceSage.md | ||
data | ||
.gitignore | ||
.SourceSageignore | ||
*.png | ||
Changelog | ||
SourceSageAssets | ||
SourceSageAssetsDemo | ||
__pycache__ | ||
.pyc | ||
**/__pycache__/** | ||
modules\__pycache__ | ||
.svg | ||
sourcesage.egg-info | ||
.pytest_cache | ||
dist | ||
build | ||
.env | ||
example | ||
|
||
.gaiah.md | ||
.Gaiah.md | ||
tmp.md | ||
tmp2.md | ||
.SourceSageAssets | ||
tests | ||
template | ||
aira.egg-info | ||
aira.Gaiah.md | ||
README_template.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
aira: | ||
gaiah: # 共通設定 | ||
run: true | ||
repo: | ||
repo_name: "PEGASUS" | ||
description: "Evolutionary Merge Experiment" | ||
private: false | ||
local: | ||
repo_dir: "C:/Prj/PEGASUS" | ||
no_initial_commit: false | ||
commit: | ||
commit_msg_path: ".Gaiah.md" | ||
branch_name: null | ||
|
||
dev: # 開発時の設定 (必要に応じて上書き) | ||
repo: | ||
create_repo: false | ||
local: | ||
init_repo: false | ||
commit: | ||
process_commits: true | ||
|
||
init: # 初期化時の設定 (必要に応じて上書き) | ||
repo: | ||
create_repo: true | ||
local: | ||
init_repo: true | ||
commit: | ||
process_commits: false | ||
|
||
llm: | ||
model: "gemini/gemini-1.5-pro-latest" # 利用するLLMモデル | ||
|
||
repository_summary_output_dir: .aira # リポジトリ概要の出力ディレクトリ | ||
readme_prompt_template_path: .aira/readme_prompt_template.txt # README生成のプロンプトテンプレートのパス | ||
|
||
harmon_ai: | ||
run: true | ||
environment: | ||
repo_name: "PEGASUS" | ||
owner_name: "Sunwood-ai-labs" | ||
package_name: "PEGASUS" | ||
icon_url: "hhttps://huggingface.co/datasets/MakiAi/IconAssets/resolve/main/PEGASUS.jpeg" | ||
title: "PEGASUS" | ||
subtitle: "~ Evolutionary Merge Experiment ~" | ||
website_url: "https://hamaruki.com/" | ||
github_url: "https://github.com/Sunwood-ai-labs" | ||
twitter_url: "https://x.com/hAru_mAki_ch" | ||
blog_url: "https://hamaruki.com/" | ||
|
||
product: | ||
important_message_file: "important_template.md" | ||
sections_content_file: "sections_template.md" | ||
output_file: "README_template.md" | ||
cicd_file_path: "publish-to-pypi.yml" | ||
cicd_main_path: "publish-to-pypi.yml" | ||
github_cicd_dir: ".github/workflows" | ||
|
||
llm_product: | ||
sections_content_file: "sections_template_llm.md" | ||
|
||
development: | ||
output_dir: "C:/Prj/PEGASUS/.harmon_ai" | ||
|
||
main: | ||
main_dir: "C:/Prj/PEGASUS/" | ||
replace_readme: true | ||
|
||
instructions_prompt: .aira/instructions.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
from pegasus.pegasus import Pegasus | ||
|
||
pegasus = Pegasus( | ||
base_url="https://docs.eraser.io/docs/what-is-eraser", | ||
output_dir="eraser_docs", | ||
exclude_selectors=['header', 'footer', 'nav', 'aside', '.sidebar', '.header', '.footer', '.navigation', '.breadcrumbs'], | ||
include_domain="docs.eraser.io", | ||
exclude_keywords=["login"] | ||
) | ||
pegasus.run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import requests | ||
import html2text | ||
|
||
def download_and_convert(url, output_file): | ||
try: | ||
# URLからWebページをダウンロード | ||
response = requests.get(url) | ||
response.raise_for_status() | ||
|
||
# HTMLをマークダウンに変換 | ||
h = html2text.HTML2Text() | ||
h.ignore_links = True | ||
markdown_content = h.handle(response.text) | ||
|
||
# マークダウンをファイルに保存 | ||
with open(output_file, 'w', encoding='utf-8') as file: | ||
file.write(markdown_content) | ||
|
||
print(f"Successfully converted {url} to {output_file}") | ||
except requests.exceptions.RequestException as e: | ||
print(f"Error downloading {url}: {e}") | ||
except IOError as e: | ||
print(f"Error writing to {output_file}: {e}") | ||
|
||
# 使用例 | ||
url = "https://docs.eraser.io/docs/what-is-eraser" | ||
output_file = "example.md" | ||
download_and_convert(url, output_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import requests | ||
import html2text | ||
from bs4 import BeautifulSoup | ||
from urllib.parse import urljoin, urlparse | ||
|
||
def download_and_convert(url, output_dir, visited_urls): | ||
if url in visited_urls: | ||
return | ||
visited_urls.add(url) | ||
|
||
try: | ||
# URLからWebページをダウンロード | ||
response = requests.get(url) | ||
response.raise_for_status() | ||
|
||
# HTMLをマークダウンに変換 | ||
h = html2text.HTML2Text() | ||
h.ignore_links = True | ||
markdown_content = h.handle(response.text) | ||
|
||
# マークダウンをファイルに保存 | ||
parsed_url = urlparse(url) | ||
output_file = f"{output_dir}/{parsed_url.path.replace('/', '_')}.md" | ||
with open(output_file, 'w', encoding='utf-8') as file: | ||
file.write(markdown_content) | ||
|
||
print(f"Successfully converted {url} to {output_file}") | ||
|
||
# ページ内のリンクを探索 | ||
soup = BeautifulSoup(response.text, 'html.parser') | ||
for link in soup.find_all('a'): | ||
href = link.get('href') | ||
if href: | ||
absolute_url = urljoin(url, href) | ||
if "docs.eraser.io" in absolute_url: | ||
# docs.eraser.ioを含むURLのみ探索 | ||
# URLのフラグメント部分を除去 | ||
absolute_url = absolute_url.split('#')[0] | ||
download_and_convert(absolute_url, output_dir, visited_urls) | ||
|
||
except requests.exceptions.RequestException as e: | ||
print(f"Error downloading {url}: {e}") | ||
except IOError as e: | ||
print(f"Error writing to {output_file}: {e}") | ||
|
||
# 使用例 | ||
base_url = "https://docs.eraser.io/docs/what-is-eraser" | ||
output_dir = "eraser_docs" | ||
visited_urls = set() | ||
download_and_convert(base_url, output_dir, visited_urls) |
Oops, something went wrong.