diff --git a/README.md b/README.md
index d998a7a..660508c 100644
--- a/README.md
+++ b/README.md
@@ -18,7 +18,8 @@ This project is a chatbot for Mattermost that integrates with the Anthropic API
- **Extracts text content from links** shared in the messages. Also supports **FlareSolverr** to bypass
Javascript/CAPTCHA restrictions
- Supports the **Vision API** for describing images provided as URLs within the chat message
-- **Gets transcripts of YouTube videos** for easy tl;dw summarizations
+- **Gets transcripts of YouTube videos** for easy tl;dw summarizations. Title, description and uploader are also
+ provided
- Maintains context of the conversation within a thread
- Sends typing indicators to show that the chatbot is processing the message
- Utilizes a thread pool to handle multiple requests concurrently (due to `mattermostdriver-asyncio` being outdated)
@@ -26,7 +27,7 @@ This project is a chatbot for Mattermost that integrates with the Anthropic API
## Prerequisites
-- Python 3.11 or just a server with [Docker](https://docs.docker.com/get-started/). _(you can get away with using 3.8 if
+- Python 3.11 or just a server with [Docker](https://docs.docker.com/get-started/) _(you can get away with using 3.8 if
you use datetime.datetime.utcnow() instead of datetime.datetime.now(datetime.UTC))_
- Anthropic API key
- Mattermost server with API access
@@ -49,7 +50,7 @@ This project is a chatbot for Mattermost that integrates with the Anthropic API
```
_or alternatively:_
```bash
- python3.12 -m pip install anthropic mattermostdriver certifi beautifulsoup4 pillow httpx youtube-transcript-api
+ python3.12 -m pip install anthropic mattermostdriver certifi beautifulsoup4 pillow httpx youtube-transcript-api yt-dlp
```
3. Set the following environment variables with your own values (most are optional):
@@ -137,4 +138,5 @@ This project is licensed under the MIT License.
- [Mattermost](https://mattermost.com/) for the messaging platform
- [mattermostdriver](https://github.com/Vaelor/python-mattermost-driver) for the Mattermost API client library
- [chatgpt-mattermost-bot](https://github.com/yGuy/chatgpt-mattermost-bot) for inspiring me to write this python code
-- [youtube-transcript-api](https://pypi.org/project/youtube-transcript-api/) for the YouTube Transcript Fetch library
\ No newline at end of file
+- [youtube-transcript-api](https://pypi.org/project/youtube-transcript-api/) for the YouTube Transcript Fetch library
+- [yt-dlp](https://pypi.org/project/yt-dlp/) for the YouTube API that allows us to fetch details
\ No newline at end of file
diff --git a/chatbot.py b/chatbot.py
index 0dc54a9..28e5f8b 100644
--- a/chatbot.py
+++ b/chatbot.py
@@ -16,6 +16,7 @@
from mattermostdriver.driver import Driver
from bs4 import BeautifulSoup
from youtube_transcript_api import YouTubeTranscriptApi
+from yt_dlp import YoutubeDL
from anthropic import Anthropic
logging.basicConfig(level=logging.INFO)
@@ -428,8 +429,18 @@ async def message_handler(event):
continue
try:
if yt_is_valid_url(link):
- transcript_text = yt_get_transcript(link)
- extracted_text += transcript_text
+ title, description, uploader = yt_get_video_info(
+ link
+ )
+ transcript = yt_get_transcript(link)
+ extracted_text += f"""
+
+ {title}
+ {description}
+ {uploader}
+ {transcript}
+
+ """
continue
with client.stream(
@@ -465,7 +476,7 @@ async def message_handler(event):
image_data += chunk
total_size += len(chunk)
if total_size > max_response_size:
- extracted_text += "*WEBSITE SIZE EXCEEDED THE MAXIMUM LIMIT FOR THE CHATBOT, WARN THE CHATBOT USER*"
+ extracted_text += "website size exceeded the maximum limit for the chatbot, warn the chatbot user"
raise Exception(
"Response size exceeds the maximum limit at image processing"
)
@@ -549,11 +560,8 @@ async def message_handler(event):
# Handle text content
try:
if flaresolverr_endpoint:
- extracted_text += (
- extract_content_with_flaresolverr(
- link
- )
- )
+ website_text = extract_content_with_flaresolverr(link)
+ extracted_text += f"{website_text}"
else:
raise Exception(
"FlareSolverr endpoint not available"
@@ -568,15 +576,16 @@ async def message_handler(event):
content_chunks.append(chunk)
total_size += len(chunk)
if total_size > max_response_size:
- extracted_text += "*WEBSITE SIZE EXCEEDED THE MAXIMUM LIMIT FOR THE CHATBOT, WARN THE CHATBOT USER*"
+ extracted_text += "website size exceeded the maximum limit for the chatbot, warn the chatbot user"
raise Exception(
"Response size exceeds the maximum limit"
)
content = b"".join(content_chunks)
soup = BeautifulSoup(content, "html.parser")
- extracted_text += soup.get_text(
+ website_text = soup.get_text(
" | ", strip=True
)
+ extracted_text += f"{website_text}"
except Exception as e:
logging.error(
f"Error extracting content from link {link}: {str(e)} {traceback.format_exc()}"
@@ -671,9 +680,26 @@ def yt_get_transcript(url):
except Exception as e:
logging.info(f"YouTube Transcript Exception: {str(e)}")
- return (
- "*COULD NOT FETCH THE VIDEO TRANSCRIPT FOR THE CHATBOT, WARN THE CHATBOT USER*"
- )
+ return "could not fetch the video transcript for the chatbot, warn the chatbot user"
+
+
+def yt_get_video_info(url):
+ ydl_opts = {
+ "quiet": True,
+ # 'no_warnings': True,
+ }
+
+ # Create a YoutubeDL instance
+ with YoutubeDL(ydl_opts) as ydl:
+ # Extract video info
+ info = ydl.extract_info(url, download=False)
+
+ # Get the desired fields from the info dictionary
+ title = info["title"]
+ description = info["description"]
+ uploader = info["uploader"]
+
+ return title, description, uploader
def yt_is_valid_url(url):
diff --git a/requirements.txt b/requirements.txt
index b2a7a5a..491e2c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,4 +4,5 @@ beautifulsoup4
pillow
httpx
youtube-transcript-api
+yt-dlp
anthropic
\ No newline at end of file