ed-donner · erfan00 · Dec 17, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/.virtual_documents/week1/day1.ipynb b/.virtual_documents/week1/day1.ipynb
@@ -0,0 +1,204 @@
+
+
+
+# imports
+
+import os
+import requests
+from dotenv import load_dotenv
+from bs4 import BeautifulSoup
+from IPython.display import Markdown, display
+from openai import OpenAI
+
+# If you get an error running this cell, then please head over to the troubleshooting notebook!
+
+
+
+
+
+# Load environment variables in a file called .env
+
+load_dotenv()
+api_key = os.getenv('OPENAI_API_KEY')
+
+# Check the key
+
+if not api_key:
+    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
+elif not api_key.startswith("sk-proj-"):
+    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
+elif api_key.strip() != api_key:
+    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
+else:
+    print("API key found and looks good so far!")
+
+
+
+openai = OpenAI()
+
+# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.
+# If it STILL doesn't work (horrors!) then please see the troubleshooting notebook, or try the below line instead:
+# openai = OpenAI(api_key="your-key-here-starting-sk-proj-")
+
+
+
+
+
+# To give you a preview -- calling OpenAI with these messages is this easy:
+
+message = "Hello, GPT! This is my first ever message to you! Hi!"
+response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
+print(response.choices[0].message.content)
+
+
+
+
+
+# A class to represent a Webpage
+# If you're not familiar with Classes, check out the "Intermediate Python" notebook
+
+class Website:
+
+    def __init__(self, url):
+        """
+        Create this Website object from the given url using the BeautifulSoup library
+        """
+        self.url = url
+        response = requests.get(url)
+        soup = BeautifulSoup(response.content, 'html.parser')
+        self.title = soup.title.string if soup.title else "No title found"
+        for irrelevant in soup.body(["script", "style", "img", "input"]):
+            irrelevant.decompose()
+        self.text = soup.body.get_text(separator="\n", strip=True)
+
+
+# Let's try one out. Change the website and add print statements to follow along.
+
+ed = Website("https://edwarddonner.com")
+print(ed.title)
+print(ed.text)
+
+
+
+
+
+# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."
+
+system_prompt = "You are an assistant that analyzes the contents of a website \
+and provides a short summary, ignoring text that might be navigation related. \
+Respond in markdown."
+
+
+# A function that writes a User Prompt that asks for summaries of websites:
+
+def user_prompt_for(website):
+    user_prompt = f"You are looking at a website titled {website.title}"
+    user_prompt += "\nThe contents of this website is as follows; \
+please provide a short summary of this website in markdown. \
+If it includes news or announcements, then summarize these too.\n\n"
+    user_prompt += website.text
+    return user_prompt
+
+
+print(user_prompt_for(ed))
+
+
+
+
+
+messages = [
+    {"role": "system", "content": "You are a snarky assistant"},
+    {"role": "user", "content": "What is 2 + 2?"}
+]
+
+
+# To give you a preview -- calling OpenAI with system and user messages:
+
+response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
+print(response.choices[0].message.content)
+
+
+
+
+
+# See how this function creates exactly the format above
+
+def messages_for(website):
+    return [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt_for(website)}
+    ]
+
+
+# Try this out, and then try for a few more websites
+
+messages_for(ed)
+
+
+
+
+
+# And now: call the OpenAI API. You will get very familiar with this!
+
+def summarize(url):
+    website = Website(url)
+    response = openai.chat.completions.create(
+        model = "gpt-4o-mini",
+        messages = messages_for(website)
+    )
+    return response.choices[0].message.content
+
+
+summarize("https://edwarddonner.com")
+
+
+# A function to display this nicely in the Jupyter output, using markdown
+
+def display_summary(url):
+    summary = summarize(url)
+    display(Markdown(summary))
+
+
+display_summary("https://edwarddonner.com")
+
+
+
+
+
+display_summary("https://cnn.com")
+
+
+display_summary("https://anthropic.com")
+
+
+
+
+
+# Step 1: Create your prompts
+
+system_prompt = "something here"
+user_prompt = """
+    Lots of text
+    Can be pasted here
+"""
+
+# Step 2: Make the messages list
+
+messages = [] # fill this in
+
+# Step 3: Call OpenAI
+
+response =
+
+# Step 4: print the result
+
+print(
+
+
+
+
+
+
+
+
+
diff --git a/.virtual_documents/week1/week1 EXERCISE.ipynb b/.virtual_documents/week1/week1 EXERCISE.ipynb
@@ -0,0 +1,133 @@
+
+
+
+# imports
+import os
+import requests
+import json
+from typing import List
+from dotenv import load_dotenv
+from bs4 import BeautifulSoup
+from IPython.display import Markdown, display, update_display
+from openai import OpenAI
+
+
+# constants
+
+MODEL_GPT = 'gpt-4o-mini'
+MODEL_LLAMA = 'llama3.2'
+
+
+# set up environment
+load_dotenv()
+api_key = os.getenv('OPENAI_API_KEY')
+
+if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
+    print("API key looks good so far")
+else:
+    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
+
+
+# A class to represent a Webpage
+
+class Website:
+    """
+    A utility class to represent a Website that we have scraped, now with links
+    """
+
+    def __init__(self, url):
+        self.url = url
+        response = requests.get(url)
+        self.body = response.content
+        soup = BeautifulSoup(self.body, 'html.parser')
+        self.title = soup.title.string if soup.title else "No title found"
+        if soup.body:
+            for irrelevant in soup.body(["script", "style", "img", "input"]):
+                irrelevant.decompose()
+            self.text = soup.body.get_text(separator="\n", strip=True)
+        else:
+            self.text = ""
+        links = [link.get('href') for link in soup.find_all('a')]
+        self.links = [link for link in links if link]
+
+    def get_contents(self):
+        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
+
+
+dr = Website("https://www.drbruceforciea.com")
+print(dr.get_contents())
+print(dr.links)
+
+
+link_system_prompt = "You are provided with a list of links found on a webpage. \
+You are able to decide which of the links would be most relevant to learn anatomy and physiology, \
+such as links to an Anatomy or Physiology page, Learing Page, Book Page.\n"
+link_system_prompt += "You should respond in JSON as in this example:"
+link_system_prompt += """
+{
+    "links": [
+        {"type": "anatomy and physiology page", "url": "https://full.url/goes/here/anatomy-and-physiology"},
+        {"type": "learning page": "url": "https://another.full.url/learning"}
+    ]
+}
+"""
+
+
+def get_links_user_prompt(website):
+    user_prompt = f"Here is the list of links on the website of {website.url} - "
+    user_prompt += "please decide which of these are relevant web links to learn anatomy and physiology, respond with the full https URL in JSON format. \
+Do not include Terms of Service, Privacy, email links.\n"
+    user_prompt += "Links (some might be relative links):\n"
+    user_prompt += "\n".join(website.links)
+    return user_prompt
+
+
+print(get_links_user_prompt(dr))
+
+
+def get_links(url):
+    website = Website(url)
+    response = openai.chat.completions.create(
+        model=MODEL,
+        messages=[
+            {"role": "system", "content": link_system_prompt},
+            {"role": "user", "content": get_links_user_prompt(website)}
+      ],
+        response_format={"type": "json_object"}
+    )
+    result = response.choices[0].message.content
+    return json.loads(result)
+
+
+# Give a medicine related website link.
+
+nationalcancerinstitute = Website("https://training.seer.cancer.gov/modules_reg_surv.html")
+nationalcancerinstitute.links
+
+
+get_links("https://training.seer.cancer.gov/modules_reg_surv.html")
+
+
+def get_all_details(url):
+    result = "Landing page:\n"
+    result += Website(url).get_contents()
+    links = get_links(url)
+    print("Found links:", links)
+    for link in links["links"]:
+        result += f"\n\n{link['type']}\n"
+        result += Website(link["url"]).get_contents()
+    return result
+
+
+# here is the question; type over this to ask something new
+
+question = """
+Please explain what this code does and why:
+yield from {book.get("author") for book in books if book.get("author")}
+"""
+
+
+# Get gpt-4o-mini to answer, with streaming
+
+
+# Get Llama 3.2 to answer