Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Yifwei/week1 anatomy translator #42

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
204 changes: 204 additions & 0 deletions .virtual_documents/week1/day1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@



# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

# If you get an error running this cell, then please head over to the troubleshooting notebook!





# Load environment variables in a file called .env

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
print("API key found and looks good so far!")



openai = OpenAI()

# If this doesn't work, try Kernel menu >> Restart Kernel and Clear Outputs Of All Cells, then run the cells from the top of this notebook down.
# If it STILL doesn't work (horrors!) then please see the troubleshooting notebook, or try the below line instead:
# openai = OpenAI(api_key="your-key-here-starting-sk-proj-")





# To give you a preview -- calling OpenAI with these messages is this easy:

message = "Hello, GPT! This is my first ever message to you! Hi!"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)





# A class to represent a Webpage
# If you're not familiar with Classes, check out the "Intermediate Python" notebook

class Website:

def __init__(self, url):
"""
Create this Website object from the given url using the BeautifulSoup library
"""
self.url = url
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
for irrelevant in soup.body(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.body.get_text(separator="\n", strip=True)


# Let's try one out. Change the website and add print statements to follow along.

ed = Website("https://edwarddonner.com")
print(ed.title)
print(ed.text)





# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish."

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."


# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
user_prompt = f"You are looking at a website titled {website.title}"
user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
user_prompt += website.text
return user_prompt


print(user_prompt_for(ed))





messages = [
{"role": "system", "content": "You are a snarky assistant"},
{"role": "user", "content": "What is 2 + 2?"}
]


# To give you a preview -- calling OpenAI with system and user messages:

response = openai.chat.completions.create(model="gpt-4o-mini", messages=messages)
print(response.choices[0].message.content)





# See how this function creates exactly the format above

def messages_for(website):
return [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt_for(website)}
]


# Try this out, and then try for a few more websites

messages_for(ed)





# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
website = Website(url)
response = openai.chat.completions.create(
model = "gpt-4o-mini",
messages = messages_for(website)
)
return response.choices[0].message.content


summarize("https://edwarddonner.com")


# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
summary = summarize(url)
display(Markdown(summary))


display_summary("https://edwarddonner.com")





display_summary("https://cnn.com")


display_summary("https://anthropic.com")





# Step 1: Create your prompts

system_prompt = "something here"
user_prompt = """
Lots of text
Can be pasted here
"""

# Step 2: Make the messages list

messages = [] # fill this in

# Step 3: Call OpenAI

response =

# Step 4: print the result

print(









133 changes: 133 additions & 0 deletions .virtual_documents/week1/week1 EXERCISE.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@



# imports
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI


# constants

MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'


# set up environment
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
print("API key looks good so far")
else:
print("There might be a problem with your API key? Please visit the troubleshooting notebook!")


# A class to represent a Webpage

class Website:
"""
A utility class to represent a Website that we have scraped, now with links
"""

def __init__(self, url):
self.url = url
response = requests.get(url)
self.body = response.content
soup = BeautifulSoup(self.body, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
if soup.body:
for irrelevant in soup.body(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.body.get_text(separator="\n", strip=True)
else:
self.text = ""
links = [link.get('href') for link in soup.find_all('a')]
self.links = [link for link in links if link]

def get_contents(self):
return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"


dr = Website("https://www.drbruceforciea.com")
print(dr.get_contents())
print(dr.links)


link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to learn anatomy and physiology, \
such as links to an Anatomy or Physiology page, Learing Page, Book Page.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
"links": [
{"type": "anatomy and physiology page", "url": "https://full.url/goes/here/anatomy-and-physiology"},
{"type": "learning page": "url": "https://another.full.url/learning"}
]
}
"""


def get_links_user_prompt(website):
user_prompt = f"Here is the list of links on the website of {website.url} - "
user_prompt += "please decide which of these are relevant web links to learn anatomy and physiology, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
user_prompt += "Links (some might be relative links):\n"
user_prompt += "\n".join(website.links)
return user_prompt


print(get_links_user_prompt(dr))


def get_links(url):
website = Website(url)
response = openai.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": link_system_prompt},
{"role": "user", "content": get_links_user_prompt(website)}
],
response_format={"type": "json_object"}
)
result = response.choices[0].message.content
return json.loads(result)


# Give a medicine related website link.

nationalcancerinstitute = Website("https://training.seer.cancer.gov/modules_reg_surv.html")
nationalcancerinstitute.links


get_links("https://training.seer.cancer.gov/modules_reg_surv.html")


def get_all_details(url):
result = "Landing page:\n"
result += Website(url).get_contents()
links = get_links(url)
print("Found links:", links)
for link in links["links"]:
result += f"\n\n{link['type']}\n"
result += Website(link["url"]).get_contents()
return result


# here is the question; type over this to ask something new

question = """
Please explain what this code does and why:
yield from {book.get("author") for book in books if book.get("author")}
"""


# Get gpt-4o-mini to answer, with streaming


# Get Llama 3.2 to answer
Loading