Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Atividade - Pandas #24

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions exercicios/musicas_populares.json

Large diffs are not rendered by default.

28 changes: 28 additions & 0 deletions exercicios/para-casa/AtvCasa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import pandas as pd

df = pd.read_csv("C:/Users/thai/Downloads/reprograma/on33-python-s09-pandas-numpy-I/material/mais_ouvidas_2024.csv")

#print(df.head())
#print (df.columns)
#print(df.dtypes)

to_parse = ['All Time Rank', 'Spotify Streams', 'Spotify Playlist Count', 'Spotify Playlist Reach', 'YouTube Views', 'YouTube Likes', 'TikTok Posts', 'TikTok Likes',
'TikTok Views', 'YouTube Playlist Reach', 'Deezer Playlist Reach', 'Pandora Streams', 'Pandora Track Stations', 'Soundcloud Streams', 'Shazam Counts']

for column in to_parse:
df[column] = df[column].str.replace(",", "").astype(float)


df['Release Date'] = pd.to_datetime((df['Release Date']), format="%m/%d/%Y")

df['Total Streaming'] = df[['Spotify Streams', 'YouTube Views', 'TikTok Views', 'Pandora Streams', 'Soundcloud Streams']].sum(axis=1)
print(df['Total Streaming'].head())

filtered_df = df[(df['Spotify Popularity'] > 80) & (df['Total Streaming'] > 1_000_000)]
print(filtered_df.head())


df.to_json("../musicas_populares.json", index=False)



29 changes: 29 additions & 0 deletions exercicios/para-sala/ETL_pandas.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,31 @@
<<<<<<< HEAD
import pandas as pd

##['TransactionID', 'Date', 'MobileModel', 'Brand', 'Price', 'UnitsSold','TotalRevenue', 'CustomerAge', 'CustomerGender', 'Location','PaymentMethod']

df = pd.read_csv(
"C:/Users/thai/Downloads/reprograma/on33-python-s09-pandas-numpy-I/material/mobile_sales.csv"
)

print(df.head(n=10))#Traz as 10 primeiras linhas do arquivo
print(df.columns)
df_valores_nulos = df.isnull() #identifica valores nulos
print(df_valores_nulos.sum())
print(df.duplicated()) #identificar valores duplicados

df['Date'] = pd.to_datetime(df['Date'], format="mixed")

df["Total Sales Value"] = df["Price"] * df["UnitsSold"] # Cria uma nova coluna com o título Total Sales Value através do produto de Price x UnitsSold
print(df["Total Sales Value"]) # print a nova coluna
print(df.columns)
df["Profit Margin"] = (df["Price"] * 0.30) * df["UnitsSold"]
print(df["Profit Margin"]) # print a nova coluna
print(df.columns)

#Filtragem
filtered_df = df [(df["Total Sales Value"] > 100_000) & (df["Profit Margin"] > 20_000)]
print(filtered_df)
=======
import pandas as pd

# ['TransactionID', 'Date', 'MobileModel', 'Brand', 'Price', 'UnitsSold','TotalRevenue', 'CustomerAge', 'CustomerGender', 'Location','PaymentMethod']
Expand Down Expand Up @@ -28,3 +56,4 @@
print(filtered_df.head())

filtered_df.to_csv("./exercicios/para-sala/filtered_list.csv", index=False)
>>>>>>> d945731276aa7dccf62e91cd8b5e6b8d11577dbf