reprograma · LidiMG · Aug 14, 2024 · manuellysuzik · Aug 27, 2024 · manuellysuzik
diff --git a/exercicios/para-casa/ETL_pandas.py b/exercicios/para-casa/ETL_pandas.py
@@ -0,0 +1,27 @@
+import pandas as pd
+
+df = pd.read_csv("../../material/mais_ouvidas_2024.csv")
+
+print(df.head())
+print(df.info())
+
+for column in df.columns:
+    if df[column].dtype == "object":
+        df[column] = df[column].str.replace(",", "").astype(float, errors='ignore')
+
+df["Release Date"] = pd.to_datetime(df["Release Date"])
+print(df.dtypes)
+
+df["Streaming Popularity"] = df[["Spotify Popularity", "YouTube Views", "TikTok Likes", "Shazam Counts"]].mean(axis=1)
+
+print(df["Streaming Popularity"])
+
+df["Total Streams"] = df[["Spotify Streams", "YouTube Views", "TikTok Views", "Pandora Streams", "Soundcloud Streams"]].sum(axis=1)
+
+print(df["Total Streams"])
+
+filtered_df = df[(df["Spotify Popularity"] > 80) & (df["Total Streams"] > 1_000_000)]
+
+print(filtered_df.head())
+
+filtered_df.to_json("./filtered_list.json", index=False)
diff --git a/exercicios/para-casa/filtered_list.json b/exercicios/para-casa/filtered_list.json
diff --git a/exercicios/para-sala/ETL_pandas_by_me.py b/exercicios/para-sala/ETL_pandas_by_me.py
@@ -0,0 +1,33 @@
+import pandas as pd
+
+# ['TransactionID', 'Date', 'MobileModel', 'Brand', 'Price', 'UnitsSold', 'TotalRevenue', 'CustomerAge', 'CustomerGender', 'Location', 'PaymentMethod']
+
+df = pd.read_csv("../../material/mobile_sales.csv")
+
+# Mostrar as 10 primeiras linhas ao invés do padrão 5
+# print(df.head(n=10))
+
+# print(df.head())
+# print(df.columns)
+df_valores_nulos = df.isnull()
+# print(df_valores_nulos.sum())
+# Verificar dados duplicados
+#print(df.duplicated().sum())
+df.drop_duplicates()
+
+df["Date"] = pd.to_datetime(df["Date"], format="mixed")
+#print(df.dtypes)
+#print(df["Date"])
+
+df["Total Sales Value"] = df["Price"] * df["UnitsSold"]
+# print(df.columns)
+# print(df["Total Sales Value"])
+
+profit_per_product = 0.30
+df["Profit Margin"] = (df["Price"] * profit_per_product ) * df["UnitsSold"]
+
+filtered_df = df[(df["Total Sales Value"] > 100_000) & (df["Profit Margin"] > 20_000)]
+
+print(filtered_df.head())
+
+filtered_df.to_csv("./filtered_list.csv", index=False)