-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpandas-vlookup.py
63 lines (47 loc) · 1.58 KB
/
pandas-vlookup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import pandas as pd
def remove_whitespace_from_columns(df, columns_to_clean=None):
"""
Remove leading and trailing whitespace from specified columns in a DataFrame.
Parameters:
df (pd.DataFrame): The input DataFrame.
columns_to_clean (list or None): A list of column names to clean.
If None, all string columns are cleaned.
Returns:
pd.DataFrame: The DataFrame with whitespace removed from specified columns.
"""
if not isinstance(df, pd.DataFrame):
raise ValueError("Input must be a pandas DataFrame.")
if columns_to_clean is None:
# If columns_to_clean is not specified, clean all string columns
columns_to_clean = [col for col in df.columns if df[col].dtype == "object"]
cleaned_df = df.copy()
for col in columns_to_clean:
if col in cleaned_df.columns:
cleaned_df[col] = cleaned_df[col].str.strip()
return cleaned_df
df_main_orig = pd.read_excel(
"Excel.xlsx", sheet_name="Master File"
)
df_lookup_orig = pd.read_csv("file.csv")
df_main = remove_whitespace_from_columns(df_main_orig, "column")
df_lookup = remove_whitespace_from_columns(df_newfuel_orig, "column")
result = pd.merge(
df_main[
[
"Column 1",
"Column 2",
"Column 3"
]
],
df_lookup,
on="column1",
how="inner",
)
# Select only the specified columns in the fuel_result DataFrame
selected_columns = [
"Column 1",
"Column 2",
"Column 3"
]
final_result = result[selected_columns]
result.to_csv("result.csv", index=False)