Moved read htm code into loader.py

utah-geological-survey · Oct 24, 2024 · 0add628 · 0add628
1 parent 6ce8141
commit 0add628
Show file tree

Hide file tree

Showing 8 changed files with 85 additions and 50 deletions.
diff --git a/.idea/loggerloader.iml b/.idea/loggerloader.iml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-2.4.1
+2.4.3
diff --git a/loggerloader/__init__.py b/loggerloader/__init__.py
@@ -3,7 +3,7 @@
 except:
     from .llgui import *
 
-version = "2.4.1"
+version = "2.4.3"
 __version__ = version
 __author__ = 'Paul Inkenbrandt'
 __name__ = 'loggerloader'

diff --git a/loggerloader/llgui.py b/loggerloader/llgui.py
@@ -21,7 +21,7 @@
 from tkcalendar import DateEntry
 
 from pylab import rcParams
-from PIL import ImageTk, Image
+#from PIL import ImageTk, Image
 
 import platform
 import os

diff --git a/loggerloader/loader.py b/loggerloader/loader.py
@@ -1144,6 +1144,59 @@ def __init__(self, infile, trim_end=True, jumptol=1.0):
             print('Bad File')
             return
 
+    def read_troll_htm(self):
+        """given a path to the .htm (html) file, function will read in the data and produce pandas dataframe
+        Args:
+            filepath (str):
+                path to data file
+
+        Return:
+            df:
+                dataframe
+        """
+        with open(self.infile, 'r') as f:
+            html_string = f.read()
+
+        # use BeautifulSoup to parse the HTML content of the page
+        soup = BeautifulSoup(html_string, "html.parser")
+
+        # find all the table rows with class "data"
+        table_rows = soup.find_all('tr', {'class': 'data'})
+        header = soup.find_all('tr', {'class': 'dataHeader'})
+
+        heads = header[0].find_all('td')
+        colnames = [head.text.strip() for head in heads]
+
+        # create an empty list to hold the data
+        data = []
+
+        # loop through each row and extract the data into a list
+        for row in table_rows:
+            cols = row.find_all('td')
+            cols = [col.text.strip() for col in cols]
+            data.append(cols)
+
+        # convert the list of data into a pandas dataframe
+        df = pd.DataFrame(data)
+        df.columns = colnames
+
+        for col in df.columns:
+            if "Date" in col or "date" in col:
+                print(col)
+                df[col] = pd.to_datetime(df[col])
+                df = df.set_index(col)
+            elif "Press" in col:
+                df[col] = pd.to_numeric(df[col])
+                if "psi" in col:
+                    df['Level'] = df[col] * 2.3067
+                elif "Hg" in col:
+                    df['Level'] = df[col] * 0.044603
+                # df = df.rename(columns={col:"Level"})
+            elif "Depth" in col or "Cond" in col or "Total" in col or "Salin" in col or "Dens" in col or "Temp" in col:
+                df[col] = pd.to_numeric(df[col])
+
+        return df
+
     def new_csv_imp(self):
         """This function uses an exact file path to upload a csv transducer file.
 
@@ -1258,7 +1311,7 @@ def new_lev_imp(self):
             # start_time = txt[inst_info_ind+6].split('=')[-1].strip()
             # stop_time = txt[inst_info_ind+7].split('=')[-1].strip()
 
-            df = pd.read_table(self.infile, parse_dates=[[0, 1]], sep='\s+', skiprows=data_ind + 2,
+            df = pd.read_table(self.infile, parse_dates=[[0, 1]], sep=r'\s+', skiprows=data_ind + 2,
                                names=['Date', 'Time', level, temp],
                                skipfooter=1, engine='python')
             df.rename(columns={'Date_Time': 'DateTime'}, inplace=True)
@@ -1607,11 +1660,12 @@ def csv_head(self, file):
 
 def getwellid(infile, wellinfo):
     """Specialized function that uses a well info table and file name to lookup a well's id number"""
-    m = re.search("\d", getfilename(infile))
-    s = re.search("\s", getfilename(infile))
+    m = re.search(r"\d", getfilename(infile))
+    s = re.search(r"\s", getfilename(infile))
     if m.start() > 3:
         wellname = getfilename(infile)[0:m.start()].strip().lower()
     else:
         wellname = getfilename(infile)[0:s.start()].strip().lower()
     wellid = wellinfo[wellinfo['Well'] == wellname]['wellid'].values[0]
     return wellname, wellid
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,20 @@
+[build-system]
+requires = ["setuptools >= 61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+dynamic = ["version"]
+version = "2020.0.0"
+dependencies = {file = "requirements.txt"}
+name = "loggerloader"
+requires-python = ">= 3.8"
+authors = [{name = "Paul Inkenbrandt", email = "[email protected]"}]
+maintainers = [{name = "Paul Inkenbrandt", email = "[email protected]"}]
+description = "Import and process transducer data; hydrograph analysis; hydrology;"
+readme = "README.md"
+license = {file = "LICENSE.txt"}
+keywords = ["well", "groundwater", "transducer"]
+
+[project.urls]
+Homepage = "https://github.com/utah-geological-survey/loggerloader"
+Issues = "https://github.com/utah-geological-survey/loggerloader/issues"
diff --git a/setup.py b/setup.py