diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
new file mode 100644
index 0000000..69478e4
--- /dev/null
+++ b/.github/workflows/python-package.yml
@@ -0,0 +1,42 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python package
+
+on:
+  push:
+    branches: [ "dev" ]
+  pull_request:
+    branches: [ "dev" ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8","3.9", "3.10", "3.11"]
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        python -m pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        python -m spacy download fi_core_news_sm
+        python -m pip install .
+    - name: Lint with flake8
+      run: |
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        pytest
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a4d7943
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,9 @@
+# Ignore Python cache files
+__pycache__/
+
+# Ignore system and editor files
+.DS_Store
+*.swp
+*.swo
+*.bak
+*~
diff --git a/finger/__init__.py b/finger/__init__.py
deleted file mode 100644
index d2cc938..0000000
--- a/finger/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# __init__.py
-
-#from finger.location_coder import *
-#from finger.location_tagger import *
-#from finger.output_formatter import *
diff --git a/finger/geoparser.py b/finger/geoparser.py
deleted file mode 100644
index 5062437..0000000
--- a/finger/geoparser.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Mar 24 18:55:46 2021
-
-@author: Tatu Leppämäki
-"""
-
-
-from finger.location_tagger import location_tagger
-from finger.location_coder import location_coder
-from finger.output_formatter import create_eupeg_json
-
-import time
-
-class geoparser:
-    """
-    The geoparser handles a whole geoparsing pipeline from geotagging to geocoding. 
-    It accepts a list of Finnish text strings as input. It then runs those texts
-    through a BERT-based neural linguistic and NER analysis pipeline built on Spacy.
-    The objective of this analysis is to find references to locations, such as
-    countries, towns, remarkable places etc., although the pipeline also runs general
-    named entity recognition and things like dependency parsing and part-of-speech tagging
-    on the side. Each input sentence can have zero to n locations in them. The locations are
-    lemmatized using the Voikko library. The first part of the geoparsing process is called (geo)tagging.
-    
-    The tagger results are gathered on a Pandas dataframe consisting of five columns,
-    with each analysis of a sentence on a single row. The dataframe is passed to
-    the (geo)coder, which attempts to return coordinate representations of the locations.
-    Currently, it relies on the GeoNames gazetteer, which is queried using a Python
-    module called GeoCoder. If locations are found, coordinate point representations
-    of them are returned as tuples or as Shapely points in WGS84 (EPSG:4326) CRS.
-
-    """
-    
-    def __init__(self, pipeline_path="fi_geoparser", use_gpu=False, 
-                 output_df=True, gn_username="", verbose=True):
-        """
-        Parameters:
-        pipeline_path | String: name of the Spacy pipeline, which is called with spacy.load().
-                                "fi_geoparser", which is the installation name, by default,
-                                however, a path to the files can also be provided.
-                                
-        
-        use_gpu | Boolean: Whether the pipeline is run on the GPU (significantly faster, but often missing in
-                           e.g. laptops) or CPU (slower but should run every time). Default True.
-                           
-        output_df | Boolean: If True, the output will be a Pandas DataFrame. False does nothing currently.
-        
-        gn_username | String: GeoNames API key, or username, which is used for geocoding.
-                              Mandatory, get from https://www.geonames.org/
-        
-        verbose | Boolean: Prints progress reports. Default True.
-        
-
-        """
-
-        self.tagger = location_tagger(pipeline_path, use_gpu=use_gpu)
-        
-        self.coder = location_coder(gn_username=gn_username)
-        
-        self.verbose=verbose
-        
-        
-    def geoparse(self, texts, ids=None, explode_df=False, return_shapely_points=False,
-                  drop_non_locations=False, output='all', filter_toponyms=True, entity_tags=['LOC']):
-        """
-        The whole geoparsing pipeline.
-        
-        Input:
-            texts | A string or a list of input strings: The input text(s)
-            *ids | String, int, float or a list: Identifying element of each input, e.g. tweet id. Must be 
-                  the same length as texts
-            *explode_df | Boolean: Whether to have each location "hit" on separate rows in the output. Default False
-            *return_shapely_points | Boolean: Whether the coordinate points of the locations are 
-                                         regular tuples or Shapely points. Default False.
-            *drop_non_locations | Boolean: Whether the sentences where no locations were found are
-                                        included in the output. Default False (non-locs are included).
-            *output | String: What's included in the output and in what format it is.
-                                        Possible values: 
-                                            1. 'all': All columns listed below as a dataframe
-                                            TODO 2. 'essential': Dataframe trimmed down selection of columns
-                                            3. 'eupeg': 
-            *filter_toponyms | Boolean: Whether to filter out almost certain false positive toponyms.
-                                        Currently removes toponyms with length less than 2. Default True.
-            
-        Output columns:
-            Pandas Dataframe containing columns:
-                1. input_text: the input sentence | String
-                2. doc: Spacy doc object of the sent analysis. See https://spacy.io/api/doc | Doc
-                3. locations_found: Whether locations were found in the input sent | Bool
-                4. locations: locations in the input text, if found | list of strings or None
-                5. loc_lemmas: lemmatized versions of the locations | list of strings or None
-                6. loc_spans: the index of the start and end characters of the identified 
-                              locations in the input text string | tuple
-                7. input_order: the index of the inserted texts. i.e. the first text is 0, the second 1 etc.
-                                Makes it easier to reassemble the results if they're exploded | int'
-                8. names: versions of the names returned by querying GeoNames | List of strins or None
-                9. coord_points: long/lat coordinate points in WGS84 | list of long/lat tuples or Shapely points
-                10.*id: The identifying element tied to each input text, if provided | string, int, float
-            OR
-            EUPEG (see here: https://github.com/geoai-lab/EUPEG) style json dump, with restucturing data and renaming headers to be in line. 
-            Mostly meant for evaluation purposes. This option only allows one text to be processed at once (no batch processing).
-            
-        """
-        assert texts, "Input missing. Expecting a (list of) strings."
-        
-        # fix if someone passes just a string
-        if isinstance(texts, str):
-            texts = [texts]
-            
-        if output.lower() == 'eupeg':
-            explode_df = True
-        
-        # check that ids are in proper formats and lengths
-        if ids:
-            if isinstance(ids, (str, int, float)):
-                ids = [ids]
-            assert len(texts) == len(ids), "If ids are passed, the number of ids and texts must be equal."
-            
-        
-        if self.verbose:
-            print("Starting geotagging...")
-        t = time.time()
-        
-        # TOPONYM RECOGNITION
-        tag_results = self.tagger.tag_sentences(texts, ids, explode_df=explode_df,
-                                                drop_non_locs=drop_non_locations,
-                                                filter_toponyms=filter_toponyms,
-                                                entity_tags=entity_tags)
-
-        if self.verbose:
-            successfuls = tag_results['locations_found'].tolist()
-            print("Finished geotagging after", round(time.time()-t, 2),"s.", successfuls.count(True), "location hits found.")
-            print("Starting geocoding...")
-        
-        # TOPONYM RESOLVING
-        geocode_results = self.coder.geocode_batch(tag_results, shp_points=False,
-                                                   exploded=explode_df)
-        
-        
-        if self.verbose:    
-            print("Finished geocoding, returning output.")
-            print("Total elapsed time:", round(time.time()-t, 2),"s")
-            
-        if output.lower() == 'eupeg':
-            return create_eupeg_json(geocode_results)
-        else:
-            return geocode_results
diff --git a/finger/location_coder.py b/finger/location_coder.py
deleted file mode 100644
index db1965a..0000000
--- a/finger/location_coder.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Mar 24 18:53:37 2021
-
-@author: Tatu Leppämäki
-"""
-import geocoder.geonames as gn
-#import pandas as pd
-"""
-try:
-    from shapely.geometry import Point
-except (ImportError, FileNotFoundError) as e:
-    print("Unable to import Shapely. The geoparser works, but exporting to Shapely points is unavailable.")
-"""
-class location_coder:
-    
-    def __init__(self, gn_username=""):
-        """
-        A geocoder, which currently accepts a Pandas dataframe (must be of certain
-        format, which mostly makes this usable as part of a geoparser pipeline)
-        and outputs a dataframe. The following columns are appended to the input df:
-            
-                1. gn_names: versions of the locations returned by querying GeoNames | List of strings or None
-                2. gn_points: long/lat coordinate points in WGS84 | list of long/lat tuples or Shapely points
-                
-        TO RUN THIS GEOCODER, YOU CURRENTLY NEED A GEONAMES API KEY. The API key
-        can be acquired simply by creating an account in https://www.geonames.org/
-        Pass your account name as gn_username parameter.
-        """
-
-        self.username_count = 0
-            
-        if isinstance(gn_username, (list, tuple, set)):
-            self.username=gn_username[self.username_count]
-            self.username_list = gn_username
-            #self.username_list_flag = True
-        else:
-            self.username=gn_username
-            self.username_list = [gn_username]
-            #self.username_list_flag = False
-        
-        self.username=gn_username
-        
-        assert self.username, "GeoNames API key (username) must be provided for the geocoder."
-            
-        test_result = gn("London", key=self.username)
-        assert test_result.ok, "Geocoding failed. Did you enter a valid GeoNames API key?"
-        
-    def geocode_batch(self, locations, input_type="df", shp_points=False, exploded=False):
-        """
-        Applies geocoding to the lemmatized locations in the input dataframe.
-        """
-        
-        
-        locations['names'] = None
-        locations['coord_points'] = None
-        
-        self.shp_points = shp_points
-        
-        self.exploded = exploded
-        
-        self.geocoded_count = 0
-        
-        self.username_count = 0
-        
-
-        locations = locations.apply(self.geocode_set, axis=1)
-        
-        return locations
-        
-        
-    def geocode_set(self, row):
-        """
-        Geocodes input Pandas series (rows).
-        """
-
-        # if locs present, continue. otherwise do nothing
-        if row['locations_found']:
-            loc_coord_points = []
-            loc_names = []
-            
-            # fixes the problem of the next step expecting a list as an input
-            if self.exploded:
-                lemma = row['loc_lemmas']
-                lemma_list = []
-                lemma_list.append(lemma)
-                row['loc_lemmas'] = lemma_list
-
-            
-            for loc in row['loc_lemmas']:        
-                #query geonames
-                gn_result = gn(loc, key=self.username)
-                # for every query, add one to the count
-                self.geocoded_count += 1
-                # if succesful, add the name of the place in GN and coordinates
-                if gn_result.ok:
-                    loc_coord_points.append(self.form_point(gn_result))
-                    loc_names.append(gn_result.address)
-                else:
-                    loc_coord_points.append(None)
-                    loc_names.append(None)
-                    
-                    # if no error present, continue as normal
-                    if isinstance(gn_result.error, int):
-                        pass
-                    # if the system throws an error, switch the username or warn the user
-                    elif ("the hourly limit of 1000 credits") in gn_result.error:
-                        switched = self.switch_username()
-                        if switched:
-                            print("\nUsername switched to username no.", str(self.username_count+1), "\n")
-                        else:
-                            print("\nHourly rate limit exceeded and no more GN usernames left. Rest of queries will fail.\n")
-                            self.geocoded_count = 0
-                    
-            if all(place==None for place in loc_names):
-                loc_coord_points = None
-                loc_names = None
-                    
-            row['names'] = loc_names
-            row['coord_points'] = loc_coord_points
-            
-            # if count nears 1000, i.e. the hourly rate limit of a GN account
-            # is filling, try to switch the account
-
-            
-            return row
-        else:
-            return row
-        
-    def switch_username(self):
-        # if there are unused usernames left on the list, 
-        if self.username_count+1 < len(self.username_list):
-            self.username_count += 1
-            self.username=self.username_list[self.username_count]
-            return True
-        else:
-            return False
-        
-    def form_point(self, gn_result):
-        if self.shp_points:
-            return Point(float(gn_result.lng), float(gn_result.lat))
-        else:
-            return (gn_result.lng, gn_result.lat)
diff --git a/finger/location_tagger.py b/finger/location_tagger.py
deleted file mode 100644
index d8facca..0000000
--- a/finger/location_tagger.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Created on Wed Mar 24 18:51:53 2021
-
-@author: Tatu Leppämäki
-"""
-
-
-import spacy 
-
-
-class location_tagger:
-    """
-    This class initiates a Finnish NER tagger using Spacy.
-    
-    A NER tagger object can be used to tag location mentions in input texts. It accepts list of strings
-    as input and outputs a Pandas dataframe, which is then passed on to the geocoder.
-    
-    Parameters:
-        pipeline_path | String: name of the Spacy pipeline, which is called with spacy.load().
-                                "fi_geoparser", which is the installation name, by default,
-                                however, a path to the files can also be provided.
-    
-        
-        use_gpu | Boolean: Whether the pipeline is run on the GPU (significantly faster, but often missing in
-                           e.g. laptops) or CPU (slower but should run every time)
-                           
-        output_df | Boolean: If True, the output will be a Pandas DataFrame. If False, a dictionary.
-                            Currently, False does nothing. Left in if newer versions implement something
-                            other than Pandas (just nested dictionaries?)
-        """
-    
-        
-    def __init__(self, pipeline_path="fi_geoparser", use_gpu=True, 
-                 output_df=True):
-        if use_gpu:
-            spacy.require_gpu()
-        else:
-            spacy.require_cpu()
-        
-        self.output_df = output_df
-        
-        self.ner_pipeline = spacy.load(pipeline_path)
-        
-
-        
-
-        
-    def tag_sentences(self, input_texts, ids, explode_df=False, drop_non_locs=False,
-                            filter_toponyms = True, entity_tags=['LOC']
-        ):
-        """Input:            
-            texts | A string or a list of input strings: The input 
-            *ids | String, int, float or a list: Identifying element of each input, e.g. tweet id. Must be 
-                  the same length as texts
-            *explode_df | Boolean: Whether to have each location "hit" on separate rows in the output. Default False
-            *drop_non_locations | Boolean: Whether the sentences where no locations were found are
-                                        included in the output. Default False (locs are included).
-            *filter_toponyms | Boolean: Whether to filter out almost certain false positive toponyms.
-                                        Currently removes toponyms with length less than 2. Default True.
-        
-        Output: Pandas DF containing columns:
-                1. input_text: the input sentence | String
-                2. doc: Spacy doc object of the sent analysis. See https://spacy.io/api/doc | Doc
-                3. locations_found: Whether locations were found in the input sent | Bool
-                4. locations: locations in the input text, if found | list of strings or None
-                5. loc_lemmas: lemmatized versions of the locations | list of strings or None
-                6. loc_spans: the index of the start and end characters of the identified 
-                              locations in the input text string | tuple
-                7. input_order: the index of the inserted texts. i.e. the first text is 0, the second 1 etc.
-                                Makes it easier to reassemble the results if they're exploded | int'
-                *8. id: The identifying element tied to each input text, if provided | string, int, float 
-        """
-        assert input_texts, "No input provided. Make sure to input a list of strings."
-        tagged_sentences = []
-        
-        self.explode_df = explode_df
-        
-        self.drop_non_locs = drop_non_locs
-        
-        self.filter_toponyms = filter_toponyms
-        
-        self.entity_tags = entity_tags
-        
-        # loop input sentences, gather the tagged dictionary results to a list
-        for sent in input_texts:
-            tag_results = self.tag_sentence(sent)
-            tagged_sentences.append(tag_results)
-        
-        return self.to_dataframe(tagged_sentences, ids)
-        """
-        if self.output_df:
-            return self.to_dataframe(tagged_sentences)
-        else:
-            return tagged_sentences
-        """
-            
-    def tag_sentence(self, sent):
-        """Input: a sentence to tag (string)
-        Output: a dictionary with the same variables as listed in 'tag_sentences'"""
-        doc = self.ner_pipeline(sent)
-        
-        # if the tagger created an output, i.e. at least one of the words in the input
-        # was tagged, create an output of that. Otherwise, return a mostly empty dict
-        docs = []
-        locs = []
-        loc_lemmas = []
-        loc_spans = []
-        locations_found = False
-
-        if doc:
-            # gather the NER labels found to a list 
-            labels = [ent.label_ for ent in doc.ents]
-
-            locs = []
-            # looping through the entities, collecting required information
-            # namely, the raw toponym text, its lemmatized form and the span as tuple
-            for ent in doc.ents:
-                if ent.label_ in self.entity_tags:
-                    # apply filtering if requested
-                    if self.filter_toponyms:
-                        # length filtering 
-                        if len(ent.text)>1:
-                            locs.append(ent.text)
-                            loc_lemmas.append(ent.lemma_.replace("#",""))
-                            loc_spans.append((ent.start_char, ent.end_char))
-                            locations_found = True
-                    else:
-                        locs.append(ent.text)
-                        loc_lemmas.append(ent.lemma_.replace("#",""))
-                        loc_spans.append((ent.start_char, ent.end_char))
-                        locations_found = True
-            docs.append(doc)
-
-        if locations_found:        
-            sent_results = {'input_text': sent, 'doc': doc, 'locations_found': locations_found, 
-                            'locations': locs, 'loc_lemmas': loc_lemmas, 'loc_spans': loc_spans}
-        else:
-            sent_results = {'input_text': sent, 'doc': doc, 'locations': None, 'loc_lemmas': None,
-                    'loc_spans': None, 'locations_found': locations_found}
-
-        return sent_results
-        
-    def to_dataframe(self, results, ids):
-        import pandas as pd
-
-        df = pd.DataFrame(results)
-        
-        
-        if ids:
-            df['id'] = ids
-            
-        df['input_order'] = df.index
-        
-        # split the possible list contents into multiple rows
-        if self.explode_df:
-            df = df.apply(lambda x: x.explode() if x.name in ['locations', 'loc_lemmas', 'loc_spans'] else x)
-        if self.drop_non_locs:
-            return self.drop_non_locations(df)
-        else:
-            return df
-    
-    def drop_non_locations(self, df):
-        df = df[df['locations_found']]
-        return df
\ No newline at end of file
diff --git a/fingerGeoparser/__init__.py b/fingerGeoparser/__init__.py
new file mode 100644
index 0000000..64b18de
--- /dev/null
+++ b/fingerGeoparser/__init__.py
@@ -0,0 +1,5 @@
+# __init__.py
+
+#from finger.toponym_coder import *
+#from finger.toponym_tagger import *
+#from finger.output_formatter import *
diff --git a/fingerGeoparser/geoparser.py b/fingerGeoparser/geoparser.py
new file mode 100644
index 0000000..68f4377
--- /dev/null
+++ b/fingerGeoparser/geoparser.py
@@ -0,0 +1,188 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Mar 24 18:55:46 2021
+
+@author: Tatu Leppämäki
+
+"""
+
+
+from fingerGeoparser.toponym_tagger import toponym_tagger
+from fingerGeoparser.toponym_coder import toponym_coder
+from fingerGeoparser.output_formatter import create_eupeg_json
+
+
+import time, asyncio, pandas as pd
+
+class geoparser:
+    """
+    The geoparser handles a whole geoparsing pipeline from geotagging to geocoding. 
+    It accepts a list of Finnish text strings as input. It then runs those texts
+    through a BERT-based neural linguistic and NER analysis pipeline built on Spacy.
+    The objective of this analysis is to find references to locations, such as
+    countries, towns, remarkable places etc., although the pipeline also runs general
+    named entity recognition and things like dependency parsing and part-of-speech tagging
+    on the side. Each input sentence can have zero to n locations in them. The locations are
+    lemmatized using the Voikko library. The first part of the geoparsing process is called (geo)tagging.
+    
+    The tagger results are gathered on a Pandas dataframe consisting of five columns,
+    with each analysis of a sentence on a single row. The dataframe is passed to
+    the (geo)coder, which attempts to return coordinate representations of the locations.
+    Currently, it relies on the GeoNames gazetteer, which is queried using a Python
+    module called GeoCoder. If locations are found, coordinate point representations
+    of them are returned as tuples or as Shapely points in WGS84 (EPSG:4326) CRS.
+
+    """
+    
+    def __init__(self,
+             pipeline_path="fi_geoparser",
+             use_gpu=False,
+             output_df=True,
+             verbose=True,
+             geocoder_url="http://vm5121.kaj.pouta.csc.fi:4000/v1/"):
+        """
+        Parameters:
+        pipeline_path | String: name of the Spacy pipeline, which is called with spacy.load().
+                                "fi_geoparser", which is the installation name, by default,
+                                however, a path to the files can also be provided.
+                                
+        
+        use_gpu | Boolean: Whether the pipeline is run on the GPU (significantly faster, but often missing in
+                           e.g. laptops) or CPU (slower but should run every time). Default True.
+                           
+        output_df | Boolean: If True, the output will be a Pandas DataFrame. False does nothing currently.
+        
+        verbose | Boolean: Prints progress reports. Default True.
+        
+        geocoder_url : str, optional
+            URL for the Pelias geocoder instance. Default instance is maintained by the author for now and is located at "http://vm5121.kaj.pouta.csc.fi:4000/v1/".
+
+        """
+
+        self.tagger = toponym_tagger(pipeline_path, use_gpu=use_gpu)
+        
+        self.coder = toponym_coder(geocoder_url)
+        
+        self.verbose=verbose
+        
+        
+    def geoparse(self, 
+             texts, 
+             ids=None, 
+             explode_df=True, 
+             return_shapely_points=False, 
+             preprocess_texts=False,
+             drop_non_locations=False, 
+             output='all', 
+             filter_toponyms=True, 
+             entity_tags=['LOC', 'FAC', 'GPE'],
+             geocoder_columns =['coordinates', 'gid', 'layer', 'label', 'bbox'],
+             geocoder_params = None):
+        """
+        The whole geoparsing pipeline.
+
+        Input:
+            texts | str or List[str]: A string or a list of input strings representing the text(s) to be processed.
+            
+            ids | str, int, float, or List[str/int/float], optional: Identifying element of each input, e.g., tweet id. 
+                  Must be the same length as texts. Default is None.
+                  
+            explode_df | bool, optional: Whether to have each location "hit" on separate rows in the output. Default is True.
+            
+            return_shapely_points | bool, optional: Whether the coordinate points of the locations are regular tuples 
+                                                or Shapely points. Default is False.
+                                                
+            preprocess_texts | bool, optional: Whether to preprocess the input texts before geoparsing. Default is False.
+            
+            drop_non_locations | bool, optional: Whether the sentences where no locations were found are included in the output. 
+                                                Default is False (non-locs are included).
+                                                
+            output | str, optional: What's included in the output and in what format it is. Possible values: 
+                                    'all': All columns listed below as a dataframe
+                                    'eupeg': EUPEG style JSON dump. Default is 'all'.
+                                    
+            filter_toponyms | bool, optional: Whether to filter out almost certain false positive toponyms. 
+                                               Currently removes toponyms with a length less than 2. Default is True.
+                                               
+            entity_tags | List[str], optional: Which named entity tags to count as toponyms. Default is ['LOC', 'FAC', 'GPE'].
+            
+            geocoder_columns | List[str], optional: Columns to include in the geocoder results. Default is 
+                                                     ['coordinates', 'gid', 'layer', 'label', 'bbox'].
+            geocoder_params | Dict[str], optional: Parameters to limit the search to, for example, a certain country. Provide as {'parameter':'value'} dictionaries. For example: {'boundary.country':'FIN'} See https://github.com/pelias/documentation/blob/master/search.md for a full list of search parameters.
+
+        Output:
+            Pandas DataFrame containing columns:
+                - input_text: the input sentence
+                - doc: Spacy doc object of the sent analysis.
+                - locations_found: Whether locations were found in the input sent.
+                - locations: locations in the input text, if found.
+                - loc_lemmas: lemmatized versions of the locations.
+                - loc_spans: the index of the start and end characters of the identified locations 
+                              in the input text string.
+                - input_order: the index of the inserted texts. i.e., the first text is 0, the second 1, etc.
+                               Makes it easier to reassemble the results if they're exploded.
+                - names: versions of the names returned by querying GeoNames.
+                - coord_points: long/lat coordinate points in WGS84.
+
+        Returns:
+            Pandas DataFrame or dict: Depending on the 'output' parameter, either a Pandas DataFrame is returned 
+                                       containing the geoparsing results, or a dictionary in EUPEG style JSON format.
+        """
+
+        # Validate inputs
+        if not texts:
+            raise ValueError("Input texts are missing. Expecting a string or a list of strings.")
+
+        # fix if someone passes just a string
+        if isinstance(texts, str):
+            texts = [texts]
+            
+        if output.lower() == 'eupeg':
+            explode_df = True
+        
+        # check that ids are in proper formats and lengths
+        if ids:
+            if isinstance(ids, (str, int, float)):
+                ids = [ids]
+            if len(ids) != len(texts):
+                raise ValueError("If ids are provided, the number of ids must be equal to the number of texts.")
+            
+        
+        if self.verbose:
+            print("Starting geotagging...")
+        t = time.time()
+        
+        # TOPONYM RECOGNITION
+        tag_results = self.tagger.tag_sentences(texts, ids, explode_df=explode_df,
+                                                drop_non_locs=drop_non_locations,
+                                                filter_toponyms=filter_toponyms,
+                                                entity_tags=entity_tags,
+                                               preprocess=preprocess_texts)
+
+        if self.verbose:
+            successfuls = tag_results['toponyms_found'].tolist()
+            print("Finished geotagging after", round(time.time()-t, 2),"s.", successfuls.count(True), "location hits found.")
+            print("Starting geocoding...")
+        
+        # TOPONYM RESOLVING
+        # TODO: Reimplement shp_points
+        geocode_results = asyncio.run(self.coder.geocode_toponyms(tag_results['topo_lemmas'].tolist(),
+                                                   columns=geocoder_columns))
+
+        
+        geocoded =  pd.DataFrame(geocode_results)
+        
+        tag_results = tag_results.reset_index(drop=True)
+        
+        # concatenate (add the columns) from the geocoder results to the tagging results to produce the final result
+        results = pd.concat([tag_results, geocoded], axis=1)
+        
+        if self.verbose:
+            print("Finished geocoding, returning output.")
+            print("Total elapsed time:", round(time.time()-t, 2),"s")
+            
+        if output.lower() == 'eupeg':
+            return create_eupeg_json(results)
+        else:
+            return results
+
diff --git a/finger/output_formatter.py b/fingerGeoparser/output_formatter.py
similarity index 100%
rename from finger/output_formatter.py
rename to fingerGeoparser/output_formatter.py
diff --git a/fingerGeoparser/toponym_coder.py b/fingerGeoparser/toponym_coder.py
new file mode 100644
index 0000000..5ba3206
--- /dev/null
+++ b/fingerGeoparser/toponym_coder.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Mar 24 18:53:37 2021
+
+@author: Tatu Leppämäki
+"""
+
+#import pandas as pd
+import requests
+import aiohttp
+import asyncio
+from tqdm.asyncio import tqdm
+
+#try:
+#    from shapely.geometry import Point
+#except (ImportError, FileNotFoundError) as e:
+#    print("Unable to import Shapely. The geoparser works, but exporting to Shapely points is unavailable.")
+
+
+class toponym_coder:
+    
+    def __init__(self, geocoder_url="http://vm5121.kaj.pouta.csc.fi:4000/v1/"):
+        """
+        Calls a geocoder at the defined URL and returns a dictionary of responses.
+        """
+
+        self.geocoder_url = geocoder_url
+        assert self.geocoder_url, "A valid URL pointing to a running Pelias geocoding service must be provided."
+        
+        params = {'text':'Kamppi'}
+        res = requests.get(geocoder_url+'search', params=params)
+        assert res.status_code == 200, f"Geocoder from url {geocoder_url} did not return all OK. The path could be faulty or the service unavailable."
+
+    async def geocode_toponyms(self, toponyms, columns=['coordinates', 'gid', 'layer', 'label', 'bbox'], params=None):
+        """Input: a list of toponyms: in default operation, this is a lemmatized versions of the toponyms recognized in the previous step.
+        TODO: EXPAND WITH COLUMNS AND PARAMS
+        Outputs: 
+        	UPDATE
+            Lonlats - list of coordinates in WGS84 longitude-latitude format
+            Labels - Textual descriptions of the toponym as returned by the geocoder
+            GIDS - An unique label that internally identifies the location. These are not stable and can change as the data in the geocoder is updated."""
+
+        lists = {key: list() for key in columns}
+
+        responses = await self.batch_get(toponyms, params=params)
+
+        for response in responses:
+            # for each response, check if the returned something (if it failed, it will not have 'features'). NB! The status will still be 200 for empty responses
+            if response and response['features']:
+
+                for key in lists.keys():
+                    # loop through the requested columns, append values
+                    # because the keys may not be at the base level, I need to do this clumsy hardcode for acquiring the correct values
+
+                    # related to geometry
+                    if key in ('type', 'coordinates'):
+                        lists[key].append(response['features'][0]['geometry'][key])
+                    # if not, it's probably at the properties level
+                    elif key != 'bbox':
+                        lists[key].append(response['features'][0]['properties'][key])
+                    # else a bounding box, which is at the base level
+                    else:
+                        lists[key].append(response['features'][0][key])
+            else:
+                # if nothing was returned, appends Nones to all lists
+                for this_list in lists.values():
+                    this_list.append(None)
+
+        return lists
+
+    async def batch_get(self, topos, params=None):
+        """"This function forms the query urls, which are then asynchronoysly requested from the geocoder"""
+        # avoid badgering the server with too many requests at once -> leads to http errors
+        # this limits the concurrent connections to 15 (default 100)
+        connector = aiohttp.TCPConnector(limit=15)
+
+        async with aiohttp.ClientSession(connector=connector) as session:
+            tasks = []
+            for topo in topos:
+                if topo:
+                    # if there's a lemmatized toponym, try searching with that. If not, return an empty string
+                    url = f"{self.geocoder_url}search"
+                    if params:
+                    	url_params = {'text': topo, **params}
+                    else:
+                        url_params = {'text': topo}
+                    task = asyncio.ensure_future(self.get_response(session, url, params=url_params))
+                    tasks.append(task)
+                else:
+                    task = asyncio.ensure_future(self.return_none())
+                    tasks.append(task)
+            # tqdm.gather works as a wrapper for asyncio.gather: it adds a progress bar
+            responses = await tqdm.gather(*tasks, desc="Geocoding...")
+
+            return responses
+
+    async def get_response(self, session, url, params=None):
+        """Setup one request. If the response code is something other than 200, print the error code."""
+        async with session.get(url, params=params) as response:
+            if response.status != 200:
+                print(url, response.status)
+            return await response.json()
+        
+    async def return_none(self):
+        return ""
+"""
+    def form_point(self, gn_result):
+        if self.shp_points:
+            return Point(float(gn_result.lng), float(gn_result.lat))
+        else:
+            return (gn_result.lng, gn_result.lat)
+"""
diff --git a/fingerGeoparser/toponym_tagger.py b/fingerGeoparser/toponym_tagger.py
new file mode 100644
index 0000000..10df125
--- /dev/null
+++ b/fingerGeoparser/toponym_tagger.py
@@ -0,0 +1,201 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Mar 24 18:51:53 2021
+
+@author: Tatu Leppämäki
+"""
+
+
+import spacy, pandas as pd, re
+
+from tqdm import tqdm
+
+
+class toponym_tagger:
+    """
+    This class initiates a Finnish NER tagger using Spacy.
+    
+    A NER tagger object can be used to tag location mentions in input texts. It accepts list of strings
+    as input and outputs a Pandas dataframe, which is then passed on to the geocoder.
+    
+    Parameters:
+        pipeline_path | String: name of the Spacy pipeline, which is called with spacy.load().
+                                "fi_geoparser", which is the installation name, by default,
+                                however, a path to the files can also be provided.
+    
+        
+        use_gpu | Boolean: Whether the pipeline is run on the GPU (significantly faster, but often missing in
+                           e.g. laptops) or CPU (slower but should run every time)
+                           
+        output_df | Boolean: If True, the output will be a Pandas DataFrame. If False, a dictionary.
+                            Currently, False does nothing. Left in if newer versions implement something
+                            other than Pandas (just nested dictionaries?)
+        """
+    
+        
+    def __init__(self, pipeline_path="fi_geoparser", use_gpu=True, 
+                 output_df=True):
+        if use_gpu:
+            resp = spacy.prefer_gpu()
+            
+            if use_gpu and not resp:
+                print("Using GPU failed, falling back on CPU...")
+        
+        self.output_df = output_df
+        
+        self.ner_pipeline = spacy.load(pipeline_path)
+        
+    def tag_sentences(self, input_texts, ids, explode_df=False, drop_non_locs=False, preprocess=False,
+                            filter_toponyms = True, entity_tags=['LOC', 'FAC', 'GPE']
+        ):
+        """Input:            
+            texts | A string or a list of input strings: The input 
+            *ids | String, int, float or a list: Identifying element of each input, e.g. tweet id. Must be 
+                  the same length as texts
+            *explode_df | Boolean: Whether to have each location "hit" on separate rows in the output. Default False
+            *drop_non_locations | Boolean: Whether the sentences where no locations were found are
+                                        included in the output. Default False.
+            *preprocess | Boolean: Whether to remove noise from the input texts, such as @-mentions and urls.
+            *filter_toponyms | Boolean: Whether to filter out almost certain false positive toponyms.
+                                        Currently removes toponyms with length less than 2. Default True.
+        
+        Output: Pandas DF containing columns:
+                1. input_text: the input sentence | String
+                2. doc: Spacy doc object of the sent analysis. See https://spacy.io/api/doc | Doc
+                3. toponyms_found: Whether locations were found in the input sent | Bool
+                4. locations: locations in the input text, if found | list of strings or None
+                5. topo_lemmas: lemmatized versions of the locations | list of strings or None
+                6. topo_spans: the index of the start and end characters of the identified 
+                              locations in the input text string | tuple
+                7. input_order: the index of the inserted texts. i.e. the first text is 0, the second 1 etc.
+                                Makes it easier to reassemble the results if they're exploded | int'
+                *8. id: The identifying element tied to each input text, if provided | string, int, float 
+        """
+        assert input_texts, "No input provided. Make sure to input a list of strings."
+        tagged_sentences = []
+        
+        self.explode_df = explode_df
+        
+        self.drop_non_locs = drop_non_locs
+        
+        self.filter_toponyms = filter_toponyms
+        
+        self.entity_tags = entity_tags
+        
+        # apply preprocessing step, if requested
+        if preprocess:
+            input_texts = [self.preprocess_sent(sent) for sent in tqdm(input_texts, desc="Preprocessing input...")]
+        
+        # run spacy pipeline 
+        tag_results = list(tqdm(self.ner_pipeline.pipe(input_texts), total=len(input_texts), desc="Running toponym recognition..."))
+        
+        # gather the wanted features from spacy doc objects into a dictionary of lists
+        tagged_sentences = [self.get_features(sent) for sent in tag_results]
+        
+        return self.to_dataframe(tagged_sentences, ids)
+        """
+        if self.output_df:
+            return self.to_dataframe(tagged_sentences)
+        else:
+            return tagged_sentences
+        """
+            
+    def get_features(self, doc):
+        """Input: a sentence to tag (string)
+        Output: a dictionary with the same variables as listed in 'tag_sentences'"""
+        #doc = self.ner_pipeline(sent)
+        
+        # if the tagger created an output, i.e. at least one of the words in the input
+        # was tagged, create an output of that. Otherwise, return a mostly empty dict
+        #docs = []
+        toponyms = []
+        topo_labels = []
+        topo_lemmas = []
+        topo_spans = []
+        toponyms_found = False
+
+        # gather the NER labels found to a list 
+        labels = [ent.label_ for ent in doc.ents]
+
+
+        # looping through the entities, collecting required information
+        # namely, the raw toponym text, its lemmatized form and the span as tuple
+        for ent in doc.ents:
+            if ent.label_ in self.entity_tags:
+                # apply filtering if requested
+                if self.filter_toponyms:
+                    # length filtering 
+                    if len(ent.text)>1:
+                        toponyms.append(ent.text)
+                        topo_labels.append(ent.label_)
+                        
+                        # remove hashtags, which mark word boundaries in compound words
+                        lemma = ent.lemma_.replace("#","")
+                        # in addition, remove punctuation characters, if they were captured by the tagger
+                        # included are various quotation marks
+                        lemma = re.sub(r'[.?!;:\'"“”‘’]', '', lemma)
+                        
+                        # add lemmatized versions of the toponyms to list 
+                        topo_lemmas.append(lemma)
+                        # spans; character start and end locations 
+                        topo_spans.append((ent.start_char, ent.end_char))
+                        toponyms_found = True
+                else:
+                    toponyms.append(ent.text)
+                    topo_labels.append(ent.label_)
+                    topo_lemmas.append(ent.lemma_.replace("#",""))
+                    topo_spans.append((ent.start_char, ent.end_char))
+                    toponyms_found = True
+       #docs.append(doc)
+
+        if toponyms_found:        
+            doc_results = {'input_text': doc.text, 'toponyms': toponyms, 'topo_lemmas': topo_lemmas,
+                            'topo_labels':topo_labels, 'topo_spans': topo_spans,'toponyms_found': toponyms_found}
+        else:
+            doc_results = {'input_text': doc.text, 'locations': None, 'topo_lemmas': None,
+                    'topo_labels':None,'topo_spans': None, 'toponyms_found': toponyms_found}
+
+        return doc_results
+    
+    def preprocess_sent(self, sent):
+        """Optionally cleans up noise (especially prominent in social media posts): removes emojis (TODO), mentions (@xyz), hashtags (#, but not the content) and URLs.
+        Based on work by Hiippala et al. 2020: Mapping the languages of Twitter in Finland: richness and diversity in space and time. See: https://zenodo.org/record/4279402
+        """
+        # Remove all mentions (@) in the input
+        sent = re.sub(r'@\S+ *', '', sent)
+        
+        # remove hashes from hashtags
+        sent = sent.replace('#', '')
+        
+        # remove old school heart emojis <3
+        sent = sent.replace('<3', '')
+        
+        # remove ampersand (&), which may be followed by 'amp'
+        sent = re.sub(r'&amp|&', '', sent)
+        
+        # remove URL's: i.e. remove everything that follows http(s) until a whitespace
+        sent = re.sub(r'http[s]?://\S+', "", sent)
+        
+        return sent
+        
+        
+    def to_dataframe(self, results, ids):
+        df = pd.DataFrame(results)
+        
+        if ids:
+            df['id'] = ids
+            
+        df['input_order'] = df.index
+        
+        # split the possible list contents into multiple rows
+        if self.explode_df:
+            df = df.apply(lambda x: x.explode() if x.name in ['toponyms', 'topo_labels', 'topo_lemmas', 'topo_spans'] else x)
+        if self.drop_non_locs:
+            return self.drop_non_locations(df)
+        else:
+            return df
+    
+    def drop_non_locations(self, df):
+        """Remove input strings / rows where the tagger did not find any toponyms."""
+        df = df[df['toponyms_found']]
+        return df
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..ef6e9c4
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+pandas
+spacy==3.5.2
+spacy-transformers==1.1.9
+tqdm
+aiohttp
diff --git a/setup.cfg b/setup.cfg
index 61ea5db..ffb148a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = fingerGeoparser
-version = 0.1.0
+version = 0.2.0
 author = Tatu Leppämäki
 author_email = tatu.leppamaki@helsinki.fi
 url = https://github.com/Tadusko/fi-geoparser
@@ -13,11 +13,12 @@ license = MIT License
 [options]
 install_requires =
     pandas
-	click==7.1.2
-	spacy==3.0.8
-    spacy_transformers==1.0.0
-    libvoikko
-	geocoder
-	voikko
+    spacy==3.5.2
+    spacy_transformers==1.1.9
+    tqdm
+    aiohttp
+    
+
 package_dir =
-    = finger
+    = .
+    
diff --git a/tests/test_geoparsing.py b/tests/test_geoparsing.py
new file mode 100644
index 0000000..5c88ff5
--- /dev/null
+++ b/tests/test_geoparsing.py
@@ -0,0 +1,14 @@
+import sys
+print(sys.path)
+
+from fingerGeoparser import geoparser
+
+def test_constructor():
+    gp = geoparser.geoparser(pipeline_path="fi_core_news_sm")
+    assert isinstance(gp, geoparser.geoparser)
+    
+def test_method():
+    gp = geoparser.geoparser(pipeline_path="fi_core_news_sm")
+    res = gp.geoparse(["Helsinki on kaunis tänään", "Paris Hilton mokasi."])
+    
+    #assert isinstance(gp, geoparser)