diff --git a/corenlp.py b/corenlp.py index 753e51c..3c6479a 100644 --- a/corenlp.py +++ b/corenlp.py @@ -69,10 +69,15 @@ def parse_parser_results(text): interface of the CoreNLP tools. Takes a string of the parser results and then returns a Python list of dictionaries, one for each parsed sentence. + + Parameters + ---------- + text : str + UTF-8 encoded string of CoreNLP parser results """ results = {"sentences": []} state = STATE_START - for line in text.encode('utf-8').split("\n"): + for line in text.split("\n"): line = line.strip() if line.startswith("Sentence #"): @@ -132,25 +137,26 @@ def __init__(self, corenlp_path=None): Checks the location of the jar files. Spawns the server as a process. """ + here = os.path.abspath(os.path.dirname(__file__)) jars = ["stanford-corenlp-3.4.1.jar", "stanford-corenlp-3.4.1-models.jar", "joda-time.jar", "xom.jar", "jollyday.jar"] - + # if CoreNLP libraries are in a different directory, # change the corenlp_path variable to point to them if not corenlp_path: - corenlp_path = "./stanford-corenlp-full-2014-08-27/" - + corenlp_path = os.path.join(here, "stanford-corenlp-full-2014-08-27") + java_path = "java" classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP" # include the properties file, so you can change defaults # but any changes in output format will break parse_parser_results() - props = "-props default.properties" - + props = "-props {}".format(os.path.join(here, 'default.properties')) + # add and check classpaths - jars = [corenlp_path + jar for jar in jars] + jars = [os.path.join(corenlp_path, jar) for jar in jars] for jar in jars: if not os.path.exists(jar): logger.error("Error! Cannot locate %s" % jar) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..711fbe8 --- /dev/null +++ b/setup.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +import sys +import os +try: + from setuptools import setup +except ImportError: + from distutils.core import setup + +here = os.path.abspath(os.path.dirname(__file__)) +README = open(os.path.join(here, 'README.md')).read() + +setup(name='corenlp', +version='3.4.1', +description='Python wrapper for Stanford CoreNLP tools v3.4.1', +long_description=README, +author='Dustin Smith', +author_email='dustin@media.mit.edu', +url='https://github.com/dasmith/stanford-corenlp-python', +py_modules=['client', 'corenlp', 'jsonrpc', 'progressbar'], +license='GPL v2+', +install_requires=['pexpect', 'unidecode'], +data_files=[ + ('.', ['default.properties']), + ('stanford-corenlp-full-2014-08-27', + ['stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1.jar', + 'stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1.jar', + 'stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1-models.jar', + 'stanford-corenlp-full-2014-08-27/joda-time.jar', + 'stanford-corenlp-full-2014-08-27/xom.jar', + 'stanford-corenlp-full-2014-08-27/jollyday.jar' + ]) +], +)