Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added setup.py and made library available regardless of current working directory #18

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions corenlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,15 @@ def parse_parser_results(text):
interface of the CoreNLP tools. Takes a string of the parser results
and then returns a Python list of dictionaries, one for each parsed
sentence.

Parameters
----------
text : str
UTF-8 encoded string of CoreNLP parser results
"""
results = {"sentences": []}
state = STATE_START
for line in text.encode('utf-8').split("\n"):
for line in text.split("\n"):
line = line.strip()

if line.startswith("Sentence #"):
Expand Down Expand Up @@ -132,25 +137,26 @@ def __init__(self, corenlp_path=None):
Checks the location of the jar files.
Spawns the server as a process.
"""
here = os.path.abspath(os.path.dirname(__file__))
jars = ["stanford-corenlp-3.4.1.jar",
"stanford-corenlp-3.4.1-models.jar",
"joda-time.jar",
"xom.jar",
"jollyday.jar"]

# if CoreNLP libraries are in a different directory,
# change the corenlp_path variable to point to them
if not corenlp_path:
corenlp_path = "./stanford-corenlp-full-2014-08-27/"
corenlp_path = os.path.join(here, "stanford-corenlp-full-2014-08-27")

java_path = "java"
classname = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
# include the properties file, so you can change defaults
# but any changes in output format will break parse_parser_results()
props = "-props default.properties"
props = "-props {}".format(os.path.join(here, 'default.properties'))

# add and check classpaths
jars = [corenlp_path + jar for jar in jars]
jars = [os.path.join(corenlp_path, jar) for jar in jars]
for jar in jars:
if not os.path.exists(jar):
logger.error("Error! Cannot locate %s" % jar)
Expand Down
34 changes: 34 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env python

import sys
import os
try:
from setuptools import setup
except ImportError:
from distutils.core import setup

here = os.path.abspath(os.path.dirname(__file__))
README = open(os.path.join(here, 'README.md')).read()

setup(name='corenlp',
version='3.4.1',
description='Python wrapper for Stanford CoreNLP tools v3.4.1',
long_description=README,
author='Dustin Smith',
author_email='[email protected]',
url='https://github.com/dasmith/stanford-corenlp-python',
py_modules=['client', 'corenlp', 'jsonrpc', 'progressbar'],
license='GPL v2+',
install_requires=['pexpect', 'unidecode'],
data_files=[
('.', ['default.properties']),
('stanford-corenlp-full-2014-08-27',
['stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1.jar',
'stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1.jar',
'stanford-corenlp-full-2014-08-27/stanford-corenlp-3.4.1-models.jar',
'stanford-corenlp-full-2014-08-27/joda-time.jar',
'stanford-corenlp-full-2014-08-27/xom.jar',
'stanford-corenlp-full-2014-08-27/jollyday.jar'
])
],
)