-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
170 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
FROM python:2.7 | ||
RUN pip install pattern grpcio-tools==1.16.1 | ||
COPY . /app | ||
WORKDIR /app | ||
CMD ["./start.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# grpc-pattern as microservice | ||
|
||
Pattern is a great framework for Natural Language Processing for Python. | ||
|
||
gRPC is the new efficient and effective transfer protocol powered by Google to create microservice. | ||
|
||
grpc-pattern brings Pattern as microservice through Docker container. | ||
|
||
# Run it | ||
|
||
To use it, just execute: | ||
``` | ||
docker run -p 50051:50051 digitalilusion/grpc-pattern | ||
``` | ||
|
||
## Try out | ||
|
||
You can use `grpcc` tool to try the microservice | ||
|
||
``` | ||
grpcc -i -p api.proto -a localhost:50051 | ||
``` | ||
A REPL interface with node is launched. Let's try the parser. `language = 1` for English, `language = 2` for Spanish, ... (see `api.proto` for full language options) | ||
|
||
``` | ||
client.parse({'language': 1, 'text': 'The cat is over the roof. And the dog?'}, pr) | ||
``` | ||
|
||
And this is the result: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
syntax = "proto3"; | ||
|
||
package api; | ||
|
||
service API { | ||
rpc parse (ParseRequest) returns (ParseResponse); | ||
} | ||
|
||
enum Language { | ||
NONE = 0; | ||
EN = 1; | ||
ES = 2; | ||
DE = 3; | ||
FR = 4; | ||
IT = 5; | ||
NL = 6; | ||
|
||
} | ||
|
||
message ParseRequest { | ||
Language language = 1; | ||
string text = 2; | ||
} | ||
|
||
message ParseResponse { | ||
bool isOk = 1; | ||
string reason = 2; | ||
repeated Sentence sentences = 3; | ||
} | ||
|
||
message Sentence { | ||
repeated Word words = 1; | ||
repeated Chunk chunks = 2; | ||
|
||
} | ||
|
||
message Word { | ||
string text = 1; | ||
string type = 2; | ||
} | ||
|
||
message Chunk { | ||
string type = 1; | ||
repeated Word words = 2; | ||
} |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# -*- coding: utf-8 -*- | ||
import os | ||
import time | ||
import logging | ||
from concurrent import futures | ||
|
||
from google.protobuf import json_format | ||
import grpc | ||
|
||
from api import api_pb2_grpc, api_pb2 | ||
|
||
|
||
DEFAULT_GRPC_PORT = '[::]:50051' | ||
|
||
GRPC_PORT = os.getenv('GRPC_PORT', DEFAULT_GRPC_PORT) | ||
|
||
parsers = { | ||
api_pb2.EN: None, | ||
api_pb2.ES: None, | ||
api_pb2.DE: None, | ||
api_pb2.FR: None, | ||
api_pb2.IT: None, | ||
api_pb2.NL: None | ||
} | ||
|
||
langCode = { | ||
api_pb2.EN: 'en', | ||
api_pb2.ES: 'es', | ||
api_pb2.DE: 'de', | ||
api_pb2.FR: 'fr', | ||
api_pb2.IT: 'it', | ||
api_pb2.NL: 'nl' | ||
} | ||
|
||
|
||
def getParser(lang): | ||
""" | ||
Layz-load parsetree if available | ||
""" | ||
if parsers[lang] is None: | ||
# Dynamic load | ||
logging.debug('Loading parsetree for %s...' % langCode[lang]) | ||
parsers[lang] = __import__('pattern.' + langCode[lang], None, None, | ||
langCode[lang]) | ||
logging.debug('loaded!') | ||
return parsers[lang].parsetree | ||
|
||
|
||
class APIServicer(api_pb2_grpc.APIServicer): | ||
|
||
def parse(self, request, context): | ||
""" | ||
Get the parsed tree of sentences | ||
""" | ||
rl = request.language | ||
if rl != api_pb2.EN and rl != api_pb2.ES and rl != api_pb2.DE and \ | ||
rl != api_pb2.FR and rl != api_pb2.IT and rl != api_pb2.NL: | ||
return api_pb2.ParseResponse( | ||
isOk=False, | ||
reason='Requested language is not implemented') | ||
parsetree = getParser(request.language) | ||
tree = parsetree(request.text, relations=True, lemmata=True) | ||
data = {'isOk': True, 'reason': None, 'sentences': []} | ||
for sentence in tree: | ||
words = [] | ||
for word in sentence.words: | ||
words.append({'type': word.type, 'text': word.string}) | ||
chunks = [] | ||
for chunk in sentence.chunks: | ||
words = [{'text': w.string, 'type': w.type} | ||
for w in chunk.words] | ||
chunk = {'type': chunk.type, 'words': words} | ||
chunks.append(chunk) | ||
data['sentences'].append({'words': words, 'chunks': chunks}) | ||
ret = json_format.ParseDict(data, api_pb2.ParseResponse()) | ||
return ret | ||
|
||
|
||
if __name__ == "__main__": | ||
logging.basicConfig(level=logging.DEBUG) | ||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) | ||
api_pb2_grpc.add_APIServicer_to_server(APIServicer(), server) | ||
server.add_insecure_port(GRPC_PORT) | ||
logging.debug('Starting gRPC-Pattern service in %s...' % GRPC_PORT) | ||
server.start() | ||
while True: | ||
time.sleep(100) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
python -m grpc_tools.protoc -I . --python_out=api --grpc_python_out=api api.proto | ||
python server.py |