Skip to content

Commit

Permalink
V1.1 (#1)
Browse files Browse the repository at this point in the history
v1.1
  • Loading branch information
kenarsa authored Dec 24, 2018
1 parent 7ddf051 commit 4f156f5
Show file tree
Hide file tree
Showing 43 changed files with 662 additions and 415 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.idea/
binding/python/__pycache__/
binding/python/*.pyc
resources/porcupine/binding/python/__pycache__/
17 changes: 9 additions & 8 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
## Ideas for Contributing

* Adding new language/platform bindings. JavaScript, maybe? When adding a new binding please do make sure it
is tested. Adding accompanying unit test is a great way to assure that. Take a look at
[binding/python/](/binding/python) to find out how to unit test new bindings.
* Adding new language/platform bindings. When adding a new binding please do make sure it is tested. Adding accompanying
unit test is a great way to assure that. Take a look at [binding/python/](/binding/python) to find out how to unit test
new bindings.

* Adding new demos. Feel free to add new demos showcasing Rhino's capabilities on new platforms. Even better,
if you have a cool application idea using Rhino feel free to add it under [demo/](/demo). If you end up making a new
repository for your application idea let us know and we'll be more than happy to provide a link to your project in
Rhino's documentation.
* Adding new demos. Feel free to add new demos showcasing Rhino's capabilities on new platforms. Even better, if you
have a cool application idea using Rhino feel free to add it under [demo/](/demo). If you end up making a new repository
for your application idea let us know and we'll be more than happy to provide a link to your project in Rhino's
documentation.

* Adding tutorials. Step-by-step tutorials are a great way of sharing knowledge with the community. These are extremely
helpful especially when some hardware setup is involved (e.g. Raspberry Pi project). These can go under [docs]() directory.
helpful especially when some hardware setup is involved (e.g. Raspberry Pi project). These can go under [docs]()
directory.
305 changes: 180 additions & 125 deletions README.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions binding/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
If you'd like to add a binding please submit a pull request.
23 changes: 23 additions & 0 deletions binding/python/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Prerequisites

Python 3.5 or higher is required to use the binding and run its accompanying unit tests.

The unit test uses [PySoundFile](https://pypi.python.org/pypi/PySoundFile) for reading audio test files. It can be
installed using

```bash
pip install pysoundfile
```

# Running Unit Tests

Using command line (from the root of the repository)

```bash
python binding/python/test_rhino.py
```

# Binding Class

Rhino's Python binding uses [ctypes](https://docs.python.org/3.5/library/ctypes.html) to access Rhino's C
library. For an example usage refer to [Rhino demo application](/demo/python/rhino_demo.py).
156 changes: 91 additions & 65 deletions binding/python/rhino.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


class Rhino(object):
"""Python binding for Picovoice's Speech to Intent (a.k.a Rhino) library."""
"""Python binding for Picovoice's Speech to Intent (a.k.a Rhino) engine."""

class PicovoiceStatuses(Enum):
"""Status codes corresponding to 'pv_status_t' defined in 'include/picovoice.h'"""
Expand All @@ -31,13 +31,15 @@ class PicovoiceStatuses(Enum):
INVALID_ARGUMENT = 3
STOP_ITERATION = 4
KEY_ERROR = 5
INVALID_STATE = 6

_PICOVOICE_STATUS_TO_EXCEPTION = {
PicovoiceStatuses.OUT_OF_MEMORY: MemoryError,
PicovoiceStatuses.IO_ERROR: IOError,
PicovoiceStatuses.INVALID_ARGUMENT: ValueError,
PicovoiceStatuses.STOP_ITERATION: StopIteration,
PicovoiceStatuses.KEY_ERROR: KeyError
PicovoiceStatuses.KEY_ERROR: KeyError,
PicovoiceStatuses.INVALID_STATE: RuntimeError
}

class CRhino(Structure):
Expand All @@ -48,12 +50,13 @@ def __init__(self, library_path, model_file_path, context_file_path):
Constructor.
:param library_path: Absolute path to Rhino's dynamic library.
:param model_file_path: Absolute path to Rhino's model parameter file.
:param context_file_path: Absolute path to Rhino's context file.
:param model_file_path: Absolute path to file containing model parameters.
:param context_file_path: Absolute path to file containing context parameters. A context represents the set of
expressions (commands), intents, and intent arguments (slots) within a domain of interest.
"""

if not os.path.exists(library_path):
raise ValueError("couldn't find library path at '%s'" % library_path)
raise ValueError("couldn't find library at '%s'" % library_path)

library = cdll.LoadLibrary(library_path)

Expand Down Expand Up @@ -85,121 +88,144 @@ def __init__(self, library_path, model_file_path, context_file_path):
self._is_understood_func.argtypes = [POINTER(self.CRhino), POINTER(c_bool)]
self._is_understood_func.restype = self.PicovoiceStatuses

self._get_num_attributes_func = library.pv_rhino_get_num_attributes
self._get_num_attributes_func.argtypes = [POINTER(self.CRhino), POINTER(c_int)]
self._get_num_attributes_func.restype = self.PicovoiceStatuses

self._get_attribute_func = library.pv_rhino_get_attribute
self._get_attribute_func.argtypes = [POINTER(self.CRhino), c_int, POINTER(c_char_p)]
self._get_attribute_func.restype = self.PicovoiceStatuses

self._get_attribute_value_func = library.pv_rhino_get_attribute_value
self._get_attribute_value_func.argtypes = [POINTER(self.CRhino), c_char_p, POINTER(c_char_p)]
self._get_attribute_value_func.restype = self.PicovoiceStatuses
self._get_intent_func = library.pv_rhino_get_intent
self._get_intent_func.argtypes = [
POINTER(self.CRhino),
POINTER(c_char_p),
POINTER(c_int),
POINTER(POINTER(c_char_p)),
POINTER(POINTER(c_char_p))]
self._get_intent_func.restype = self.PicovoiceStatuses

self._reset_func = library.pv_rhino_reset
self._reset_func.argtypes = [POINTER(self.CRhino)]
self._reset_func.restype = self.PicovoiceStatuses

context_expressions_func = library.pv_rhino_context_expressions
context_expressions_func.argtypes = [POINTER(self.CRhino), POINTER(c_char_p)]
context_expressions_func.restype = self.PicovoiceStatuses

expressions = c_char_p()
status = context_expressions_func(self._handle, byref(expressions))
if status is not self.PicovoiceStatuses.SUCCESS:
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('getting expressions failed')

self._context_expressions = expressions.value.decode('utf-8')

version_func = library.pv_rhino_version
version_func.argtypes = []
version_func.restype = c_char_p
self._version = version_func().decode('utf-8')

self._frame_length = library.pv_rhino_frame_length()

self._sample_rate = library.pv_sample_rate()

def delete(self):
"""Releases resources acquired by Rhino's library."""

self._delete_func(self._handle)

def process(self, pcm):
"""
Processes a frame of audio.
Processes a frame of audio and emits a flag indicating if the engine has finalized intent extraction. When
finalized, 'self.is_understood()' should be called to check if the command was valid
(is within context of interest).
:param pcm: An array (or array-like) of consecutive audio samples. For more information regarding required audio
properties (i.e. sample rate, number of channels encoding, and number of samples per frame) please refer to
'include/pv_rhino.h'.
:param pcm: A frame of audio samples. The number of samples per frame can be attained by calling
'self.frame_length'. The incoming audio needs to have a sample rate equal to 'self.sample_rate' and be 16-bit
linearly-encoded. Furthermore, Rhino operates on single channel audio.
:return: A flag if the engine has finalized intent extraction.
:return: Flag indicating whether the engine has finalized intent extraction.
"""

assert len(pcm) == self.frame_length
if len(pcm) != self.frame_length:
raise ValueError("invalid frame length")

is_finalized = c_bool()
status = self._process_func(self._handle, (c_short * len(pcm))(*pcm), byref(is_finalized))
if status is not self.PicovoiceStatuses.SUCCESS:
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Processing failed')
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('processing failed')

return is_finalized.value

def is_understood(self):
"""
Indicates weather the engine understood the intent within speech command.
Indicates if the spoken command is valid, is within the domain of interest (context), and the engine understood
it.
:return: Flag indicating if the engine understood the intent.
:return: Flag indicating if the spoken command is valid, is within the domain of interest (context), and the
engine understood it.
"""

is_understood = c_bool()
status = self._is_understood_func(self._handle, byref(is_understood))
if status is not self.PicovoiceStatuses.SUCCESS:
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Processing failed')
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('failed to verify if the spoken command is understood')

return is_understood.value

def get_attributes(self):
def get_intent(self):
"""
Retrieves the attributes within the speech command.
Getter for the intent inferred from spoken command. The intent is presented as an intent string and a
dictionary mapping slots to their values. It should be called only after intent extraction is finalized and it
is verified that the spoken command is valid and understood via calling 'self.is_understood()'.
:return: Inferred attributes.
:return: Tuple of intent string and slot dictionary.
"""

num_attributes = c_int()
status = self._get_num_attributes_func(self._handle, byref(num_attributes))
intent = c_char_p()
num_slots = c_int()
slots = POINTER(c_char_p)()
values = POINTER(c_char_p)()
status = self._get_intent_func(
self._handle,
byref(intent),
byref(num_slots),
byref(slots),
byref(values))
if status is not self.PicovoiceStatuses.SUCCESS:
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Getting number of attributes failed')

attributes = list()

for i in range(num_attributes.value):
attribute = c_char_p()
status = self._get_attribute_func(self._handle, i, byref(attribute))
if status is not self.PicovoiceStatuses.SUCCESS:
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Getting attribute failed')
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('getting intent failed')

attributes.append(attribute.value.decode('utf-8'))
slot_values = dict()
for i in range(num_slots.value):
slot_values[slots[i].decode('utf-8')] = values[i].decode('utf-8')

return set(attributes)
return intent.value.decode('utf-8'), slot_values

def get_attribute_value(self, attribute):
def reset(self):
"""
Retrieves the value of a given attribute.
:param attribute: Attribute.
:return: Attribute's value.
Resets the internal state of the engine. It should be called before the engine can be used to infer intent from
a new stream of audio.
"""

attribute_value = c_char_p()
status = self._get_attribute_value_func(
self._handle,
create_string_buffer(attribute.encode('utf-8')),
byref(attribute_value))
status = self._reset_func(self._handle)
if status is not self.PicovoiceStatuses.SUCCESS:
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Getting attribute value failed')
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('reset failed')

return attribute_value.value.decode('utf-8')

def reset(self):
"""Reset's the internal state of Speech to Intent engine."""
@property
def context_expressions(self):
"""
Getter for expressions. Each expression maps a set of spoken phrases to an intent and possibly a number of slots
(intent arguments).
"""

status = self._reset_func(self._handle)
if status is not self.PicovoiceStatuses.SUCCESS:
raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Reset failed')
return self._context_expressions

def delete(self):
"""Releases resources acquired by Rhino's library."""
@property
def version(self):
"""Getter for version string."""

self._delete_func(self._handle)
return self._version

@property
def frame_length(self):
"""Number of audio samples per frame expected by C library."""
"""Getter for length (number of audio samples) per frame."""

return self._frame_length

@property
def sample_rate(self):
"""Audio sample rate accepted by Rhino library."""
"""Audio sample rate accepted by Picovoice."""

return self._sample_rate
Loading

0 comments on commit 4f156f5

Please sign in to comment.