V1.1 (#1)

v1.1
Picovoice · Dec 24, 2018 · 4f156f5 · 4f156f5
1 parent 7ddf051
commit 4f156f5
Show file tree

Hide file tree

Showing 43 changed files with 662 additions and 415 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 .idea/
 binding/python/__pycache__/
+binding/python/*.pyc
 resources/porcupine/binding/python/__pycache__/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -1,13 +1,14 @@
 ## Ideas for Contributing
 
-* Adding new language/platform bindings. JavaScript, maybe? When adding a new binding please do make sure it
-is tested. Adding accompanying unit test is a great way to assure that. Take a look at
-[binding/python/](/binding/python) to find out how to unit test new bindings.
+* Adding new language/platform bindings. When adding a new binding please do make sure it is tested. Adding accompanying
+unit test is a great way to assure that. Take a look at [binding/python/](/binding/python) to find out how to unit test
+new bindings.
 
-* Adding new demos. Feel free to add new demos showcasing Rhino's capabilities on new platforms. Even better,
-if you have a cool application idea using Rhino feel free to add it under [demo/](/demo). If you end up making a new 
-repository for your application idea let us know and we'll be more than happy to provide a link to your project in 
-Rhino's documentation.
+* Adding new demos. Feel free to add new demos showcasing Rhino's capabilities on new platforms. Even better, if you
+have a cool application idea using Rhino feel free to add it under [demo/](/demo). If you end up making a new repository
+for your application idea let us know and we'll be more than happy to provide a link to your project in  Rhino's
+documentation.
 
 * Adding tutorials. Step-by-step tutorials are a great way of sharing knowledge with the community. These are extremely
-helpful especially when some hardware setup is involved (e.g. Raspberry Pi project). These can go under [docs]() directory. 
+helpful especially when some hardware setup is involved (e.g. Raspberry Pi project). These can go under [docs]()
+directory.
diff --git a/README.md b/README.md
diff --git a/binding/README.md b/binding/README.md
@@ -0,0 +1 @@
+If you'd like to add a binding please submit a pull request.
diff --git a/binding/python/README.md b/binding/python/README.md
@@ -0,0 +1,23 @@
+# Prerequisites
+
+Python 3.5 or higher is required to use the binding and run its accompanying unit tests.
+
+The unit test uses [PySoundFile](https://pypi.python.org/pypi/PySoundFile) for reading audio test files. It can be
+installed using
+
+```bash
+pip install pysoundfile
+```
+
+# Running Unit Tests
+
+Using command line (from the root of the repository)
+
+```bash
+python binding/python/test_rhino.py
+```
+
+# Binding Class
+
+Rhino's Python binding uses [ctypes](https://docs.python.org/3.5/library/ctypes.html) to access Rhino's C
+library. For an example usage refer to [Rhino demo application](/demo/python/rhino_demo.py).
diff --git a/binding/python/rhino.py b/binding/python/rhino.py
@@ -20,7 +20,7 @@
 
 
 class Rhino(object):
-    """Python binding for Picovoice's Speech to Intent (a.k.a Rhino) library."""
+    """Python binding for Picovoice's Speech to Intent (a.k.a Rhino) engine."""
 
     class PicovoiceStatuses(Enum):
         """Status codes corresponding to 'pv_status_t' defined in 'include/picovoice.h'"""
@@ -31,13 +31,15 @@ class PicovoiceStatuses(Enum):
         INVALID_ARGUMENT = 3
         STOP_ITERATION = 4
         KEY_ERROR = 5
+        INVALID_STATE = 6
 
     _PICOVOICE_STATUS_TO_EXCEPTION = {
         PicovoiceStatuses.OUT_OF_MEMORY: MemoryError,
         PicovoiceStatuses.IO_ERROR: IOError,
         PicovoiceStatuses.INVALID_ARGUMENT: ValueError,
         PicovoiceStatuses.STOP_ITERATION: StopIteration,
-        PicovoiceStatuses.KEY_ERROR: KeyError
+        PicovoiceStatuses.KEY_ERROR: KeyError,
+        PicovoiceStatuses.INVALID_STATE: RuntimeError
     }
 
     class CRhino(Structure):
@@ -48,12 +50,13 @@ def __init__(self, library_path, model_file_path, context_file_path):
         Constructor.
 
         :param library_path: Absolute path to Rhino's dynamic library.
-        :param model_file_path: Absolute path to Rhino's model parameter file.
-        :param context_file_path: Absolute path to Rhino's context file.
+        :param model_file_path: Absolute path to file containing model parameters.
+        :param context_file_path: Absolute path to file containing context parameters. A context represents the set of
+        expressions (commands), intents, and intent arguments (slots) within a domain of interest.
         """
 
         if not os.path.exists(library_path):
-            raise ValueError("couldn't find library path at '%s'" % library_path)
+            raise ValueError("couldn't find library at '%s'" % library_path)
 
         library = cdll.LoadLibrary(library_path)
 
@@ -85,121 +88,144 @@ def __init__(self, library_path, model_file_path, context_file_path):
         self._is_understood_func.argtypes = [POINTER(self.CRhino), POINTER(c_bool)]
         self._is_understood_func.restype = self.PicovoiceStatuses
 
-        self._get_num_attributes_func = library.pv_rhino_get_num_attributes
-        self._get_num_attributes_func.argtypes = [POINTER(self.CRhino), POINTER(c_int)]
-        self._get_num_attributes_func.restype = self.PicovoiceStatuses
-
-        self._get_attribute_func = library.pv_rhino_get_attribute
-        self._get_attribute_func.argtypes = [POINTER(self.CRhino), c_int, POINTER(c_char_p)]
-        self._get_attribute_func.restype = self.PicovoiceStatuses
-
-        self._get_attribute_value_func = library.pv_rhino_get_attribute_value
-        self._get_attribute_value_func.argtypes = [POINTER(self.CRhino), c_char_p, POINTER(c_char_p)]
-        self._get_attribute_value_func.restype = self.PicovoiceStatuses
+        self._get_intent_func = library.pv_rhino_get_intent
+        self._get_intent_func.argtypes = [
+            POINTER(self.CRhino),
+            POINTER(c_char_p),
+            POINTER(c_int),
+            POINTER(POINTER(c_char_p)),
+            POINTER(POINTER(c_char_p))]
+        self._get_intent_func.restype = self.PicovoiceStatuses
 
         self._reset_func = library.pv_rhino_reset
         self._reset_func.argtypes = [POINTER(self.CRhino)]
         self._reset_func.restype = self.PicovoiceStatuses
 
+        context_expressions_func = library.pv_rhino_context_expressions
+        context_expressions_func.argtypes = [POINTER(self.CRhino), POINTER(c_char_p)]
+        context_expressions_func.restype = self.PicovoiceStatuses
+
+        expressions = c_char_p()
+        status = context_expressions_func(self._handle, byref(expressions))
+        if status is not self.PicovoiceStatuses.SUCCESS:
+            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('getting expressions failed')
+
+        self._context_expressions = expressions.value.decode('utf-8')
+
+        version_func = library.pv_rhino_version
+        version_func.argtypes = []
+        version_func.restype = c_char_p
+        self._version = version_func().decode('utf-8')
+
         self._frame_length = library.pv_rhino_frame_length()
 
         self._sample_rate = library.pv_sample_rate()
 
+    def delete(self):
+        """Releases resources acquired by Rhino's library."""
+
+        self._delete_func(self._handle)
+
     def process(self, pcm):
         """
-        Processes a frame of audio.
+        Processes a frame of audio and emits a flag indicating if the engine has finalized intent extraction. When
+        finalized, 'self.is_understood()' should be called to check if the command was valid
+        (is within context of interest).
 
-        :param pcm: An array (or array-like) of consecutive audio samples. For more information regarding required audio
-        properties (i.e. sample rate, number of channels encoding, and number of samples per frame) please refer to
-        'include/pv_rhino.h'.
+        :param pcm: A frame of audio samples. The number of samples per frame can be attained by calling
+        'self.frame_length'. The incoming audio needs to have a sample rate equal to 'self.sample_rate' and be 16-bit
+        linearly-encoded. Furthermore, Rhino operates on single channel audio.
 
-        :return: A flag if the engine has finalized intent extraction.
+        :return: Flag indicating whether the engine has finalized intent extraction.
         """
 
-        assert len(pcm) == self.frame_length
+        if len(pcm) != self.frame_length:
+            raise ValueError("invalid frame length")
+
         is_finalized = c_bool()
         status = self._process_func(self._handle, (c_short * len(pcm))(*pcm), byref(is_finalized))
         if status is not self.PicovoiceStatuses.SUCCESS:
-            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Processing failed')
+            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('processing failed')
 
         return is_finalized.value
 
     def is_understood(self):
         """
-        Indicates weather the engine understood the intent within speech command.
+        Indicates if the spoken command is valid, is within the domain of interest (context), and the engine understood
+        it.
 
-        :return: Flag indicating if the engine understood the intent.
+        :return: Flag indicating if the spoken command is valid, is within the domain of interest (context), and the
+        engine understood it.
         """
 
         is_understood = c_bool()
         status = self._is_understood_func(self._handle, byref(is_understood))
         if status is not self.PicovoiceStatuses.SUCCESS:
-            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Processing failed')
+            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('failed to verify if the spoken command is understood')
 
         return is_understood.value
 
-    def get_attributes(self):
+    def get_intent(self):
         """
-        Retrieves the attributes within the speech command.
+         Getter for the intent inferred from spoken command. The intent is presented as an intent string and a
+         dictionary mapping slots to their values. It should be called only after intent extraction is finalized and it
+         is verified that the spoken command is valid and understood via calling 'self.is_understood()'.
 
-        :return: Inferred attributes.
+        :return: Tuple of intent string and slot dictionary.
         """
 
-        num_attributes = c_int()
-        status = self._get_num_attributes_func(self._handle, byref(num_attributes))
+        intent = c_char_p()
+        num_slots = c_int()
+        slots = POINTER(c_char_p)()
+        values = POINTER(c_char_p)()
+        status = self._get_intent_func(
+            self._handle,
+            byref(intent),
+            byref(num_slots),
+            byref(slots),
+            byref(values))
         if status is not self.PicovoiceStatuses.SUCCESS:
-            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Getting number of attributes failed')
-
-        attributes = list()
-
-        for i in range(num_attributes.value):
-            attribute = c_char_p()
-            status = self._get_attribute_func(self._handle, i, byref(attribute))
-            if status is not self.PicovoiceStatuses.SUCCESS:
-                raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Getting attribute failed')
+            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('getting intent failed')
 
-            attributes.append(attribute.value.decode('utf-8'))
+        slot_values = dict()
+        for i in range(num_slots.value):
+            slot_values[slots[i].decode('utf-8')] = values[i].decode('utf-8')
 
-        return set(attributes)
+        return intent.value.decode('utf-8'), slot_values
 
-    def get_attribute_value(self, attribute):
+    def reset(self):
         """
-        Retrieves the value of a given attribute.
-
-        :param attribute: Attribute.
-        :return: Attribute's value.
+        Resets the internal state of the engine. It should be called before the engine can be used to infer intent from
+        a new stream of audio.
         """
 
-        attribute_value = c_char_p()
-        status = self._get_attribute_value_func(
-            self._handle,
-            create_string_buffer(attribute.encode('utf-8')),
-            byref(attribute_value))
+        status = self._reset_func(self._handle)
         if status is not self.PicovoiceStatuses.SUCCESS:
-            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Getting attribute value failed')
+            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('reset failed')
 
-        return attribute_value.value.decode('utf-8')
-
-    def reset(self):
-        """Reset's the internal state of Speech to Intent engine."""
+    @property
+    def context_expressions(self):
+        """
+        Getter for expressions. Each expression maps a set of spoken phrases to an intent and possibly a number of slots
+        (intent arguments).
+        """
 
-        status = self._reset_func(self._handle)
-        if status is not self.PicovoiceStatuses.SUCCESS:
-            raise self._PICOVOICE_STATUS_TO_EXCEPTION[status]('Reset failed')
+        return self._context_expressions
 
-    def delete(self):
-        """Releases resources acquired by Rhino's library."""
+    @property
+    def version(self):
+        """Getter for version string."""
 
-        self._delete_func(self._handle)
+        return self._version
 
     @property
     def frame_length(self):
-        """Number of audio samples per frame expected by C library."""
+        """Getter for length (number of audio samples) per frame."""
 
         return self._frame_length
 
     @property
     def sample_rate(self):
-        """Audio sample rate accepted by Rhino library."""
+        """Audio sample rate accepted by Picovoice."""
 
         return self._sample_rate