From e5a79645959971bae5a37f23946aa2f2c3d7cb6b Mon Sep 17 00:00:00 2001 From: kratu92 Date: Fri, 10 Jun 2022 12:31:58 +0200 Subject: [PATCH 1/4] Configurable Google Speech to Text automatic punctuation --- README.md | 6 +++++ .../GoogleCloudTranscriptionService.java | 23 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/README.md b/README.md index bac2c60e5..0057068a6 100644 --- a/README.md +++ b/README.md @@ -126,6 +126,12 @@ sudo apt-get update && sudo apt-get install google-cloud-sdk google-cloud-sdk-ap gcloud init gcloud auth application-default login ``` +It is possible to enable or disable the functionality of Google Cloud Speech to Text. +By default, the properties +`org.jitsi.jigasi.transcription.ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION=false` +in +`jigasi-home/sip-communicator.properties` +To change this, simply set the desired property to `true` or `false`. Vosk configuration ================== diff --git a/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java b/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java index 3894ae4d7..e176fd4dc 100644 --- a/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java +++ b/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java @@ -184,11 +184,23 @@ public class GoogleCloudTranscriptionService private final static String P_NAME_USE_VIDEO_MODEL = "org.jitsi.jigasi.transcription.USE_VIDEO_MODEL"; + /** + * Property name to determine whether the Google Speech API should get + * automatic punctuation + */ + private final static String P_NAME_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION + = "org.jitsi.jigasi.transcription.ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION"; + /** * The default value for the property USE_VIDEO_MODEL */ private final static boolean DEFAULT_VALUE_USE_VIDEO_MODEL = false; + /** + * The default value for the property ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION + */ + private final static boolean DEFAULT_VALUE_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION = false; + /** * Check whether the given string contains a supported language tag * @@ -224,6 +236,11 @@ private static void validateLanguageTag(String tag) */ private boolean useVideoModel; + /** + * Whether to get automatic punctuation + */ + private boolean enableAutomaticPunctuation; + /** * Creates the RecognitionConfig the Google service uses based * on the TranscriptionRequest @@ -263,6 +280,9 @@ private RecognitionConfig getRecognitionConfig(TranscriptionRequest request) builder.setModel("video"); } + // set punctuation mode + builder.setEnableAutomaticPunctuation(enableAutomaticPunctuation); + // set the Language tag String languageTag = request.getLocale().toLanguageTag(); validateLanguageTag(languageTag); @@ -284,6 +304,9 @@ public GoogleCloudTranscriptionService() { useVideoModel = JigasiBundleActivator.getConfigurationService() .getBoolean(P_NAME_USE_VIDEO_MODEL, DEFAULT_VALUE_USE_VIDEO_MODEL); + + enableAutomaticPunctuation = JigasiBundleActivator.getConfigurationService() + .getBoolean(P_NAME_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION, DEFAULT_VALUE_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION); } /** From f4fe59dc902b355a5fcb41186627b06506204cf9 Mon Sep 17 00:00:00 2001 From: kratu92 Date: Fri, 10 Jun 2022 12:49:31 +0200 Subject: [PATCH 2/4] Configurable Google Speech to Text transcription profanity filter --- README.md | 2 ++ .../GoogleCloudTranscriptionService.java | 23 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/README.md b/README.md index 0057068a6..28f83caf2 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,8 @@ gcloud auth application-default login It is possible to enable or disable the functionality of Google Cloud Speech to Text. By default, the properties `org.jitsi.jigasi.transcription.ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION=false` +and +`org.jitsi.jigasi.transcription.ENABLE_GOOGLE_PROFANITY_FILTER=false` in `jigasi-home/sip-communicator.properties` To change this, simply set the desired property to `true` or `false`. diff --git a/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java b/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java index e176fd4dc..67bb28176 100644 --- a/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java +++ b/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java @@ -191,6 +191,13 @@ public class GoogleCloudTranscriptionService private final static String P_NAME_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION = "org.jitsi.jigasi.transcription.ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION"; + /** + * Property name to determine whether the Google Speech API should censor + * profane words + */ + private final static String P_NAME_ENABLE_GOOGLE_PROFANITY_FILTER + = "org.jitsi.jigasi.transcription.ENABLE_GOOGLE_PROFANITY_FILTER"; + /** * The default value for the property USE_VIDEO_MODEL */ @@ -201,6 +208,11 @@ public class GoogleCloudTranscriptionService */ private final static boolean DEFAULT_VALUE_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION = false; + /** + * The default value for the property ENABLE_GOOGLE_PROFANITY_FILTER + */ + private final static boolean DEFAULT_VALUE_ENABLE_GOOGLE_PROFANITY_FILTER = false; + /** * Check whether the given string contains a supported language tag * @@ -241,6 +253,11 @@ private static void validateLanguageTag(String tag) */ private boolean enableAutomaticPunctuation; + /** + * Wheteher to enable profanity filter + */ + private boolean enableProfanityFilter; + /** * Creates the RecognitionConfig the Google service uses based * on the TranscriptionRequest @@ -283,6 +300,9 @@ private RecognitionConfig getRecognitionConfig(TranscriptionRequest request) // set punctuation mode builder.setEnableAutomaticPunctuation(enableAutomaticPunctuation); + // set profanity filter + builder.setProfanityFilter(enableProfanityFilter); + // set the Language tag String languageTag = request.getLocale().toLanguageTag(); validateLanguageTag(languageTag); @@ -307,6 +327,9 @@ public GoogleCloudTranscriptionService() enableAutomaticPunctuation = JigasiBundleActivator.getConfigurationService() .getBoolean(P_NAME_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION, DEFAULT_VALUE_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION); + + enableProfanityFilter = JigasiBundleActivator.getConfigurationService() + .getBoolean(P_NAME_ENABLE_GOOGLE_PROFANITY_FILTER, DEFAULT_VALUE_ENABLE_GOOGLE_PROFANITY_FILTER); } /** From 40d7fb20cf86f5e2d08a8453e45d9111255ae104 Mon Sep 17 00:00:00 2001 From: kratu92 Date: Fri, 10 Jun 2022 13:05:42 +0200 Subject: [PATCH 3/4] Configurable Google Speech to Text interim results --- README.md | 3 ++ .../GoogleCloudTranscriptionService.java | 33 +++++++++++++++++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 28f83caf2..d5dc23b99 100644 --- a/README.md +++ b/README.md @@ -131,8 +131,11 @@ By default, the properties `org.jitsi.jigasi.transcription.ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION=false` and `org.jitsi.jigasi.transcription.ENABLE_GOOGLE_PROFANITY_FILTER=false` +and +`org.jitsi.jigasi.transcription.ENABLE_GOOGLE_INTERIM_RESULTS=false` in `jigasi-home/sip-communicator.properties` +disable automatic punctuation, profanity filter and interim results for the transcription. To change this, simply set the desired property to `true` or `false`. Vosk configuration diff --git a/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java b/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java index 67bb28176..7d1ef65b7 100644 --- a/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java +++ b/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java @@ -184,6 +184,12 @@ public class GoogleCloudTranscriptionService private final static String P_NAME_USE_VIDEO_MODEL = "org.jitsi.jigasi.transcription.USE_VIDEO_MODEL"; + /** + * Property name to determine whether to send the interim results + */ + private final static String P_NAME_ENABLE_GOOGLE_INTERIM_RESULTS + = "org.jitsi.jigasi.transcription.ENABLE_GOOGLE_INTERIM_RESULTS"; + /** * Property name to determine whether the Google Speech API should get * automatic punctuation @@ -203,6 +209,11 @@ public class GoogleCloudTranscriptionService */ private final static boolean DEFAULT_VALUE_USE_VIDEO_MODEL = false; + /** + * The default value for the property ENABLE_GOOGLE_INTERIM_RESULTS + */ + private final static boolean DEFAULT_VALUE_ENABLE_GOOGLE_INTERIM_RESULTS = false; + /** * The default value for the property ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION */ @@ -248,6 +259,11 @@ private static void validateLanguageTag(String tag) */ private boolean useVideoModel; + /** + * Whether to send interim non-final results + */ + private boolean enableInterimResults; + /** * Whether to get automatic punctuation */ @@ -325,6 +341,9 @@ public GoogleCloudTranscriptionService() useVideoModel = JigasiBundleActivator.getConfigurationService() .getBoolean(P_NAME_USE_VIDEO_MODEL, DEFAULT_VALUE_USE_VIDEO_MODEL); + enableInterimResults = JigasiBundleActivator.getConfigurationService() + .getBoolean(P_NAME_ENABLE_GOOGLE_INTERIM_RESULTS, DEFAULT_VALUE_ENABLE_GOOGLE_INTERIM_RESULTS); + enableAutomaticPunctuation = JigasiBundleActivator.getConfigurationService() .getBoolean(P_NAME_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION, DEFAULT_VALUE_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION); @@ -726,7 +745,8 @@ private ApiStreamObserver createObserver( new ResponseApiStreamingObserver( this, config.getLanguageCode(), - debugName); + debugName, + enableInterimResults); // StreamingRecognitionConfig which will hold information // about the streaming session, including the RecognitionConfig @@ -907,6 +927,11 @@ private static class ResponseApiStreamingObserver */ private UUID messageID; + /** + * Whether to send interim results + */ + private Boolean enableInterimResults; + /** * Google provides multiple results per API response where the first one * contains the most stable part of the sentence and freshly transcribed @@ -926,11 +951,13 @@ private static class ResponseApiStreamingObserver */ ResponseApiStreamingObserver(RequestApiStreamObserverManager manager, String languageTag, - String debugName) + String debugName, + Boolean enableInterimResults) { this.requestManager = manager; this.languageTag = languageTag; this.debugName = debugName; + this.enableInterimResults = enableInterimResults; messageID = UUID.randomUUID(); } @@ -1064,7 +1091,7 @@ private void handleResult(StreamingRecognitionResult result) TranscriptionResult transcriptionResult = new TranscriptionResult( null, this.messageID, - !result.getIsFinal(), + !enableInterimResults && !result.getIsFinal(), this.languageTag, result.getStability(), new TranscriptionAlternative( From 886a387632314a2d4315ed4599d56e4715d38d5d Mon Sep 17 00:00:00 2001 From: kratu92 Date: Tue, 14 Jun 2022 15:54:07 +0200 Subject: [PATCH 4/4] Moved ENABLE_INTERIM_RESULTS parameter from GoogleCloudTranscriptionService to RemotePublisherTranscriptionHandler --- README.md | 9 +++-- .../GoogleCloudTranscriptionService.java | 33 ++----------------- .../RemotePublisherTranscriptionHandler.java | 22 ++++++++++++- 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index d5dc23b99..8d2bb10d2 100644 --- a/README.md +++ b/README.md @@ -131,11 +131,9 @@ By default, the properties `org.jitsi.jigasi.transcription.ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION=false` and `org.jitsi.jigasi.transcription.ENABLE_GOOGLE_PROFANITY_FILTER=false` -and -`org.jitsi.jigasi.transcription.ENABLE_GOOGLE_INTERIM_RESULTS=false` in `jigasi-home/sip-communicator.properties` -disable automatic punctuation, profanity filter and interim results for the transcription. +disable automatic punctuation, profanity filter results for the transcription. To change this, simply set the desired property to `true` or `false`. Vosk configuration @@ -217,6 +215,11 @@ XMPP account must also be set to make Jigasi be able to join a conference room. in plain text. Note that this will result in the chat being somewhat spammed. + + org.jitsi.jigasi.transcription.ENABLE_INTERIM_RESULTS + false + Whether or not to send interim non-final results. Note that interim results should be handled so that no repeated transcriptions are displayed to the user. + Call control MUCs (brewery) diff --git a/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java b/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java index 7d1ef65b7..67bb28176 100644 --- a/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java +++ b/src/main/java/org/jitsi/jigasi/transcription/GoogleCloudTranscriptionService.java @@ -184,12 +184,6 @@ public class GoogleCloudTranscriptionService private final static String P_NAME_USE_VIDEO_MODEL = "org.jitsi.jigasi.transcription.USE_VIDEO_MODEL"; - /** - * Property name to determine whether to send the interim results - */ - private final static String P_NAME_ENABLE_GOOGLE_INTERIM_RESULTS - = "org.jitsi.jigasi.transcription.ENABLE_GOOGLE_INTERIM_RESULTS"; - /** * Property name to determine whether the Google Speech API should get * automatic punctuation @@ -209,11 +203,6 @@ public class GoogleCloudTranscriptionService */ private final static boolean DEFAULT_VALUE_USE_VIDEO_MODEL = false; - /** - * The default value for the property ENABLE_GOOGLE_INTERIM_RESULTS - */ - private final static boolean DEFAULT_VALUE_ENABLE_GOOGLE_INTERIM_RESULTS = false; - /** * The default value for the property ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION */ @@ -259,11 +248,6 @@ private static void validateLanguageTag(String tag) */ private boolean useVideoModel; - /** - * Whether to send interim non-final results - */ - private boolean enableInterimResults; - /** * Whether to get automatic punctuation */ @@ -341,9 +325,6 @@ public GoogleCloudTranscriptionService() useVideoModel = JigasiBundleActivator.getConfigurationService() .getBoolean(P_NAME_USE_VIDEO_MODEL, DEFAULT_VALUE_USE_VIDEO_MODEL); - enableInterimResults = JigasiBundleActivator.getConfigurationService() - .getBoolean(P_NAME_ENABLE_GOOGLE_INTERIM_RESULTS, DEFAULT_VALUE_ENABLE_GOOGLE_INTERIM_RESULTS); - enableAutomaticPunctuation = JigasiBundleActivator.getConfigurationService() .getBoolean(P_NAME_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION, DEFAULT_VALUE_ENABLE_GOOGLE_AUTOMATIC_PUNCTUATION); @@ -745,8 +726,7 @@ private ApiStreamObserver createObserver( new ResponseApiStreamingObserver( this, config.getLanguageCode(), - debugName, - enableInterimResults); + debugName); // StreamingRecognitionConfig which will hold information // about the streaming session, including the RecognitionConfig @@ -927,11 +907,6 @@ private static class ResponseApiStreamingObserver */ private UUID messageID; - /** - * Whether to send interim results - */ - private Boolean enableInterimResults; - /** * Google provides multiple results per API response where the first one * contains the most stable part of the sentence and freshly transcribed @@ -951,13 +926,11 @@ private static class ResponseApiStreamingObserver */ ResponseApiStreamingObserver(RequestApiStreamObserverManager manager, String languageTag, - String debugName, - Boolean enableInterimResults) + String debugName) { this.requestManager = manager; this.languageTag = languageTag; this.debugName = debugName; - this.enableInterimResults = enableInterimResults; messageID = UUID.randomUUID(); } @@ -1091,7 +1064,7 @@ private void handleResult(StreamingRecognitionResult result) TranscriptionResult transcriptionResult = new TranscriptionResult( null, this.messageID, - !enableInterimResults && !result.getIsFinal(), + !result.getIsFinal(), this.languageTag, result.getStability(), new TranscriptionAlternative( diff --git a/src/main/java/org/jitsi/jigasi/transcription/RemotePublisherTranscriptionHandler.java b/src/main/java/org/jitsi/jigasi/transcription/RemotePublisherTranscriptionHandler.java index c8c975ac6..6a1f401e8 100644 --- a/src/main/java/org/jitsi/jigasi/transcription/RemotePublisherTranscriptionHandler.java +++ b/src/main/java/org/jitsi/jigasi/transcription/RemotePublisherTranscriptionHandler.java @@ -17,6 +17,7 @@ */ package org.jitsi.jigasi.transcription; +import org.jitsi.jigasi.*; import net.java.sip.communicator.service.protocol.*; import org.json.*; @@ -31,11 +32,27 @@ public class RemotePublisherTranscriptionHandler extends LocalJsonTranscriptHandler implements TranscriptionEventListener { + /** + * Property name to determine whether to send the interim results + */ + private final static String P_NAME_ENABLE_INTERIM_RESULTS + = "org.jitsi.jigasi.transcription.ENABLE_INTERIM_RESULTS"; + + /** + * The default value for the property ENABLE_INTERIM_RESULTS + */ + private final static boolean DEFAULT_VALUE_ENABLE_INTERIM_RESULTS = false; + /** * List of remote services to notify for transcriptions. */ private List urls = new ArrayList<>(); + /** + * Whether to send interim non-final results + */ + private boolean enableInterimResults; + /** * Constructs RemotePublisherTranscriptionHandler, initializing its config. * @@ -52,12 +69,15 @@ public RemotePublisherTranscriptionHandler(String urlsStr) { urls.add(tokens.nextToken().trim()); } + + enableInterimResults = JigasiBundleActivator.getConfigurationService() + .getBoolean(P_NAME_ENABLE_INTERIM_RESULTS, DEFAULT_VALUE_ENABLE_INTERIM_RESULTS); } @Override public void publish(ChatRoom room, TranscriptionResult result) { - if (result.isInterim()) + if (!enableInterimResults && result.isInterim()) return; JSONObject eventObject = createTranscriptionJSONObject(result);