Skip to content

Commit

Permalink
JIT-12948 allow the selection of any google model
Browse files Browse the repository at this point in the history
  • Loading branch information
rpurdel committed Mar 11, 2024
1 parent 415c576 commit 38fe12e
Showing 1 changed file with 13 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -180,16 +180,15 @@ public class GoogleCloudTranscriptionService
private final static int STREAMING_SESSION_TIMEOUT_MS = 2000;

/**
* Property name to determine whether to use the Google Speech API's
* video model
* Property name to determine which Google Speech API model to use
*/
private final static String P_NAME_USE_VIDEO_MODEL
= "org.jitsi.jigasi.transcription.USE_VIDEO_MODEL";
private final static String GOOGLE_MODEL
= "org.jitsi.jigasi.transcription.google_model";

/**
* The default value for the property USE_VIDEO_MODEL
* The default value for the property GOOGLE_MODEL
*/
private final static boolean DEFAULT_VALUE_USE_VIDEO_MODEL = false;
private final static String DEFAULT_VALUE_GOOGLE_MODEL = "latest_long";

/**
* Check whether the given string contains a supported language tag
Expand Down Expand Up @@ -229,10 +228,9 @@ public boolean supportsLanguageRouting()
private List<SpeechContext> speechContexts = null;

/**
* Whether to use the more expensive video model when making
* requests.
* The model used for STT
*/
private boolean useVideoModel;
private final String useModel;

/**
* Creates the RecognitionConfig the Google service uses based
Expand Down Expand Up @@ -262,19 +260,10 @@ private RecognitionConfig getRecognitionConfig(TranscriptionRequest request)
"encoding");
}

// set the default model to "latest_long" instead of "default"
// https://cloud.google.com/speech-to-text/docs/transcription-model#transcription_models
// and https://cloud.google.com/speech-to-text/docs/latest-models#pricing for pricing
builder.setModel("latest_long");

// set the model to video
if (useVideoModel)
builder.setModel(useModel);
if (logger.isDebugEnabled())
{
if (logger.isDebugEnabled())
{
logger.debug("Using the more expensive video model");
}
builder.setModel("video");
logger.debug("Using model " + useModel);
}

// set the Language tag
Expand All @@ -296,8 +285,8 @@ private RecognitionConfig getRecognitionConfig(TranscriptionRequest request)
*/
public GoogleCloudTranscriptionService()
{
useVideoModel = JigasiBundleActivator.getConfigurationService()
.getBoolean(P_NAME_USE_VIDEO_MODEL, DEFAULT_VALUE_USE_VIDEO_MODEL);
useModel = JigasiBundleActivator.getConfigurationService()
.getString(GOOGLE_MODEL, DEFAULT_VALUE_GOOGLE_MODEL);
}

/**
Expand Down Expand Up @@ -704,8 +693,7 @@ private ApiStreamObserver<StreamingRecognizeRequest> createObserver(
StreamingRecognitionConfig.newBuilder()
.setConfig(config)
.setInterimResults(RETRIEVE_INTERIM_RESULTS)
.setSingleUtterance(!useVideoModel &&
SINGLE_UTTERANCE_ONLY)
.setSingleUtterance(SINGLE_UTTERANCE_ONLY)
.build();

// StreamingCallable manages sending the audio and receiving
Expand Down

0 comments on commit 38fe12e

Please sign in to comment.