@@ -82,12 +82,71 @@ def recognize_whisper_api_from_file(file_name: str, whisper_model: str):
8282 transcript = recognize_whisper_api (audio_file , whisper_model )
8383 return transcript
8484
85+
8586def recognize_azure_speech_to_text_from_file (file_path : str , key : str , region : str ):
86- speech_config = speechsdk .SpeechConfig (subscription = key , region = region )
87- audio_config = speechsdk .AudioConfig (filename = file_path )
88- speech_recognizer = speechsdk .SpeechRecognizer (speech_config = speech_config , audio_config = audio_config )
89- result = speech_recognizer .recognize_once_async ().get ()
90- return result .text
87+ """
88+ Recognize speech from an audio file with automatic language detection
89+ across the top 6 spoken languages globally.
90+
91+ Args:
92+ file_path (str): Path to the audio file.
93+ key (str): Azure Speech Service subscription key.
94+ region (str): Azure service region.
95+
96+ Returns:
97+ string: Transcribed text.
98+
99+ Raises:
100+ RuntimeError: If an error occurs during speech recognition.
101+ """
102+ try :
103+ # Create a speech configuration with your subscription key and region
104+ speech_config = speechsdk .SpeechConfig (subscription = key , region = region )
105+
106+ # Create an audio configuration pointing to the audio file
107+ audio_config = speechsdk .AudioConfig (filename = file_path )
108+
109+ # Top 4 most spoken languages (ISO language codes)
110+ # SDK only supports 4 languages as options
111+ languages = ["en-US" , "zh-CN" , "hi-IN" , "es-ES" ]
112+
113+ # Configure auto language detection with the specified languages
114+ auto_detect_source_language_config = speechsdk .languageconfig .AutoDetectSourceLanguageConfig (languages = languages )
115+
116+ # Create a speech recognizer with the auto language detection configuration
117+ speech_recognizer = speechsdk .SpeechRecognizer (
118+ speech_config = speech_config ,
119+ audio_config = audio_config ,
120+ auto_detect_source_language_config = auto_detect_source_language_config
121+ )
122+
123+ # Perform speech recognition
124+ result = speech_recognizer .recognize_once_async ().get ()
125+
126+ # Check the result
127+ if result .reason == speechsdk .ResultReason .RecognizedSpeech :
128+ # Retrieve the detected language
129+ detected_language = result .properties .get (
130+ speechsdk .PropertyId .SpeechServiceConnection_AutoDetectSourceLanguageResult ,
131+ "Unknown"
132+ )
133+ logging .debug ("Detected Language %s" , detected_language , exc_info = True )
134+ return result .text
135+
136+ elif result .reason == speechsdk .ResultReason .NoMatch :
137+ raise RuntimeError ("No speech could be recognized from the audio." )
138+
139+ elif result .reason == speechsdk .ResultReason .Canceled :
140+ cancellation_details = speechsdk .CancellationDetails (result )
141+ raise RuntimeError (f"Speech Recognition canceled: { cancellation_details .reason } . "
142+ f"Error details: { cancellation_details .error_details } " )
143+
144+ else :
145+ raise RuntimeError ("Unknown error occurred during speech recognition." )
146+
147+ except Exception as e :
148+ raise RuntimeError (f"An error occurred during speech recognition: { e } " )
149+
91150
92151def speech_to_text_from_file (file_path : str ):
93152 """
0 commit comments