Последняя активность 1747790553

capitalex's Avatar capitalex ревизий этого фрагмента 1747790553. К ревизии

1 file changed, 369 insertions

GoogleVoiceStreaming.cs(файл создан)

@@ -0,0 +1,369 @@
1 +
2 + using Google.Apis.Auth.OAuth2;
3 + using Google.Cloud.Speech.V1;
4 + using Google.Protobuf.Collections;
5 + using Grpc.Auth;
6 + using System;
7 + using System.Collections;
8 + using System.Collections.Generic;
9 + using System.IO;
10 + using System.Threading.Tasks;
11 + using UnityEngine;
12 + using UnityEngine.Events;
13 + #if PLATFORM_ANDROID
14 + using UnityEngine.Android;
15 + #endif
16 +
17 + public class GoogleVoiceStreaming : MonoBehaviour
18 + {
19 + [Serializable]
20 + public class TranscriptEvent : UnityEvent<StreamingRecognitionResult> { }
21 + public TranscriptEvent OnResultEvent;
22 + public TranscriptEvent OnFinalResultEvent;
23 +
24 + float SILENCE_THRESHOLD = 0.01f;
25 + AudioClip _audioClip = null;
26 + bool _recording = false;
27 + int _recordingHZ = 22050;
28 + bool _stream_started = false;
29 + Task ListenTask;
30 + float t = 0;
31 +
32 + RecognitionConfig _config;
33 + SpeechClient _speech;
34 + SpeechClient.StreamingRecognizeStream _streamingCall;
35 + #if PLATFORM_ANDROID
36 + GameObject dialog = null;
37 + #endif
38 +
39 + ~GoogleVoiceStreaming () {
40 + if (_stream_started)
41 + {
42 + _streamingCall.WriteCompleteAsync().Wait();
43 + }
44 + }
45 +
46 + public void OnDestroy()
47 + {
48 + if (_stream_started)
49 + {
50 + _streamingCall.WriteCompleteAsync().Wait();
51 +
52 + }
53 + }
54 +
55 + void OnDisable()
56 + {
57 + Debug.Log("Disabled");
58 + if (_stream_started)
59 + {
60 + EndRecording();
61 + }
62 + }
63 +
64 + void OnActive ()
65 + {
66 + }
67 +
68 + public bool StartActive = false;
69 + public bool _active = false;
70 + public bool Active {
71 + set {
72 + if (value && !_recording)
73 + {
74 + _active = true;
75 + BeginRecognition();
76 + }
77 + else if (!value && _recording)
78 + {
79 + _active = false;
80 + EndRecording();
81 + }
82 + }
83 + }
84 +
85 + public void Start()
86 + {
87 + #if PLATFORM_ANDROID
88 + if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
89 + {
90 + Permission.RequestUserPermission(Permission.Microphone);
91 + dialog = new GameObject();
92 + }
93 + #endif
94 + if (_speech == null)
95 + {
96 + CreateChannel();
97 + Active = StartActive;
98 + }
99 +
100 + Debug.Log(_speech);
101 +
102 + }
103 +
104 + public void Update()
105 + {
106 + // Count the how long the stream has been running
107 + // if the run time exceeds 340s, restart the stream
108 + if (_recording)
109 + {
110 + t += Time.deltaTime;
111 + if (t > 340)
112 + {
113 + EndRecording();
114 + BeginRecognition();
115 + t = 0;
116 + }
117 + }
118 +
119 + }
120 +
121 + void OnGUI()
122 + {
123 +
124 + #if PLATFORM_ANDROID
125 + if (!Permission.HasUserAuthorizedPermission(Permission.Microphone))
126 + {
127 + // The user denied permission to use the microphone.
128 + // Display a message explaining why you need it with Yes/No buttons.
129 + // If the user says yes then present the request again
130 + // Display a dialog here.
131 + dialog.AddComponent<PermissionsRationaleDialog>();
132 + return;
133 + }
134 + else if (dialog != null)
135 + {
136 + Destroy(dialog);
137 + }
138 + #endif
139 +
140 + // Now you can do things with the microphone
141 + }
142 +
143 + public void CreateChannel()
144 + {
145 + var key = Resources.Load<TextAsset>("key");
146 + _config = new RecognitionConfig()
147 + {
148 + Encoding = RecognitionConfig.Types.AudioEncoding.Linear16,
149 + SampleRateHertz = 22050,
150 + LanguageCode = "en",
151 + };
152 +
153 + var credentials = GoogleCredential.FromJson(key.text)
154 + .CreateScoped();
155 + var channel = new Grpc.Core.Channel(
156 + SpeechClient.DefaultEndpoint.Host,
157 + credentials.ToChannelCredentials()
158 + );
159 + _speech = SpeechClient.Create(channel);
160 + }
161 +
162 + public void BeginRecognition()
163 + {
164 +
165 + if (_speech == null)
166 + {
167 + CreateChannel();
168 + }
169 + _streamingCall = _speech.StreamingRecognize();
170 + _streamingCall.WriteAsync(
171 + new StreamingRecognizeRequest()
172 + {
173 + StreamingConfig = new StreamingRecognitionConfig()
174 + {
175 + Config = _config,
176 + InterimResults = true,
177 + }
178 + }
179 + ).Wait();
180 + _recording = true;
181 + _stream_started = true;
182 + StartCoroutine(AudioListener());
183 + ListenTask = new Task(() => listen());
184 + ListenTask.RunSynchronously();
185 + }
186 +
187 + // Funciton ends the recording of audio
188 + public void EndRecording()
189 + {
190 + if (_stream_started)
191 + {
192 + _streamingCall.WriteCompleteAsync();
193 + _stream_started = false;
194 + _recording = false;
195 + }
196 + }
197 +
198 + /// <summary>
199 + /// This method provides the coroutine needed for recording audio from the microphone
200 + /// to a buffer that can be sent to google speech. Since it uses a ring buffer, once
201 + /// the buffer is full it loops to the start and overwrites old data. This buffer is
202 + /// streamed in 1/2 * buffer.length chunks. This allows for reduced recording latency.
203 + /// </summary>
204 + /// <returns></returns>
205 + IEnumerator AudioListener()
206 + {
207 +
208 + _audioClip = Microphone.Start(null, true, 1, _recordingHZ);
209 +
210 + yield return null;
211 +
212 + if (_audioClip == null)
213 + {
214 + yield break;
215 + }
216 +
217 + float[] samples = null;
218 + bool inFirstHalf = true;
219 + int midPoint = _audioClip.samples / 2;
220 +
221 + while (_recording && _audioClip != null)
222 + {
223 +
224 + int writePos = Microphone.GetPosition(null);
225 +
226 + if (writePos > _audioClip.samples || !Microphone.IsRecording(null))
227 + {
228 + Debug.Log("Failed to get Microphone");
229 + yield break;
230 + }
231 + // Check if we were in the first half, but have crossed the mid point (upper half full)
232 + // or if we were in the last half, but have wrapped to the start (lower half full)
233 + if ((inFirstHalf && writePos >= midPoint) ||
234 + (!inFirstHalf && writePos < midPoint))
235 + {
236 + samples = new float[midPoint];
237 + _audioClip.GetData(samples, inFirstHalf ? 0 : midPoint);
238 +
239 + SendAudio(samples);
240 + inFirstHalf = !inFirstHalf;
241 + }
242 + else
243 + {
244 + // Else wait the amount of time to fill the buffer.
245 + int remaining = inFirstHalf ? (midPoint - writePos) : (_audioClip.samples - writePos);
246 + float timeRemaining = remaining / (float)_recordingHZ;
247 + yield return new WaitForSeconds(timeRemaining);
248 + }
249 +
250 + }
251 + }
252 +
253 + /// <summary>
254 + /// This function sends the audio to google. However, if the audio is below
255 + /// SILENEC_THRESHOLD then it does not send the audio.
256 + /// </summary>
257 + /// <param name="samples"></param>
258 + public void SendAudio(float[] samples)
259 + {
260 + // If the samples are all below the SILENCE_THRESHOLD, then the audio is not sent
261 + // to Google for processing. Additionally, it closes the streaming call. This is
262 + // done to prevent Google Cloud Speech API from crashing the program. Google
263 + // Cloud Speech API expect a continue stream of real time audio. Not sending
264 + // audio is concidered an error. Therefore, the stream needs to be closed and
265 + // later restarted.
266 + if (DetectSilence(samples, SILENCE_THRESHOLD))
267 + {
268 + if (_stream_started)
269 + {
270 + _streamingCall.WriteCompleteAsync().Wait();
271 + ListenTask.Wait();
272 + _stream_started = false;
273 +
274 + }
275 + }
276 + else
277 + {
278 + // Start the stream if it has been closed.
279 + if (!_stream_started)
280 + {
281 + StartStream();
282 + }
283 + var bytes = AudioUtils.FloatsToLinear16(samples);
284 + _streamingCall.WriteAsync(
285 + new StreamingRecognizeRequest()
286 + {
287 + AudioContent = Google.Protobuf.ByteString
288 + .CopyFrom(bytes, 0, bytes.Length)
289 + });
290 + }
291 +
292 + }
293 +
294 + /// <summary>
295 + /// This function starts the streaming recognizer. This function blocks until it
296 + /// receives the connection to Google's API.
297 + /// </summary>
298 + private void StartStream()
299 + {
300 + _streamingCall = _speech.StreamingRecognize();
301 + _streamingCall.WriteAsync(
302 + new StreamingRecognizeRequest()
303 + {
304 + StreamingConfig = new StreamingRecognitionConfig()
305 + {
306 + Config = _config,
307 + InterimResults = true,
308 + }
309 + }).Wait();
310 + _stream_started = true;
311 + ListenTask = new Task(() => listen());
312 + ListenTask.RunSynchronously();
313 + }
314 +
315 + /// <summary>
316 + /// To avoid making wasted calls to Google's Speech API, this function is used to
317 + /// detect if the audio is quieter than the threshold.
318 + /// </summary>
319 + /// <param name="samples">The buffer of audio samples as floating points</param>
320 + /// <param name="threshold">The threashold to be treated as silence</param>
321 + /// <returns></returns>
322 + static private bool DetectSilence(float[] samples, float threshold)
323 + {
324 + float maxLevel = Mathf.Max(
325 + Mathf.Abs(Mathf.Min(samples)),
326 + Mathf.Abs(Mathf.Max(samples))
327 + );
328 + return maxLevel < threshold;
329 + }
330 +
331 +
332 + /// <summary>
333 + /// This function is ran in the background to read audio from the ResponseStream.
334 + /// This must be ran asynchronosly otherwise an error will be raised.
335 + ///
336 + /// This funciton invokes two events: One for a partial result and one for a final result.
337 + /// Callbacks can be attached to these events to react to incoming transcripts.
338 + /// </summary>
339 + private async void listen()
340 + {
341 + while (await _streamingCall.ResponseStream.MoveNext(default))
342 + {
343 + foreach (var result in _streamingCall.ResponseStream.Current.Results)
344 + {
345 + OnResultEvent.Invoke(result);
346 + if (result.IsFinal)
347 + {
348 + OnFinalResultEvent.Invoke(result);
349 + }
350 + }
351 + }
352 +
353 + }
354 +
355 + private bool resumeWhenUnpaused = false;
356 + private void OnApplicationPause(bool pause)
357 + {
358 + if (pause)
359 + {
360 + resumeWhenUnpaused = _active;
361 + Active = false;
362 + }
363 + else
364 + {
365 + Active = resumeWhenUnpaused;
366 + }
367 + }
368 +
369 + }
Новее Позже