1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.car.test.concurrent.hotword;
18 
19 import android.annotation.Nullable;
20 import android.app.Service;
21 import android.content.Intent;
22 import android.media.AudioAttributes;
23 import android.media.AudioFormat;
24 import android.media.AudioRecord;
25 import android.media.MediaRecorder;
26 import android.os.Bundle;
27 import android.os.Handler;
28 import android.os.IBinder;
29 import android.os.Looper;
30 import android.os.Message;
31 import android.os.Messenger;
32 import android.os.RemoteException;
33 import android.speech.RecognitionListener;
34 import android.speech.RecognizerIntent;
35 import android.speech.SpeechRecognizer;
36 import android.util.Log;
37 
38 import com.android.internal.annotations.GuardedBy;
39 
40 import java.util.ArrayList;
41 import java.util.concurrent.atomic.AtomicBoolean;
42 import java.util.stream.Collectors;
43 
44 public class ConcurrentHotwordDetectionService extends Service {
45 
46     static final String TAG = "ConcurrentHotwordDetectionService";
47 
48     public static final int MSG_START_DETECT = 1;
49     public static final int MSG_STOP_DETECT = 2;
50     // Only runs recognizer for one minute
51     public static final int MSG_START_RECOGNIZER = 3;
52     public static final int MSG_STOP_SERVICE = 4;
53 
54     public static final int MSG_START_DETECT_REPLY = 1;
55     public static final int MSG_STOP_DETECT_REPLY = 2;
56     public static final int MSG_START_RECOGNIZER_REPLY = 3;
57 
58     public static final String MESSAGE_REPLY = "reply yo!";
59     public static final int RECOGNIZER_RUN_TIME = 60_000;
60 
61     private final Object mLock = new Object();
62 
63     private Messenger mMessenger;
64     @GuardedBy("mLock")
65     private SpeechRecognizer mSpeechRecognizer;
66 
67     @GuardedBy("mLock")
68     private Thread mRecordingThread;
69 
70     private final AtomicBoolean mStopRecording = new AtomicBoolean(true);
71 
72     @Override
onBind(Intent intent)73     public IBinder onBind(Intent intent) {
74         mMessenger = new Messenger(new Handler(Looper.getMainLooper()) {
75 
76             @Override
77             public void handleMessage(Message msg) {
78                 Log.d(TAG, "Handle Message " + msg);
79                 Message replyMessage = null;
80                 switch (msg.what) {
81                     case MSG_START_DETECT:
82                         onDetect();
83                         replyMessage =
84                                 createMessage("Detection Started", MSG_START_DETECT_REPLY);
85                         break;
86                     case MSG_STOP_DETECT:
87                         onStopDetection();
88                         replyMessage =
89                                 createMessage("Detection Stopped", MSG_STOP_DETECT_REPLY);
90                         break;
91                     case MSG_STOP_SERVICE:
92                         onStopDetection();
93                         stopSelf();
94                         return;
95                     case MSG_START_RECOGNIZER:
96                         startRecognizer(msg.replyTo);
97                         replyMessage =
98                                 createMessage("Starting Recognizer", MSG_START_RECOGNIZER_REPLY);
99                         break;
100                     default:
101                         super.handleMessage(msg);
102                         Log.d(TAG, "Error no handler for message " + msg);
103                         return;
104                 }
105                 sendReply(msg.replyTo, replyMessage);
106             }
107         });
108         return mMessenger.getBinder();
109     }
110 
onDetect()111     private void onDetect() {
112         Log.d(TAG, "onDetect for Mic source");
113         Thread recordingThread = new Thread(this::recordAudio);
114         recordingThread.start();
115         synchronized (mLock) {
116             mRecordingThread = recordingThread;
117         }
118     }
119 
onStopDetection()120     private void onStopDetection() {
121         Log.d(TAG, "onStopDetection");
122         Thread recordingThread;
123         synchronized (mLock) {
124             recordingThread = mRecordingThread;
125             mRecordingThread = null;
126         }
127 
128         mStopRecording.set(true);
129 
130         try {
131             recordingThread.join(/* timeout= */ 100);
132         } catch (InterruptedException e) {
133             Log.e(TAG, "onStopDetection could join thread", e);
134         }
135         Log.d(TAG, "onStopDetection detection stopped");
136     }
137 
recordAudio()138     private void recordAudio() {
139         Log.d(TAG, "recordAudio for Mic source");
140         mStopRecording.set(false);
141         int bytesPerSample = 2; // for ENCODING_PCM_16BIT
142         int sampleRate = 16000;
143         int bytesPerSecond = bytesPerSample * sampleRate; // for single channel
144         AudioRecord record = null;
145         try {
146             AudioRecord.Builder recordBuilder =
147                     new AudioRecord.Builder()
148                             .setAudioAttributes(
149                                     new AudioAttributes.Builder()
150                                             .setInternalCapturePreset(
151                                                     MediaRecorder.AudioSource.HOTWORD)
152                                             .build())
153                             .setAudioFormat(
154                                     new AudioFormat.Builder()
155                                             .setChannelMask(AudioFormat.CHANNEL_IN_MONO)
156                                             .setEncoding(AudioFormat.ENCODING_PCM_16BIT)
157                                             .setSampleRate(sampleRate)
158                                             .build())
159                             .setBufferSizeInBytes(bytesPerSecond);
160 
161             Log.d(TAG, "recordAudio building");
162             record = recordBuilder.build();
163             Log.d(TAG, "recordAudio built");
164         } catch (Exception e) {
165             Log.e(TAG, "recordAudio error", e);
166         }
167 
168         if (record == null) {
169             return;
170         }
171 
172         if (record.getState() != AudioRecord.STATE_INITIALIZED) {
173             Log.e(TAG, "Failed to initialize AudioRecord");
174             record.release();
175             return;
176         }
177 
178         Log.d(TAG, "recordAudio recording starting");
179         record.startRecording();
180         Log.d(TAG, "recordAudio recording started");
181 
182         while (!mStopRecording.get()) {
183             boolean canRead = canReadAudio(record, bytesPerSecond);
184             Log.i(TAG, "recordAudio can record " + canRead);
185         }
186         record.stop();
187         Log.i(TAG, "recordAudio stopped");
188     }
189 
canReadAudio(AudioRecord record, int bytesPerSecond)190     private boolean canReadAudio(AudioRecord record, int bytesPerSecond) {
191         byte[] buffer = new byte[bytesPerSecond]; // read 1 second of audio
192         int numBytes = 0;
193         while (numBytes < buffer.length) {
194             int bytesRead =
195                     record.read(buffer, numBytes, Math.min(1024, buffer.length - numBytes));
196             if (bytesRead < 0) {
197                 Log.e(TAG, "Error reading from mic: " + bytesRead);
198                 return false;
199             }
200             numBytes += bytesRead;
201         }
202 
203         int counter = 100;
204         for (byte b : buffer) {
205             if ((b != 0) && (counter-- < 0)) {
206                 return true;
207             }
208         }
209         Log.d(TAG, "All data are zero");
210         return false;
211     }
212 
createMessage(String replyString, int what)213     private Message createMessage(String replyString, int what) {
214         Message replyMessage =
215                 Message.obtain(/* handler= */ null, what, /* arg1= */ 0, /* arg2= */ 0);
216         Bundle data = new Bundle();
217         data.putString(MESSAGE_REPLY, replyString);
218         replyMessage.setData(data);
219         return replyMessage;
220     }
221 
sendReply(@ullable Messenger messenger, @Nullable Message reply)222     private void sendReply(@Nullable Messenger messenger, @Nullable Message reply) {
223         if (messenger == null) {
224             Log.i(TAG, "reply null messenger");
225             return;
226         }
227 
228         if (reply == null) {
229             Log.i(TAG, "reply null message");
230             return;
231         }
232 
233         try {
234             messenger.send(reply);
235             Log.i(TAG, "reply message sent " + reply);
236         } catch (RemoteException e) {
237             Log.e(TAG, "replay error ", e);
238         }
239     }
240 
startRecognizer(Messenger replyTo)241     private void startRecognizer(Messenger replyTo) {
242         synchronized (mLock) {
243             mSpeechRecognizer =
244                     SpeechRecognizer.createOnDeviceSpeechRecognizer(getApplicationContext());
245 
246             RecognitionListener recognitionListener = new RecognitionListener() {
247 
248                 @Override
249                 public void onReadyForSpeech(Bundle params) {
250                     sendReply(replyTo, createMessage("Got ready for speech",
251                             MSG_START_RECOGNIZER_REPLY));
252                 }
253 
254                 @Override
255                 public void onBeginningOfSpeech() {
256                     sendReply(replyTo, createMessage("Got beginning of speech",
257                             MSG_START_RECOGNIZER_REPLY));
258                 }
259 
260                 @Override
261                 public void onRmsChanged(float rmsdB) {
262                     sendReply(replyTo, createMessage("Sound level changed, rms[dB]: " + rmsdB,
263                             MSG_START_RECOGNIZER_REPLY));
264                 }
265 
266                 @Override
267                 public void onBufferReceived(byte[] buffer) {
268                     sendReply(replyTo, createMessage("Buffer Received, length:" + buffer.length,
269                             MSG_START_RECOGNIZER_REPLY));
270                 }
271 
272                 @Override
273                 public void onEndOfSpeech() {
274                     sendReply(replyTo, createMessage("End of Speech",
275                             MSG_START_RECOGNIZER_REPLY));
276                 }
277 
278                 @Override
279                 public void onError(int error) {
280                     sendReply(replyTo, createMessage("Got an error:" + error,
281                             MSG_START_RECOGNIZER_REPLY));
282                 }
283 
284                 @Override
285                 public void onResults(Bundle results) {
286                     replyWithResults(results);
287 
288                 }
289 
290                 @Override
291                 public void onPartialResults(Bundle partialResults) {
292                     replyWithResults(partialResults);
293                 }
294 
295                 @Override
296                 public void onSegmentResults(Bundle segmentResults) {
297                     replyWithResults(segmentResults);
298                 }
299 
300                 @Override
301                 public void onEndOfSegmentedSession() {
302                     sendReply(replyTo, createMessage("End of segmented session",
303                             MSG_START_RECOGNIZER_REPLY));
304                 }
305 
306                 @Override
307                 public void onEvent(int eventType, Bundle params) {
308 
309                 }
310 
311                 private void replyWithResults(Bundle recognitionResults) {
312                     ArrayList<String> results = recognitionResults
313                             .getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
314                     String reply = results.stream().collect(Collectors.joining(" "));
315 
316                     sendReply(replyTo, createMessage(reply, MSG_START_RECOGNIZER_REPLY));
317                 }
318             };
319 
320             mSpeechRecognizer.setRecognitionListener(recognitionListener);
321 
322             Intent requestIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
323             requestIntent.putExtra(
324                     RecognizerIntent.EXTRA_SEGMENTED_SESSION,
325                     RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS);
326             requestIntent.putExtra(
327                     RecognizerIntent.EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, RECOGNIZER_RUN_TIME);
328 
329             mSpeechRecognizer.startListening(requestIntent);
330         }
331     }
332 }
333