Android 语音开发(无交互)

ANDROID SDK TTS Package Summary


Starting the TTS engine

1
2
3
4
5
6
TextToSpeech tts = new TextToSpeech(this, new OnInitListener() {
public void onInit(int status){
if (status == TextToSpeech.SUCCESS)
speak("Hello world", TextToSpeech.QUEUE_ADD, null);
}
}
  • QUEUE_ADD: The new entry placed at the end of the playback queue.
  • QUEUE_FLUSH: All entries in the playback queue are dropped and replaced by the new entry.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
private void initTTS() {
disableSpeakButton(); //Disable speak button during the initialization of the text to speech engine
//Check if a the engine is installed, when the check is finished, the onActivityResult method is executed
Intent checkIntent = new Intent(TextToSpeech.Engine.ACTION_CHECK_TTS_DATA);
startActivityForResult(checkIntent, TTS_DATA_CHECK);
}
/**
* Callback from check for text to speech engine installed
* If positive, then creates a new <code>TextToSpeech</code> instance which will be called when user clicks on the 'Speak' button
* If negative, creates an intent to install a <code>TextToSpeech</code> engine
*/
@Override
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
if (requestCode == TTS_DATA_CHECK) {
// Check that the resultCode is CHECK_VOICE_DATA_PASS, it was the TTS which result is being processed and not any other activity
if (resultCode == TextToSpeech.Engine.CHECK_VOICE_DATA_PASS) {
tts = new TextToSpeech(this, new OnInitListener() { // Create a TextToSpeech instance
public void onInit(int status) {
if ( (status == TextToSpeech.SUCCESS) && (tts.isLanguageAvailable(Locale.US) >= 0) ) {
tts.setLanguage(Locale.US);
}
enableSpeakButton();
}
});
} else { // Install missing data
PackageManager pm = getPackageManager();
Intent installIntent = new Intent();
installIntent.setAction(TextToSpeech.Engine.ACTION_INSTALL_TTS_DATA);
ResolveInfo resolveInfo = pm.resolveActivity( installIntent, PackageManager.MATCH_DEFAULT_ONLY );
if( resolveInfo == null ) {
Toast.makeText(TTSWithIntent.this,
"There is no TTS installed, please download it from Google Play",
Toast.LENGTH_LONG).show();
} else {
startActivity( installIntent );
}
}
}
}

Speech Recognition

Automatic Speech Recognition(ASR), the process that converts spoken words to written text.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
Button speak = (Button) findViewById(R.id.speech_btn);
speak.setOnClickListener(
new View.OnClickListener() {
@Override
public void onClick(View v) {
//Speech recognition does not currently work on simulated devices,
//it is the user attempting to run the app in a simulated device, and they will get a Toast
if( "generic".equals(Build.BRAND. toLowerCase()) ){
Toast.makeText(getApplicationContext(),
"ASR is not supported on virtual devices",
Toast.LENGTH_SHORT).show();
} else {
setRecognitionParams(); //Read parameters from GUI
listen(); //Set up the recognizer and start listening
}
}
});
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); // Specify language model
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, languageModel);
// Specify how many results to receive. Results listed in order of confidence
intent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, numberRecoResults);
startActivityForResult(intent, ASR_CODE);

EXTRA_LANGUAGE_MODEL

  • LANGUAGE_MODEL_FREE_FORM: This language model is based on free-form speech recognition and is used to recognize free-form speech, for example, in the dictation of an e-mail.
  • LANGUAGE_MODEL_WEB_SEARCH: This language model is based on web search terms and is used to model more restricted forms of input such as shorter, search-like phrases, for example, flights to London , weather in Madrid, and so on.

Other extras:

  • EXTRA_PROMPT: This provides a text prompt that is shown to users when they are asked to speak.
  • EXTRA_MAX_RESULTS: This integer value specifies a limit on the maximum number of results to be returned. If omitted, the recognizer will choose how many results to return. The results are the different possible texts corresponding to the user’s input and sorted from most to less probable.
  • EXTRA_LANGUAGE: This specifies a language that can be used instead of the default provided on the device.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
protected void onActivityResult(int requestCode, int resultCode, Intent data) {
if (requestCode == ASR_CODE) {
if (resultCode == RESULT_OK) {
//Retrieves the N-best list and the confidences from the ASR result
ArrayList<String> nBestList = data.getStringArrayListExtra(RecognizerIntent.EXTRA_RESULTS);
float[] nBestConfidences = data.getFloatArrayExtra(RecognizerIntent.EXTRA_CONFIDENCE_SCORES;
// Creates a collection of strings, each one with a recognition result and its confidence, e.g. "Phrase matched (conf: 0.5)"
ArrayList<String> nBestView = new ArrayList<String>();
for(int i=0; i<nBestList.size(); i++){
if(nBestConfidences[i]<0) {
nBestView.add(nBestList.get(i) + " (no confidence value available)");
} else {
nBestView.add(nBestList.get(i) + " (conf: " + nBestConfidences[i] + ")");
}
}
//Includes the collection in the listview of the GUI
setListView(nBestView);
} else {
Log.e(LOGTAG, "Recognition was not successful");
}
}
}

ASRWithLib app

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/**
* Creates the single SpeechRecognizer instance and assigns a listener
* @see CustomRecognitionListener.java
* @param ctx context of the interaction
* */
public void createRecognizer(Context ctx) {
this.ctx = ctx;
PackageManager packManager = ctx.getPackageManager();
// find out whether speech recognition is supported
List<ResolveInfo> intActivities = packManager.queryIntentActivities(new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH), 0);
if (intActivities.size() != 0) {
myASR = SpeechRecognizer.createSpeechRecognizer(ctx);
myASR.setRecognitionListener(this);
} else {
myASR = null;
}
}
/**
* Starts speech recognition
* @param languageModel Type of language model used (see Chapter 3 in the book for further details)
* @param maxResults Maximum number of recognition results
*/
public void listen(String languageModel, int maxResults) throws Exception{
if( (languageModel.equals(RecognizerIntent.LANGUAGE_MODEL_FREE_FORM) ||
languageModel.equals(RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH)) && (maxResults>=0)) {
Intent intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
// Specify the calling package to identify the application
intent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE, ctx.getPackageName());
//Caution: be careful not to use: getClass().getPackage().getName());
// Specify language model
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, languageModel);
// Specify how many results to receive. Results listed in order of confidence
intent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, maxResults);
myASR.startListening(intent);
} else {
throw new Exception("Invalid params to listen method"); //If the input parameters are not valid, it throws an exception
}
}
public void onResults(Bundle results) {
processAsrResults(results.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION),
results.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES));
}
1
<uses-permission android:name="android.permission.RECORD_AUDIO"/>