Подключение ASR в Assistant SDK Android
Обновлено 22 ноября 2023
ASR (Automatic Speech Recognition) — это распознавание речи. Вы можете интегрировать ASR в свое приложение.
Ниже описан пример интеграции ASR. Его можно найти в файле ActiveSpeechRecognizerActivity.kt
, начиная со сборки 22.02.1.9010.
package ru.sberbank.sdakit.sdk.client.ext.app.presentation
import android.Manifest
import android.os.Bundle
import android.view.View
import androidx.appcompat.app.AppCompatActivity
import io.reactivex.Single
import io.reactivex.disposables.CompositeDisposable
import io.reactivex.disposables.Disposable
import ru.sberbank.sdakit.base.core.threading.rx.di.ThreadingRxApi
import ru.sberbank.sdakit.core.di.platform.api
import ru.sberbank.sdakit.core.platform.di.CorePlatformApi
import ru.sberbank.sdakit.core.platform.domain.permissions.ActivityBasedPermissions
import ru.sberbank.sdakit.core.platform.domain.permissions.Permissions
import ru.sberbank.sdakit.core.utils.rx.subscribeBy
import ru.sberbank.sdakit.sdk.client.ext.app.R
import ru.sberbank.sdakit.sdk.client.ext.app.databinding.ActivityActiveSpeechRecognizerBinding
import ru.sberbank.sdakit.voice.MusicRecognizer
import ru.sberbank.sdakit.voice.VoiceRecognizer
import ru.sberbank.sdakit.voice.di.VoiceRecognitionComponent
import ru.sberbank.sdakit.vps.config.ClientInfo
import ru.sberbank.sdakit.vps.config.ClientInfoFactory
import ru.sberbank.sdakit.vps.config.DefaultVpsTokenInvalidator
import ru.sberbank.sdakit.vps.config.NoopUfsMetaInfoProvider
import ru.sberbank.sdakit.vps.config.StreamingConfig
import ru.sberbank.sdakit.vps.config.TokenValue
import ru.sberbank.sdakit.vps.config.VPSClientConfig
import ru.sberbank.sdakit.vps.config.VPSTokenProvider
import ru.sberbank.sdakit.vps.config.VpsTokenMode
import ru.sberbank.sdakit.vps.config.di.VpsConfigDependencies
/**
* Экран с примером перевода речи пользователя в текст.
*/
class ActiveSpeechRecognizerActivity : AppCompatActivity() {
private val rxSchedulers by api(ThreadingRxApi::rxSchedulers)
private val permissionsCache by api(CorePlatformApi::permissionsCache)
private val permissionsRequestStateProvider by api(CorePlatformApi::permissionsRequestStateProvider)
// Тестовый конфиг, не пригоден для продакшена. Значения для прода необходимо взять у менеджера.
private val assistantConfig: VPSClientConfig by lazy {
VPSClientConfig(
serverUrl = getString(R.string.preference_assistant_vps_endpoint_uri_demo_2),
channel = "FEBRUARY",
clientInfo = ClientInfoFactory.create(
surface = "DEMO_APP",
surfaceVersion = "12.0",
packageName = "ru.sberbank.sdakit.sdk.client.ext.app"
),
streamingConfig = StreamingConfig(dubbingEnabled = false)
)
}
private val tokenMode: VpsTokenMode = VpsTokenMode.SingleTokenProvider(object : VPSTokenProvider {
override fun requestToken(cause: VPSTokenProvider.RequestCause): Single<TokenValue> =
Single.just(TokenValue.esa("TEST_TOKEN"))
})
private fun vpsConfigDependencies() = object : VpsConfigDependencies {
override val vpsClientConfig = assistantConfig
override val vpsTokenMode = tokenMode
override val vpsTokenInvalidator = DefaultVpsTokenInvalidator()
override val ufsMetaInfoProvider = NoopUfsMetaInfoProvider()
}
private val voiceRecognizer: VoiceRecognizer by lazy {
VoiceRecognitionComponent.createWithDependencies(vpsConfigDependencies()).voiceRecognizer
}
private val musicRecognizer: MusicRecognizer by lazy {
VoiceRecognitionComponent.createWithDependencies(vpsConfigDependencies()).musicRecognizer
}
private lateinit var binding: ActivityActiveSpeechRecognizerBinding
private lateinit var permissions: Permissions
private val disposables = CompositeDisposable()
private var started = false
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
binding = ActivityActiveSpeechRecognizerBinding.inflate(layoutInflater)
setContentView(binding.root)
binding.recognizedText.text = ""
updateButtonAppearance()
binding.startStopButton.setOnClickListener { startStop() }
binding.recognitionTypeVoice.isChecked = true
binding.recordingProgress.visibility = View.INVISIBLE
permissions = ActivityBasedPermissions(this, permissionsCache, permissionsRequestStateProvider)
}
override fun onRequestPermissionsResult(requestCode: Int, permissions: Array<String>, grantResults: IntArray) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
this.permissions.onRequestPermissionsResult(requestCode, permissions, grantResults)
}
override fun onStop() {
super.onStop()
stop()
}
private fun startStop() = when (started) {
true -> stop()
false -> start()
}
private fun startRecognition(): Disposable =
when (binding.recognitionType.checkedRadioButtonId) {
R.id.recognition_type_voice -> startVoiceRecognition()
R.id.recognition_type_music -> startMusicRecognition()
else -> startVoiceRecognition()
}
private fun startVoiceRecognition(): Disposable =
voiceRecognizer.startRecognition()
.observeOn(rxSchedulers.ui())
.subscribeBy(
onNext = {
binding.recognizedText.text = it
},
onComplete = {
stop()
},
onError = {
stop()
}
)
private fun startMusicRecognition(): Disposable =
musicRecognizer.startRecognition()
.observeOn(rxSchedulers.ui())
.subscribeBy(
onNext = { value ->
binding.recognizedText.text = if (value.isFinal) value.response else "В процессе"
},
onComplete = {
stop()
},
onError = {
stop()
}
)
private fun start() {
started = true
binding.recognizedText.text = ""
binding.recordingProgress.visibility = View.VISIBLE
updateButtonAppearance()
disposables.add(
voiceRecognizer.observeAudioRecordingPermissionRequests()
.observeOn(rxSchedulers.ui())
.subscribeBy(
onNext = {
permissions.request(Manifest.permission.RECORD_AUDIO)
},
onError = {}
)
)
disposables.add(startRecognition())
}
private fun stop() {
started = false
binding.recordingProgress.visibility = View.INVISIBLE
updateButtonAppearance()
voiceRecognizer.stopRecognition()
musicRecognizer.stopRecognition()
disposables.clear()
}
@Suppress("SetTextI18n")
private fun updateButtonAppearance() = when (started) {
true -> binding.startStopButton.text = "ОСТАНОВИТЬ"
false -> binding.startStopButton.text = "РАСПОЗНАТЬ$recognitionTypeName"
}
private val recognitionTypeName: String
get() = when (binding.recognitionType.checkedRadioButtonId) {
R.id.recognition_type_voice -> " ГОЛОС"
R.id.recognition_type_music -> " МУЗЫКУ"
else -> ""
}
}