diff --git a/homeassistant/components/assist_pipeline/__init__.py b/homeassistant/components/assist_pipeline/__init__.py index 8f4c6efd355..481f787c8ef 100644 --- a/homeassistant/components/assist_pipeline/__init__.py +++ b/homeassistant/components/assist_pipeline/__init__.py @@ -3,11 +3,14 @@ from __future__ import annotations from collections.abc import AsyncIterable +from http import HTTPStatus +from pathlib import Path from typing import Any +from aiohttp import web import voluptuous as vol -from homeassistant.components import stt +from homeassistant.components import http, stt from homeassistant.core import Context, HomeAssistant from homeassistant.helpers import chat_session from homeassistant.helpers.typing import ConfigType @@ -86,6 +89,8 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool: await async_setup_pipeline_store(hass) async_register_websocket_api(hass) + hass.http.register_view(DefaultSoundsView(hass)) + return True @@ -133,3 +138,19 @@ async def async_pipeline_from_audio_stream( ) await pipeline_input.validate() await pipeline_input.execute() + + +class DefaultSoundsView(http.HomeAssistantView): + url = f"/api/{DOMAIN}/sounds/{{filename}}" + name = f"api:{DOMAIN}:sounds" + requires_auth = False + + def __init__(self, hass: HomeAssistant) -> None: + self.hass = hass + self.base_dir = Path(__file__).parent / "sounds" + + async def get(self, request: web.Request, filename: str): + if filename not in ("acknowledge.mp3",): + return web.Response(body="Invalid filename", status=HTTPStatus.BAD_REQUEST) + + return web.FileResponse(self.base_dir / filename) diff --git a/homeassistant/components/assist_pipeline/manifest.json b/homeassistant/components/assist_pipeline/manifest.json index 3a59d8f87f1..1f61e09aeab 100644 --- a/homeassistant/components/assist_pipeline/manifest.json +++ b/homeassistant/components/assist_pipeline/manifest.json @@ -3,7 +3,7 @@ "name": "Assist pipeline", "after_dependencies": ["repairs"], "codeowners": ["@balloob", "@synesthesiam"], - "dependencies": ["conversation", "stt", "tts", "wake_word"], + "dependencies": ["conversation", "stt", "tts", "wake_word", "http"], "documentation": "https://www.home-assistant.io/integrations/assist_pipeline", "integration_type": "system", "iot_class": "local_push", diff --git a/homeassistant/components/assist_pipeline/pipeline.py b/homeassistant/components/assist_pipeline/pipeline.py index 0cd593e9666..2781db7a382 100644 --- a/homeassistant/components/assist_pipeline/pipeline.py +++ b/homeassistant/components/assist_pipeline/pipeline.py @@ -19,11 +19,24 @@ import wave import hass_nabucasa import voluptuous as vol -from homeassistant.components import conversation, stt, tts, wake_word, websocket_api +from homeassistant.components import ( + conversation, + media_source, + stt, + tts, + wake_word, + websocket_api, +) from homeassistant.const import ATTR_SUPPORTED_FEATURES, MATCH_ALL from homeassistant.core import Context, HomeAssistant, callback from homeassistant.exceptions import HomeAssistantError -from homeassistant.helpers import chat_session, intent +from homeassistant.helpers import ( + chat_session, + device_registry as dr, + entity_registry as er, + intent, + network, +) from homeassistant.helpers.collection import ( CHANGE_UPDATED, CollectionError, @@ -91,6 +104,8 @@ KEY_PIPELINE_CONVERSATION_DATA: HassKey[dict[str, PipelineConversationData]] = H # Number of response parts to handle before streaming the response STREAM_RESPONSE_CHARS = 60 +DEFAULT_ACKNOWLEDGE_MEDIA_ID = f"/api/{DOMAIN}/sounds/acknowledge.mp3" + def validate_language(data: dict[str, Any]) -> Any: """Validate language settings.""" @@ -412,6 +427,8 @@ class Pipeline: wake_word_entity: str | None wake_word_id: str | None prefer_local_intents: bool = False + acknowledge_same_area: bool = True + acknowledge_media_id: str | None = None id: str = field(default_factory=ulid_util.ulid_now) @@ -436,6 +453,10 @@ class Pipeline: wake_word_entity=data["wake_word_entity"], wake_word_id=data["wake_word_id"], prefer_local_intents=data.get("prefer_local_intents", False), + acknowledge_same_area=data.get("acknowledge_same_area", True), + acknowledge_media_id=data.get( + "acknowledge_media_id", DEFAULT_ACKNOWLEDGE_MEDIA_ID + ), ) def to_json(self) -> dict[str, Any]: @@ -454,6 +475,7 @@ class Pipeline: "wake_word_entity": self.wake_word_entity, "wake_word_id": self.wake_word_id, "prefer_local_intents": self.prefer_local_intents, + "acknowledge_media_id": self.acknowledge_media_id, } @@ -1059,7 +1081,7 @@ class PipelineRun: conversation_id: str, device_id: str | None, conversation_extra_system_prompt: str | None, - ) -> str: + ) -> tuple[str, bool]: """Run intent recognition portion of pipeline. Returns text to speak.""" if self.intent_agent is None or self._conversation_data is None: raise RuntimeError("Recognize intent was not prepared") @@ -1107,6 +1129,7 @@ class PipelineRun: agent_id = self.intent_agent.id processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT + all_same_area = False intent_response: intent.IntentResponse | None = None if not processed_locally and not self._intent_agent_only: # Sentence triggers override conversation agent @@ -1136,7 +1159,8 @@ class PipelineRun: # Try local intents if ( - intent_response is None + self.pipeline.acknowledge_same_area + and intent_response is None and self.pipeline.prefer_local_intents and ( intent_response := await conversation.async_handle_intents( @@ -1280,6 +1304,43 @@ class PipelineRun: if tts_input_stream and self._streamed_response_text: tts_input_stream.put_nowait(None) + intent_response = conversation_result.response + device_registry = dr.async_get(self.hass) + if ( + ( + intent_response.response_type + == intent.IntentResponseType.ACTION_DONE + ) + and intent_response.matched_states + and device_id + and (device := device_registry.async_get(device_id)) + and device.area_id + ): + entity_registry = er.async_get(self.hass) + all_same_area = True + for state in intent_response.matched_states: + entity = entity_registry.async_get(state.entity_id) + if ( + (not entity) + or ( + entity.area_id + and (entity.area_id != device.area_id) + ) + or ( + entity.device_id + and ( + entity_device := device_registry.async_get( + entity.device_id + ) + ) + and entity_device.area_id != device.area_id + ) + ): + all_same_area = False + break + + _LOGGER.error("All same area: %s", all_same_area) + except Exception as src_error: _LOGGER.exception("Unexpected error during intent recognition") raise IntentRecognitionError( @@ -1302,7 +1363,7 @@ class PipelineRun: if conversation_result.continue_conversation: self._conversation_data.continue_conversation_agent = agent_id - return speech + return speech, all_same_area async def prepare_text_to_speech(self) -> None: """Prepare text-to-speech.""" @@ -1370,6 +1431,30 @@ class PipelineRun: PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output}) ) + async def acknowledge(self, media_id: str, tts_input: str | None) -> None: + self.process_event( + PipelineEvent( + PipelineEventType.TTS_START, + { + "language": self.pipeline.tts_language, + "voice": self.pipeline.tts_voice, + "tts_input": tts_input or "", + }, + ) + ) + + if media_source.is_media_source_id(media_id): + media = await media_source.async_resolve_media(self.hass, media_id, None) + media_id = media.url + else: + media_id = network.get_url(self.hass) + media_id + + tts_output = {"url": media_id} + + self.process_event( + PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output}) + ) + def _capture_chunk(self, audio_bytes: bytes | None) -> None: """Forward audio chunk to various capturing mechanisms.""" if self.debug_recording_queue is not None: @@ -1649,17 +1734,18 @@ class PipelineInput: if self.run.end_stage != PipelineStage.STT: tts_input = self.tts_input + all_same_area = False if current_stage == PipelineStage.INTENT: # intent-recognition assert intent_input is not None - tts_input = await self.run.recognize_intent( + tts_input, all_same_area = await self.run.recognize_intent( intent_input, self.session.conversation_id, self.device_id, self.conversation_extra_system_prompt, ) - if tts_input.strip(): + if all_same_area or tts_input.strip(): current_stage = PipelineStage.TTS else: # Skip TTS @@ -1668,8 +1754,13 @@ class PipelineInput: if self.run.end_stage != PipelineStage.INTENT: # text-to-speech if current_stage == PipelineStage.TTS: - assert tts_input is not None - await self.run.text_to_speech(tts_input) + if all_same_area and self.run.pipeline.acknowledge_media_id: + await self.run.acknowledge( + self.run.pipeline.acknowledge_media_id, tts_input + ) + else: + assert tts_input is not None + await self.run.text_to_speech(tts_input) except PipelineError as err: self.run.process_event( diff --git a/homeassistant/components/assist_pipeline/sounds/acknowledge.mp3 b/homeassistant/components/assist_pipeline/sounds/acknowledge.mp3 new file mode 100644 index 00000000000..603e79e4f2a Binary files /dev/null and b/homeassistant/components/assist_pipeline/sounds/acknowledge.mp3 differ