First pass at acknowledgement

This commit is contained in:
Michael Hansen
2025-08-13 17:04:38 -05:00
parent cf68214c4d
commit 36d3086864
4 changed files with 123 additions and 11 deletions

View File

@@ -3,11 +3,14 @@
from __future__ import annotations from __future__ import annotations
from collections.abc import AsyncIterable from collections.abc import AsyncIterable
from http import HTTPStatus
from pathlib import Path
from typing import Any from typing import Any
from aiohttp import web
import voluptuous as vol import voluptuous as vol
from homeassistant.components import stt from homeassistant.components import http, stt
from homeassistant.core import Context, HomeAssistant from homeassistant.core import Context, HomeAssistant
from homeassistant.helpers import chat_session from homeassistant.helpers import chat_session
from homeassistant.helpers.typing import ConfigType from homeassistant.helpers.typing import ConfigType
@@ -86,6 +89,8 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
await async_setup_pipeline_store(hass) await async_setup_pipeline_store(hass)
async_register_websocket_api(hass) async_register_websocket_api(hass)
hass.http.register_view(DefaultSoundsView(hass))
return True return True
@@ -133,3 +138,19 @@ async def async_pipeline_from_audio_stream(
) )
await pipeline_input.validate() await pipeline_input.validate()
await pipeline_input.execute() await pipeline_input.execute()
class DefaultSoundsView(http.HomeAssistantView):
url = f"/api/{DOMAIN}/sounds/{{filename}}"
name = f"api:{DOMAIN}:sounds"
requires_auth = False
def __init__(self, hass: HomeAssistant) -> None:
self.hass = hass
self.base_dir = Path(__file__).parent / "sounds"
async def get(self, request: web.Request, filename: str):
if filename not in ("acknowledge.mp3",):
return web.Response(body="Invalid filename", status=HTTPStatus.BAD_REQUEST)
return web.FileResponse(self.base_dir / filename)

View File

@@ -3,7 +3,7 @@
"name": "Assist pipeline", "name": "Assist pipeline",
"after_dependencies": ["repairs"], "after_dependencies": ["repairs"],
"codeowners": ["@balloob", "@synesthesiam"], "codeowners": ["@balloob", "@synesthesiam"],
"dependencies": ["conversation", "stt", "tts", "wake_word"], "dependencies": ["conversation", "stt", "tts", "wake_word", "http"],
"documentation": "https://www.home-assistant.io/integrations/assist_pipeline", "documentation": "https://www.home-assistant.io/integrations/assist_pipeline",
"integration_type": "system", "integration_type": "system",
"iot_class": "local_push", "iot_class": "local_push",

View File

@@ -19,11 +19,24 @@ import wave
import hass_nabucasa import hass_nabucasa
import voluptuous as vol import voluptuous as vol
from homeassistant.components import conversation, stt, tts, wake_word, websocket_api from homeassistant.components import (
conversation,
media_source,
stt,
tts,
wake_word,
websocket_api,
)
from homeassistant.const import ATTR_SUPPORTED_FEATURES, MATCH_ALL from homeassistant.const import ATTR_SUPPORTED_FEATURES, MATCH_ALL
from homeassistant.core import Context, HomeAssistant, callback from homeassistant.core import Context, HomeAssistant, callback
from homeassistant.exceptions import HomeAssistantError from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers import chat_session, intent from homeassistant.helpers import (
chat_session,
device_registry as dr,
entity_registry as er,
intent,
network,
)
from homeassistant.helpers.collection import ( from homeassistant.helpers.collection import (
CHANGE_UPDATED, CHANGE_UPDATED,
CollectionError, CollectionError,
@@ -91,6 +104,8 @@ KEY_PIPELINE_CONVERSATION_DATA: HassKey[dict[str, PipelineConversationData]] = H
# Number of response parts to handle before streaming the response # Number of response parts to handle before streaming the response
STREAM_RESPONSE_CHARS = 60 STREAM_RESPONSE_CHARS = 60
DEFAULT_ACKNOWLEDGE_MEDIA_ID = f"/api/{DOMAIN}/sounds/acknowledge.mp3"
def validate_language(data: dict[str, Any]) -> Any: def validate_language(data: dict[str, Any]) -> Any:
"""Validate language settings.""" """Validate language settings."""
@@ -412,6 +427,8 @@ class Pipeline:
wake_word_entity: str | None wake_word_entity: str | None
wake_word_id: str | None wake_word_id: str | None
prefer_local_intents: bool = False prefer_local_intents: bool = False
acknowledge_same_area: bool = True
acknowledge_media_id: str | None = None
id: str = field(default_factory=ulid_util.ulid_now) id: str = field(default_factory=ulid_util.ulid_now)
@@ -436,6 +453,10 @@ class Pipeline:
wake_word_entity=data["wake_word_entity"], wake_word_entity=data["wake_word_entity"],
wake_word_id=data["wake_word_id"], wake_word_id=data["wake_word_id"],
prefer_local_intents=data.get("prefer_local_intents", False), prefer_local_intents=data.get("prefer_local_intents", False),
acknowledge_same_area=data.get("acknowledge_same_area", True),
acknowledge_media_id=data.get(
"acknowledge_media_id", DEFAULT_ACKNOWLEDGE_MEDIA_ID
),
) )
def to_json(self) -> dict[str, Any]: def to_json(self) -> dict[str, Any]:
@@ -454,6 +475,7 @@ class Pipeline:
"wake_word_entity": self.wake_word_entity, "wake_word_entity": self.wake_word_entity,
"wake_word_id": self.wake_word_id, "wake_word_id": self.wake_word_id,
"prefer_local_intents": self.prefer_local_intents, "prefer_local_intents": self.prefer_local_intents,
"acknowledge_media_id": self.acknowledge_media_id,
} }
@@ -1059,7 +1081,7 @@ class PipelineRun:
conversation_id: str, conversation_id: str,
device_id: str | None, device_id: str | None,
conversation_extra_system_prompt: str | None, conversation_extra_system_prompt: str | None,
) -> str: ) -> tuple[str, bool]:
"""Run intent recognition portion of pipeline. Returns text to speak.""" """Run intent recognition portion of pipeline. Returns text to speak."""
if self.intent_agent is None or self._conversation_data is None: if self.intent_agent is None or self._conversation_data is None:
raise RuntimeError("Recognize intent was not prepared") raise RuntimeError("Recognize intent was not prepared")
@@ -1107,6 +1129,7 @@ class PipelineRun:
agent_id = self.intent_agent.id agent_id = self.intent_agent.id
processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
all_same_area = False
intent_response: intent.IntentResponse | None = None intent_response: intent.IntentResponse | None = None
if not processed_locally and not self._intent_agent_only: if not processed_locally and not self._intent_agent_only:
# Sentence triggers override conversation agent # Sentence triggers override conversation agent
@@ -1136,7 +1159,8 @@ class PipelineRun:
# Try local intents # Try local intents
if ( if (
intent_response is None self.pipeline.acknowledge_same_area
and intent_response is None
and self.pipeline.prefer_local_intents and self.pipeline.prefer_local_intents
and ( and (
intent_response := await conversation.async_handle_intents( intent_response := await conversation.async_handle_intents(
@@ -1280,6 +1304,43 @@ class PipelineRun:
if tts_input_stream and self._streamed_response_text: if tts_input_stream and self._streamed_response_text:
tts_input_stream.put_nowait(None) tts_input_stream.put_nowait(None)
intent_response = conversation_result.response
device_registry = dr.async_get(self.hass)
if (
(
intent_response.response_type
== intent.IntentResponseType.ACTION_DONE
)
and intent_response.matched_states
and device_id
and (device := device_registry.async_get(device_id))
and device.area_id
):
entity_registry = er.async_get(self.hass)
all_same_area = True
for state in intent_response.matched_states:
entity = entity_registry.async_get(state.entity_id)
if (
(not entity)
or (
entity.area_id
and (entity.area_id != device.area_id)
)
or (
entity.device_id
and (
entity_device := device_registry.async_get(
entity.device_id
)
)
and entity_device.area_id != device.area_id
)
):
all_same_area = False
break
_LOGGER.error("All same area: %s", all_same_area)
except Exception as src_error: except Exception as src_error:
_LOGGER.exception("Unexpected error during intent recognition") _LOGGER.exception("Unexpected error during intent recognition")
raise IntentRecognitionError( raise IntentRecognitionError(
@@ -1302,7 +1363,7 @@ class PipelineRun:
if conversation_result.continue_conversation: if conversation_result.continue_conversation:
self._conversation_data.continue_conversation_agent = agent_id self._conversation_data.continue_conversation_agent = agent_id
return speech return speech, all_same_area
async def prepare_text_to_speech(self) -> None: async def prepare_text_to_speech(self) -> None:
"""Prepare text-to-speech.""" """Prepare text-to-speech."""
@@ -1370,6 +1431,30 @@ class PipelineRun:
PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output}) PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output})
) )
async def acknowledge(self, media_id: str, tts_input: str | None) -> None:
self.process_event(
PipelineEvent(
PipelineEventType.TTS_START,
{
"language": self.pipeline.tts_language,
"voice": self.pipeline.tts_voice,
"tts_input": tts_input or "",
},
)
)
if media_source.is_media_source_id(media_id):
media = await media_source.async_resolve_media(self.hass, media_id, None)
media_id = media.url
else:
media_id = network.get_url(self.hass) + media_id
tts_output = {"url": media_id}
self.process_event(
PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output})
)
def _capture_chunk(self, audio_bytes: bytes | None) -> None: def _capture_chunk(self, audio_bytes: bytes | None) -> None:
"""Forward audio chunk to various capturing mechanisms.""" """Forward audio chunk to various capturing mechanisms."""
if self.debug_recording_queue is not None: if self.debug_recording_queue is not None:
@@ -1649,17 +1734,18 @@ class PipelineInput:
if self.run.end_stage != PipelineStage.STT: if self.run.end_stage != PipelineStage.STT:
tts_input = self.tts_input tts_input = self.tts_input
all_same_area = False
if current_stage == PipelineStage.INTENT: if current_stage == PipelineStage.INTENT:
# intent-recognition # intent-recognition
assert intent_input is not None assert intent_input is not None
tts_input = await self.run.recognize_intent( tts_input, all_same_area = await self.run.recognize_intent(
intent_input, intent_input,
self.session.conversation_id, self.session.conversation_id,
self.device_id, self.device_id,
self.conversation_extra_system_prompt, self.conversation_extra_system_prompt,
) )
if tts_input.strip(): if all_same_area or tts_input.strip():
current_stage = PipelineStage.TTS current_stage = PipelineStage.TTS
else: else:
# Skip TTS # Skip TTS
@@ -1668,6 +1754,11 @@ class PipelineInput:
if self.run.end_stage != PipelineStage.INTENT: if self.run.end_stage != PipelineStage.INTENT:
# text-to-speech # text-to-speech
if current_stage == PipelineStage.TTS: if current_stage == PipelineStage.TTS:
if all_same_area and self.run.pipeline.acknowledge_media_id:
await self.run.acknowledge(
self.run.pipeline.acknowledge_media_id, tts_input
)
else:
assert tts_input is not None assert tts_input is not None
await self.run.text_to_speech(tts_input) await self.run.text_to_speech(tts_input)