First pass at acknowledgement

This commit is contained in:
Michael Hansen
2025-08-13 17:04:38 -05:00
parent cf68214c4d
commit 36d3086864
4 changed files with 123 additions and 11 deletions

View File

@@ -3,11 +3,14 @@
from __future__ import annotations
from collections.abc import AsyncIterable
from http import HTTPStatus
from pathlib import Path
from typing import Any
from aiohttp import web
import voluptuous as vol
from homeassistant.components import stt
from homeassistant.components import http, stt
from homeassistant.core import Context, HomeAssistant
from homeassistant.helpers import chat_session
from homeassistant.helpers.typing import ConfigType
@@ -86,6 +89,8 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
await async_setup_pipeline_store(hass)
async_register_websocket_api(hass)
hass.http.register_view(DefaultSoundsView(hass))
return True
@@ -133,3 +138,19 @@ async def async_pipeline_from_audio_stream(
)
await pipeline_input.validate()
await pipeline_input.execute()
class DefaultSoundsView(http.HomeAssistantView):
url = f"/api/{DOMAIN}/sounds/{{filename}}"
name = f"api:{DOMAIN}:sounds"
requires_auth = False
def __init__(self, hass: HomeAssistant) -> None:
self.hass = hass
self.base_dir = Path(__file__).parent / "sounds"
async def get(self, request: web.Request, filename: str):
if filename not in ("acknowledge.mp3",):
return web.Response(body="Invalid filename", status=HTTPStatus.BAD_REQUEST)
return web.FileResponse(self.base_dir / filename)

View File

@@ -3,7 +3,7 @@
"name": "Assist pipeline",
"after_dependencies": ["repairs"],
"codeowners": ["@balloob", "@synesthesiam"],
"dependencies": ["conversation", "stt", "tts", "wake_word"],
"dependencies": ["conversation", "stt", "tts", "wake_word", "http"],
"documentation": "https://www.home-assistant.io/integrations/assist_pipeline",
"integration_type": "system",
"iot_class": "local_push",

View File

@@ -19,11 +19,24 @@ import wave
import hass_nabucasa
import voluptuous as vol
from homeassistant.components import conversation, stt, tts, wake_word, websocket_api
from homeassistant.components import (
conversation,
media_source,
stt,
tts,
wake_word,
websocket_api,
)
from homeassistant.const import ATTR_SUPPORTED_FEATURES, MATCH_ALL
from homeassistant.core import Context, HomeAssistant, callback
from homeassistant.exceptions import HomeAssistantError
from homeassistant.helpers import chat_session, intent
from homeassistant.helpers import (
chat_session,
device_registry as dr,
entity_registry as er,
intent,
network,
)
from homeassistant.helpers.collection import (
CHANGE_UPDATED,
CollectionError,
@@ -91,6 +104,8 @@ KEY_PIPELINE_CONVERSATION_DATA: HassKey[dict[str, PipelineConversationData]] = H
# Number of response parts to handle before streaming the response
STREAM_RESPONSE_CHARS = 60
DEFAULT_ACKNOWLEDGE_MEDIA_ID = f"/api/{DOMAIN}/sounds/acknowledge.mp3"
def validate_language(data: dict[str, Any]) -> Any:
"""Validate language settings."""
@@ -412,6 +427,8 @@ class Pipeline:
wake_word_entity: str | None
wake_word_id: str | None
prefer_local_intents: bool = False
acknowledge_same_area: bool = True
acknowledge_media_id: str | None = None
id: str = field(default_factory=ulid_util.ulid_now)
@@ -436,6 +453,10 @@ class Pipeline:
wake_word_entity=data["wake_word_entity"],
wake_word_id=data["wake_word_id"],
prefer_local_intents=data.get("prefer_local_intents", False),
acknowledge_same_area=data.get("acknowledge_same_area", True),
acknowledge_media_id=data.get(
"acknowledge_media_id", DEFAULT_ACKNOWLEDGE_MEDIA_ID
),
)
def to_json(self) -> dict[str, Any]:
@@ -454,6 +475,7 @@ class Pipeline:
"wake_word_entity": self.wake_word_entity,
"wake_word_id": self.wake_word_id,
"prefer_local_intents": self.prefer_local_intents,
"acknowledge_media_id": self.acknowledge_media_id,
}
@@ -1059,7 +1081,7 @@ class PipelineRun:
conversation_id: str,
device_id: str | None,
conversation_extra_system_prompt: str | None,
) -> str:
) -> tuple[str, bool]:
"""Run intent recognition portion of pipeline. Returns text to speak."""
if self.intent_agent is None or self._conversation_data is None:
raise RuntimeError("Recognize intent was not prepared")
@@ -1107,6 +1129,7 @@ class PipelineRun:
agent_id = self.intent_agent.id
processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
all_same_area = False
intent_response: intent.IntentResponse | None = None
if not processed_locally and not self._intent_agent_only:
# Sentence triggers override conversation agent
@@ -1136,7 +1159,8 @@ class PipelineRun:
# Try local intents
if (
intent_response is None
self.pipeline.acknowledge_same_area
and intent_response is None
and self.pipeline.prefer_local_intents
and (
intent_response := await conversation.async_handle_intents(
@@ -1280,6 +1304,43 @@ class PipelineRun:
if tts_input_stream and self._streamed_response_text:
tts_input_stream.put_nowait(None)
intent_response = conversation_result.response
device_registry = dr.async_get(self.hass)
if (
(
intent_response.response_type
== intent.IntentResponseType.ACTION_DONE
)
and intent_response.matched_states
and device_id
and (device := device_registry.async_get(device_id))
and device.area_id
):
entity_registry = er.async_get(self.hass)
all_same_area = True
for state in intent_response.matched_states:
entity = entity_registry.async_get(state.entity_id)
if (
(not entity)
or (
entity.area_id
and (entity.area_id != device.area_id)
)
or (
entity.device_id
and (
entity_device := device_registry.async_get(
entity.device_id
)
)
and entity_device.area_id != device.area_id
)
):
all_same_area = False
break
_LOGGER.error("All same area: %s", all_same_area)
except Exception as src_error:
_LOGGER.exception("Unexpected error during intent recognition")
raise IntentRecognitionError(
@@ -1302,7 +1363,7 @@ class PipelineRun:
if conversation_result.continue_conversation:
self._conversation_data.continue_conversation_agent = agent_id
return speech
return speech, all_same_area
async def prepare_text_to_speech(self) -> None:
"""Prepare text-to-speech."""
@@ -1370,6 +1431,30 @@ class PipelineRun:
PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output})
)
async def acknowledge(self, media_id: str, tts_input: str | None) -> None:
self.process_event(
PipelineEvent(
PipelineEventType.TTS_START,
{
"language": self.pipeline.tts_language,
"voice": self.pipeline.tts_voice,
"tts_input": tts_input or "",
},
)
)
if media_source.is_media_source_id(media_id):
media = await media_source.async_resolve_media(self.hass, media_id, None)
media_id = media.url
else:
media_id = network.get_url(self.hass) + media_id
tts_output = {"url": media_id}
self.process_event(
PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output})
)
def _capture_chunk(self, audio_bytes: bytes | None) -> None:
"""Forward audio chunk to various capturing mechanisms."""
if self.debug_recording_queue is not None:
@@ -1649,17 +1734,18 @@ class PipelineInput:
if self.run.end_stage != PipelineStage.STT:
tts_input = self.tts_input
all_same_area = False
if current_stage == PipelineStage.INTENT:
# intent-recognition
assert intent_input is not None
tts_input = await self.run.recognize_intent(
tts_input, all_same_area = await self.run.recognize_intent(
intent_input,
self.session.conversation_id,
self.device_id,
self.conversation_extra_system_prompt,
)
if tts_input.strip():
if all_same_area or tts_input.strip():
current_stage = PipelineStage.TTS
else:
# Skip TTS
@@ -1668,8 +1754,13 @@ class PipelineInput:
if self.run.end_stage != PipelineStage.INTENT:
# text-to-speech
if current_stage == PipelineStage.TTS:
assert tts_input is not None
await self.run.text_to_speech(tts_input)
if all_same_area and self.run.pipeline.acknowledge_media_id:
await self.run.acknowledge(
self.run.pipeline.acknowledge_media_id, tts_input
)
else:
assert tts_input is not None
await self.run.text_to_speech(tts_input)
except PipelineError as err:
self.run.process_event(