mirror of
https://github.com/home-assistant/core.git
synced 2025-08-30 09:51:37 +02:00
First pass at acknowledgement
This commit is contained in:
@@ -3,11 +3,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import AsyncIterable
|
||||
from http import HTTPStatus
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from aiohttp import web
|
||||
import voluptuous as vol
|
||||
|
||||
from homeassistant.components import stt
|
||||
from homeassistant.components import http, stt
|
||||
from homeassistant.core import Context, HomeAssistant
|
||||
from homeassistant.helpers import chat_session
|
||||
from homeassistant.helpers.typing import ConfigType
|
||||
@@ -86,6 +89,8 @@ async def async_setup(hass: HomeAssistant, config: ConfigType) -> bool:
|
||||
await async_setup_pipeline_store(hass)
|
||||
async_register_websocket_api(hass)
|
||||
|
||||
hass.http.register_view(DefaultSoundsView(hass))
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@@ -133,3 +138,19 @@ async def async_pipeline_from_audio_stream(
|
||||
)
|
||||
await pipeline_input.validate()
|
||||
await pipeline_input.execute()
|
||||
|
||||
|
||||
class DefaultSoundsView(http.HomeAssistantView):
|
||||
url = f"/api/{DOMAIN}/sounds/{{filename}}"
|
||||
name = f"api:{DOMAIN}:sounds"
|
||||
requires_auth = False
|
||||
|
||||
def __init__(self, hass: HomeAssistant) -> None:
|
||||
self.hass = hass
|
||||
self.base_dir = Path(__file__).parent / "sounds"
|
||||
|
||||
async def get(self, request: web.Request, filename: str):
|
||||
if filename not in ("acknowledge.mp3",):
|
||||
return web.Response(body="Invalid filename", status=HTTPStatus.BAD_REQUEST)
|
||||
|
||||
return web.FileResponse(self.base_dir / filename)
|
||||
|
@@ -3,7 +3,7 @@
|
||||
"name": "Assist pipeline",
|
||||
"after_dependencies": ["repairs"],
|
||||
"codeowners": ["@balloob", "@synesthesiam"],
|
||||
"dependencies": ["conversation", "stt", "tts", "wake_word"],
|
||||
"dependencies": ["conversation", "stt", "tts", "wake_word", "http"],
|
||||
"documentation": "https://www.home-assistant.io/integrations/assist_pipeline",
|
||||
"integration_type": "system",
|
||||
"iot_class": "local_push",
|
||||
|
@@ -19,11 +19,24 @@ import wave
|
||||
import hass_nabucasa
|
||||
import voluptuous as vol
|
||||
|
||||
from homeassistant.components import conversation, stt, tts, wake_word, websocket_api
|
||||
from homeassistant.components import (
|
||||
conversation,
|
||||
media_source,
|
||||
stt,
|
||||
tts,
|
||||
wake_word,
|
||||
websocket_api,
|
||||
)
|
||||
from homeassistant.const import ATTR_SUPPORTED_FEATURES, MATCH_ALL
|
||||
from homeassistant.core import Context, HomeAssistant, callback
|
||||
from homeassistant.exceptions import HomeAssistantError
|
||||
from homeassistant.helpers import chat_session, intent
|
||||
from homeassistant.helpers import (
|
||||
chat_session,
|
||||
device_registry as dr,
|
||||
entity_registry as er,
|
||||
intent,
|
||||
network,
|
||||
)
|
||||
from homeassistant.helpers.collection import (
|
||||
CHANGE_UPDATED,
|
||||
CollectionError,
|
||||
@@ -91,6 +104,8 @@ KEY_PIPELINE_CONVERSATION_DATA: HassKey[dict[str, PipelineConversationData]] = H
|
||||
# Number of response parts to handle before streaming the response
|
||||
STREAM_RESPONSE_CHARS = 60
|
||||
|
||||
DEFAULT_ACKNOWLEDGE_MEDIA_ID = f"/api/{DOMAIN}/sounds/acknowledge.mp3"
|
||||
|
||||
|
||||
def validate_language(data: dict[str, Any]) -> Any:
|
||||
"""Validate language settings."""
|
||||
@@ -412,6 +427,8 @@ class Pipeline:
|
||||
wake_word_entity: str | None
|
||||
wake_word_id: str | None
|
||||
prefer_local_intents: bool = False
|
||||
acknowledge_same_area: bool = True
|
||||
acknowledge_media_id: str | None = None
|
||||
|
||||
id: str = field(default_factory=ulid_util.ulid_now)
|
||||
|
||||
@@ -436,6 +453,10 @@ class Pipeline:
|
||||
wake_word_entity=data["wake_word_entity"],
|
||||
wake_word_id=data["wake_word_id"],
|
||||
prefer_local_intents=data.get("prefer_local_intents", False),
|
||||
acknowledge_same_area=data.get("acknowledge_same_area", True),
|
||||
acknowledge_media_id=data.get(
|
||||
"acknowledge_media_id", DEFAULT_ACKNOWLEDGE_MEDIA_ID
|
||||
),
|
||||
)
|
||||
|
||||
def to_json(self) -> dict[str, Any]:
|
||||
@@ -454,6 +475,7 @@ class Pipeline:
|
||||
"wake_word_entity": self.wake_word_entity,
|
||||
"wake_word_id": self.wake_word_id,
|
||||
"prefer_local_intents": self.prefer_local_intents,
|
||||
"acknowledge_media_id": self.acknowledge_media_id,
|
||||
}
|
||||
|
||||
|
||||
@@ -1059,7 +1081,7 @@ class PipelineRun:
|
||||
conversation_id: str,
|
||||
device_id: str | None,
|
||||
conversation_extra_system_prompt: str | None,
|
||||
) -> str:
|
||||
) -> tuple[str, bool]:
|
||||
"""Run intent recognition portion of pipeline. Returns text to speak."""
|
||||
if self.intent_agent is None or self._conversation_data is None:
|
||||
raise RuntimeError("Recognize intent was not prepared")
|
||||
@@ -1107,6 +1129,7 @@ class PipelineRun:
|
||||
|
||||
agent_id = self.intent_agent.id
|
||||
processed_locally = agent_id == conversation.HOME_ASSISTANT_AGENT
|
||||
all_same_area = False
|
||||
intent_response: intent.IntentResponse | None = None
|
||||
if not processed_locally and not self._intent_agent_only:
|
||||
# Sentence triggers override conversation agent
|
||||
@@ -1136,7 +1159,8 @@ class PipelineRun:
|
||||
|
||||
# Try local intents
|
||||
if (
|
||||
intent_response is None
|
||||
self.pipeline.acknowledge_same_area
|
||||
and intent_response is None
|
||||
and self.pipeline.prefer_local_intents
|
||||
and (
|
||||
intent_response := await conversation.async_handle_intents(
|
||||
@@ -1280,6 +1304,43 @@ class PipelineRun:
|
||||
if tts_input_stream and self._streamed_response_text:
|
||||
tts_input_stream.put_nowait(None)
|
||||
|
||||
intent_response = conversation_result.response
|
||||
device_registry = dr.async_get(self.hass)
|
||||
if (
|
||||
(
|
||||
intent_response.response_type
|
||||
== intent.IntentResponseType.ACTION_DONE
|
||||
)
|
||||
and intent_response.matched_states
|
||||
and device_id
|
||||
and (device := device_registry.async_get(device_id))
|
||||
and device.area_id
|
||||
):
|
||||
entity_registry = er.async_get(self.hass)
|
||||
all_same_area = True
|
||||
for state in intent_response.matched_states:
|
||||
entity = entity_registry.async_get(state.entity_id)
|
||||
if (
|
||||
(not entity)
|
||||
or (
|
||||
entity.area_id
|
||||
and (entity.area_id != device.area_id)
|
||||
)
|
||||
or (
|
||||
entity.device_id
|
||||
and (
|
||||
entity_device := device_registry.async_get(
|
||||
entity.device_id
|
||||
)
|
||||
)
|
||||
and entity_device.area_id != device.area_id
|
||||
)
|
||||
):
|
||||
all_same_area = False
|
||||
break
|
||||
|
||||
_LOGGER.error("All same area: %s", all_same_area)
|
||||
|
||||
except Exception as src_error:
|
||||
_LOGGER.exception("Unexpected error during intent recognition")
|
||||
raise IntentRecognitionError(
|
||||
@@ -1302,7 +1363,7 @@ class PipelineRun:
|
||||
if conversation_result.continue_conversation:
|
||||
self._conversation_data.continue_conversation_agent = agent_id
|
||||
|
||||
return speech
|
||||
return speech, all_same_area
|
||||
|
||||
async def prepare_text_to_speech(self) -> None:
|
||||
"""Prepare text-to-speech."""
|
||||
@@ -1370,6 +1431,30 @@ class PipelineRun:
|
||||
PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output})
|
||||
)
|
||||
|
||||
async def acknowledge(self, media_id: str, tts_input: str | None) -> None:
|
||||
self.process_event(
|
||||
PipelineEvent(
|
||||
PipelineEventType.TTS_START,
|
||||
{
|
||||
"language": self.pipeline.tts_language,
|
||||
"voice": self.pipeline.tts_voice,
|
||||
"tts_input": tts_input or "",
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
if media_source.is_media_source_id(media_id):
|
||||
media = await media_source.async_resolve_media(self.hass, media_id, None)
|
||||
media_id = media.url
|
||||
else:
|
||||
media_id = network.get_url(self.hass) + media_id
|
||||
|
||||
tts_output = {"url": media_id}
|
||||
|
||||
self.process_event(
|
||||
PipelineEvent(PipelineEventType.TTS_END, {"tts_output": tts_output})
|
||||
)
|
||||
|
||||
def _capture_chunk(self, audio_bytes: bytes | None) -> None:
|
||||
"""Forward audio chunk to various capturing mechanisms."""
|
||||
if self.debug_recording_queue is not None:
|
||||
@@ -1649,17 +1734,18 @@ class PipelineInput:
|
||||
|
||||
if self.run.end_stage != PipelineStage.STT:
|
||||
tts_input = self.tts_input
|
||||
all_same_area = False
|
||||
|
||||
if current_stage == PipelineStage.INTENT:
|
||||
# intent-recognition
|
||||
assert intent_input is not None
|
||||
tts_input = await self.run.recognize_intent(
|
||||
tts_input, all_same_area = await self.run.recognize_intent(
|
||||
intent_input,
|
||||
self.session.conversation_id,
|
||||
self.device_id,
|
||||
self.conversation_extra_system_prompt,
|
||||
)
|
||||
if tts_input.strip():
|
||||
if all_same_area or tts_input.strip():
|
||||
current_stage = PipelineStage.TTS
|
||||
else:
|
||||
# Skip TTS
|
||||
@@ -1668,8 +1754,13 @@ class PipelineInput:
|
||||
if self.run.end_stage != PipelineStage.INTENT:
|
||||
# text-to-speech
|
||||
if current_stage == PipelineStage.TTS:
|
||||
assert tts_input is not None
|
||||
await self.run.text_to_speech(tts_input)
|
||||
if all_same_area and self.run.pipeline.acknowledge_media_id:
|
||||
await self.run.acknowledge(
|
||||
self.run.pipeline.acknowledge_media_id, tts_input
|
||||
)
|
||||
else:
|
||||
assert tts_input is not None
|
||||
await self.run.text_to_speech(tts_input)
|
||||
|
||||
except PipelineError as err:
|
||||
self.run.process_event(
|
||||
|
BIN
homeassistant/components/assist_pipeline/sounds/acknowledge.mp3
Normal file
BIN
homeassistant/components/assist_pipeline/sounds/acknowledge.mp3
Normal file
Binary file not shown.
Reference in New Issue
Block a user