OpenAI thinking content (#150340)

This commit is contained in:
Denis Shulyaka
2025-08-11 23:28:36 +03:00
committed by GitHub
parent e13702d9b1
commit 9cae0e0acc
4 changed files with 259 additions and 77 deletions

View File

@@ -3,11 +3,11 @@
from __future__ import annotations from __future__ import annotations
import base64 import base64
from collections.abc import AsyncGenerator, Callable from collections.abc import AsyncGenerator, Callable, Iterable
import json import json
from mimetypes import guess_file_type from mimetypes import guess_file_type
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Any, Literal, cast from typing import TYPE_CHECKING, Any, Literal
import openai import openai
from openai._streaming import AsyncStream from openai._streaming import AsyncStream
@@ -29,14 +29,15 @@ from openai.types.responses import (
ResponseOutputItemAddedEvent, ResponseOutputItemAddedEvent,
ResponseOutputItemDoneEvent, ResponseOutputItemDoneEvent,
ResponseOutputMessage, ResponseOutputMessage,
ResponseOutputMessageParam,
ResponseReasoningItem, ResponseReasoningItem,
ResponseReasoningItemParam, ResponseReasoningItemParam,
ResponseReasoningSummaryTextDeltaEvent,
ResponseStreamEvent, ResponseStreamEvent,
ResponseTextDeltaEvent, ResponseTextDeltaEvent,
ToolParam, ToolParam,
WebSearchToolParam, WebSearchToolParam,
) )
from openai.types.responses.response_create_params import ResponseCreateParamsStreaming
from openai.types.responses.response_input_param import FunctionCallOutput from openai.types.responses.response_input_param import FunctionCallOutput
from openai.types.responses.tool_param import ( from openai.types.responses.tool_param import (
CodeInterpreter, CodeInterpreter,
@@ -143,70 +144,116 @@ def _format_tool(
def _convert_content_to_param( def _convert_content_to_param(
content: conversation.Content, chat_content: Iterable[conversation.Content],
) -> ResponseInputParam: ) -> ResponseInputParam:
"""Convert any native chat message for this agent to the native format.""" """Convert any native chat message for this agent to the native format."""
messages: ResponseInputParam = [] messages: ResponseInputParam = []
if isinstance(content, conversation.ToolResultContent): reasoning_summary: list[str] = []
return [
FunctionCallOutput(
type="function_call_output",
call_id=content.tool_call_id,
output=json.dumps(content.tool_result),
)
]
if content.content: for content in chat_content:
role: Literal["user", "assistant", "system", "developer"] = content.role if isinstance(content, conversation.ToolResultContent):
if role == "system": messages.append(
role = "developer" FunctionCallOutput(
messages.append( type="function_call_output",
EasyInputMessageParam(type="message", role=role, content=content.content) call_id=content.tool_call_id,
) output=json.dumps(content.tool_result),
)
if isinstance(content, conversation.AssistantContent) and content.tool_calls:
messages.extend(
ResponseFunctionToolCallParam(
type="function_call",
name=tool_call.tool_name,
arguments=json.dumps(tool_call.tool_args),
call_id=tool_call.id,
) )
for tool_call in content.tool_calls continue
)
if content.content:
role: Literal["user", "assistant", "system", "developer"] = content.role
if role == "system":
role = "developer"
messages.append(
EasyInputMessageParam(
type="message", role=role, content=content.content
)
)
if isinstance(content, conversation.AssistantContent):
if content.tool_calls:
messages.extend(
ResponseFunctionToolCallParam(
type="function_call",
name=tool_call.tool_name,
arguments=json.dumps(tool_call.tool_args),
call_id=tool_call.id,
)
for tool_call in content.tool_calls
)
if content.thinking_content:
reasoning_summary.append(content.thinking_content)
if isinstance(content.native, ResponseReasoningItem):
messages.append(
ResponseReasoningItemParam(
type="reasoning",
id=content.native.id,
summary=[
{
"type": "summary_text",
"text": summary,
}
for summary in reasoning_summary
]
if content.thinking_content
else [],
encrypted_content=content.native.encrypted_content,
)
)
reasoning_summary = []
return messages return messages
async def _transform_stream( async def _transform_stream(
chat_log: conversation.ChatLog, chat_log: conversation.ChatLog,
result: AsyncStream[ResponseStreamEvent], stream: AsyncStream[ResponseStreamEvent],
messages: ResponseInputParam,
) -> AsyncGenerator[conversation.AssistantContentDeltaDict]: ) -> AsyncGenerator[conversation.AssistantContentDeltaDict]:
"""Transform an OpenAI delta stream into HA format.""" """Transform an OpenAI delta stream into HA format."""
async for event in result: last_summary_index = None
async for event in stream:
LOGGER.debug("Received event: %s", event) LOGGER.debug("Received event: %s", event)
if isinstance(event, ResponseOutputItemAddedEvent): if isinstance(event, ResponseOutputItemAddedEvent):
if isinstance(event.item, ResponseOutputMessage): if isinstance(event.item, ResponseOutputMessage):
yield {"role": event.item.role} yield {"role": event.item.role}
last_summary_index = None
elif isinstance(event.item, ResponseFunctionToolCall): elif isinstance(event.item, ResponseFunctionToolCall):
# OpenAI has tool calls as individual events # OpenAI has tool calls as individual events
# while HA puts tool calls inside the assistant message. # while HA puts tool calls inside the assistant message.
# We turn them into individual assistant content for HA # We turn them into individual assistant content for HA
# to ensure that tools are called as soon as possible. # to ensure that tools are called as soon as possible.
yield {"role": "assistant"} yield {"role": "assistant"}
last_summary_index = None
current_tool_call = event.item current_tool_call = event.item
elif isinstance(event, ResponseOutputItemDoneEvent): elif isinstance(event, ResponseOutputItemDoneEvent):
item = event.item.model_dump()
item.pop("status", None)
if isinstance(event.item, ResponseReasoningItem): if isinstance(event.item, ResponseReasoningItem):
messages.append(cast(ResponseReasoningItemParam, item)) yield {
elif isinstance(event.item, ResponseOutputMessage): "native": ResponseReasoningItem(
messages.append(cast(ResponseOutputMessageParam, item)) type="reasoning",
elif isinstance(event.item, ResponseFunctionToolCall): id=event.item.id,
messages.append(cast(ResponseFunctionToolCallParam, item)) summary=[], # Remove summaries
encrypted_content=event.item.encrypted_content,
)
}
elif isinstance(event, ResponseTextDeltaEvent): elif isinstance(event, ResponseTextDeltaEvent):
yield {"content": event.delta} yield {"content": event.delta}
elif isinstance(event, ResponseReasoningSummaryTextDeltaEvent):
# OpenAI can output several reasoning summaries
# in a single ResponseReasoningItem. We split them as separate
# AssistantContent messages. Only last of them will have
# the reasoning `native` field set.
if (
last_summary_index is not None
and event.summary_index != last_summary_index
):
yield {"role": "assistant"}
last_summary_index = event.summary_index
yield {"thinking_content": event.delta}
elif isinstance(event, ResponseFunctionCallArgumentsDeltaEvent): elif isinstance(event, ResponseFunctionCallArgumentsDeltaEvent):
current_tool_call.arguments += event.delta current_tool_call.arguments += event.delta
elif isinstance(event, ResponseFunctionCallArgumentsDoneEvent): elif isinstance(event, ResponseFunctionCallArgumentsDoneEvent):
@@ -335,16 +382,18 @@ class OpenAIBaseLLMEntity(Entity):
) )
) )
model_args = { messages = _convert_content_to_param(chat_log.content)
"model": options.get(CONF_CHAT_MODEL, RECOMMENDED_CHAT_MODEL),
"input": [], model_args = ResponseCreateParamsStreaming(
"max_output_tokens": options.get(CONF_MAX_TOKENS, RECOMMENDED_MAX_TOKENS), model=options.get(CONF_CHAT_MODEL, RECOMMENDED_CHAT_MODEL),
"top_p": options.get(CONF_TOP_P, RECOMMENDED_TOP_P), input=messages,
"temperature": options.get(CONF_TEMPERATURE, RECOMMENDED_TEMPERATURE), max_output_tokens=options.get(CONF_MAX_TOKENS, RECOMMENDED_MAX_TOKENS),
"user": chat_log.conversation_id, top_p=options.get(CONF_TOP_P, RECOMMENDED_TOP_P),
"store": False, temperature=options.get(CONF_TEMPERATURE, RECOMMENDED_TEMPERATURE),
"stream": True, user=chat_log.conversation_id,
} store=False,
stream=True,
)
if tools: if tools:
model_args["tools"] = tools model_args["tools"] = tools
@@ -352,7 +401,8 @@ class OpenAIBaseLLMEntity(Entity):
model_args["reasoning"] = { model_args["reasoning"] = {
"effort": options.get( "effort": options.get(
CONF_REASONING_EFFORT, RECOMMENDED_REASONING_EFFORT CONF_REASONING_EFFORT, RECOMMENDED_REASONING_EFFORT
) ),
"summary": "auto",
} }
model_args["include"] = ["reasoning.encrypted_content"] model_args["include"] = ["reasoning.encrypted_content"]
@@ -361,12 +411,6 @@ class OpenAIBaseLLMEntity(Entity):
"verbosity": options.get(CONF_VERBOSITY, RECOMMENDED_VERBOSITY) "verbosity": options.get(CONF_VERBOSITY, RECOMMENDED_VERBOSITY)
} }
messages = [
m
for content in chat_log.content
for m in _convert_content_to_param(content)
]
last_content = chat_log.content[-1] last_content = chat_log.content[-1]
# Handle attachments by adding them to the last user message # Handle attachments by adding them to the last user message
@@ -399,16 +443,19 @@ class OpenAIBaseLLMEntity(Entity):
# To prevent infinite loops, we limit the number of iterations # To prevent infinite loops, we limit the number of iterations
for _iteration in range(MAX_TOOL_ITERATIONS): for _iteration in range(MAX_TOOL_ITERATIONS):
model_args["input"] = messages
try: try:
result = await client.responses.create(**model_args) stream = await client.responses.create(**model_args)
async for content in chat_log.async_add_delta_content_stream( messages.extend(
self.entity_id, _transform_stream(chat_log, result, messages) _convert_content_to_param(
): [
if not isinstance(content, conversation.AssistantContent): content
messages.extend(_convert_content_to_param(content)) async for content in chat_log.async_add_delta_content_stream(
self.entity_id, _transform_stream(chat_log, stream)
)
]
)
)
except openai.RateLimitError as err: except openai.RateLimitError as err:
LOGGER.error("Rate limited by OpenAI: %s", err) LOGGER.error("Rate limited by OpenAI: %s", err)
raise HomeAssistantError("Rate limited or insufficient funds") from err raise HomeAssistantError("Rate limited or insufficient funds") from err

View File

@@ -18,6 +18,10 @@ from openai.types.responses import (
ResponseOutputMessage, ResponseOutputMessage,
ResponseOutputText, ResponseOutputText,
ResponseReasoningItem, ResponseReasoningItem,
ResponseReasoningSummaryPartAddedEvent,
ResponseReasoningSummaryPartDoneEvent,
ResponseReasoningSummaryTextDeltaEvent,
ResponseReasoningSummaryTextDoneEvent,
ResponseStreamEvent, ResponseStreamEvent,
ResponseTextDeltaEvent, ResponseTextDeltaEvent,
ResponseTextDoneEvent, ResponseTextDoneEvent,
@@ -26,6 +30,7 @@ from openai.types.responses import (
ResponseWebSearchCallSearchingEvent, ResponseWebSearchCallSearchingEvent,
) )
from openai.types.responses.response_function_web_search import ActionSearch from openai.types.responses.response_function_web_search import ActionSearch
from openai.types.responses.response_reasoning_item import Summary
def create_message_item( def create_message_item(
@@ -173,9 +178,23 @@ def create_function_tool_call_item(
return events return events
def create_reasoning_item(id: str, output_index: int) -> list[ResponseStreamEvent]: def create_reasoning_item(
id: str,
output_index: int,
reasoning_summary: list[list[str]] | list[str] | str | None = None,
) -> list[ResponseStreamEvent]:
"""Create a reasoning item.""" """Create a reasoning item."""
return [
if reasoning_summary is None:
reasoning_summary = [[]]
elif isinstance(reasoning_summary, str):
reasoning_summary = [reasoning_summary]
if isinstance(reasoning_summary, list) and all(
isinstance(item, str) for item in reasoning_summary
):
reasoning_summary = [reasoning_summary]
events = [
ResponseOutputItemAddedEvent( ResponseOutputItemAddedEvent(
item=ResponseReasoningItem( item=ResponseReasoningItem(
id=id, id=id,
@@ -187,11 +206,60 @@ def create_reasoning_item(id: str, output_index: int) -> list[ResponseStreamEven
output_index=output_index, output_index=output_index,
sequence_number=0, sequence_number=0,
type="response.output_item.added", type="response.output_item.added",
), )
]
for summary_index, summary in enumerate(reasoning_summary):
events.append(
ResponseReasoningSummaryPartAddedEvent(
item_id=id,
output_index=output_index,
part={"text": "", "type": "summary_text"},
sequence_number=0,
summary_index=summary_index,
type="response.reasoning_summary_part.added",
)
)
events.extend(
ResponseReasoningSummaryTextDeltaEvent(
delta=delta,
item_id=id,
output_index=output_index,
sequence_number=0,
summary_index=summary_index,
type="response.reasoning_summary_text.delta",
)
for delta in summary
)
events.extend(
[
ResponseReasoningSummaryTextDoneEvent(
item_id=id,
output_index=output_index,
sequence_number=0,
summary_index=summary_index,
text="".join(summary),
type="response.reasoning_summary_text.done",
),
ResponseReasoningSummaryPartDoneEvent(
item_id=id,
output_index=output_index,
part={"text": "".join(summary), "type": "summary_text"},
sequence_number=0,
summary_index=summary_index,
type="response.reasoning_summary_part.done",
),
]
)
events.append(
ResponseOutputItemDoneEvent( ResponseOutputItemDoneEvent(
item=ResponseReasoningItem( item=ResponseReasoningItem(
id=id, id=id,
summary=[], summary=[
Summary(text="".join(summary), type="summary_text")
for summary in reasoning_summary
],
type="reasoning", type="reasoning",
status=None, status=None,
encrypted_content="AAABBB", encrypted_content="AAABBB",
@@ -200,7 +268,9 @@ def create_reasoning_item(id: str, output_index: int) -> list[ResponseStreamEven
sequence_number=0, sequence_number=0,
type="response.output_item.done", type="response.output_item.done",
), ),
] )
return events
def create_web_search_item(id: str, output_index: int) -> list[ResponseStreamEvent]: def create_web_search_item(id: str, output_index: int) -> list[ResponseStreamEvent]:

View File

@@ -6,6 +6,22 @@
'content': 'Please call the test function', 'content': 'Please call the test function',
'role': 'user', 'role': 'user',
}), }),
dict({
'agent_id': 'conversation.openai_conversation',
'content': None,
'native': None,
'role': 'assistant',
'thinking_content': 'Thinking',
'tool_calls': None,
}),
dict({
'agent_id': 'conversation.openai_conversation',
'content': None,
'native': ResponseReasoningItem(id='rs_A', summary=[], type='reasoning', content=None, encrypted_content='AAABBB', status=None),
'role': 'assistant',
'thinking_content': 'Thinking more',
'tool_calls': None,
}),
dict({ dict({
'agent_id': 'conversation.openai_conversation', 'agent_id': 'conversation.openai_conversation',
'content': None, 'content': None,
@@ -62,6 +78,57 @@
}), }),
]) ])
# --- # ---
# name: test_function_call.1
list([
dict({
'content': 'Please call the test function',
'role': 'user',
'type': 'message',
}),
dict({
'encrypted_content': 'AAABBB',
'id': 'rs_A',
'summary': list([
dict({
'text': 'Thinking',
'type': 'summary_text',
}),
dict({
'text': 'Thinking more',
'type': 'summary_text',
}),
]),
'type': 'reasoning',
}),
dict({
'arguments': '{"param1": "call1"}',
'call_id': 'call_call_1',
'name': 'test_tool',
'type': 'function_call',
}),
dict({
'call_id': 'call_call_1',
'output': '"value1"',
'type': 'function_call_output',
}),
dict({
'arguments': '{"param1": "call2"}',
'call_id': 'call_call_2',
'name': 'test_tool',
'type': 'function_call',
}),
dict({
'call_id': 'call_call_2',
'output': '"value2"',
'type': 'function_call_output',
}),
dict({
'content': 'Cool',
'role': 'assistant',
'type': 'message',
}),
])
# ---
# name: test_function_call_without_reasoning # name: test_function_call_without_reasoning
list([ list([
dict({ dict({

View File

@@ -252,7 +252,11 @@ async def test_function_call(
# Initial conversation # Initial conversation
( (
# Wait for the model to think # Wait for the model to think
*create_reasoning_item(id="rs_A", output_index=0), *create_reasoning_item(
id="rs_A",
output_index=0,
reasoning_summary=[["Thinking"], ["Thinking ", "more"]],
),
# First tool call # First tool call
*create_function_tool_call_item( *create_function_tool_call_item(
id="fc_1", id="fc_1",
@@ -288,16 +292,10 @@ async def test_function_call(
agent_id="conversation.openai_conversation", agent_id="conversation.openai_conversation",
) )
assert mock_create_stream.call_args.kwargs["input"][2] == {
"content": None,
"id": "rs_A",
"summary": [],
"type": "reasoning",
"encrypted_content": "AAABBB",
}
assert result.response.response_type == intent.IntentResponseType.ACTION_DONE assert result.response.response_type == intent.IntentResponseType.ACTION_DONE
# Don't test the prompt, as it's not deterministic # Don't test the prompt, as it's not deterministic
assert mock_chat_log.content[1:] == snapshot assert mock_chat_log.content[1:] == snapshot
assert mock_create_stream.call_args.kwargs["input"][1:] == snapshot
async def test_function_call_without_reasoning( async def test_function_call_without_reasoning(