Compare commits

...

10 Commits

Author SHA1 Message Date
Suren Gabrielyan
767a090dc5 Merge pull request #924 from gabsuren/fix/ws_race_on_abort
fix(websocket): Fix websocket client race on abort and memory leak(IDFGH-16555)
2025-12-16 21:02:17 +04:00
surengab
23ca97d5ec fix(websocket): Fix race conditions, memory leak, and data loss
- Add state check in abort_connection to prevent double-close
- Fix memory leak: free errormsg_buffer on disconnect
- Reset connection state on reconnect to prevent stale data
- Implement lock ordering for separate TX lock mode
- Read buffered data immediately after connection to prevent data loss
- Added sdkconfig.ci.tx_lock config
2025-12-16 20:11:29 +04:00
david-cermak
77abff48e1 Merge pull request #973 from david-cermak/fix/mosq_build
fix(mosq): Fix build with the new picolibc
2025-12-16 08:59:55 +01:00
david-cermak
7f8494a4b1 Merge pull request #974 from david-cermak/fix/asio_picolib
[asio]: Fix picolib missing pthread_sigmask() declaration
2025-12-15 13:31:08 +01:00
david-cermak
fdec046428 Merge pull request #972 from david-cermak/fix/modem_ap2ppp_deinit
[modem]: Fix deinit function in ap2ppp example
2025-12-15 12:43:52 +01:00
David Cermak
25d54efd3a fix(asio): Fix picolib missing pthread_sigmask() declaration 2025-12-15 11:11:35 +01:00
David Cermak
7f4e3690db fix(test): Fix catch based target tests with v6.0 2025-12-15 10:52:21 +01:00
David Cermak
ebc1258ea6 fix(mosq): Fix mosquitto build on latest master 2025-12-15 10:06:28 +01:00
David Cermak
dc68bf87cc fix(mosq): Fix build with the new picolibc 2025-12-15 08:34:46 +01:00
David Cermak
853e8e2810 fix(modem): Fix deinit function in ap2ppp example 2025-12-12 16:08:55 +01:00
11 changed files with 166 additions and 14 deletions

View File

@@ -6,3 +6,4 @@ Warning: Deprecated: Command 'sign_data' is deprecated. Use 'sign-data' instead.
Warning: Deprecated: Command 'extract_public_key' is deprecated. Use 'extract-public-key' instead.
warning: unknown kconfig symbol 'EXAMPLE_ETH_PHY_IP101'
WARNING: The following Kconfig variables were used in "if" clauses, but not
warning: unknown kconfig symbol 'LIBC_NEWLIB'

View File

@@ -7,5 +7,6 @@
#include "sys/socket.h"
#include "socketpair.h"
#include "asio_stub.hpp"
#include_next "asio/detail/config.hpp"

View File

@@ -0,0 +1,9 @@
/*
* SPDX-FileCopyrightText: 2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include <signal.h>
extern "C" int pthread_sigmask(int, const sigset_t *, sigset_t *);

View File

@@ -198,7 +198,7 @@ extern "C" bool modem_stop_network()
extern "C" void modem_deinit_network()
{
free(dce);
delete dce;
dce = nullptr;
}

View File

@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: 2022-2024 Espressif Systems (Shanghai) CO LTD
* SPDX-FileCopyrightText: 2022-2025 Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Unlicense OR CC0-1.0
*/
@@ -12,6 +12,7 @@
#include "esp_system.h"
#include "esp_event.h"
#include "esp_log.h"
#include "esp_idf_version.h"
#include "esp_netif.h"
#include "esp_netif_ppp.h"
#include "freertos/FreeRTOS.h"
@@ -123,7 +124,7 @@ TEST_CASE("Disconnection test", "[esp_modem]")
CHECK(b == 2);
}
#if ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(6, 0, 0)
extern "C" {
static void handle(int nr)
@@ -131,10 +132,10 @@ extern "C" {
ESP_LOGE(TAG, "Signal handler %d", nr);
}
_sig_func_ptr signal (int nr, _sig_func_ptr)
_sig_func_ptr signal(int nr, _sig_func_ptr)
{
return handle;
}
}
#endif // ESP_IDF_VERSION < v6

View File

@@ -241,9 +241,29 @@ static esp_err_t esp_websocket_client_dispatch_event(esp_websocket_client_handle
return esp_event_loop_run(client->event_handle, 0);
}
/**
* @brief Abort the WebSocket connection and initiate reconnection or shutdown
*
* @param client WebSocket client handle
* @param error_type Type of error that caused the abort
*
* @return ESP_OK on success, ESP_FAIL on failure
*
* @note PRECONDITION: client->lock MUST be held by the calling thread before calling this function.
* This function does NOT acquire the lock itself. Calling without the lock will result in
* race conditions and undefined behavior.
*/
static esp_err_t esp_websocket_client_abort_connection(esp_websocket_client_handle_t client, esp_websocket_error_type_t error_type)
{
ESP_WS_CLIENT_STATE_CHECK(TAG, client, return ESP_FAIL);
if (client->state == WEBSOCKET_STATE_CLOSING || client->state == WEBSOCKET_STATE_UNKNOW ||
client->state == WEBSOCKET_STATE_WAIT_TIMEOUT) {
ESP_LOGW(TAG, "Connection already closing/closed, skipping abort");
goto cleanup;
}
esp_transport_close(client->transport);
if (!client->config->auto_reconnect) {
@@ -256,6 +276,18 @@ static esp_err_t esp_websocket_client_abort_connection(esp_websocket_client_hand
}
client->error_handle.error_type = error_type;
esp_websocket_client_dispatch_event(client, WEBSOCKET_EVENT_DISCONNECTED, NULL, 0);
cleanup:
if (client->errormsg_buffer) {
ESP_LOGD(TAG, "Freeing error buffer (%d bytes) - Free heap before: %" PRIu32 " bytes",
client->errormsg_size, esp_get_free_heap_size());
free(client->errormsg_buffer);
client->errormsg_buffer = NULL;
client->errormsg_size = 0;
} else {
ESP_LOGD(TAG, "Disconnect - Free heap: %" PRIu32 " bytes", esp_get_free_heap_size());
}
return ESP_OK;
}
@@ -453,6 +485,8 @@ static void destroy_and_free_resources(esp_websocket_client_handle_t client)
esp_websocket_client_destroy_config(client);
if (client->transport_list) {
esp_transport_list_destroy(client->transport_list);
client->transport_list = NULL;
client->transport = NULL;
}
vSemaphoreDelete(client->lock);
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
@@ -671,6 +705,11 @@ static int esp_websocket_client_send_with_exact_opcode(esp_websocket_client_hand
if (wlen < 0 || (wlen == 0 && need_write != 0)) {
ret = wlen;
esp_websocket_free_buf(client, true);
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
xSemaphoreGiveRecursive(client->tx_lock);
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY);
#endif
esp_tls_error_handle_t error_handle = esp_transport_get_error_handle(client->transport);
if (error_handle) {
esp_websocket_client_error(client, "esp_transport_write() returned %d, transport_error=%s, tls_error_code=%i, tls_flags=%i, errno=%d",
@@ -679,8 +718,16 @@ static int esp_websocket_client_send_with_exact_opcode(esp_websocket_client_hand
} else {
esp_websocket_client_error(client, "esp_transport_write() returned %d, errno=%d", ret, errno);
}
ESP_LOGD(TAG, "Calling abort_connection due to send error");
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
xSemaphoreGiveRecursive(client->lock);
return ret;
#else
// Already holding client->lock, safe to call
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
goto unlock_and_return;
#endif
}
opcode = 0;
widx += wlen;
@@ -1019,7 +1066,6 @@ static esp_err_t esp_websocket_client_recv(esp_websocket_client_handle_t client)
esp_websocket_free_buf(client, false);
return ESP_OK;
}
esp_websocket_client_dispatch_event(client, WEBSOCKET_EVENT_DATA, client->rx_buffer, rlen);
client->payload_offset += rlen;
@@ -1030,15 +1076,35 @@ static esp_err_t esp_websocket_client_recv(esp_websocket_client_handle_t client)
const char *data = (client->payload_len == 0) ? NULL : client->rx_buffer;
ESP_LOGD(TAG, "Sending PONG with payload len=%d", client->payload_len);
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
xSemaphoreGiveRecursive(client->lock);
// Now acquire tx_lock with timeout (consistent with PING/CLOSE handling)
if (xSemaphoreTakeRecursive(client->tx_lock, WEBSOCKET_TX_LOCK_TIMEOUT_MS) != pdPASS) {
ESP_LOGE(TAG, "Could not lock ws-client within %d timeout", WEBSOCKET_TX_LOCK_TIMEOUT_MS);
ESP_LOGE(TAG, "Could not lock ws-client within %d timeout for PONG", WEBSOCKET_TX_LOCK_TIMEOUT_MS);
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY); // Re-acquire client->lock before returning
esp_websocket_free_buf(client, false);
return ESP_FAIL;
}
#endif
// Re-acquire client->lock to maintain consistency
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY);
// Another thread may have closed it while we didn't hold client->lock
if (client->state == WEBSOCKET_STATE_CLOSING || client->state == WEBSOCKET_STATE_UNKNOW ||
client->state == WEBSOCKET_STATE_WAIT_TIMEOUT || client->transport == NULL) {
ESP_LOGW(TAG, "Transport closed while preparing PONG, skipping send");
xSemaphoreGiveRecursive(client->tx_lock);
esp_websocket_free_buf(client, false);
return ESP_OK; // Caller expects client->lock to be held, which it is
}
esp_transport_ws_send_raw(client->transport, WS_TRANSPORT_OPCODES_PONG | WS_TRANSPORT_OPCODES_FIN, data, client->payload_len,
client->config->network_timeout_ms);
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
xSemaphoreGiveRecursive(client->tx_lock);
#else
esp_transport_ws_send_raw(client->transport, WS_TRANSPORT_OPCODES_PONG | WS_TRANSPORT_OPCODES_FIN, data, client->payload_len,
client->config->network_timeout_ms);
#endif
} else if (client->last_opcode == WS_TRANSPORT_OPCODES_PONG) {
client->wait_for_pong_resp = false;
@@ -1136,7 +1202,29 @@ static void esp_websocket_client_task(void *pv)
client->state = WEBSOCKET_STATE_CONNECTED;
client->wait_for_pong_resp = false;
client->error_handle.error_type = WEBSOCKET_ERROR_TYPE_NONE;
client->payload_len = 0;
client->payload_offset = 0;
client->last_fin = false;
client->last_opcode = WS_TRANSPORT_OPCODES_NONE;
esp_websocket_client_dispatch_event(client, WEBSOCKET_EVENT_CONNECTED, NULL, 0);
// Check if there is data pending to be read (e.g. piggybacked with handshake)
if (esp_transport_poll_read(client->transport, 0) > 0) {
esp_err_t recv_result = esp_websocket_client_recv(client);
if (recv_result == ESP_OK) {
xSemaphoreGiveRecursive(client->lock);
esp_event_loop_run(client->event_handle, 0);
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY);
if (client->state != WEBSOCKET_STATE_CONNECTED || client->transport == NULL) {
ESP_LOGD(TAG, "Connection state changed during handshake data processing");
break;
}
} else if (recv_result == ESP_FAIL) {
ESP_LOGE(TAG, "Error receive data during initial connection");
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
}
}
break;
case WEBSOCKET_STATE_CONNECTED:
if ((CLOSE_FRAME_SENT_BIT & xEventGroupGetBits(client->status_bits)) == 0) { // only send and check for PING
@@ -1145,8 +1233,23 @@ static void esp_websocket_client_task(void *pv)
client->ping_tick_ms = _tick_get_ms();
ESP_LOGD(TAG, "Sending PING...");
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
// Release client->lock first to avoid deadlock with send error path
xSemaphoreGiveRecursive(client->lock);
// Now acquire tx_lock with timeout (consistent with PONG handling)
if (xSemaphoreTakeRecursive(client->tx_lock, WEBSOCKET_TX_LOCK_TIMEOUT_MS) != pdPASS) {
ESP_LOGE(TAG, "Could not lock ws-client within %d timeout", WEBSOCKET_TX_LOCK_TIMEOUT_MS);
ESP_LOGE(TAG, "Could not lock ws-client within %d timeout for PING", WEBSOCKET_TX_LOCK_TIMEOUT_MS);
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY); // Re-acquire client->lock before break
break;
}
// Re-acquire client->lock to check state
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY);
// Another thread may have closed it while we didn't hold client->lock
if (client->state != WEBSOCKET_STATE_CONNECTED || client->transport == NULL) {
ESP_LOGW(TAG, "Transport closed while preparing PING, skipping send");
xSemaphoreGiveRecursive(client->tx_lock);
break;
}
#endif
@@ -1182,8 +1285,23 @@ static void esp_websocket_client_task(void *pv)
if ((CLOSE_FRAME_SENT_BIT & xEventGroupGetBits(client->status_bits)) == 0) {
ESP_LOGD(TAG, "Closing initiated by the server, sending close frame");
#ifdef CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK
// Release client->lock first to avoid deadlock with send error path
xSemaphoreGiveRecursive(client->lock);
// Now acquire tx_lock with timeout (consistent with PONG/PING handling)
if (xSemaphoreTakeRecursive(client->tx_lock, WEBSOCKET_TX_LOCK_TIMEOUT_MS) != pdPASS) {
ESP_LOGE(TAG, "Could not lock ws-client within %d timeout", WEBSOCKET_TX_LOCK_TIMEOUT_MS);
ESP_LOGE(TAG, "Could not lock ws-client within %d timeout for CLOSE", WEBSOCKET_TX_LOCK_TIMEOUT_MS);
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY); // Re-acquire client->lock before break
break;
}
// Re-acquire client->lock to check state
xSemaphoreTakeRecursive(client->lock, portMAX_DELAY);
// Another thread may have closed it while we didn't hold client->lock
if (client->state != WEBSOCKET_STATE_CLOSING || client->transport == NULL) {
ESP_LOGW(TAG, "Transport closed while preparing CLOSE frame, skipping send");
xSemaphoreGiveRecursive(client->tx_lock);
break;
}
#endif
@@ -1202,6 +1320,7 @@ static void esp_websocket_client_task(void *pv)
if (WEBSOCKET_STATE_CONNECTED == client->state) {
read_select = esp_transport_poll_read(client->transport, 1000); //Poll every 1000ms
if (read_select < 0) {
xSemaphoreTakeRecursive(client->lock, lock_timeout);
esp_tls_error_handle_t error_handle = esp_transport_get_error_handle(client->transport);
if (error_handle) {
esp_websocket_client_error(client, "esp_transport_poll_read() returned %d, transport_error=%s, tls_error_code=%i, tls_flags=%i, errno=%d",
@@ -1210,16 +1329,15 @@ static void esp_websocket_client_task(void *pv)
} else {
esp_websocket_client_error(client, "esp_transport_poll_read() returned %d, errno=%d", read_select, errno);
}
xSemaphoreTakeRecursive(client->lock, lock_timeout);
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
xSemaphoreGiveRecursive(client->lock);
} else if (read_select > 0) {
xSemaphoreTakeRecursive(client->lock, lock_timeout);
if (esp_websocket_client_recv(client) == ESP_FAIL) {
ESP_LOGE(TAG, "Error receive data");
xSemaphoreTakeRecursive(client->lock, lock_timeout);
esp_websocket_client_abort_connection(client, WEBSOCKET_ERROR_TYPE_TCP_TRANSPORT);
xSemaphoreGiveRecursive(client->lock);
}
xSemaphoreGiveRecursive(client->lock);
} else {
ESP_LOGV(TAG, "Read poll timeout: skipping esp_transport_poll_read().");
}

View File

@@ -0,0 +1,15 @@
CONFIG_IDF_TARGET="esp32"
CONFIG_IDF_TARGET_LINUX=n
CONFIG_WEBSOCKET_URI_FROM_STDIN=n
CONFIG_WEBSOCKET_URI_FROM_STRING=y
CONFIG_EXAMPLE_CONNECT_ETHERNET=y
CONFIG_EXAMPLE_CONNECT_WIFI=n
CONFIG_EXAMPLE_USE_INTERNAL_ETHERNET=y
CONFIG_EXAMPLE_ETH_PHY_IP101=y
CONFIG_EXAMPLE_ETH_MDC_GPIO=23
CONFIG_EXAMPLE_ETH_MDIO_GPIO=18
CONFIG_EXAMPLE_ETH_PHY_RST_GPIO=5
CONFIG_EXAMPLE_ETH_PHY_ADDR=1
CONFIG_EXAMPLE_CONNECT_IPV6=y
CONFIG_ESP_WS_CLIENT_SEPARATE_TX_LOCK=y
CONFIG_ESP_WS_CLIENT_TX_LOCK_TIMEOUT_MS=2000

View File

@@ -101,3 +101,5 @@ set(sources_that_define_gnu_source ${m_src_dir}/loop.c ${m_src_dir}/mux_poll.c)
foreach(offending_src ${sources_that_define_gnu_source})
set_source_files_properties(${offending_src} PROPERTIES COMPILE_OPTIONS "-U_GNU_SOURCE")
endforeach()
set_source_files_properties(${m_src_dir}/security_default.c PROPERTIES COMPILE_OPTIONS "-Wno-char-subscripts")

View File

@@ -0,0 +1 @@
CONFIG_LIBC_NEWLIB=y

View File

@@ -4,3 +4,4 @@ CONFIG_PTHREAD_TASK_STACK_SIZE_DEFAULT=32768
CONFIG_LWIP_SNTP_MAX_SERVERS=2
CONFIG_MBEDTLS_SSL_PROTO_DTLS=y
CONFIG_MBEDTLS_SSL_DTLS_SRTP=y
CONFIG_LIBC_NEWLIB=y

View File

@@ -16,8 +16,11 @@
#include <ctype.h>
#include "net/if.h"
#include "esp_idf_version.h"
#if ESP_IDF_VERSION_MAJOR < 6
#undef isspace
#define isspace(__c) (__ctype_lookup((int)__c)&_S)
#endif
#define VERSION "v2.0.20~5"