From 6f379678f014deae1a5103cec2bf4f4536a56c6b Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Wed, 5 May 2021 23:56:48 +0200 Subject: [PATCH 1/2] Better fix for "ack timeout 4" client disconnects. After my first attempt at fixing the client disconnects (https://github.com/OttoWinter/AsyncTCP/pull/4) got merged into AsyncTCP, it turned out that there was regression for some devices: the connection stability actually went down instead of up. After a lot of debugging and discussion with @glmnet (some of the results can be found in the above pull request discussion), we came up with an improved fix for the disconnect issues. **Changed:** The code that checks for ACK timeouts has been simplified in such way, that only two timestamps are now used to determine if an ACK timeout has happened: the time of the last sent packet (this was already recorded), and the time of the last received ACK from the client (this has been added). Using these timestamps, there is no more need for a separate field to keep track if we are waiting for an ACK or not (`_pcb_busy`). Therefore, this field was completely removed from the code. While I was at it, I renamed a few variables to make the code easier to read and more consistent. **Results:** I connected Home Assistant plus 8 OTA loggers at the same time, using very verbose logging output. This normally was an easy way to trigger the disconnect errors. It turned out, this solution runs as solid for me, as when disabling the ACK timeout checks completely (using `AsyncClient::setAckTimeout(0)`). --- src/AsyncTCP.cpp | 41 ++++++++++++++++++++--------------------- src/AsyncTCP.h | 6 +++--- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/AsyncTCP.cpp b/src/AsyncTCP.cpp index 471e8fc..771c0a8 100644 --- a/src/AsyncTCP.cpp +++ b/src/AsyncTCP.cpp @@ -575,11 +575,10 @@ AsyncClient::AsyncClient(tcp_pcb* pcb) , _pb_cb_arg(0) , _timeout_cb(0) , _timeout_cb_arg(0) -, _pcb_busy(0) -, _pcb_sent_at(0) , _ack_pcb(true) -, _rx_last_packet(0) -, _rx_since_timeout(0) +, _tx_last_packet(0) +, _rx_timeout(0) +, _rx_last_ack(0) , _ack_timeout(ASYNC_MAX_ACK_TIME) , _connect_port(0) , prev(NULL) @@ -783,14 +782,12 @@ size_t AsyncClient::add(const char* data, size_t size, uint8_t apiflags) { } bool AsyncClient::send(){ - auto pcb_sent_at_backup = _pcb_sent_at; - _pcb_sent_at = millis(); - _pcb_busy++; + auto backup = _tx_last_packet; + _tx_last_packet = millis(); if (_tcp_output(_pcb, _closed_slot) == ERR_OK) { return true; } - _pcb_sent_at = pcb_sent_at_backup; - _pcb_busy--; + _tx_last_packet = backup; return false; } @@ -870,7 +867,6 @@ int8_t AsyncClient::_connected(void* pcb, int8_t err){ _pcb = reinterpret_cast(pcb); if(_pcb){ _rx_last_packet = millis(); - _pcb_busy = 0; // tcp_recv(_pcb, &_tcp_recv); // tcp_sent(_pcb, &_tcp_sent); // tcp_poll(_pcb, &_tcp_poll, 1); @@ -932,10 +928,10 @@ int8_t AsyncClient::_fin(tcp_pcb* pcb, int8_t err) { int8_t AsyncClient::_sent(tcp_pcb* pcb, uint16_t len) { _rx_last_packet = millis(); + _rx_last_ack = millis(); //log_i("%u", len); - _pcb_busy--; if(_sent_cb) { - _sent_cb(_sent_cb_arg, this, len, (millis() - _pcb_sent_at)); + _sent_cb(_sent_cb_arg, this, len, (millis() - _tx_last_packet)); } return ERR_OK; } @@ -978,15 +974,18 @@ int8_t AsyncClient::_poll(tcp_pcb* pcb){ uint32_t now = millis(); // ACK Timeout - if(_pcb_busy > 0 && _ack_timeout && (now - _pcb_sent_at) >= _ack_timeout){ - _pcb_busy = 0; - log_w("ack timeout %d", pcb->state); - if(_timeout_cb) - _timeout_cb(_timeout_cb_arg, this, (now - _pcb_sent_at)); - return ERR_OK; + if(_ack_timeout){ + uint32_t one_day = 86400000; + bool last_tx_is_after_last_ack = (_rx_last_ack - _tx_last_packet + one_day) < one_day; + if(last_tx_is_after_last_ack && (now - _tx_last_packet) >= _ack_timeout) { + log_w("ack timeout %d", pcb->state); + if(_timeout_cb) + _timeout_cb(_timeout_cb_arg, this, (now - _tx_last_packet)); + return ERR_OK; + } } // RX Timeout - if(_rx_since_timeout && (now - _rx_last_packet) >= (_rx_since_timeout * 1000)){ + if(_rx_timeout && (now - _rx_last_packet) >= (_rx_timeout * 1000)) { log_w("rx timeout %d", pcb->state); _close(); return ERR_OK; @@ -1045,11 +1044,11 @@ size_t AsyncClient::write(const char* data, size_t size, uint8_t apiflags) { } void AsyncClient::setRxTimeout(uint32_t timeout){ - _rx_since_timeout = timeout; + _rx_timeout = timeout; } uint32_t AsyncClient::getRxTimeout(){ - return _rx_since_timeout; + return _rx_timeout; } uint32_t AsyncClient::getAckTimeout(){ diff --git a/src/AsyncTCP.h b/src/AsyncTCP.h index fcd511b..9786490 100644 --- a/src/AsyncTCP.h +++ b/src/AsyncTCP.h @@ -160,12 +160,12 @@ class AsyncClient { AcConnectHandler _poll_cb; void* _poll_cb_arg; - uint32_t _pcb_busy; - uint32_t _pcb_sent_at; bool _ack_pcb; + uint32_t _tx_last_packet; uint32_t _rx_ack_len; uint32_t _rx_last_packet; - uint32_t _rx_since_timeout; + uint32_t _rx_timeout; + uint32_t _rx_last_ack; uint32_t _ack_timeout; uint16_t _connect_port; From 18ac673e82bfd2542c0ba12f10615963c13fb5b9 Mon Sep 17 00:00:00 2001 From: Maurice Makaay Date: Thu, 6 May 2021 00:41:06 +0200 Subject: [PATCH 2/2] Define one_day var as a const. --- src/AsyncTCP.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AsyncTCP.cpp b/src/AsyncTCP.cpp index 771c0a8..59ef186 100644 --- a/src/AsyncTCP.cpp +++ b/src/AsyncTCP.cpp @@ -975,7 +975,7 @@ int8_t AsyncClient::_poll(tcp_pcb* pcb){ // ACK Timeout if(_ack_timeout){ - uint32_t one_day = 86400000; + const uint32_t one_day = 86400000; bool last_tx_is_after_last_ack = (_rx_last_ack - _tx_last_packet + one_day) < one_day; if(last_tx_is_after_last_ack && (now - _tx_last_packet) >= _ack_timeout) { log_w("ack timeout %d", pcb->state);