Better fix for "ack timeout 4" client disconnects.

After my first attempt at fixing the client disconnects
(https://github.com/OttoWinter/AsyncTCP/pull/4) got merged
into AsyncTCP, it turned out that there was regression
for some devices: the connection stability actually went
down instead of up.

After a lot of debugging and discussion with @glmnet (some of
the results can be found in the above pull request discussion),
we came up with an improved fix for the disconnect issues.

**Changed:**

The code that checks for ACK timeouts has been simplified in
such way, that only two timestamps are now used to determine if
an ACK timeout has happened: the time of the last sent packet
(this was already recorded), and the time of the last received
ACK from the client (this has been added).

Using these timestamps, there is no more need for a separate field
to keep track if we are waiting for an ACK or not (`_pcb_busy`).
Therefore, this field was completely removed from the code.

While I was at it, I renamed a few variables to make the code
easier to read and more consistent.

**Results:**

I connected Home Assistant plus 8 OTA loggers at the same time,
using very verbose logging output. This normally was an easy way
to trigger the disconnect errors.

It turned out, this solution runs as solid for me, as when disabling
the ACK timeout checks completely
(using `AsyncClient::setAckTimeout(0)`).
This commit is contained in:
Maurice Makaay
2021-05-05 23:56:48 +02:00
parent f278522a59
commit 6f379678f0
2 changed files with 23 additions and 24 deletions

View File

@@ -575,11 +575,10 @@ AsyncClient::AsyncClient(tcp_pcb* pcb)
, _pb_cb_arg(0)
, _timeout_cb(0)
, _timeout_cb_arg(0)
, _pcb_busy(0)
, _pcb_sent_at(0)
, _ack_pcb(true)
, _rx_last_packet(0)
, _rx_since_timeout(0)
, _tx_last_packet(0)
, _rx_timeout(0)
, _rx_last_ack(0)
, _ack_timeout(ASYNC_MAX_ACK_TIME)
, _connect_port(0)
, prev(NULL)
@@ -783,14 +782,12 @@ size_t AsyncClient::add(const char* data, size_t size, uint8_t apiflags) {
}
bool AsyncClient::send(){
auto pcb_sent_at_backup = _pcb_sent_at;
_pcb_sent_at = millis();
_pcb_busy++;
auto backup = _tx_last_packet;
_tx_last_packet = millis();
if (_tcp_output(_pcb, _closed_slot) == ERR_OK) {
return true;
}
_pcb_sent_at = pcb_sent_at_backup;
_pcb_busy--;
_tx_last_packet = backup;
return false;
}
@@ -870,7 +867,6 @@ int8_t AsyncClient::_connected(void* pcb, int8_t err){
_pcb = reinterpret_cast<tcp_pcb*>(pcb);
if(_pcb){
_rx_last_packet = millis();
_pcb_busy = 0;
// tcp_recv(_pcb, &_tcp_recv);
// tcp_sent(_pcb, &_tcp_sent);
// tcp_poll(_pcb, &_tcp_poll, 1);
@@ -932,10 +928,10 @@ int8_t AsyncClient::_fin(tcp_pcb* pcb, int8_t err) {
int8_t AsyncClient::_sent(tcp_pcb* pcb, uint16_t len) {
_rx_last_packet = millis();
_rx_last_ack = millis();
//log_i("%u", len);
_pcb_busy--;
if(_sent_cb) {
_sent_cb(_sent_cb_arg, this, len, (millis() - _pcb_sent_at));
_sent_cb(_sent_cb_arg, this, len, (millis() - _tx_last_packet));
}
return ERR_OK;
}
@@ -978,15 +974,18 @@ int8_t AsyncClient::_poll(tcp_pcb* pcb){
uint32_t now = millis();
// ACK Timeout
if(_pcb_busy > 0 && _ack_timeout && (now - _pcb_sent_at) >= _ack_timeout){
_pcb_busy = 0;
log_w("ack timeout %d", pcb->state);
if(_timeout_cb)
_timeout_cb(_timeout_cb_arg, this, (now - _pcb_sent_at));
return ERR_OK;
if(_ack_timeout){
uint32_t one_day = 86400000;
bool last_tx_is_after_last_ack = (_rx_last_ack - _tx_last_packet + one_day) < one_day;
if(last_tx_is_after_last_ack && (now - _tx_last_packet) >= _ack_timeout) {
log_w("ack timeout %d", pcb->state);
if(_timeout_cb)
_timeout_cb(_timeout_cb_arg, this, (now - _tx_last_packet));
return ERR_OK;
}
}
// RX Timeout
if(_rx_since_timeout && (now - _rx_last_packet) >= (_rx_since_timeout * 1000)){
if(_rx_timeout && (now - _rx_last_packet) >= (_rx_timeout * 1000)) {
log_w("rx timeout %d", pcb->state);
_close();
return ERR_OK;
@@ -1045,11 +1044,11 @@ size_t AsyncClient::write(const char* data, size_t size, uint8_t apiflags) {
}
void AsyncClient::setRxTimeout(uint32_t timeout){
_rx_since_timeout = timeout;
_rx_timeout = timeout;
}
uint32_t AsyncClient::getRxTimeout(){
return _rx_since_timeout;
return _rx_timeout;
}
uint32_t AsyncClient::getAckTimeout(){

View File

@@ -160,12 +160,12 @@ class AsyncClient {
AcConnectHandler _poll_cb;
void* _poll_cb_arg;
uint32_t _pcb_busy;
uint32_t _pcb_sent_at;
bool _ack_pcb;
uint32_t _tx_last_packet;
uint32_t _rx_ack_len;
uint32_t _rx_last_packet;
uint32_t _rx_since_timeout;
uint32_t _rx_timeout;
uint32_t _rx_last_ack;
uint32_t _ack_timeout;
uint16_t _connect_port;