[ZigBee sample] Introduce debug counters

In the effort to increse reliability of the Zigbee sample, #126 and #130
introduced handling for possibly rare, hard-to-debug events that may be
occurring. While these changes seem to help in practive, we don't know
exactly how often, if ever, they are triggered.

This PR introduces debug_counters in hope to add some visilibty in these
hard-to-debug, rare conditions.

Calling `prst_debug_counters_increment("some_counter")` will increment a
value stored in flash. These values are then dumped via logging when the
device boots. Moving forward we can also consider exposing these values
via a ZigBee cluster.
This commit is contained in:
rbaron 2023-06-06 19:42:39 +02:00
parent bcc5a853d0
commit 509350457f
9 changed files with 229 additions and 20 deletions

View file

@ -12,8 +12,10 @@ include_directories(src)
target_sources(app PRIVATE
src/main.c
src/debug_counters.c
src/double_reset_detector.c
src/factory_reset.c
src/flash_fs.c
src/prst_zb_attrs.c
src/prst_zb_soil_moisture_defs.c
src/restart_handler.c

View file

@ -5,6 +5,9 @@ CONFIG_I2C=y
CONFIG_ADC=y
CONFIG_GPIO=y
CONFIG_USE_SEGGER_RTT=y
CONFIG_SEGGER_RTT_BUFFER_SIZE_UP=4096
CONFIG_PM=y
CONFIG_PM_DEVICE=y
@ -49,4 +52,7 @@ CONFIG_FLASH=y
CONFIG_FLASH_MAP=y
CONFIG_FLASH_PAGE_LAYOUT=y
CONFIG_FILE_SYSTEM=y
CONFIG_FILE_SYSTEM_LITTLEFS=y
CONFIG_FILE_SYSTEM_LITTLEFS=y
# Uncomment for debug log level.
# CONFIG_LOG_DEFAULT_LEVEL=4

View file

@ -0,0 +1,131 @@
#include "debug_counters.h"
#include <prstlib/macros.h>
#include <zephyr/fs/fs.h>
#include <zephyr/fs/littlefs.h>
#include <zephyr/kernel.h>
#include <zephyr/logging/log.h>
LOG_MODULE_REGISTER(debug_counters, CONFIG_LOG_DEFAULT_LEVEL);
static const char kDebugCountersDir[] = "/lfs/debug_counters";
#define PRST_MAX_COUNTER_NAME_LENGTH 64
#define PRST_MAX_FILE_NAME_LENGTH (sizeof(kDebugCountersDir) + 1 + PRST_MAX_COUNTER_NAME_LENGTH)
static int mk_filename(const char* counter_name, char* buff, size_t buff_size) {
RET_CHECK(strlen(counter_name) <= PRST_MAX_FILE_NAME_LENGTH, "Counter name too long");
strcpy(buff, kDebugCountersDir);
strcat(buff, "/");
strcat(buff, counter_name);
return 0;
}
int prst_debug_counters_init() {
struct fs_dirent entry;
int err = fs_stat(kDebugCountersDir, &entry);
if (err == 0) {
LOG_DBG("Directory %s already exists", kDebugCountersDir);
return 0;
} else if (err == -ENOENT) {
LOG_DBG("Creating directory %s", kDebugCountersDir);
return fs_mkdir(kDebugCountersDir);
}
LOG_ERR("Unexpected error in fs_stat for %s: %d", kDebugCountersDir, err);
return err;
}
int prst_debug_counters_increment(const char* counter_name) {
char filename[PRST_MAX_FILE_NAME_LENGTH];
RET_IF_ERR(mk_filename(counter_name, filename, sizeof(filename)));
LOG_DBG("Incrementing counter %s", filename);
// Open the file.
struct fs_file_t file;
fs_file_t_init(&file);
RET_IF_ERR(fs_open(&file, filename, FS_O_CREATE | FS_O_RDWR));
// Read the current value.
prst_debug_counter_t value;
int n_read = fs_read(&file, &value, sizeof(value));
if (n_read != sizeof(value)) {
LOG_WRN("fs_read returned %d, expected %d, assuming first access", n_read, sizeof(value));
value = 0;
}
// Increment the value.
value++;
// Write back to file.
RET_CHECK(fs_seek(&file, 0, SEEK_SET) == 0, "Unable to seek");
ssize_t written = fs_write(&file, &value, sizeof(value));
if (written != sizeof(value)) {
LOG_ERR("fs_write returned %d, expected %d", written, sizeof(value));
}
return fs_close(&file);
}
int prst_debug_counters_get(const char* counter_name, prst_debug_counter_t* value) {
char filename[PRST_MAX_FILE_NAME_LENGTH];
RET_IF_ERR(mk_filename(counter_name, filename, sizeof(filename)));
LOG_DBG("Getting counter %s", filename);
// Open the file.
struct fs_file_t file;
fs_file_t_init(&file);
RET_IF_ERR(fs_open(&file, filename, FS_O_CREATE | FS_O_READ));
// Read the current value.
int n_read = fs_read(&file, value, sizeof(value));
if (n_read != sizeof(prst_debug_counter_t)) {
LOG_WRN("fs_read returned %d, expected %d, assuming first access", n_read, sizeof(prst_debug_counter_t));
// return -1;
*value = 0;
}
return fs_close(&file);
}
int prst_debug_counters_get_all(prst_debug_counters_callback_t callback) {
LOG_DBG("Getting all counters from %s", kDebugCountersDir);
struct fs_dir_t dirp;
fs_dir_t_init(&dirp);
static struct fs_dirent entry;
RET_IF_ERR(fs_opendir(&dirp, kDebugCountersDir));
prst_debug_counter_t value;
for (;;) {
RET_IF_ERR(fs_readdir(&dirp, &entry));
// End of directory;
if (entry.name[0] == 0) {
break;
}
LOG_DBG("Found %s", entry.name);
prst_debug_counters_get(entry.name, &value);
callback(entry.name, value);
}
return fs_closedir(&dirp);
}
typedef struct {
struct k_work work;
const char* counter_name;
} counter_increment_work_t;
static void increment_counter_callback(struct k_work* work) {
counter_increment_work_t* counter_work = CONTAINER_OF(work, counter_increment_work_t, work);
LOG_DBG("Incrementing counter callback for %s", counter_work->counter_name);
prst_debug_counters_increment(counter_work->counter_name);
free(counter_work);
LOG_DBG("Freed memory for %s", counter_work->counter_name);
}
int prst_debug_counters_increment_async(const char* counter_name) {
counter_increment_work_t* work = malloc(sizeof(counter_increment_work_t));
work->counter_name = counter_name;
k_work_init(&work->work, increment_counter_callback);
LOG_DBG("Scheduling increment of %s", counter_name);
return k_work_submit(&work->work);
}

View file

@ -0,0 +1,20 @@
#ifndef _PRST_DEBUG_COUNTERS_H_
#define _PRST_DEBUG_COUNTERS_H_
#include <stdint.h>
typedef uint32_t prst_debug_counter_t;
typedef void (*prst_debug_counters_callback_t)(const char* counter_name, prst_debug_counter_t value);
int prst_debug_counters_init();
int prst_debug_counters_increment(const char* counter_name);
int prst_debug_counters_increment_async(const char* counter_name);
int prst_debug_counters_get(const char* counter_name, prst_debug_counter_t* value);
int prst_debug_counters_get_all(prst_debug_counters_callback_t callback);
#endif // _PRST_DEBUG_COUNTERS_H_

View file

@ -9,15 +9,6 @@
LOG_MODULE_REGISTER(double_reset_detector, CONFIG_LOG_DEFAULT_LEVEL);
FS_LITTLEFS_DECLARE_DEFAULT_CONFIG(storage);
static struct fs_mount_t lfs_storage_mnt = {
.type = FS_LITTLEFS,
.fs_data = &storage,
.storage_dev = (void *)FLASH_AREA_ID(storage),
.mnt_point = "/lfs",
};
static const char *flag_filename = "/lfs/reset_flag";
static const char flag_prefix[] = "prst-rst-count";
@ -38,12 +29,6 @@ void erase_flag_callback(struct k_work *work) {
K_WORK_DELAYABLE_DEFINE(erase_flag_work, erase_flag_callback);
int prst_detect_double_reset(prst_double_reset_callback_t on_double_reset) {
// TODO: if booting for the first time after a full flash erase, fs_mount will
// complain (via a LOG_ERR) and then automatically format the flash. It all works,
// but avoiding a scary red message would be ideal. Maybe somehow check if it's
// formatted before mounting?
RET_IF_ERR(fs_mount(&lfs_storage_mnt));
fs_file_t_init(&flag_file);
RET_IF_ERR(fs_open(&flag_file, flag_filename, FS_O_CREATE | FS_O_RDWR));
@ -64,7 +49,7 @@ int prst_detect_double_reset(prst_double_reset_callback_t on_double_reset) {
// Write the flag and erase it after some time.
ssize_t written = fs_write(&flag_file, flag_prefix, sizeof(flag_prefix));
if (written != sizeof(flag_prefix)) {
LOG_ERR("s_write returned %d, expected %d", written, sizeof(flag_prefix));
LOG_ERR("fs_write returned %d, expected %d", written, sizeof(flag_prefix));
return -1;
}

View file

@ -0,0 +1,26 @@
#include "flash_fs.h"
#include <prstlib/macros.h>
#include <zephyr/fs/fs.h>
#include <zephyr/fs/littlefs.h>
#include <zephyr/logging/log.h>
LOG_MODULE_REGISTER(flash_fs, CONFIG_LOG_DEFAULT_LEVEL);
FS_LITTLEFS_DECLARE_DEFAULT_CONFIG(storage);
static struct fs_mount_t lfs_storage_mnt = {
.type = FS_LITTLEFS,
.fs_data = &storage,
.storage_dev = (void *)FLASH_AREA_ID(storage),
.mnt_point = "/lfs",
};
int prst_flash_fs_init() {
// TODO: if booting for the first time after a full flash erase, fs_mount will
// complain (via a LOG_ERR) and then automatically format the flash. It all works,
// but avoiding a scary red message would be ideal. Maybe somehow check if it's
// formatted before mounting?
RET_IF_ERR(fs_mount(&lfs_storage_mnt));
return 0;
}

View file

@ -0,0 +1,9 @@
#ifndef _PRST_FLASH_FS_H_
#define _PRST_FLASH_FS_H_
#include <stddef.h>
// Initializes and mounts littlefs.
int prst_flash_fs_init();
#endif // _PRST_FLASH_FS_H_

View file

@ -15,7 +15,9 @@
#include <zigbee/zigbee_app_utils.h>
#include <zigbee/zigbee_error_handler.h>
#include "debug_counters.h"
#include "factory_reset.h"
#include "flash_fs.h"
#include "prst_zb_attrs.h"
#include "prst_zb_endpoint_defs.h"
#include "prst_zb_soil_moisture_defs.h"
@ -131,13 +133,15 @@ void zboss_signal_handler(zb_bufid_t bufid) {
LOG_DBG("Steering complete. Status: %d", status);
if (status == RET_OK) {
LOG_DBG("Steering successful. Status: %d", status);
prst_debug_counters_increment("steering_success");
prst_led_flash(/*times=*/3);
k_timer_stop(&led_flashing_timer);
prst_restart_watchdog_stop();
prst_led_off();
} else {
LOG_DBG("Steering failed. Status: %d", status);
prst_led_flash(7);
prst_debug_counters_increment("steering_failure");
prst_led_flash(/*times=*/7);
prst_restart_watchdog_start();
k_timer_stop(&led_flashing_timer); // Power saving
prst_led_off();
@ -173,6 +177,7 @@ void zboss_signal_handler(zb_bufid_t bufid) {
For details see: https://developer.nordicsemi.com/nRF_Connect_SDK/doc/latest/nrf/known_issues.html?v=v2-3-0
*/
if (stack_initialised && !joining_signal_received) {
prst_debug_counters_increment("krknwk_12017_reset");
zb_reset(0);
}
}
@ -194,6 +199,7 @@ void update_sensors_cb(zb_uint8_t arg) {
ZB_TIME_ONE_SECOND * CONFIG_PRST_ZB_SLEEP_DURATION_SEC);
if (prst_sensors_read_all(&sensors)) {
prst_debug_counters_increment("sensors_read_error");
LOG_ERR("Unable to read sensors");
return;
}
@ -235,10 +241,21 @@ void update_sensors_cb(zb_uint8_t arg) {
&log_lux);
}
void log_counter(const char *counter_name, prst_debug_counter_t value) {
LOG_INF("- %s: %d", counter_name, value);
}
int main(void) {
RET_IF_ERR(prst_adc_init());
RET_IF_ERR(prst_led_init());
RET_IF_ERR(prst_button_init());
RET_IF_ERR(prst_flash_fs_init());
RET_IF_ERR(prst_debug_counters_init());
prst_debug_counters_increment("boot");
LOG_INF("Dumping debug counters:");
prst_debug_counters_get_all(log_counter);
RET_IF_ERR(prst_zb_factory_reset_check());
@ -260,5 +277,7 @@ int main(void) {
zigbee_enable();
zigbee_configure_sleepy_behavior(/*enable=*/true);
prst_debug_counters_increment("main_finish");
return 0;
}

View file

@ -4,14 +4,25 @@
#include <zephyr/logging/log.h>
#include <zigbee/zigbee_app_utils.h>
#include "debug_counters.h"
LOG_MODULE_REGISTER(restart_handler, CONFIG_LOG_DEFAULT_LEVEL);
static void restart_network_steering_cb(struct k_timer *timer) {
LOG_DBG("Restart handler expired. Restarting network steering.");
void callback_work_handler(struct k_work *work) {
LOG_INF("Running restart callback_work_handler.");
prst_debug_counters_increment("steering_watchdog_restart");
// If the device is not commissioned, the rejoin procedure is started.
user_input_indicate();
}
K_WORK_DEFINE(callback_work, callback_work_handler);
// Runs in an ISR context. We offload the actual work to a workqueue.
static void restart_network_steering_cb(struct k_timer *timer) {
LOG_INF("Triggered restart_network_steering_cb. Offloading work.");
k_work_submit(&callback_work);
}
K_TIMER_DEFINE(restart_timer, restart_network_steering_cb, NULL);
void prst_restart_watchdog_start() {