diff --git a/doc/connectivity/networking/api/latmon.rst b/doc/connectivity/networking/api/latmon.rst new file mode 100644 index 0000000000000..2679953be1fec --- /dev/null +++ b/doc/connectivity/networking/api/latmon.rst @@ -0,0 +1,124 @@ +.. _latmon: + +Latmon Network Service +###################### + +.. contents:: + :local: + :depth: 2 + +Overview +******** + +Provides the functionality required for network-based latency monitoring, including socket +management, client-server communication, and data exchange with the Latmus service running +on the System Under Test (SUT). + +The Latmon network service is responsible for establishing and managing network +communication between the Latmon application (running on a Zephyr-based board) and +the Latmus service (running on the SUT). + +It uses TCP sockets for reliable communication and UDP sockets for broadcasting +the IP address of the Latmon device. + +API Reference +************* + +.. doxygengroup:: latmon + +Features +******** + +- **Socket Management**: Creates and manages TCP and UDP sockets for communication. +- **Client-Server Communication**: Handles incoming connections from the Latmus service. +- **Data Exchange**: Sends latency metrics and histogram data to the Latmus service. +- **IP Address Broadcasting**: Broadcasts the IP address of the Latmon device to facilitate + discovery by the Latmus service. +- **Thread-Safe Design**: Uses Zephyr's kernel primitives (e.g., message queues and semaphores) for + synchronization. + +Workflow +******** + +Socket Creation +=============== + +The :c:func:`net_latmon_get_socket()` function is called to create and configure a TCP socket to +communicate with the Latmus service. A connection address can be specified as a paramenter to +bind the socket to a specific interface and port. + +Connection Handling +=================== + +The :c:func:`net_latmon_connect()` function waits for a connection from the Latmus service. +If no connection is received within the timeout period, the service broadcasts its IP address +using UDP and returns ``-EAGAIN``. +If the broadcast request cannot be sent, the function returns ``-1``, and the client should quit. + +Monitoring Start +================ + +Once a connection is established, the :c:func:`net_latmon_start()` function is called to +start the monitoring process. This function uses a callback to calculate latency deltas +and sends the data to the Latmus service. + +Monitoring Status +================= + +The :c:func:`net_latmon_running()` function can be used to check if the monitoring process is active. + +Thread Management +================= + +The service uses Zephyr threads to handle incoming connections and manage the monitoring +process. + +Enabling the Latmon Service +*************************** + +The following configuration option must be enabled in the :file:`prj.conf` file. + +- :kconfig:option:`CONFIG_NET_LATMON` + +The following options may be configured to customize the Latmon service: + +- :kconfig:option:`CONFIG_NET_LATMON_PORT` - Port number for the Latmon service. +- :kconfig:option:`CONFIG_NET_LATMON_XFER_THREAD_STACK_SIZE` +- :kconfig:option:`CONFIG_NET_LATMON_XFER_THREAD_PRIORITY` +- :kconfig:option:`CONFIG_NET_LATMON_THREAD_STACK_SIZE` +- :kconfig:option:`CONFIG_NET_LATMON_THREAD_PRIORITY` +- :kconfig:option:`CONFIG_NET_LATMON_MONITOR_THREAD_STACK_SIZE` +- :kconfig:option:`CONFIG_NET_LATMON_MONITOR_THREAD_PRIORITY` + +Example Usage +************* + +.. code-block:: c + + #include + #include + + void main(void) + { + struct in_addr ip; + int server_socket, client_socket; + + /* Create and configure the server socket */ + server_socket = net_latmon_get_socket(NULL); + + while (1) { + /* Wait for a connection from the Latmus service */ + client_socket = net_latmon_connect(server_socket, &ip); + if (client_socket < 0) { + if (client_socket == -EAGAIN) { + continue; + } + goto out; + } + + /* Start the latency monitoring process */ + net_latmon_start(client_socket, measure_latency_cycles); + } + out: + close(server_socket); + } diff --git a/doc/connectivity/networking/api/protocols.rst b/doc/connectivity/networking/api/protocols.rst index f4f3b1f88c1af..b172089cd45a1 100644 --- a/doc/connectivity/networking/api/protocols.rst +++ b/doc/connectivity/networking/api/protocols.rst @@ -17,3 +17,4 @@ Protocols mqtt_sn ptp tftp + latmon diff --git a/include/zephyr/net/latmon.h b/include/zephyr/net/latmon.h new file mode 100644 index 0000000000000..1a6831761a9ba --- /dev/null +++ b/include/zephyr/net/latmon.h @@ -0,0 +1,97 @@ +/** @file + * @brief Latency Monitor API + */ + +/* + * Copyright (c) 2025 Jorge A. Ramirez Ortiz + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#ifndef ZEPHYR_INCLUDE_NET_LATMON_H_ +#define ZEPHYR_INCLUDE_NET_LATMON_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Latency Monitor + * @defgroup latmon Latency Monitor + * @ingroup networking + * @{ + */ + +/** + * @typedef net_latmon_measure_t + * @brief Callback function type for retrieving latency deltas. + * + * @param delta Pointer to store the calculated latency delta in cycles. + * @return 0 on success, negative errno code on failure. + */ +typedef int (*net_latmon_measure_t)(uint32_t *delta); + +/** + * @brief Start the latency monitor. + * + * @details This function starts the latency monitor, which measures + * latency using the provided callback function to calculate deltas. Samples + * are sent to the connected Latmus client. + * + * @param latmus A valid socket descriptor connected to latmus + * @param measure_func A callback function to execute the delta calculation. + */ +void net_latmon_start(int latmus, net_latmon_measure_t measure_func); + +/** + * @brief Wait for a connection from a Latmus client. + * + * @details This function blocks until a Latmus client connects to the + * specified socket. Once connected, the client's IP address is stored + * in the provided `ip` structure. + * + * @param socket A valid socket descriptor for listening. + * @param ip The client's IP address. + * @return A valid client socket descriptor connected to latmus on success, + * negative errno code on failure. + * + */ +int net_latmon_connect(int socket, struct in_addr *ip); + +/** + * @brief Get a socket for the Latmus service. + * + * @details This function creates and returns a socket to wait for Latmus + * connections + * + * @param bind_addr The address to bind the socket to. If NULL, the socket + * will be bound to the first available address using the build time configured + * latmus port. + * + * @return A valid socket descriptor on success, negative errno code on failure. + */ +int net_latmon_get_socket(struct sockaddr *bind_addr); + +/** + * @brief Check if the latency monitor is running. + * + * @details This function checks whether the latency monitor is currently + * active and running. + * + * @return True if the latency monitor is running, false if it is waiting for a + * Latmus connection + */ +bool net_latmon_running(void); + +/** + * @} + */ + +#ifdef __cplusplus +} +#endif + +#endif /* ZEPHYR_INCLUDE_NET_LATMON_H_ */ diff --git a/samples/net/latmon/CMakeLists.txt b/samples/net/latmon/CMakeLists.txt new file mode 100644 index 0000000000000..85ded7c5c7493 --- /dev/null +++ b/samples/net/latmon/CMakeLists.txt @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.20.0) +find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE}) +project(latmon) + +target_sources(app PRIVATE src/main.c) diff --git a/samples/net/latmon/Kconfig b/samples/net/latmon/Kconfig new file mode 100644 index 0000000000000..1486a998e363e --- /dev/null +++ b/samples/net/latmon/Kconfig @@ -0,0 +1,13 @@ +# Private config options for Latmon Sample app +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Jorge Ramirez-Ortiz + +mainmenu "Latmon Sample application" + +config LATMON_LOOPBACK_CALIBRATION + bool "Run the sample code in calibration mode" + default n + help + Run Latmon in calibration mode. + +source "Kconfig.zephyr" diff --git a/samples/net/latmon/README.rst b/samples/net/latmon/README.rst new file mode 100644 index 0000000000000..b945bdac6b035 --- /dev/null +++ b/samples/net/latmon/README.rst @@ -0,0 +1,190 @@ +.. zephyr:code-sample:: latmon-client + :name: Latmon Client + :relevant-api: latmon + + Measures delta time between GPIO events and reports the latency metrics via latmon to the latmus + service executing on the SUT. + +Overview +******** + +This project provides tools to measure the worst-case response time of a system under test (SUT) to +GPIO events using: + +- **Latmon**: + + Runs on a Zephyr-based board to generate and monitor GPIO events while collecting metrics + +- **Latmus**: + + Runs on the SUT to respond to the falling edge of input GPIO event, displaying the latency metrics + and generate histogram data. + +This project is part of the open-source initiative +`EVL Project - Latmus GPIO Response Time `_. + +The main program is designed to monitor latency using GPIO pins on a Zephyr-based system. It generates a +pulse signal on a GPIO pin and measures the time it takes for the SUT (executing Latmus) to respond to +it. + +The SUT must be running Latmus to capture the latency metrics and histogram information reported over the +network. The program uses LEDs to indicate the different states, such as DHCP binding(red), waiting for the +Latmus connection (blue) and sampling (green). + +Why Not Just Use a Timer? +========================= + +Timer tests miss external factors like GPIO signals, hardware, and interrupt handling. +Latmon and Latmus simulate real-world scenarios, capturing end-to-end latency metrics. +This ensures accurate assessment of real-time responsiveness across the entire system. + +- **Real-Time Thread Testing**: + + Evaluates how a user-space thread processes external interrupts. + +- **End-to-End Latency Measurement**: + + Captures delays from hardware, drivers, and user-space threads. + +- **Versatile Platform Support**: + + Works on EVL, PREEMPT_RT, and other platforms. + +Code Structure +============== + +The Latmon sample application is divided into two main components: + +- **Application Logic** (:zephyr_file:`samples/net/latmon/src/main.c`): + + This file contains the application logic for Latmon. + It initializes networking, provides the instrumentation mechanism and handles LED indicators for the + different states. + +- **Library** (:zephyr_file:`subsys/net/lib/latmon/latmon.c`): + + This file provides reusable functions and abstractions for latency monitoring via Latmus. + It includes the core logic for reporting latency metrics and histogram data. + +Requirements +************ + +- **Zephyr-Compatible Board**: + + A board with external GPIO support and an IPv4 network interface (e.g., FRDM-K64F). + +- **System under Test**: + + A system with external GPIO pins running the Latmus service and an IPv4 network interface. + +- **Network Connection**: + + A DHCP server for IP assignment. + +- **Physical Connection**: + + GPIO wires connecting the Zephyr board to the SUT and both systems connected to the network. + +Setup and Usage +*************** + +- **Flash Latmon onto the Zephyr board**: + + The application will connect to the network and wait for a connection from the SUT. The application + will use DHCP to obtain an IPv4 address. + +- **Connect GPIO pins for transmit (Zephyr to SUT) and receive (SUT to Zephyr)** + + On **FRDM-K64F**, the sample code uses the **Arduino header J2**, ``pin 20`` for transmit the pulse to + the SUT and ``pin 18`` to receive the acknowledgment from the SUT. + +- **Run Latmus on the SUT** + + Request the appropriate options with `Latmus `_. Users + can for example modify the sampling period with the ``-p`` option or generate historgram data for + postprocessing with the ``-g`` option, + +- **Monitor results from the SUT** + + Latmus will report latency figures and, if requested, generate the histogram data file. + +- **Calibrating the Latmus latencies: CONFIG_LATMON_LOOPBACK_CALIBRATION**: + + Users can connect the GPIO pins in loopback mode (transmit to ack) and build the Latmon sample application with + CONFIG_LATMON_LOOPBACK_CALIBRATION enabled. When connecting to Latmus in this configuration, Latmus is providing + a calibration value that can be used to adjust the final latencies. + +Example +======= + +On the host and to build and flash the Zephyr FRDM-K64F board with the Latmon sample: + +.. code-block:: console + + user@host:~$ west build -b frdm_k64f samples/net/latmon + user@host:~$ west flash + +On the SUT running on Linux, latmus **MUST** track the falling edge of the signal: + +.. code-block:: console + + root@target:~$ latmus -I gpiochip2,23,falling-edge -O gpiochip2,21 -z -g"histogram" "broadcast" + +Monitoring both consoles, you should see the following: + +.. code-block:: console + + [00:00:03.311,000] phy_mc_ksz8081: PHY 0 is up + [00:00:03.311,000] phy_mc_ksz8081: PHY (0) Link speed 100 Mb, full duplex + [00:00:03.312,000] eth_nxp_enet_mac: Link is up + *** Booting Zephyr OS build v4.1.0-3337-g886443a190b1 *** + [00:00:03.313,000] sample_latmon: DHCPv4: binding... + [00:00:03.313,000] latmon: Latmon server thread priority: 14 + [00:00:10.964,000] net_dhcpv4: Received: 192.168.1.58 + [00:00:10.964,000] sample_latmon: Listening on 192.168.1.58 + [00:00:30.966,000] latmon: Waiting for Latmus ... + [00:00:31.356,000] latmon: Monitor thread priority: -16 + [00:00:31.356,000] latmon: monitoring started: + [00:00:31.356,000] latmon: - samples per period: 1000 + [00:00:31.356,000] latmon: - period: 1000 usecs + [00:00:31.356,000] latmon: - histogram cells: 200 + [00:00:31.393,000] latmon: Transfer thread priority: 14 + +.. code-block:: console + + root@target:~$ latmus -I gpiochip2,23,falling-edge -O gpiochip2,21 -Z -g"histogram" broadcast + Received broadcast message: 192.168.1.58 + warming up on CPU0 (not isolated)... + connecting to latmon at 192.168.1.58:2306... + RTT| 00:00:16 (oob-gpio, 1000 us period, priority 98, CPU0-noisol) + RTH|----lat min|----lat avg|----lat max|-overrun|---msw|---lat best|--lat worst + RTD| 26.375| 30.839| 33.508| 0| 0| 26.375| 33.508 + RTD| 26.333| 30.801| 37.633| 0| 0| 26.333| 37.633 + RTD| 26.375| 30.801| 31.966| 0| 0| 26.333| 37.633 + RTD| 26.375| 30.911| 49.675| 0| 0| 26.333| 49.675 + RTD| 26.333| 30.830| 41.658| 0| 0| 26.333| 49.675 + RTD| 26.375| 31.107| 59.216| 0| 0| 26.333| 59.216 + RTD| 26.333| 30.767| 30.925| 0| 0| 26.333| 59.216 + RTD| 26.333| 30.781| 41.616| 0| 0| 26.333| 59.216 + RTD| 26.375| 30.768| 32.925| 0| 0| 26.333| 59.216 + RTD| 26.375| 30.768| 37.633| 0| 0| 26.333| 59.216 + +On completion and from your host, retrieve the histogram file from the SUT, and generate a plot (a PNG file) using +gnuplot: + +.. code-block:: console + + user@host:~$ gnuplot plot_data.gp + +The ``plot_data.gp`` script should look like this for a file named ``histogram``: + +.. code-block:: gnuplot + + set terminal pngcairo size 800,600 + set output 'plot.png' + set title "Data Plot" + set xlabel "Latency (usec)" + set ylabel "Sample Count" + set grid + set style data linespoints + plot 'histogram' using 1:2 with linespoints title "Data Points" diff --git a/samples/net/latmon/boards/frdm_k64f.overlay b/samples/net/latmon/boards/frdm_k64f.overlay new file mode 100644 index 0000000000000..97a32616629ad --- /dev/null +++ b/samples/net/latmon/boards/frdm_k64f.overlay @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright (c) 2025 Jorge Ramirez-Ortiz + */ + +/ { + zephyr,user { + pulse_gpios = <&gpioe 24 GPIO_ACTIVE_HIGH>; + ack_gpios = <&gpioe 25 GPIO_ACTIVE_HIGH>; + }; +}; diff --git a/samples/net/latmon/prj.conf b/samples/net/latmon/prj.conf new file mode 100644 index 0000000000000..6d1ef64b8baa6 --- /dev/null +++ b/samples/net/latmon/prj.conf @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (c) 2025 Jorge Ramirez-Ortiz + +CONFIG_POSIX_API=y + +# General config +CONFIG_LOG=y +CONFIG_GPIO=y + +# Networking config +CONFIG_NETWORKING=y +CONFIG_NET_DHCPV4=y +CONFIG_NET_IPV4=y +CONFIG_NET_LOG=y +CONFIG_NET_TCP=y + +# Latmon config +CONFIG_NET_LATMON=y +CONFIG_LATMON_LOG_LEVEL_DBG=y + +# Heap for Latmon +CONFIG_HEAP_MEM_POOL_SIZE=8192 diff --git a/samples/net/latmon/sample.yaml b/samples/net/latmon/sample.yaml new file mode 100644 index 0000000000000..a1a405f92039d --- /dev/null +++ b/samples/net/latmon/sample.yaml @@ -0,0 +1,14 @@ +sample: + description: Latency Benchmarking Tool + name: Latmon sample app +common: + harness: net + tags: + - net + - latmon +tests: + sample.net.latmon: + build_only: true + platform_allow: + - frdm_k64f + depends_on: eth diff --git a/samples/net/latmon/src/main.c b/samples/net/latmon/src/main.c new file mode 100644 index 0000000000000..92eb1fc82aa6e --- /dev/null +++ b/samples/net/latmon/src/main.c @@ -0,0 +1,271 @@ +/* + * + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright (c) 2025 Jorge Ramirez-Ortiz + */ + +#include +LOG_MODULE_REGISTER(sample_latmon, LOG_LEVEL_DBG); + +#include +#include +#include +#include +#include +/* + * Blink Control + * DHCP: red + * waiting for connection: blue + * sampling: green + */ +#define LED_WAIT_PERIOD 1000000 +#define LED_DHCP_PERIOD 500000 +#define LED_RUN_PERIOD 200000 + +#define BLINK_THREAD_PRIORITY K_IDLE_PRIO +#define BLINK_STACK_SIZE 4096 +static K_THREAD_STACK_DEFINE(blink_stack, BLINK_STACK_SIZE); + +static const struct gpio_dt_spec pulse = + GPIO_DT_SPEC_GET_OR(DT_PATH(zephyr_user), pulse_gpios, {0}); +static const struct gpio_dt_spec ack = + GPIO_DT_SPEC_GET_OR(DT_PATH(zephyr_user), ack_gpios, {0}); + +static K_SEM_DEFINE(ack_event, 0, 1); + +#define DHCP_DONE (atomic_test_bit(&dhcp_done, 0) == true) +#define SET_DHCP_DONE atomic_set_bit(&dhcp_done, 0) +static atomic_val_t dhcp_done; + +static struct k_spinlock lock; + +static void gpio_ack_handler(const struct device *port, + struct gpio_callback *cb, + gpio_port_pins_t pins) +{ + k_sem_give(&ack_event); +} + +static int configure_measurement_hardware(void) +{ + static struct gpio_callback gpio_cb = { }; + int ret = 0; + + if (!gpio_is_ready_dt(&pulse) || !gpio_is_ready_dt(&ack)) { + LOG_ERR("GPIO device not ready"); + return -ENODEV; + } + + ret = gpio_pin_configure_dt(&pulse, GPIO_OUTPUT_HIGH); + if (ret < 0) { + LOG_ERR("failed configuring pulse pin"); + return ret; + } + + ret = gpio_pin_configure_dt(&ack, GPIO_INPUT); + if (ret < 0) { + LOG_ERR("failed configuring ack pin"); + return ret; + } + +#if defined(CONFIG_LATMON_LOOPBACK_CALIBRATION) + /* + * Connect GPIO pins in loopback mode for validation (tx to ack) + * On FRDM_K64F, Latmus will show around 3.2 usec of latency. + * + * You can then use these values to adjust the reported latencies (ie, + * subtract the loopback latency from the measured latencies). + */ + ret = gpio_pin_interrupt_configure_dt(&ack, GPIO_INT_EDGE_FALLING); +#else + ret = gpio_pin_interrupt_configure_dt(&ack, GPIO_INT_EDGE_RISING); +#endif + if (ret < 0) { + LOG_ERR("failed configuring ack pin interrupt"); + return ret; + } + + gpio_init_callback(&gpio_cb, gpio_ack_handler, BIT(ack.pin)); + + ret = gpio_add_callback_dt(&ack, &gpio_cb); + if (ret < 0) { + LOG_ERR("failed adding ack pin callback"); + return ret; + } + + return ret; +} + +static void blink(void*, void*, void*) +{ + const struct gpio_dt_spec led_run = + GPIO_DT_SPEC_GET_OR(DT_ALIAS(led0), gpios, {0}); + const struct gpio_dt_spec led_wait = + GPIO_DT_SPEC_GET_OR(DT_ALIAS(led1), gpios, {0}); + const struct gpio_dt_spec led_dhcp = + GPIO_DT_SPEC_GET_OR(DT_ALIAS(led2), gpios, {0}); + const struct gpio_dt_spec *led = &led_dhcp, *tmp = NULL; + uint32_t period = LED_DHCP_PERIOD; + + if (gpio_is_ready_dt(&led_run)) { + gpio_pin_configure_dt(&led_run, GPIO_OUTPUT_INACTIVE); + } + + if (gpio_is_ready_dt(&led_wait)) { + gpio_pin_configure_dt(&led_wait, GPIO_OUTPUT_INACTIVE); + } + + if (gpio_is_ready_dt(&led_dhcp)) { + gpio_pin_configure_dt(&led_dhcp, GPIO_OUTPUT_INACTIVE); + } + + for (;;) { + k_usleep(period); + if (DHCP_DONE) { + led = net_latmon_running() ? &led_run : &led_wait; + } + + if (tmp && led != tmp) { + gpio_pin_set_dt(tmp, 0); + } + + if (!gpio_is_ready_dt(led)) { + continue; + } + + if (led == &led_wait) { + period = LED_WAIT_PERIOD; + } + + if (led == &led_run) { + period = LED_RUN_PERIOD; + } + + gpio_pin_toggle_dt(led); + tmp = led; + } + gpio_pin_set_dt(led, 0); +} + +static k_tid_t start_led_blinking_thread(struct k_thread *blink_thread, + k_thread_entry_t blink_thread_func) +{ + return k_thread_create(blink_thread, blink_stack, BLINK_STACK_SIZE, + (k_thread_entry_t)blink_thread_func, + NULL, NULL, NULL, + BLINK_THREAD_PRIORITY, 0, K_NO_WAIT); +} + +/* Raw ticks */ +#define CALCULATE_DELTA(ack, pulse) \ +((ack) < (pulse) ? \ +(~(pulse) + 1 + (ack)) : ((ack) - (pulse))) + +static int measure_latency_cycles(uint32_t *delta) +{ + k_spinlock_key_t key; + uint32_t tx = 0; + uint32_t rx = 0; + int ret = 0; + + /* Remove spurious events */ + k_sem_reset(&ack_event); + + /* Generate a falling edge pulse to the DUT */ + key = k_spin_lock(&lock); + if (gpio_pin_set_dt(&pulse, 0)) { + k_spin_unlock(&lock, key); + LOG_ERR("Failed to set pulse pin"); + ret = -1; + goto out; + } + tx = k_cycle_get_32(); + k_spin_unlock(&lock, key); + + /* Wait for a rising edge from the Latmus controlled DUT */ + if (k_sem_take(&ack_event, K_MSEC(1)) == 0) { + rx = k_cycle_get_32(); + /* Measure the cycles */ + *delta = CALCULATE_DELTA(rx, tx); + } else { + ret = -1; + } +out: + if (gpio_pin_set_dt(&pulse, 1)) { + LOG_ERR("Failed to clear pulse pin"); + ret = -1; + } + + return ret; +} + +int main(void) +{ + struct net_if *iface = net_if_get_default(); + struct k_thread blink_thread; + static k_tid_t blink_tid; + int client, socket = 0; + int ret = 0; + + /* Prepare the instrumentation */ + if (configure_measurement_hardware() < 0) { + LOG_ERR("Failed to configure the measurement hardware"); + return -1; + } + + /* Start visual indicators - dhcp/blue, waiting/red, running/green */ + blink_tid = start_led_blinking_thread(&blink_thread, blink); + if (!blink_tid) { + LOG_WRN("Failed to start led blinking thread"); + } + + /* Get a valid ip */ + LOG_INF("DHCPv4: binding..."); + net_dhcpv4_start(iface); + for (;;) { + ret = net_mgmt_event_wait(NET_EVENT_IPV4_DHCP_BOUND, NULL, + NULL, NULL, NULL, K_SECONDS(10)); + if (ret == -ETIMEDOUT) { + LOG_WRN("DHCPv4: binding timed out, retrying..."); + continue; + } + if (ret < 0) { + LOG_ERR("DHCPv4: binding failed, aborting..."); + goto out; + } + break; + } + + SET_DHCP_DONE; + + /* Get a socket to the Latmus port */ + socket = net_latmon_get_socket(NULL); + if (socket < 0) { + LOG_ERR("Failed to get a socket to latmon (errno %d)", socket); + ret = -1; + goto out; + } + + for (;;) { + /* Wait for Latmus to connect */ + client = net_latmon_connect(socket, + &iface->config.dhcpv4.requested_ip); + if (client < 0) { + if (client == -EAGAIN) { + continue; + } + LOG_ERR("Failed to connect to latmon"); + ret = -1; + goto out; + } + + /* Provide latency data until Latmus closes the connection */ + net_latmon_start(client, measure_latency_cycles); + } +out: + k_thread_abort(blink_tid); + close(socket); + + return ret; +} diff --git a/subsys/net/lib/CMakeLists.txt b/subsys/net/lib/CMakeLists.txt index ee23de2ab027d..1a0d432cdaafe 100644 --- a/subsys/net/lib/CMakeLists.txt +++ b/subsys/net/lib/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory_ifdef(CONFIG_SNTP sntp) add_subdirectory_ifdef(CONFIG_MQTT_LIB mqtt) add_subdirectory_ifdef(CONFIG_MQTT_SN_LIB mqtt_sn) add_subdirectory_ifdef(CONFIG_PTP ptp) +add_subdirectory_ifdef(CONFIG_NET_LATMON latmon) add_subdirectory_ifdef(CONFIG_TFTP_LIB tftp) add_subdirectory_ifdef(CONFIG_NET_CONFIG_SETTINGS config) add_subdirectory_ifdef(CONFIG_NET_SOCKETS sockets) diff --git a/subsys/net/lib/Kconfig b/subsys/net/lib/Kconfig index e530877125afa..84d88788cab48 100644 --- a/subsys/net/lib/Kconfig +++ b/subsys/net/lib/Kconfig @@ -7,6 +7,8 @@ source "subsys/net/lib/coap/Kconfig" source "subsys/net/lib/dns/Kconfig" +source "subsys/net/lib/latmon/Kconfig" + source "subsys/net/lib/mqtt/Kconfig" source "subsys/net/lib/mqtt_sn/Kconfig" diff --git a/subsys/net/lib/latmon/CMakeLists.txt b/subsys/net/lib/latmon/CMakeLists.txt new file mode 100644 index 0000000000000..ce29c56f0d5f7 --- /dev/null +++ b/subsys/net/lib/latmon/CMakeLists.txt @@ -0,0 +1,5 @@ +zephyr_library() + +zephyr_library_sources( + latmon.c +) diff --git a/subsys/net/lib/latmon/Kconfig b/subsys/net/lib/latmon/Kconfig new file mode 100644 index 0000000000000..d798d84838d54 --- /dev/null +++ b/subsys/net/lib/latmon/Kconfig @@ -0,0 +1,58 @@ +# Copyright (c) 2025 Jorge A. Ramirez-Ortiz +# SPDX-License-Identifier: Apache-2.0 + +config NET_LATMON + bool "Latency monitoring support" + select EXPERIMENTAL + select NET_SOCKETS + depends on NET_TCP + help + This option enables the latency monitoring support for Zephyr + +config NET_LATMON_PORT + int "Latmon - Latmus communication port" + default 2306 + help + Specify the port number used for Latmon - Latmus communication. + +config NET_LATMON_XFER_THREAD_STACK_SIZE + int "Stack size for the network transfer thread" + default 8192 + help + Specify the stack size for the network transfer thread used in latency monitoring. + +config NET_LATMON_XFER_THREAD_PRIORITY + int "Priority for the network transfer thread" + default 14 + help + Specify the priority for the network transfer thread used in latency monitoring. + +config NET_LATMON_THREAD_STACK_SIZE + int "Stack size for the Latmon thread" + default 8192 + help + Specify the stack size for the Latmon thread used in latency monitoring. + +config NET_LATMON_THREAD_PRIORITY + int "Priority for the Latmon thread" + default 14 + help + Specify the priority for the Latmon thread used in latency monitoring. + +config NET_LATMON_MONITOR_THREAD_STACK_SIZE + int "Stack size for the monitor thread" + default 8192 + help + Specify the stack size for the monitor thread used in latency monitoring. + +config NET_LATMON_MONITOR_THREAD_PRIORITY + int "Priority for the monitor thread" + default -16 + help + Specify the priority for the monitor thread used in latency monitoring. + +module = LATMON +module-dep = NET_LOG +module-str = Latency monitoring Service +module-help = This option enables the latency monitoring support for Zephyr +source "subsys/net/Kconfig.template.log_config.net" diff --git a/subsys/net/lib/latmon/latmon.c b/subsys/net/lib/latmon/latmon.c new file mode 100644 index 0000000000000..7b6fddc97c0df --- /dev/null +++ b/subsys/net/lib/latmon/latmon.c @@ -0,0 +1,568 @@ +/* + * + * SPDX-License-Identifier: Apache-2.0 + * + * Copyright (c) 2025 Jorge Ramirez-Ortiz + */ + + #include +LOG_MODULE_REGISTER(latmon, CONFIG_LATMON_LOG_LEVEL); + +#include +#include + +/* Latmon < -- > Latmus Interface */ +#define LATMON_NET_PORT CONFIG_NET_LATMON_PORT + +struct latmon_net_request { + uint32_t period_usecs; + uint32_t histogram_cells; +} __packed; + +struct latmon_net_data { + int32_t sum_lat_hi; + int32_t sum_lat_lo; + int32_t min_lat; + int32_t max_lat; + uint32_t overruns; + uint32_t samples; +} __packed; + +/* Private IPC: Zephyr application to Latmon service */ +struct latmon_message { + net_latmon_measure_t measure_func; + int latmus; /* latmus connection */ +}; + +K_MSGQ_DEFINE(latmon_msgq, sizeof(struct latmon_message), 2, 4); + +/* + * Note: Using a small period (e.g., less than 100 microseconds) may result in + * the reporting too good interrupt latencies during a short test due to cache + * effects. + */ +struct latmus_conf { + uint32_t max_samples; + uint32_t period; /* in usecs */ + uint32_t cells; +}; + +/* + * Each cell represents a 1 usec timespan. + * note: the sampling period cannot be longer than 1 sec. + */ +#define MAX_SAMPLING_PERIOD_USEC 1000000 +#define HISTOGRAM_CELLS_MAX 1000 +struct latmon_data { + bool warmed; /* sample data can be used */ + uint32_t histogram[HISTOGRAM_CELLS_MAX]; + uint32_t current_samples; + uint32_t overruns; + uint32_t min_lat; + uint32_t max_lat; + uint64_t sum_lat; +}; + +/* Message queue for sample data transfers */ +K_MSGQ_DEFINE(xfer_msgq, sizeof(struct latmon_data), 10, 4); + +/* Network transfer thread: sends data to Latmus */ +#define XFER_THREAD_STACK_SIZE CONFIG_NET_LATMON_XFER_THREAD_STACK_SIZE +#define XFER_THREAD_PRIORITY CONFIG_NET_LATMON_XFER_THREAD_PRIORITY +K_THREAD_STACK_DEFINE(xfer_thread_stack, XFER_THREAD_STACK_SIZE); +static struct k_thread xfer_thread; + +/* Latmon thread: receives application requests */ +#define LATMON_THREAD_PRIORITY CONFIG_NET_LATMON_THREAD_PRIORITY +#define LATMON_STACK_SIZE CONFIG_NET_LATMON_THREAD_STACK_SIZE + +/* Monitor thread: performs the sampling */ +#define MONITOR_THREAD_PRIORITY CONFIG_NET_LATMON_MONITOR_THREAD_PRIORITY +#define MONITOR_STACK_SIZE CONFIG_NET_LATMON_MONITOR_THREAD_STACK_SIZE +static K_THREAD_STACK_DEFINE(monitor_stack, MONITOR_STACK_SIZE); + +static struct k_thread monitor_thread; +static k_tid_t monitor_tid; +static bool abort_monitor; + +/* Synchronization */ +static K_SEM_DEFINE(latmon_done, 0, 1); +static K_SEM_DEFINE(monitor_done, 0, 1); + +static ssize_t send_net_data(int latmus, const void *buf, size_t count) +{ + ssize_t total_written = 0; + ssize_t bytes_written; + + while (count > 0) { + const char *p = (const char *)buf + total_written; + + bytes_written = zsock_send(latmus, p, count, 0); + if (bytes_written < 0) { + if (errno == EINTR) { + continue; + } + return -1; + } + if (bytes_written == 0) { + break; + } + + total_written += bytes_written; + count -= bytes_written; + } + + return total_written; +} + +static int send_sample_data(int latmus, struct latmon_data *data) +{ + struct latmon_net_data ndata = { + .sum_lat_lo = htonl(data->sum_lat & 0xffffffff), + .sum_lat_hi = htonl(data->sum_lat >> 32), + .samples = htonl(data->current_samples), + .overruns = htonl(data->overruns), + .min_lat = htonl(data->min_lat), + .max_lat = htonl(data->max_lat), + }; + + /* Reset the data */ + data->min_lat = UINT32_MAX; + data->current_samples = 0; + data->overruns = 0; + data->max_lat = 0; + data->sum_lat = 0; + + return (send_net_data(latmus, &ndata, sizeof(ndata)) <= 0 ? -1 : 0); +} + +static int send_trailing_data(int latmus, struct latmus_conf *conf, + struct latmon_data *data) +{ + int count = conf->cells * sizeof(data->histogram[0]); + ssize_t ret = 0; + + if (data->current_samples != 0 && send_sample_data(latmus, data) < 0) { + return -1; + } + + /* send empty frame */ + if (send_sample_data(latmus, data) < 0) { + return -1; + } + + /* send histogram if enabled (ie, conf->cells > 0) */ + for (int cell = 0; cell < conf->cells; cell++) { + data->histogram[cell] = htonl(data->histogram[cell]); + } + + ret = send_net_data(latmus, data->histogram, count); + memset(data->histogram, 0, count); + + if (ret < 0) { + LOG_INF("failed tx histogram (ret=%d, errno %d)", ret, errno); + return -1; + } + + return 0; +} + +static int prepare_sample_data(uint32_t delta, struct latmus_conf *conf, + struct latmon_data *data) +{ + uint32_t delta_ns = k_cyc_to_ns_floor64(delta); + uint32_t delta_us = delta_ns / 1000; + + data->sum_lat += delta_ns; + + if (delta_ns < data->min_lat) { + data->min_lat = delta_ns; + } + + if (delta_ns > data->max_lat) { + data->max_lat = delta_ns; + } + + while (delta_us > conf->period) { + data->overruns++; + delta_us -= conf->period; + } + + if (conf->cells != 0) { + if (delta_us >= conf->cells) { + /* histogram outlier */ + delta_us = conf->cells - 1; + } + + data->histogram[delta_us]++; + } + + return ++data->current_samples < conf->max_samples ? -EAGAIN : 0; +} + +static int enqueue_sample_data(struct latmon_data *data) +{ + int ret = 0; + + /* Drop the warming samples */ + if (data->warmed == false) { + data->warmed = true; + goto out; + } + + /* Enqueue the data for transfer */ + ret = k_msgq_put(&xfer_msgq, data, K_NO_WAIT); + if (ret < 0) { + LOG_ERR("Failed to enqueue netdata (queue full)"); + } +out: + /* Reset the data */ + data->min_lat = UINT32_MAX; + data->current_samples = 0; + data->overruns = 0; + data->max_lat = 0; + data->sum_lat = 0; + + return ret; +} + +static void xfer_thread_func(void *p1, void *p2, void *p3) +{ + int latmus = *(int *)p1; + struct latmon_data sample; + + LOG_INF("Transfer thread priority: %d", XFER_THREAD_PRIORITY); + + for (;;) { + if (k_msgq_get(&xfer_msgq, &sample, K_FOREVER) != 0) { + LOG_ERR("Failed to get sample data to transfer"); + continue; + } + + if (send_sample_data(latmus, &sample) < 0) { + LOG_ERR("Failed to transfer sample data"); + break; + } + } +} + +static void start_xfer_thread(int *latmus) +{ + k_thread_create(&xfer_thread, xfer_thread_stack, XFER_THREAD_STACK_SIZE, + xfer_thread_func, latmus, NULL, NULL, + XFER_THREAD_PRIORITY, 0, K_MSEC(10)); +} + +static void abort_xfer_thread(void) +{ + k_thread_abort(&xfer_thread); +} + +static int measure(uint32_t *delta, struct latmon_message *msg, + struct latmon_data *data, + struct latmus_conf *conf) +{ + if (data->warmed == true) { + k_usleep(conf->period); + } + + if (msg->measure_func(delta) < 0) { + if (data->overruns++ > conf->max_samples / 2) { + return -1; + } + /* Just an overrun */ + return 1; + } + return 0; +} + +static void monitor_thread_func(void *p1, void *p2, void *p3) +{ + struct latmon_message *msg = p1; + struct latmus_conf *conf = p2; + struct latmon_data *data = p3; + uint32_t delta = 0; + int ret = 0; + + LOG_INF("Monitor thread priority: %d", MONITOR_THREAD_PRIORITY); + + /* Prepare transfer thread */ + start_xfer_thread(&msg->latmus); + + LOG_INF("\tmonitoring started:"); + LOG_INF("\t - samples per period: %u", conf->max_samples); + LOG_INF("\t - period: %u usecs", conf->period); + LOG_INF("\t - histogram cells: %u", conf->cells); + + /* Sampling loop */ + memset(data, 0, sizeof(*data)); + data->min_lat = UINT32_MAX; + data->warmed = false; + delta = 0; + do { + ret = measure(&delta, msg, data, conf); + if (ret != 0) { + if (ret < 0) { + LOG_ERR("\tExcessive overruns, abort!"); + goto out; + } + continue; + } + + if (prepare_sample_data(delta, conf, data) == -EAGAIN) { + continue; + } + + ret = enqueue_sample_data(data); + /* Abort allowed after all samples have been queued */ + } while (abort_monitor == false && ret == 0); +out: + abort_xfer_thread(); + k_sem_give(&monitor_done); + monitor_tid = NULL; + + LOG_INF("\tmonitoring stopped"); +} + +static int broadcast_ip_address(struct in_addr *ip_addr) +{ + char ip_str[NET_IPV4_ADDR_LEN]; + struct sockaddr_in broadcast; + int sock = -1; + int ret = -1; + + if (ip_addr == NULL || ip_addr->s_addr == INADDR_ANY) { + LOG_ERR("Invalid IP address for broadcast"); + return -1; + } + + sock = zsock_socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (sock < 0) { + LOG_ERR("Failed to create broadcast socket : %d", errno); + return -1; + } + + broadcast.sin_addr.s_addr = htonl(INADDR_BROADCAST); + broadcast.sin_port = htons(LATMON_NET_PORT); + broadcast.sin_family = AF_INET; + + if (net_addr_ntop(AF_INET, ip_addr, ip_str, sizeof(ip_str)) == NULL) { + LOG_ERR("Failed to convert IP address to string"); + ret = -1; + goto out; + } + + ret = zsock_sendto(sock, ip_str, strlen(ip_str), 0, + (struct sockaddr *)&broadcast, sizeof(broadcast)); + +out: + zsock_close(sock); + + return ret; +} + +/* Get a socket to listen to Latmus requests */ +int net_latmon_get_socket(struct sockaddr *connection_addr) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + .sin_port = htons(LATMON_NET_PORT) + }; + int s, on = 1; + + if (connection_addr != NULL) { + memcpy(&addr, connection_addr, sizeof(addr)); + } + + s = zsock_socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (s < 0) { + LOG_ERR("failed to create latmon socket : %d", errno); + return -1; + } + + zsock_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); + if (zsock_bind(s, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + LOG_ERR("failed to bind latmon socket : %d", errno); + zsock_close(s); + return -1; + } + + if (zsock_listen(s, 1) < 0) { + LOG_ERR("failed to listen on latmon socket : %d", errno); + zsock_close(s); + return -1; + } + + return s; +} + +/* Waits for connection from Latmus */ +int net_latmon_connect(int socket, struct in_addr *ip) +{ + struct zsock_pollfd fd[1] = { {.fd = socket, .events = ZSOCK_POLLIN } }; + struct sockaddr_in clnt_addr; + const int timeout = 5000; + socklen_t len; + int latmus = -1; + int ret; + + LOG_INF("Waiting for Latmus ... "); + + /* Broadcast Latmon's address every timeout seconds until connected */ + ret = zsock_poll(fd, 1, timeout); + if (ret < 0) { + LOG_ERR("Poll error: %d", errno); + return -1; + } else if (ret == 0) { + /* Timeout waiting for connection */ + if (broadcast_ip_address(ip) < 0) { + LOG_ERR("Broadcast error"); + return -1; + } + + /* Client should retry the connection if broadcast succeeded */ + return -EAGAIN; + } + + /* + * As per MISRA guidelines, an 'else' clause is required. However, we + * chose to prioritize adherence to the project's code style guidelines. + */ + len = sizeof(clnt_addr); + latmus = zsock_accept(socket, (struct sockaddr *)&clnt_addr, &len); + if (latmus < 0) { + LOG_INF("Failed accepting new connection..."); + return -1; + } + + return latmus; +} + +void net_latmon_start(int latmus, net_latmon_measure_t measure_f) +{ + struct latmon_message msg = { + .measure_func = measure_f, + .latmus = latmus, + }; + + k_msgq_put(&latmon_msgq, &msg, K_NO_WAIT); + k_sem_take(&latmon_done, K_FOREVER); +} + +bool net_latmon_running(void) +{ + return monitor_tid ? true : false; +} + +static int get_latmus_conf(ssize_t len, struct latmon_net_request *req, + struct latmus_conf *conf) +{ + if (len != sizeof(*req)) { + return -1; + } + + if (ntohl(req->period_usecs) == 0) { + LOG_ERR("null period received, invalid\n"); + return -1; + } + + if (ntohl(req->period_usecs) > MAX_SAMPLING_PERIOD_USEC) { + LOG_ERR("invalid period received: %u usecs\n", + ntohl(req->period_usecs)); + return -1; + } + + if (ntohl(req->histogram_cells) > HISTOGRAM_CELLS_MAX) { + LOG_ERR("invalid histogram size received: %u > %u cells\n", + ntohl(req->histogram_cells), HISTOGRAM_CELLS_MAX); + return -1; + } + + conf->period = ntohl(req->period_usecs); + conf->cells = ntohl(req->histogram_cells); + conf->max_samples = MAX_SAMPLING_PERIOD_USEC / conf->period; + + return 0; +} + +static void start_monitoring(struct latmon_message *msg, + struct latmus_conf *conf, + struct latmon_data *data) +{ + k_sem_reset(&monitor_done); + abort_monitor = false; + + memset(data, 0, sizeof(*data)); + monitor_tid = k_thread_create(&monitor_thread, monitor_stack, + MONITOR_STACK_SIZE, monitor_thread_func, + msg, conf, data, MONITOR_THREAD_PRIORITY, 0, K_NO_WAIT); +} + +static void stop_monitoring(void) +{ + if (monitor_tid == 0) { + return; + } + + abort_monitor = true; + k_sem_take(&monitor_done, K_FOREVER); +} + +static void handle_connection(struct latmon_message *msg) +{ +#if (K_HEAP_MEM_POOL_SIZE > 0) + struct latmus_conf *conf = k_malloc(sizeof(*conf)); + struct latmon_data *data = k_malloc(sizeof(*data)); + struct latmon_net_request req; + ssize_t len; + + if (conf == 0 || data == 0) { + LOG_ERR("Failed to allocate memory, check HEAP_MEM_POOL_SIZE"); + goto out; + } + + memset(conf, 0, sizeof(*conf)); + + for (;;) { + len = zsock_recv(msg->latmus, &req, sizeof(req), 0); + stop_monitoring(); + if (get_latmus_conf(len, &req, conf) < 0) { + /* Send the histogram */ + if (send_trailing_data(msg->latmus, conf, data) < 0) { + break; + } + memset(conf, 0, sizeof(*conf)); + continue; + } + start_monitoring(msg, conf, data); + } +out: + k_free(conf); + k_free(data); + zsock_close(msg->latmus); + k_sem_give(&latmon_done); +#else + LOG_ERR("No heap configured"); +#endif +} + +static int latmon_server_thread_func(void *p1, void *p2, void *p3) +{ + struct latmon_message msg = { }; + + LOG_INF("Latmon server thread priority: %d", LATMON_THREAD_PRIORITY); + + for (;;) { + k_msgq_get(&latmon_msgq, &msg, K_FOREVER); + + /* Only latmus can stop the monitoring, so hang in there */ + handle_connection(&msg); + } + + return 0; +} + +K_THREAD_DEFINE(latmon_server_id, LATMON_STACK_SIZE, + latmon_server_thread_func, NULL, NULL, NULL, + LATMON_THREAD_PRIORITY, 0, 0);