#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include <sys/mman.h>
#include <stdatomic.h>
#include "cm_setup.h"
#include "cm_error_handler.h"
#include "scheduling_latency.h"
static void receive_func(
void *eo_context, em_event_t event,
em_event_type_t type,
em_queue_t queue, void *q_context);
static void update_stats(int64_t ts, int64_t diff, uint64_t count);
static void print_stats(int64_t start_time, int64_t loop_start_time, uint64_t count);
static int parse_args(int first, int argc, char *argv[]);
static void usage(void);
static void do_work(test_msg *msg);
static int64_t mask_from_str(
const char *optarg,
em_core_mask_t *mask);
static void entry_hook(em_eo_t eo, void **eo_ctx, em_event_t events[], int num,
em_queue_t *queue, void **q_ctx);
static void exit_hook(em_eo_t eo);
static uint64_t try_timestamp_overhead(void);
static perf_shm_t *perf_shm;
static __thread uint64_t entry_ts;
static __thread int64_t max_eo_time;
config_data g_options = {
.loops = 1,
.lo_events = 0,
.work_ns = 2000,
.atomic_end = false,
.eo_receive = false
};
void entry_hook(em_eo_t eo, void **eo_ctx, em_event_t events[], int num,
em_queue_t *queue, void **q_ctx)
{
(void)eo;
(void)eo_ctx;
(void)events;
(void)num;
(void)queue;
(void)q_ctx;
entry_ts = odp_time_global_strict_ns();
}
void exit_hook(em_eo_t eo)
{
(void)eo;
int64_t diff = odp_time_global_strict_ns() - entry_ts;
if (max_eo_time < diff)
max_eo_time = diff;
}
{
switch (type) {
return "PARALLEL";
break;
return "ORDERED";
break;
return "ATOMIC";
break;
default:
break;
}
return "<?>";
}
{
uint64_t mask;
if (hex == NULL)
return 0;
if (sscanf(hex, "%lx", &mask) != 1)
return 0;
}
void update_stats(int64_t now, int64_t diff, uint64_t count)
{
int64_t dtime = now -
em_debug_timestamp(EM_DEBUG_TSP_SCHED_RETURN) - perf_shm->ts_overhead;
if (diff < perf_shm->times.mint) {
perf_shm->times.mint = diff;
perf_shm->times.minnum = count;
} else if (diff > perf_shm->times.maxt) {
perf_shm->times.maxt = diff;
perf_shm->times.maxnum = count;
perf_shm->times.maxdisp = dtime;
}
perf_shm->times.sum += diff;
if (dtime < perf_shm->times.disp_min)
perf_shm->times.disp_min = dtime;
else if (dtime > perf_shm->times.disp_max)
perf_shm->times.disp_max = dtime;
if (g_options.eo_receive && perf_shm->times.max_eo_time < max_eo_time)
perf_shm->times.max_eo_time = max_eo_time;
}
void do_work(test_msg *msg)
{
msg->count++;
if (msg->work == 0)
return;
uint64_t t1 = odp_time_global_strict_ns();
while ((odp_time_global_ns() - t1) < msg->work)
;
}
void print_stats(int64_t start_time, int64_t loop_start_time, uint64_t count)
{
double period = (double)odp_time_global_ns() - loop_start_time;
double runtime = (double)odp_time_global_ns() - start_time;
period /= 1000000000;
runtime /= 1000000000;
double rate = ((count - perf_shm->stat_mcount) / period) / 1000000;
uint64_t average = perf_shm->times.sum / (count - START_EVENTS);
perf_shm->times.maxdisp = 0;
APPL_PRINT(": time(h) cores events(M) rate(M/s) min[ns] max[ns] avg[ns] min ev# max ev# max_do[ns] max_eo[ns]\n");
APPL_PRINT(": %-7.3f %-5d %-9lu %-9.3f %-7lu %-7lu %-7lu %-11lu %-11lu %-10lu %lu\n",
runtime / (60 * 60), perf_shm->core_count, count / 1000000, rate,
perf_shm->times.mint, perf_shm->times.maxt, average,
perf_shm->times.minnum, perf_shm->times.maxnum,
perf_shm->times.maxdisp, perf_shm->times.max_eo_time);
if (g_options.lo_events) {
double lrate = ((perf_shm->num_lo - perf_shm->stat_lcount) / period) / 1000000;
APPL_PRINT(": bg events(M) rate(M/s)\n");
APPL_PRINT(": %-12.3f %.3f\n", ((double)perf_shm->num_lo) / 1000000, lrate);
perf_shm->stat_lcount = perf_shm->num_lo;
}
perf_shm->stat_mcount = count;
}
void usage(void)
{
APPL_PRINT("scheduling_latency %s\n\n%s", VERSION, instructions);
for (int i = 0; ; i++) {
if (longopts[i].name == NULL)
break;
APPL_PRINT("-%c or --%-16s %s\n", longopts[i].val, longopts[i].name, descopts[i]);
}
APPL_PRINT("\n");
}
int parse_args(int first, int argc, char *argv[])
{
optind = first + 1;
while (1) {
int opt;
int long_index;
char *endptr;
int64_t num;
opt = getopt_long(argc, argv, shortopts, longopts, &long_index);
if (opt == -1)
break;
switch (opt) {
case 'a': {
g_options.atomic_end = true;
}
break;
case 'r': {
g_options.eo_receive = true;
}
break;
case 'l': {
num = strtol(optarg, &endptr, 0);
if (*endptr != '\0' || num < 0)
return 0;
g_options.loops = (uint64_t)num;
}
break;
case 'q': {
num = strtol(optarg, &endptr, 0);
if (*endptr != '\0' || num < 0)
return 0;
switch (num) {
case 0:
break;
case 1:
break;
case 2:
break;
default: return 0;
}
}
break;
case 'e': {
num = strtol(optarg, &endptr, 0);
if (*endptr != '\0' || num < 0)
return 0;
g_options.lo_events = (uint64_t)num;
}
break;
case 'w': {
num = strtol(optarg, &endptr, 0);
if (*endptr != '\0' || num < 0)
return 0;
g_options.work_ns = (uint64_t)num;
}
break;
case 'g': {
num = mask_from_str(optarg, &g_options.lgroup);
if (!num)
return 0;
}
break;
case 't': {
num = mask_from_str(optarg, &g_options.hgroup);
if (!num)
return 0;
}
break;
case 'h':
default:
opterr = 0;
usage();
return 0;
}
}
optind = 1;
return 1;
}
{
return test_error_handler(eo, error, escope, args);
}
void test_init(const appl_conf_t *appl_conf)
{
(void)appl_conf;
if (core == 0) {
perf_shm = env_shared_reserve("PerfSharedMem", sizeof(perf_shm_t));
mlockall(MCL_FUTURE);
} else {
perf_shm = env_shared_lookup("PerfSharedMem");
}
if (perf_shm == NULL)
"Test init failed on EM-core: %u\n",
core);
else if (core == 0) {
memset(perf_shm, 0, sizeof(perf_shm_t));
APPL_PRINT("%luB Shared memory initialized\n", sizeof(perf_shm_t));
}
}
uint64_t try_timestamp_overhead(void)
{
#define NUM_TS_TRY 5
uint64_t oh = UINT64_MAX;
for (int i = 0; i < NUM_TS_TRY; i++) {
uint64_t t1 = odp_time_global_ns();
uint64_t t2 = odp_time_global_ns();
uint64_t t3 = odp_time_global_ns();
if (t2 - t1 < oh)
oh = t2 - t1;
if (t3 - t2 < oh)
oh = t3 - t2;
}
return oh;
}
void test_start(const appl_conf_t *appl_conf)
{
perf_shm->core_count = appl_conf->core_count;
perf_shm->ts_overhead = (int64_t)try_timestamp_overhead();
APPL_PRINT("odp_time_global_ns pair overhead seems to be %lu ns\n", perf_shm->ts_overhead);
perf_shm->eo =
em_eo_create(
"perf test eo", start, NULL, stop, NULL, receive_func,
&perf_shm->eo_ctx);
test_fatal_if(perf_shm->eo ==
EM_EO_UNDEF,
"EO create failed");
char buf[32];
APPL_PRINT("Using queue type %d (%s) for timing\n",
(int)g_options.queue_type, queue_type_str(g_options.queue_type));
APPL_PRINT("Coremask for hi-prio events: %s (%d cores)\n",
} else {
APPL_PRINT("Using default queue group for hi-prio\n");
}
APPL_PRINT("Coremask for background events: %s (%d cores)\n",
} else {
APPL_PRINT("Using default queue group for background events\n");
}
} else {
APPL_PRINT("Queue groups are overlapping\n");
}
grp, NULL);
if (g_options.lo_events)
APPL_PRINT("Background work: %lu normal priority events with %.2fus work\n",
g_options.lo_events, g_options.work_ns / 1000.0);
if (g_options.atomic_end)
APPL_PRINT("Using atomic_processing_end()\n");
if (g_options.eo_receive) {
test_fatal_if(stat !=
EM_OK,
"entry_hook() register failed!");
test_fatal_if(stat !=
EM_OK,
"exit_hook() register failed!");
APPL_PRINT("entry/exit hooks registered (expect a bit more latency)\n");
}
perf_shm->eo_ctx.test_q = q;
perf_shm->eo_ctx.loprio_q = q2;
perf_shm->eo_ctx.stopping = false;
"EO start:%" PRI_STAT " %" PRI_STAT "", ret, start_ret);
APPL_PRINT("Starting %lu loops\n", g_options.loops);
}
void test_stop(const appl_conf_t *appl_conf)
{
em_eo_t eo;
(void)appl_conf;
APPL_PRINT("%s() on EM-core %d\n", __func__, core);
eo = perf_shm->eo;
test_fatal_if(ret !=
EM_OK,
"EO:%" PRI_EO " stop:%" PRI_STAT
"", eo, ret);
eo = perf_shm->eo;
test_fatal_if(ret !=
EM_OK,
"EO remove queue all:%" PRI_STAT
" EO:%" PRI_EO "", ret, eo);
test_fatal_if(ret !=
EM_OK,
"EO:%" PRI_EO " delete:%" PRI_STAT
"", eo, ret);
}
void test_term(const appl_conf_t *appl_conf)
{
(void)appl_conf;
APPL_PRINT(
"%s() on EM-core %d\n", __func__,
em_core_id());
env_shared_free(perf_shm);
}
{
(void)conf;
APPL_PRINT(
"EO %" PRI_EO " starting\n", eo);
APPL_PRINT("Dispatch timestamps (max_do) NOT available\n");
for (uint64_t i = 0; i < g_options.lo_events; i++) {
msg->count = 0;
msg->work = g_options.work_ns;
msg->magic = BACKGROUND_MAGIC;
test_fatal_if(ret !=
EM_OK,
"Send fail");
}
APPL_PRINT("Sent %lu bg events\n", g_options.lo_events);
perf_shm->times.mint = INT64_MAX;
perf_shm->times.disp_min = INT64_MAX;
msg->count = 0;
msg->magic = TIMING_MAGIC;
msg->ts = odp_time_global_ns();
return em_send(ev, ((app_eo_ctx *)eo_context)->test_q);
}
{
(void)eo_context;
APPL_PRINT(
"EO %" PRI_EO " stopping\n", eo);
}
void receive_func(
void *eo_context, em_event_t event,
em_event_type_t type,
em_queue_t queue, void *q_context)
{
uint64_t ts_ns = odp_time_global_strict_ns();
app_eo_ctx *ctx = (app_eo_ctx *)eo_context;
(void)type;
(void)q_context;
if (unlikely(ctx->stopping)) {
return;
}
if (unlikely(queue == ctx->loprio_q)) {
do_work(msg);
perf_shm->num_lo++;
ret =
em_send(event, ctx->loprio_q);
test_fatal_if(ret !=
EM_OK,
"Event send fail, ret=%u, #=%lu",
(unsigned int)ret, msg->count);
return;
}
test_fatal_if(msg->magic != TIMING_MAGIC, "Unexpected event, magic fail (%x/%x)",
msg->magic, TIMING_MAGIC);
test_fatal_if(queue != ctx->test_q, "Timing event from wrong Q??");
int64_t diff = ts_ns - msg->ts - perf_shm->ts_overhead;
if (unlikely(msg->count < START_EVENTS)) {
perf_shm->start_time = odp_time_global_ns();
perf_shm->loop_start_time = perf_shm->start_time;
} else {
update_stats(ts_ns, diff, msg->count);
if (g_options.atomic_end)
if (unlikely(!(msg->count % REPORT_PERIOD) && msg->count)) {
print_stats(perf_shm->start_time, perf_shm->loop_start_time, msg->count);
perf_shm->loopcount++;
if (perf_shm->loopcount >= g_options.loops && g_options.loops) {
ctx->stopping = true;
raise(SIGINT);
return;
}
perf_shm->loop_start_time = odp_time_global_ns();
}
}
msg->count++;
msg->ts = odp_time_global_strict_ns();
test_fatal_if(ret !=
EM_OK,
"Event send fail");
}
int main(int argc, char *argv[])
{
int i;
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "--"))
break;
}
if (i < argc) {
if (!parse_args(i, argc, argv)) {
APPL_PRINT("Invalid application arguments\n");
return 1;
}
}
return cm_setup(argc, argv);
}