#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include "cm_setup.h"
#include "cm_error_handler.h"
#define NUM_EO 128
#define NUM_EVENT_PER_QUEUE 32
#define DATA_SIZE 250
#define MAX_NBR_OF_CORES 256
#define PRINT_EVENT_COUNT 0xff0000
#define QUEUE_TYPE EM_QUEUE_TYPE_ATOMIC
#define SEND_MULTI_MAX 32
#define USE_DIFF_QUEUE_PRIO_LEVELS 0
#define ALLOC_FREE_PER_EVENT 0
#define RESULT_PRINTF_FMT \
"cycles/event:% -8.2f Mevents/s/core: %-6.2f %5.0f MHz core%02d %" PRIu64 "\n"
typedef struct {
int64_t events;
uint64_t begin_cycles;
uint64_t end_cycles;
uint64_t print_count;
} perf_stat_t;
typedef struct {
em_queue_t dest;
} eo_context_t;
typedef struct {
uint8_t data[DATA_SIZE];
} perf_event_t;
typedef struct {
eo_context_t eo_ctx_tbl[NUM_EO];
em_eo_t eo_tbl[NUM_EO];
em_pool_t pool;
} perf_shm_t;
static ENV_LOCAL perf_stat_t core_stat = {.events = -PRINT_EVENT_COUNT};
perf_start(
void *eo_context, em_eo_t eo,
const em_eo_conf_t *conf);
perf_stop(void *eo_context, em_eo_t eo);
static void
em_queue_t queue, void *q_ctx);
static void
print_result(perf_stat_t *const perf_stat);
get_queue_priority(const int index);
int main(int argc, char *argv[])
{
return cm_setup(argc, argv);
}
void test_init(const appl_conf_t *appl_conf)
{
(void)appl_conf;
if (core == 0) {
perf_shm = env_shared_reserve("PerfSharedMem",
sizeof(perf_shm_t));
} else {
perf_shm = env_shared_lookup("PerfSharedMem");
}
if (perf_shm == NULL)
else if (core == 0)
memset(perf_shm, 0, sizeof(perf_shm_t));
}
void test_start(const appl_conf_t *appl_conf)
{
if (appl_conf->num_pools >= 1)
perf_shm->pool = appl_conf->pools[0];
else
APPL_PRINT("\n"
"***********************************************************\n"
"EM APPLICATION: '%s' initializing:\n"
" %s: %s() - EM-core:%d\n"
" Application running on %u EM-cores (procs:%u, threads:%u)\n"
"***********************************************************\n"
"\n",
appl_conf->name, NO_PATH(__FILE__), __func__,
em_core_id(),
appl_conf->core_count, appl_conf->num_procs, appl_conf->num_threads,
perf_shm->pool);
"Undefined application event pool!");
em_queue_t queues_a[NUM_EO / 2];
em_queue_t queues_b[NUM_EO / 2];
for (int i = 0; i < NUM_EO / 2; i++) {
em_queue_t queue_a, queue_b;
eo_context_t *eo_ctx_a, *eo_ctx_b;
em_eo_t eo;
get_queue_priority(i),
get_queue_priority(i),
"Queue creation failed, round:%d", i);
queues_a[i] = queue_a;
queues_b[i] = queue_b;
eo_ctx_a = &perf_shm->eo_ctx_tbl[2 * i];
eo =
em_eo_create(
"pairs-eo-a", perf_start, NULL, perf_stop,
NULL, perf_receive, eo_ctx_a);
"EO(%d) creation failed!", 2 * i);
perf_shm->eo_tbl[2 * i] = eo;
eo_ctx_a->dest = queue_b;
test_fatal_if(ret !=
EM_OK,
"EO add queue:%" PRI_STAT "\n"
ret, eo, queue_a);
"EO start:%" PRI_STAT " %" PRI_STAT "",
ret, start_ret);
eo_ctx_b = &perf_shm->eo_ctx_tbl[2 * i + 1];
eo =
em_eo_create(
"pairs-eo-b", perf_start, NULL, perf_stop,
NULL, perf_receive, eo_ctx_b);
"EO(%d) creation failed!", 2 * i + 1);
perf_shm->eo_tbl[2 * i + 1] = eo;
eo_ctx_b->dest = queue_a;
test_fatal_if(ret !=
EM_OK,
"EO add queue:%" PRI_STAT "\n"
ret, eo, queue_b);
"EO start:%" PRI_STAT " %" PRI_STAT "",
ret, start_ret);
}
for (int i = 0; i < NUM_EO / 2; i++) {
em_queue_t queue_a = queues_a[i];
em_queue_t queue_b = queues_b[i];
em_event_t events_a[NUM_EVENT_PER_QUEUE];
em_event_t events_b[NUM_EVENT_PER_QUEUE];
for (int j = 0; j < NUM_EVENT_PER_QUEUE; j++) {
em_event_t ev_a, ev_b;
"Event allocation failed (%d, %d)", i, j);
events_a[j] = ev_a;
events_b[j] = ev_b;
}
const int send_rounds = NUM_EVENT_PER_QUEUE / SEND_MULTI_MAX;
const int left_over = NUM_EVENT_PER_QUEUE % SEND_MULTI_MAX;
int num_sent = 0;
int m, n;
for (m = 0, n = 0; m < send_rounds; m++, n += SEND_MULTI_MAX) {
queue_a);
}
if (left_over) {
queue_a);
}
test_fatal_if(num_sent != NUM_EVENT_PER_QUEUE,
"Event send multi failed:%d (%d)\n"
num_sent, NUM_EVENT_PER_QUEUE, queue_a);
num_sent = 0;
for (m = 0, n = 0; m < send_rounds; m++, n += SEND_MULTI_MAX) {
queue_b);
}
if (left_over) {
queue_b);
}
test_fatal_if(num_sent != NUM_EVENT_PER_QUEUE,
"Event send multi failed:%d (%d)\n"
num_sent, NUM_EVENT_PER_QUEUE, queue_b);
}
env_sync_mem();
}
void test_stop(const appl_conf_t *appl_conf)
{
em_eo_t eo;
int i;
(void)appl_conf;
APPL_PRINT("%s() on EM-core %d\n", __func__, core);
for (i = 0; i < NUM_EO; i++) {
eo = perf_shm->eo_tbl[i];
test_fatal_if(ret !=
EM_OK,
"EO:%" PRI_EO " stop:%" PRI_STAT
"", eo, ret);
test_fatal_if(ret !=
EM_OK,
"EO:%" PRI_EO " delete:%" PRI_STAT
"", eo, ret);
}
}
void test_term(const appl_conf_t *appl_conf)
{
(void)appl_conf;
APPL_PRINT("%s() on EM-core %d\n", __func__, core);
if (core == 0) {
env_shared_free(perf_shm);
}
}
perf_start(
void *eo_context, em_eo_t eo,
const em_eo_conf_t *conf)
{
(void)eo_context;
(void)eo;
(void)conf;
}
perf_stop(void *eo_context, em_eo_t eo)
{
(void)eo_context;
test_fatal_if(ret !=
EM_OK,
"EO remove queue all:%" PRI_STAT
" EO:%" PRI_EO "",
ret, eo);
return ret;
}
static void
em_queue_t queue, void *queue_context)
{
int64_t events = core_stat.events;
eo_context_t *const eo_ctx = eo_context;
const em_queue_t dst_queue = eo_ctx->dest;
(void)type;
(void)queue;
(void)queue_context;
if (unlikely(appl_shm->exit_flag)) {
return;
}
if (unlikely(events == 0)) {
core_stat.begin_cycles = env_get_cycle();
} else if (unlikely(events == PRINT_EVENT_COUNT)) {
core_stat.end_cycles = env_get_cycle();
core_stat.print_count += 1;
print_result(&core_stat);
events = -1;
}
if (ALLOC_FREE_PER_EVENT) {
perf_shm->pool);
}
if (unlikely(ret !=
EM_OK)) {
test_fatal_if(!appl_shm->exit_flag,
ret, dst_queue);
}
events++;
core_stat.events = events;
}
get_queue_priority(const int queue_index)
{
if (USE_DIFF_QUEUE_PRIO_LEVELS) {
int remainder = queue_index % 5;
if (remainder <= 1)
else if (remainder <= 3)
else
} else {
}
return prio;
}
static void
print_result(perf_stat_t *const perf_stat)
{
uint64_t diff;
uint32_t hz;
double mhz;
double cycles_per_event, events_per_sec;
uint64_t print_count;
hz = env_core_hz();
mhz = ((double)hz) / 1000000.0;
diff = env_cycles_diff(perf_stat->end_cycles, perf_stat->begin_cycles);
print_count = perf_stat->print_count;
cycles_per_event = ((double)diff) / ((double)perf_stat->events);
events_per_sec = mhz / cycles_per_event;
APPL_PRINT(RESULT_PRINTF_FMT, cycles_per_event, events_per_sec,
}