#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include "cm_setup.h"
#include "cm_error_handler.h"
#define NUM_EO 128
#define NUM_EVENT_PER_QUEUE 32
#define DATA_SIZE 250
#define MAX_NBR_OF_CORES 256
#define PRINT_EVENT_COUNT 0xff0000
#define QUEUE_TYPE EM_QUEUE_TYPE_ATOMIC
#define SEND_MULTI_MAX 32
#define ALLOC_FREE_PER_EVENT 0
#define RESULT_PRINTF_FMT \
"cycles/event:% -8.2f Mevents/s/core: %-6.2f %5.0f MHz core%02d %" PRIu64 "\n"
typedef struct {
int64_t events;
uint64_t begin_cycles;
uint64_t end_cycles;
uint64_t print_count;
} perf_stat_t;
typedef struct {
uint8_t data[DATA_SIZE];
} perf_event_t;
typedef struct {
em_eo_t eo_tbl[NUM_EO];
em_pool_t pool;
} perf_shm_t;
static ENV_LOCAL perf_stat_t core_stat = {.events = -PRINT_EVENT_COUNT};
perf_start(
void *eo_context, em_eo_t eo,
const em_eo_conf_t *conf);
perf_stop(void *eo_context, em_eo_t eo);
static void
em_queue_t queue, void *q_ctx);
static void
print_result(perf_stat_t *const perf_stat);
int main(int argc, char *argv[])
{
return cm_setup(argc, argv);
}
void test_init(const appl_conf_t *appl_conf)
{
(void)appl_conf;
if (core == 0) {
perf_shm = env_shared_reserve("PerfSharedMem",
sizeof(perf_shm_t));
} else {
perf_shm = env_shared_lookup("PerfSharedMem");
}
if (perf_shm == NULL)
else if (core == 0)
memset(perf_shm, 0, sizeof(perf_shm_t));
}
void test_start(const appl_conf_t *appl_conf)
{
if (appl_conf->num_pools >= 1)
perf_shm->pool = appl_conf->pools[0];
else
APPL_PRINT("\n"
"***********************************************************\n"
"EM APPLICATION: '%s' initializing:\n"
" %s: %s() - EM-core:%d\n"
" Application running on %u EM-cores (procs:%u, threads:%u)\n"
"***********************************************************\n"
"\n",
appl_conf->name, NO_PATH(__FILE__), __func__,
em_core_id(),
appl_conf->core_count, appl_conf->num_procs, appl_conf->num_threads,
perf_shm->pool);
"Undefined application event pool!");
em_queue_t queues[NUM_EO];
for (int i = 0; i < NUM_EO; i++) {
em_queue_t queue;
em_eo_t eo;
"Queue creation failed, round:%d", i);
queues[i] = queue;
eo =
em_eo_create(
"loop-eo", perf_start, NULL, perf_stop, NULL,
perf_receive, NULL);
"EO(%d) creation failed!", i);
perf_shm->eo_tbl[i] = eo;
test_fatal_if(ret !=
EM_OK,
"EO add queue:%" PRI_STAT "\n"
ret, eo, queue);
"EO start:%" PRI_STAT " %" PRI_STAT "",
ret, start_ret);
}
for (int i = 0; i < NUM_EO; i++) {
em_queue_t queue = queues[i];
em_event_t events[NUM_EVENT_PER_QUEUE];
for (int j = 0; j < NUM_EVENT_PER_QUEUE; j++) {
em_event_t ev;
"Event allocation failed (%d, %d)", i, j);
events[j] = ev;
}
const int send_rounds = NUM_EVENT_PER_QUEUE / SEND_MULTI_MAX;
const int left_over = NUM_EVENT_PER_QUEUE % SEND_MULTI_MAX;
int num_sent = 0;
int m, n;
for (m = 0, n = 0; m < send_rounds; m++, n += SEND_MULTI_MAX) {
queue);
}
if (left_over) {
queue);
}
test_fatal_if(num_sent != NUM_EVENT_PER_QUEUE,
"Event send multi failed:%d (%d)\n"
num_sent, NUM_EVENT_PER_QUEUE, queue);
}
env_sync_mem();
}
void test_stop(const appl_conf_t *appl_conf)
{
em_eo_t eo;
int i;
(void)appl_conf;
APPL_PRINT("%s() on EM-core %d\n", __func__, core);
for (i = 0; i < NUM_EO; i++) {
eo = perf_shm->eo_tbl[i];
test_fatal_if(ret !=
EM_OK,
"EO:%" PRI_EO " stop:%" PRI_STAT
"", eo, ret);
test_fatal_if(ret !=
EM_OK,
"EO:%" PRI_EO " delete:%" PRI_STAT
"", eo, ret);
}
}
void test_term(const appl_conf_t *appl_conf)
{
(void)appl_conf;
APPL_PRINT("%s() on EM-core %d\n", __func__, core);
if (core == 0) {
env_shared_free(perf_shm);
}
}
perf_start(
void *eo_context, em_eo_t eo,
const em_eo_conf_t *conf)
{
(void)eo_context;
(void)eo;
(void)conf;
}
perf_stop(void *eo_context, em_eo_t eo)
{
(void)eo_context;
test_fatal_if(ret !=
EM_OK,
"EO remove queue all:%" PRI_STAT
" EO:%" PRI_EO "",
ret, eo);
return ret;
}
static void
em_queue_t queue, void *queue_context)
{
int64_t events = core_stat.events;
(void)eo_context;
(void)type;
(void)queue_context;
if (unlikely(appl_shm->exit_flag)) {
return;
}
if (unlikely(events == 0)) {
core_stat.begin_cycles = env_get_cycle();
} else if (unlikely(events == PRINT_EVENT_COUNT)) {
core_stat.end_cycles = env_get_cycle();
core_stat.print_count += 1;
print_result(&core_stat);
events = -1;
}
if (ALLOC_FREE_PER_EVENT) {
perf_shm->pool);
}
if (unlikely(ret !=
EM_OK)) {
test_fatal_if(!appl_shm->exit_flag,
ret, queue);
}
events++;
core_stat.events = events;
}
static void
print_result(perf_stat_t *const perf_stat)
{
uint64_t diff;
uint32_t hz;
double mhz;
double cycles_per_event, events_per_sec;
uint64_t print_count;
hz = env_core_hz();
mhz = ((double)hz) / 1000000.0;
diff = env_cycles_diff(perf_stat->end_cycles, perf_stat->begin_cycles);
print_count = perf_stat->print_count;
cycles_per_event = ((double)diff) / ((double)perf_stat->events);
events_per_sec = mhz / cycles_per_event;
APPL_PRINT(RESULT_PRINTF_FMT, cycles_per_event, events_per_sec,
}