#include <inttypes.h>
#include <string.h>
#include <stdio.h>
#include "cm_setup.h"
#include "cm_error_handler.h"
#define NUM_EO 2
#define NUM_EVENT 512
#define DATA_SIZE 512
#define MAX_NBR_OF_CORES 256
#define WORK_LOOPS 40
#define PRINT_EVENT_COUNT 0x20000
#define SEND_MULTI_MAX 32
#define CHECK_SEQ_PER_EVENT 1
typedef union {
struct {
uint64_t events;
uint64_t begin_cycles;
uint64_t end_cycles;
uint64_t print_count;
int atomic_processing_end;
int rounds;
int ready;
double cycles_per_event;
};
} perf_stat_t;
PERF_STAT_T_SIZE_ERROR);
typedef struct {
em_eo_t id;
int next_seq;
int initialize_events;
} eo_context_t;
typedef union {
eo_context_t eo_ctx;
} eo_context_array_elem_t;
PERF_EO_CONTEXT_SIZE_ERROR);
typedef struct {
em_queue_t dest;
int seq;
uint8_t data[DATA_SIZE];
} perf_event_t;
typedef struct {
em_pool_t pool;
uint64_t core_count;
} perf_shm_t;
perf_start(
void *eo_context, em_eo_t eo,
const em_eo_conf_t *conf);
perf_stop(void *eo_context, em_eo_t eo);
static void
initialize_events(em_queue_t queue_a, em_queue_t queue_b);
static void
em_queue_t queue, void *q_ctx);
static void
em_queue_t queue, void *q_ctx);
static void
calc_result(perf_stat_t *const perf_stat, const uint64_t events);
static void
print_result(perf_stat_t *const perf_stat);
static int
get_queue_priority(const int index);
static void
check_seq_per_event(eo_context_t *const eo_ctx, perf_event_t *const perf,
em_queue_t queue);
static void
do_dummy_work(unsigned int work_loops);
int main(int argc, char *argv[])
{
return cm_setup(argc, argv);
}
void test_init(const appl_conf_t *appl_conf)
{
(void)appl_conf;
if (core == 0) {
perf_shm = env_shared_reserve("PerfSharedMem",
sizeof(perf_shm_t));
} else {
perf_shm = env_shared_lookup("PerfSharedMem");
}
if (perf_shm == NULL) {
"Perf init failed on EM-core:%u\n",
em_core_id());
} else if (core == 0) {
memset(perf_shm, 0, sizeof(perf_shm_t));
env_atomic64_init(&perf_shm->ready_count);
env_atomic64_init(&perf_shm->seen_all_ready);
}
}
void test_start(const appl_conf_t *appl_conf)
{
em_eo_t eo;
em_queue_t queue_a, queue_b;
eo_context_t *eo_ctx;
int i;
if (appl_conf->num_pools >= 1)
perf_shm->pool = appl_conf->pools[0];
else
perf_shm->core_count = appl_conf->core_count;
APPL_PRINT("\n"
"***********************************************************\n"
"EM APPLICATION: '%s' initializing:\n"
" %s: %s() - EM-core:%d\n"
" Application running on %u EM-cores (procs:%u, threads:%u)\n"
"***********************************************************\n"
"\n",
appl_conf->name, NO_PATH(__FILE__), __func__,
em_core_id(),
appl_conf->core_count, appl_conf->num_procs, appl_conf->num_threads,
perf_shm->pool);
"Undefined application event pool!");
for (i = 0; i < NUM_EO / 2; i++) {
em_event_t start_event;
perf_event_t *perf;
eo_ctx = &perf_shm->perf_eo_context[2 * i].eo_ctx;
eo_ctx->initialize_events = 1;
eo_ctx->next_seq = 0;
snprintf(eo_name, sizeof(eo_name), "EO-A%i", i);
eo_name[sizeof(eo_name) - 1] = '\0';
eo =
em_eo_create(eo_name, perf_start, NULL, perf_stop, NULL,
perf_receive_a, eo_ctx);
snprintf(queue_name, sizeof(queue_name), "Q-A%i", i);
queue_name[sizeof(queue_name) - 1] = '\0';
get_queue_priority(i),
test_fatal_if(ret !=
EM_OK,
"EO or Q creation failed:%" PRI_STAT "\n"
ret, eo, queue_a);
"EO start failed:%" PRI_STAT " %" PRI_STAT "\n"
"EO:%" PRI_EO "", ret, start_ret, eo);
eo_ctx = &perf_shm->perf_eo_context[2 * i + 1].eo_ctx;
eo_ctx->next_seq = 0;
snprintf(eo_name, sizeof(eo_name), "EO-B%i", i);
eo_name[sizeof(eo_name) - 1] = '\0';
eo =
em_eo_create(eo_name, perf_start, NULL, perf_stop, NULL,
perf_receive_b, eo_ctx);
snprintf(queue_name, sizeof(queue_name), "Q-B%i", i);
queue_name[sizeof(queue_name) - 1] = '\0';
get_queue_priority(i),
test_fatal_if(ret !=
EM_OK,
"EO add queue:%" PRI_STAT "\n"
ret, eo, queue_b);
"EO start failed:%" PRI_STAT " %" PRI_STAT "\n"
"EO: %" PRI_EO "", ret, start_ret, eo);
perf_shm->pool);
"Start event alloc failed");
perf->seq = 0;
perf->dest = queue_b;
ret =
em_send(start_event, queue_a);
test_fatal_if(ret !=
EM_OK,
"Start event send:%" PRI_STAT "\n"
ret, queue_a);
}
env_sync_mem();
}
void test_stop(const appl_conf_t *appl_conf)
{
em_eo_t eo;
int i;
(void)appl_conf;
APPL_PRINT("%s() on EM-core %d\n", __func__, core);
for (i = 0; i < NUM_EO; i++) {
eo = perf_shm->perf_eo_context[i].eo_ctx.id;
test_fatal_if(ret !=
EM_OK,
"EO:%" PRI_EO " stop:%" PRI_STAT
"", eo, ret);
}
for (i = 0; i < NUM_EO; i++) {
eo = perf_shm->perf_eo_context[i].eo_ctx.id;
test_fatal_if(ret !=
EM_OK,
"EO rem-Q-all-sync:%" PRI_STAT
" EO:%" PRI_EO "",
ret, eo);
test_fatal_if(ret !=
EM_OK,
"EO:%" PRI_EO " delete:%" PRI_STAT
"", eo, ret);
}
}
void test_term(const appl_conf_t *appl_conf)
{
(void)appl_conf;
APPL_PRINT("%s() on EM-core %d\n", __func__, core);
if (core == 0) {
env_shared_free(perf_shm);
}
}
perf_start(
void *eo_context, em_eo_t eo,
const em_eo_conf_t *conf)
{
eo_context_t *eo_ctx = eo_context;
(void)conf;
APPL_PRINT(
"%s (id:%" PRI_EO ") starting.\n", eo_name, eo);
eo_ctx->id = eo;
}
perf_stop(void *eo_context, em_eo_t eo)
{
(void)eo_context;
APPL_PRINT(
"%s (id:%" PRI_EO ") stopping.\n", eo_name, eo);
}
static void
initialize_events(em_queue_t queue_a, em_queue_t queue_b)
{
em_event_t events[NUM_EVENT];
int i;
for (i = 0; i < NUM_EVENT; i++) {
perf_event_t *perf;
perf_shm->pool);
"Event alloc failed (%d)", i);
perf->seq = i;
perf->dest = queue_b;
}
const int send_rounds = NUM_EVENT / SEND_MULTI_MAX;
const int left_over = NUM_EVENT % SEND_MULTI_MAX;
int num_sent = 0;
for (i = 0; i < send_rounds; i++) {
queue_a);
}
if (left_over) {
queue_a);
}
if (unlikely(num_sent != NUM_EVENT)) {
test_fatal_if(!appl_shm->exit_flag,
"Event send multi failed:%d (%d)\n"
num_sent, NUM_EVENT, queue_a);
for (i = num_sent; i < NUM_EVENT; i++)
}
}
static void
em_queue_t queue, void *q_ctx)
{
uint64_t events = perf_shm->core_stat[core].events;
int call_atomic_processing_end =
perf_shm->core_stat[core].atomic_processing_end;
int ready = perf_shm->core_stat[core].ready;
uint64_t ready_count;
em_queue_t dest_queue;
(void)type;
(void)q_ctx;
if (unlikely(appl_shm->exit_flag)) {
return;
}
if (unlikely(events == 0)) {
eo_context_t *const eo_ctx = eo_context;
if (unlikely(eo_ctx->initialize_events)) {
eo_ctx->initialize_events = 0;
initialize_events(queue, perf->dest);
return;
}
perf_shm->core_stat[core].begin_cycles = env_get_cycle();
} else if (unlikely(!ready && events > PRINT_EVENT_COUNT)) {
perf_shm->core_stat[core].end_cycles = env_get_cycle();
int rounds = perf_shm->core_stat[core].rounds++;
if (rounds % 3 == 1) {
calc_result(&perf_shm->core_stat[core], events);
} else if (rounds % 3 == 2) {
print_result(&perf_shm->core_stat[core]);
ready = 1;
perf_shm->core_stat[core].ready = 1;
env_atomic64_inc(&perf_shm->ready_count);
}
}
events++;
if (CHECK_SEQ_PER_EVENT)
check_seq_per_event(eo_context, perf, queue);
dest_queue = perf->dest;
perf->dest = queue;
perf_shm->core_stat[core].events = events;
if (unlikely(ret !=
EM_OK)) {
test_fatal_if(!appl_shm->exit_flag,
ret, dest_queue);
return;
}
if (call_atomic_processing_end)
if (unlikely(ready)) {
ready_count = env_atomic64_get(&perf_shm->ready_count);
if (ready_count == perf_shm->core_count) {
perf_shm->core_stat[core].atomic_processing_end =
!call_atomic_processing_end;
perf_shm->core_stat[core].ready = 0;
events = 0;
perf_shm->core_stat[core].events = 0;
uint64_t seen_all_ready =
env_atomic64_add_return(&perf_shm->seen_all_ready, 1);
if (seen_all_ready == perf_shm->core_count) {
env_atomic64_set(&perf_shm->ready_count, 0);
env_atomic64_set(&perf_shm->seen_all_ready, 0);
}
}
}
do_dummy_work(WORK_LOOPS);
}
static void
em_queue_t queue, void *q_ctx)
{
const int call_atomic_processing_end =
perf_shm->core_stat[core].atomic_processing_end;
uint64_t events = perf_shm->core_stat[core].events;
em_queue_t dest_queue;
(void)type;
(void)q_ctx;
if (unlikely(appl_shm->exit_flag)) {
return;
}
if (unlikely(events == 0)) {
perf_shm->core_stat[core].begin_cycles = env_get_cycle();
}
events++;
if (CHECK_SEQ_PER_EVENT)
check_seq_per_event(eo_context, perf, queue);
dest_queue = perf->dest;
perf->dest = queue;
perf_shm->core_stat[core].events = events;
if (unlikely(ret !=
EM_OK)) {
test_fatal_if(!appl_shm->exit_flag,
ret, dest_queue);
return;
}
if (call_atomic_processing_end)
do_dummy_work(WORK_LOOPS);
}
static void
check_seq_per_event(eo_context_t *const eo_ctx, perf_event_t *const perf,
em_queue_t queue)
{
int seq = perf->seq;
if (unlikely(seq != eo_ctx->next_seq)) {
APPL_PRINT(
"Bad sequence number. %s(id:%" PRI_EO "),\t"
"%s(id:%" PRI_QUEUE ") expected seq %i, event seq %i\n",
eo_name, eo_ctx->id, queue_name, queue,
eo_ctx->next_seq, seq);
}
if (likely(eo_ctx->next_seq < (NUM_EVENT - 1)))
eo_ctx->next_seq++;
else
eo_ctx->next_seq = 0;
}
static void
do_dummy_work(unsigned int work_loops)
{
em_event_t workbuf_event;
perf_event_t *workbuf;
uint8_t *from, *to;
unsigned int i;
for (i = 0; i < work_loops && !appl_shm->exit_flag; i++) {
workbuf_event =
em_alloc(
sizeof(perf_event_t),
"em_alloc(pool:%" PRI_POOL ") of buf:%u of tot:%u failed!",
perf_shm->pool, i, work_loops);
return;
}
if (likely(workbuf)) {
from = &workbuf->data[DATA_SIZE / 2];
to = &workbuf->data[0];
memcpy(to, from, DATA_SIZE / 2);
}
}
}
static void
calc_result(perf_stat_t *const perf_stat, const uint64_t events)
{
uint64_t diff;
double cycles_per_event;
diff = env_cycles_diff(perf_stat->end_cycles, perf_stat->begin_cycles);
cycles_per_event = ((double)diff) / ((double)events);
perf_stat->cycles_per_event = cycles_per_event;
}
static int
get_queue_priority(const int queue_index)
{
int remainder = queue_index % 5;
if (remainder <= 1)
else if (remainder <= 3)
else
}
static void
print_result(perf_stat_t *const perf_stat)
{
const uint32_t hz = env_core_hz();
const double mhz = ((double)hz) / 1000000.0;
const double cycles_per_event = perf_stat->cycles_per_event;
const double events_per_sec = mhz * perf_shm->core_count /
cycles_per_event;
const uint64_t print_count = perf_stat->print_count++;
if (perf_stat->atomic_processing_end) {
APPL_PRINT("em_atomic_processing_end():%10.0f cycles/event\t"
"events/s:%.2f M @%.2f MHz (core-%02i %" PRIu64 ")\n",
cycles_per_event, events_per_sec, mhz,
} else {
APPL_PRINT("normal atomic processing:%12.0f cycles/event\t"
"events/s:%.2f M @%.2f MHz (core-%02i %" PRIu64 ")\n",
cycles_per_event, events_per_sec,
}
}