Skip to content

Commit

Permalink
Merge pull request #368 from JeffersonLab/nbrei_logger_improvements
Browse files Browse the repository at this point in the history
Improve log output
  • Loading branch information
nathanwbrei authored Sep 27, 2024
2 parents 95e6b8d + 9a1d2e1 commit 3a9c913
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 61 deletions.
40 changes: 19 additions & 21 deletions src/libraries/JANA/Engine/JScheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ JArrow* JScheduler::next_assignment(uint32_t worker_id, JArrow* assignment, JArr

std::lock_guard<std::mutex> lock(m_mutex);

LOG_DEBUG(logger) << "Worker " << worker_id << " checking in: "
LOG_TRACE(logger) << "JScheduler: Worker " << worker_id << ": Returned arrow "
<< ((assignment == nullptr) ? "idle" : assignment->get_name()) << " -> " << to_string(last_result) << LOG_END;

// Check latest arrow back in
Expand All @@ -47,8 +47,8 @@ JArrow* JScheduler::next_assignment(uint32_t worker_id, JArrow* assignment, JArr

JArrow* next = checkout_unprotected();

LOG_DEBUG(logger) << "Worker " << worker_id << " assigned: "
<< ((next == nullptr) ? "idle" : next->get_name()) << LOG_END;
LOG_TRACE(logger) << "JScheduler: Worker " << worker_id << " assigned arrow "
<< ((next == nullptr) ? "(idle)" : next->get_name()) << LOG_END;
return next;

}
Expand All @@ -58,8 +58,8 @@ void JScheduler::last_assignment(uint32_t worker_id, JArrow* assignment, JArrowM

std::lock_guard<std::mutex> lock(m_mutex);

LOG_DEBUG(logger) << "Worker " << worker_id << " checking in: "
<< ((assignment == nullptr) ? "idle" : assignment->get_name())
LOG_TRACE(logger) << "JScheduler: Worker " << worker_id << ": returned arrow "
<< ((assignment == nullptr) ? "(idle)" : assignment->get_name())
<< " -> " << to_string(last_result) << "). Shutting down!" << LOG_END;

if (assignment != nullptr) {
Expand Down Expand Up @@ -110,7 +110,7 @@ void JScheduler::checkin_unprotected(JArrow* assignment, JArrowMetrics::Status l
as.status = ArrowStatus::Finalized;
m_topology_state.active_or_draining_arrow_count--;

LOG_DEBUG(logger) << "Deactivated arrow '" << assignment->get_name() << "' (" << m_topology_state.active_or_draining_arrow_count << " remaining)" << LOG_END;
LOG_TRACE(logger) << "JScheduler: Deactivated arrow " << assignment->get_name() << " (" << m_topology_state.active_or_draining_arrow_count << " remaining)" << LOG_END;

for (size_t downstream: m_topology_state.arrow_states[index].downstream_arrow_indices) {
m_topology_state.arrow_states[downstream].active_or_draining_upstream_arrow_count--;
Expand All @@ -119,12 +119,12 @@ void JScheduler::checkin_unprotected(JArrow* assignment, JArrowMetrics::Status l
else if (found_draining_stage_or_sink) {
// Drain arrow
as.status = ArrowStatus::Draining;
LOG_DEBUG(logger) << "Draining arrow '" << assignment->get_name() << "' (" << m_topology_state.active_or_draining_arrow_count << " remaining)" << LOG_END;
LOG_DEBUG(logger) << "JScheduler: Draining arrow " << assignment->get_name() << " (" << m_topology_state.active_or_draining_arrow_count << " remaining)" << LOG_END;
}

// Test if this was the last arrow running
if (m_topology_state.active_or_draining_arrow_count == 0) {
LOG_DEBUG(logger) << "All arrows are inactive. Deactivating topology." << LOG_END;
LOG_DEBUG(logger) << "JScheduler: All arrows are inactive. Deactivating topology." << LOG_END;
achieve_topology_pause_unprotected();
}
}
Expand Down Expand Up @@ -189,10 +189,10 @@ void JScheduler::initialize_topology() {
void JScheduler::drain_topology() {
std::lock_guard<std::mutex> lock(m_mutex);
if (m_topology_state.current_topology_status == TopologyStatus::Finalized) {
LOG_DEBUG(logger) << "JScheduler: drain(): Skipping because topology is already Finalized" << LOG_END;
LOG_DEBUG(logger) << "JScheduler: Draining topology: Skipping because topology is already Finalized" << LOG_END;
return;
}
LOG_DEBUG(logger) << "JScheduler: drain_topology()" << LOG_END;
LOG_DEBUG(logger) << "JScheduler: Draining topology" << LOG_END;

// We pause (as opposed to finish) for two reasons:
// 1. There might be workers in the middle of calling eventSource->GetEvent.
Expand All @@ -213,10 +213,10 @@ void JScheduler::run_topology(int nthreads) {
std::lock_guard<std::mutex> lock(m_mutex);
TopologyStatus current_status = m_topology_state.current_topology_status;
if (current_status == TopologyStatus::Running || current_status == TopologyStatus::Finalized) {
LOG_DEBUG(logger) << "JScheduler: run_topology() : " << current_status << " => " << current_status << LOG_END;
LOG_DEBUG(logger) << "JScheduler: Running topology: " << current_status << " => " << current_status << LOG_END;
return;
}
LOG_DEBUG(logger) << "JScheduler: run_topology() : " << current_status << " => Running" << LOG_END;
LOG_DEBUG(logger) << "JScheduler: Running topology: " << current_status << " => Running" << LOG_END;

bool source_found = false;
for (JArrow* arrow : m_topology->arrows) {
Expand Down Expand Up @@ -306,7 +306,7 @@ JScheduler::TopologyState JScheduler::get_topology_state() {

void JScheduler::run_arrow_unprotected(size_t index) {
auto& as = m_topology_state.arrow_states[index];
auto name = as.arrow->get_name();
const auto& name = as.arrow->get_name();
ArrowStatus status = as.status;

// if (status == ArrowStatus::Unopened) {
Expand All @@ -317,7 +317,7 @@ void JScheduler::run_arrow_unprotected(size_t index) {
// LOG_DEBUG(logger) << "Arrow '" << name << "' run() : " << status << " => " << status << LOG_END;
// return;
// }
LOG_DEBUG(logger) << "Arrow '" << name << "' run() : " << status << " => Active" << LOG_END;
LOG_DEBUG(logger) << "JScheduler: Activating arrow " << name << " (Previous status was " << status << ")" << LOG_END;

m_topology_state.active_or_draining_arrow_count++;
for (size_t downstream: m_topology_state.arrow_states[index].downstream_arrow_indices) {
Expand All @@ -329,14 +329,13 @@ void JScheduler::run_arrow_unprotected(size_t index) {

void JScheduler::pause_arrow_unprotected(size_t index) {
auto& as = m_topology_state.arrow_states[index];
auto name = as.arrow->get_name();
const auto& name = as.arrow->get_name();
ArrowStatus status = as.status;

LOG_DEBUG(logger) << "JScheduler: Pausing arrow " << name << " (Previous status was " << status << ")" << LOG_END;
if (status != ArrowStatus::Active) {
LOG_DEBUG(logger) << "JArrow '" << name << "' pause() : " << status << " => " << status << LOG_END;
return; // pause() is a no-op unless running
}
LOG_DEBUG(logger) << "JArrow '" << name << "' pause() : " << status << " => Inactive" << LOG_END;
m_topology_state.active_or_draining_arrow_count--;
for (size_t downstream: m_topology_state.arrow_states[index].downstream_arrow_indices) {
m_topology_state.arrow_states[downstream].active_or_draining_upstream_arrow_count--;
Expand All @@ -346,11 +345,10 @@ void JScheduler::pause_arrow_unprotected(size_t index) {

void JScheduler::finish_arrow_unprotected(size_t index) {
auto& as = m_topology_state.arrow_states[index];
auto name = as.arrow->get_name();
ArrowStatus status = as.status;
const auto& name = as.arrow->get_name();

LOG_DEBUG(logger) << "JArrow '" << name << "' finish() : " << status << " => Finalized" << LOG_END;
ArrowStatus old_status = as.status;
LOG_DEBUG(logger) << "JScheduler: Finishing arrow " << name << " (Previous status was " << old_status << ")" << LOG_END;
// if (old_status == ArrowStatus::Unopened) {
// LOG_DEBUG(logger) << "JArrow '" << name << "': Uninitialized!" << LOG_END;
// throw JException("JArrow::finish(): Arrow %s has not been initialized!", name.c_str());
Expand All @@ -362,7 +360,7 @@ void JScheduler::finish_arrow_unprotected(size_t index) {
}
}
if (old_status != ArrowStatus::Finalized) {
LOG_TRACE(logger) << "JArrow '" << name << "': Finalizing (this must only happen once)" << LOG_END;
LOG_DEBUG(logger) << "JScheduler: Finalizing arrow " << name << " (this must only happen once)" << LOG_END;
as.arrow->finalize();
}
m_topology_state.arrow_states[index].status = ArrowStatus::Finalized;
Expand Down
25 changes: 13 additions & 12 deletions src/libraries/JANA/Engine/JWorker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ const JException& JWorker::get_exception() const {
void JWorker::loop() {
using jclock_t = JWorkerMetrics::clock_t;
try {
LOG_DEBUG(logger) << "Worker " << m_worker_id << " has entered loop()." << LOG_END;
LOG_TRACE(logger) << "Worker " << m_worker_id << " has entered loop()." << LOG_END;
JArrowMetrics::Status last_result = JArrowMetrics::Status::NotRunYet;

while (m_run_state == RunState::Running) {
Expand All @@ -193,7 +193,7 @@ void JWorker::loop() {
auto useful_duration = jclock_t::duration::zero();

if (m_assignment == nullptr) {
LOG_DEBUG(logger) << "Worker " << m_worker_id << " shutdown driven by topology pause" << LOG_END;
LOG_TRACE(logger) << "Worker " << m_worker_id << " shutdown driven by topology pause" << LOG_END;
m_run_state = RunState::Stopped;
return;

Expand All @@ -211,7 +211,7 @@ void JWorker::loop() {
(m_run_state == RunState::Running) &&
(jclock_t::now() - start_time) < m_checkin_time) {

LOG_TRACE(logger) << "Worker " << m_worker_id << " is executing "
LOG_TRACE(logger) << "Worker " << m_worker_id << ": Executing "
<< m_assignment->get_name() << LOG_END;
auto before_execute_time = jclock_t::now();
m_assignment->execute(m_arrow_metrics, m_location_id);
Expand All @@ -220,8 +220,8 @@ void JWorker::loop() {


if (last_result == JArrowMetrics::Status::KeepGoing) {
LOG_DEBUG(logger) << "Worker " << m_worker_id << " succeeded at "
<< m_assignment->get_name() << LOG_END;
LOG_TRACE(logger) << "Worker " << m_worker_id << ": Executed "
<< m_assignment->get_name() << " with result KeepGoing" << LOG_END;
current_tries = 0;
backoff_duration = m_initial_backoff_time;
}
Expand All @@ -234,8 +234,9 @@ void JWorker::loop() {
else if (m_backoff_strategy == BackoffStrategy::Exponential) {
backoff_duration *= 2;
}
LOG_TRACE(logger) << "Worker " << m_worker_id << " backing off with "
<< m_assignment->get_name() << ", tries = " << current_tries
LOG_TRACE(logger) << "Worker " << m_worker_id << ": Executed "
<< m_assignment->get_name() << " with result " << to_string(last_result)
<< "; backoff try = " << current_tries
<< LOG_END;

std::this_thread::sleep_for(backoff_duration);
Expand All @@ -255,22 +256,22 @@ void JWorker::loop() {

m_scheduler->last_assignment(m_worker_id, m_assignment, last_result);
m_assignment = nullptr; // Worker has 'handed in' the assignment
LOG_DEBUG(logger) << "Worker " << m_worker_id << " shutdown due to worker->request_stop()." << LOG_END;
LOG_TRACE(logger) << "Worker " << m_worker_id << " shutdown due to worker->request_stop()." << LOG_END;
}
catch (const JException& e) {
// For now the excepting Worker prints the error, and then terminates the whole program.
// Eventually we want to unify error handling across JApplication::Run, and maybe even across the entire JApplication.
// This means that Workers must pass JExceptions back to the master thread.
LOG_INFO(logger) << "Worker " << m_worker_id << " shutdown due to JException: " << e.what() << LOG_END;
LOG_DEBUG(logger) << e << LOG_END;
LOG_ERROR(logger) << "Worker " << m_worker_id << " shutdown due to JException: " << e.what() << LOG_END;
LOG_ERROR(logger) << e << LOG_END;
m_run_state = RunState::Excepted;
m_exception = e;
m_japc->request_pause(); // We aren't going to even try to drain queues.
}
catch (std::runtime_error& e){
// same as above
LOG_INFO(logger) << "Worker " << m_worker_id << " shutdown due to std::runtime_error:" << e.what() << LOG_END;
LOG_DEBUG(logger) << e.what() << LOG_END;
LOG_ERROR(logger) << "Worker " << m_worker_id << " shutdown due to std::runtime_error:" << e.what() << LOG_END;
LOG_ERROR(logger) << e.what() << LOG_END;
m_run_state = RunState::Excepted;
m_exception = JException(e.what());
m_exception.nested_exception = std::current_exception();
Expand Down
1 change: 0 additions & 1 deletion src/libraries/JANA/JApplication.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ void JApplication::Initialize() {

// Set logger on JApplication itself
m_logger = m_params->GetLogger("jana");
m_logger.show_classname = false;

if (m_logger.level > JLogger::Level::INFO) {
std::ostringstream oss;
Expand Down
71 changes: 49 additions & 22 deletions src/libraries/JANA/JLogger.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,48 +4,53 @@

#pragma once

#include <JANA/Compatibility/JStreamLog.h>

#include <iostream>
#include <sstream>
#include <JANA/Compatibility/JStreamLog.h>
#include <chrono>
#include <thread>
#include <iomanip>



struct JLogger {
enum class Level { TRACE, DEBUG, INFO, WARN, ERROR, FATAL, OFF };
Level level;
std::ostream *destination;
std::string className;
std::string group;
bool show_level = true;
bool show_classname = false;
bool show_timestamp = false;
bool show_group = false;
bool show_timestamp = true;
bool show_threadstamp = false;

explicit JLogger(JLogger::Level level = JLogger::Level::INFO,
std::ostream* destination = &std::cout,
std::string className = "")
: level(level), destination(destination), className(std::move(className)) {};
std::string group = "")
: level(level), destination(destination), group(std::move(group)) {};

JLogger(const JLogger&) = default;
JLogger& operator=(const JLogger&) = default;

void SetTag(std::string tag) {className = tag; }
void SetTimestampFlag() {show_timestamp = true; }
void UnsetTimestampFlag() {show_timestamp = false; }
void SetThreadstampFlag() {show_threadstamp = true; }
void UnsetThreadstampFlag() {show_threadstamp = false; }
void SetGroup(std::string group) {this->group = group; }
void ShowGroup(bool show) {show_group = show; }
void ShowLevel(bool show) {show_level = show; }
void ShowTimestamp(bool show) {show_timestamp = show; }
void ShowThreadstamp(bool show) {show_threadstamp = show; }
};

static JLogger default_cout_logger = JLogger(JLogger::Level::TRACE, &std::cout, "JANA");
static JLogger default_cerr_logger = JLogger(JLogger::Level::TRACE, &std::cerr, "JERR");

inline std::ostream& operator<<(std::ostream& s, JLogger::Level l) {
switch (l) {
case JLogger::Level::TRACE: return s << "TRACE";
case JLogger::Level::DEBUG: return s << "DEBUG";
case JLogger::Level::INFO: return s << "INFO";
case JLogger::Level::WARN: return s << "WARN";
case JLogger::Level::ERROR: return s << "ERROR";
case JLogger::Level::FATAL: return s << "FATAL";
default: return s << "OFF";
case JLogger::Level::TRACE: return s << "trace";
case JLogger::Level::DEBUG: return s << "debug";
case JLogger::Level::INFO: return s << "info";
case JLogger::Level::WARN: return s << "warn ";
case JLogger::Level::ERROR: return s << "error";
case JLogger::Level::FATAL: return s << "fatal";
default: return s << "off";
}
}

Expand All @@ -64,13 +69,35 @@ struct JLogMessage {
JLogger::Level level = JLogger::Level::INFO)
: logger(logger), level(level) {

if (logger.show_timestamp) {
auto now = std::chrono::system_clock::now();
std::time_t current_time = std::chrono::system_clock::to_time_t(now);
std::tm* local_time = std::localtime(&current_time);
char buffer[100];
std::strftime(buffer, sizeof(buffer), "%H:%M:%S", local_time);

// Extract milliseconds by calculating the duration since the last whole second
auto milliseconds = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()) % 1000;
builder << buffer << ".";
builder << std::setfill('0') << std::setw(3) << milliseconds.count() << std::setfill(' ') << " ";
}
if (logger.show_level) {
builder << "[" << level << "] ";
switch (level) {
case JLogger::Level::TRACE: builder << "[trace] "; break;
case JLogger::Level::DEBUG: builder << "[debug] "; break;
case JLogger::Level::INFO: builder << "[info] "; break;
case JLogger::Level::WARN: builder << "[warn] "; break;
case JLogger::Level::ERROR: builder << "[error] "; break;
case JLogger::Level::FATAL: builder << "[fatal] "; break;
default: builder << "[?????] ";
}
}
if (logger.show_threadstamp) {
builder << std::this_thread::get_id() << " ";
}
if (logger.show_classname) {
builder << logger.className << ": ";
if (logger.show_group) {
builder << "[" << logger.group << "] ";
}
// TODO: Re-add thread and timestamp info?
}

// Helper function for truncating long strings to keep our log readable
Expand Down
12 changes: 11 additions & 1 deletion src/libraries/JANA/Services/JParameterManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -396,10 +396,15 @@ void JParameterManager::FilterParameters(std::map<std::string, std::string> &par
JLogger JParameterManager::GetLogger(const std::string& component_prefix) {

JLogger logger;
logger.className = component_prefix;
logger.group = component_prefix;

auto global_log_level = RegisterParameter("jana:global_loglevel", JLogger::Level::INFO, "Global log level");

bool enable_timestamp = RegisterParameter("jana:log:show_timestamp", true, "Show timestamp in log output");
auto enable_threadstamp = RegisterParameter("jana:log:show_threadstamp", false, "Show threadstamp in log output");
auto enable_group = RegisterParameter("jana:log:show_group", false, "Show threadstamp in log output");
auto enable_level = RegisterParameter("jana:log:show_level", true, "Show threadstamp in log output");

if (component_prefix.empty()) {
logger.level = global_log_level;
}
Expand All @@ -408,6 +413,11 @@ JLogger JParameterManager::GetLogger(const std::string& component_prefix) {
os << component_prefix << ":loglevel";
logger.level = RegisterParameter(os.str(), global_log_level, "Component log level");
}
logger.ShowLevel(enable_level);
logger.ShowTimestamp(enable_timestamp);
logger.ShowThreadstamp(enable_threadstamp);
logger.ShowGroup(enable_group);
logger.show_threadstamp = enable_threadstamp;
return logger;
}

Expand Down
Loading

0 comments on commit 3a9c913

Please sign in to comment.