diff --git a/gpu-simulator/trace-parser/trace_parser.cc b/gpu-simulator/trace-parser/trace_parser.cc index 00aa7892b..c126ad38e 100644 --- a/gpu-simulator/trace-parser/trace_parser.cc +++ b/gpu-simulator/trace-parser/trace_parser.cc @@ -11,6 +11,10 @@ #include #include +#include +#include +#include + #include "trace_parser.h" bool is_number(const std::string &s) { @@ -280,19 +284,65 @@ kernel_trace_t *trace_parser::parse_kernel_info( const std::string &kerneltraces_filepath) { kernel_trace_t *kernel_info = new kernel_trace_t; kernel_info->enable_lineinfo = 0; // default disabled - kernel_info->ifs = new std::ifstream; - std::ifstream *ifs = kernel_info->ifs; - ifs->open(kerneltraces_filepath.c_str()); - if (!ifs->is_open()) { - std::cout << "Unable to open file: " << kerneltraces_filepath << std::endl; + std::string read_trace_cmd; + int _l = kerneltraces_filepath.length(); + if(_l > 3 && kerneltraces_filepath.substr(_l-3, 3) == ".xz"){ + // this is xz-compressed trace + read_trace_cmd = "xz -dc " + kerneltraces_filepath; + } else if(_l > 7 && kerneltraces_filepath.substr(_l-7, 7) == ".traceg"){ + // this is plain text trace + read_trace_cmd ="cat " + kerneltraces_filepath; + } else { + std::cerr << "Can't read trace. Only .xz and plain text are supported: " + << kerneltraces_filepath <<"\n"; + exit(1); + } + + // Create an interprocess channel, and fork out a data source process. The + // data source process reads trace from disk, write to the channel, and the + // simulator process read from the channel. + int *pipefd = kernel_info->pipefd; + if(pipe(pipefd) != 0){ + std::cerr << "Failed to create interprocess channel\n"; + perror("pipe"); exit(1); } + pid_t pid = fork(); + if(pid == 0){ + // The child process is the data source. Redirect its + // stdout to the write end of the pipe. + close(pipefd[0]); + dup2(pipefd[1], STDOUT_FILENO); + + // When using GDB, sending Ctrl+C to the simulator will send a SIGINT signal + // to the child process as well, subsequently causing it to terminate. To + // avoid this, we let the child process ignore (SIG_IGN) the SIGINT signal. + // Reference: + // https://stackoverflow.com/questions/38404925/gdb-interrupt-running-process-without-killing-child-processes + signal(SIGINT, SIG_IGN); + + execle("/bin/sh", "sh", "-c", read_trace_cmd.c_str(), NULL, environ); + perror("execle"); // the child process shouldn't reach here if all is well. + exit(1); + } else { + // parent (simulator) + close(pipefd[1]); + dup2(pipefd[0], STDIN_FILENO); + } + + // Parent continues from here. + kernel_info->ifs = &std::cin; + std::istream *ifs = kernel_info->ifs; + std::cout << "Processing kernel " << kerneltraces_filepath << std::endl; std::string line; + // Important to clear the istream. Otherwise, the eofbit from the last + // kernel may be carried over to this kernel + ifs->clear(); while (!ifs->eof()) { getline(*ifs, line); @@ -362,15 +412,19 @@ kernel_trace_t *trace_parser::parse_kernel_info( void trace_parser::kernel_finalizer(kernel_trace_t *trace_info) { assert(trace_info); - assert(trace_info->ifs); - if (trace_info->ifs->is_open()) trace_info->ifs->close(); - delete trace_info->ifs; + + // The pipe read/write end file descriptors held by the child process would + // have been automatically closed when it terminated. But the parent + // process may read an arbitrary amount of trace files, so it has to close + // all file descriptors. + close(trace_info->pipefd[0]); + close(trace_info->pipefd[1]); delete trace_info; } void trace_parser::get_next_threadblock_traces( std::vector *> threadblock_traces, - unsigned trace_version, unsigned enable_lineinfo, std::ifstream *ifs) { + unsigned trace_version, unsigned enable_lineinfo, std::istream *ifs) { for (unsigned i = 0; i < threadblock_traces.size(); ++i) { threadblock_traces[i]->clear(); } diff --git a/gpu-simulator/trace-parser/trace_parser.h b/gpu-simulator/trace-parser/trace_parser.h index 682a7aea0..89f67b5e6 100644 --- a/gpu-simulator/trace-parser/trace_parser.h +++ b/gpu-simulator/trace-parser/trace_parser.h @@ -96,7 +96,10 @@ struct kernel_trace_t { unsigned long long shmem_base_addr; unsigned long long local_base_addr; // Reference to open filestream - std::ifstream *ifs; + std::istream *ifs; + // Anonymous pipe through which the trace is transmitted from a trace reader + // process to the simulator process + int pipefd[2]={}; }; class trace_parser { @@ -112,7 +115,7 @@ class trace_parser { void get_next_threadblock_traces( std::vector *> threadblock_traces, - unsigned trace_version, unsigned enable_lineinfo, std::ifstream *ifs); + unsigned trace_version, unsigned enable_lineinfo, std::istream *ifs); void kernel_finalizer(kernel_trace_t *trace_info); diff --git a/util/tracer_nvbit/tracer_tool/tracer_tool.cu b/util/tracer_nvbit/tracer_tool/tracer_tool.cu index 903cdd530..2b2f78d88 100644 --- a/util/tracer_nvbit/tracer_tool/tracer_tool.cu +++ b/util/tracer_nvbit/tracer_tool/tracer_tool.cu @@ -60,6 +60,9 @@ bool active_region = true; int terminate_after_limit_number_of_kernels_reached = 0; int user_defined_folders = 0; +/* Use xz to compress the *.trace file */ +int xz_compress_trace = 0; + /* opcode to id map and reverse map */ std::map opcode_to_id_map; std::map id_to_opcode_map; @@ -105,6 +108,8 @@ void nvbit_at_init() { "Stop the process once the current kernel > DYNAMIC_KERNEL_LIMIT_END"); GET_VAR_INT(user_defined_folders, "USER_DEFINED_FOLDERS", 0, "Uses the user defined " "folder TRACES_FOLDER path environment"); + GET_VAR_INT(xz_compress_trace, "TRACE_FILE_COMPRESS", 0, "Create xz-compressed trace" + "file"); std::string pad(100, '-'); printf("%s\n", pad.c_str()); @@ -388,9 +393,15 @@ void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, sprintf(buffer, std::string(traces_location+"/kernel-%d.trace").c_str(), kernelid); if (!stop_report) { - resultsFile = fopen(buffer, "w"); - - printf("Writing results to %s\n", buffer); + if(!xz_compress_trace){ + resultsFile = fopen(buffer, "w"); + printf("Writing results to %s\n", buffer); + } else { + char cmd_buffer[1039]; + sprintf(cmd_buffer, "xz -1 -T0 > %s.xz", buffer); + resultsFile = popen(cmd_buffer, "w"); + printf("Writing results to %s.xz\n", buffer); + } fprintf(resultsFile, "-kernel name = %s\n", nvbit_get_func_name(ctx, p->f, true)); @@ -421,7 +432,7 @@ void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, kernelsFile = fopen(kernelslist_location.c_str(), "a"); // This will be a relative path to the traces file - sprintf(buffer,"kernel-%d.trace", kernelid); + sprintf(buffer,"kernel-%d.trace%s", kernelid, xz_compress_trace?".xz":""); if (!stop_report) { fprintf(kernelsFile, buffer); fprintf(kernelsFile, "\n"); @@ -480,8 +491,10 @@ void nvbit_at_cuda_event(CUcontext ctx, int is_exit, nvbit_api_cuda_t cbid, fprintf(statsFile, "\n"); fclose(statsFile); - if (!stop_report) - fclose(resultsFile); + if (!stop_report){ + if(!xz_compress_trace){fclose(resultsFile);} + else{pclose(resultsFile);} + } if (active_from_start && dynamic_kernel_limit_end && kernelid > dynamic_kernel_limit_end) active_region = false; diff --git a/util/tracer_nvbit/tracer_tool/traces-processing/Makefile b/util/tracer_nvbit/tracer_tool/traces-processing/Makefile index 89d0c9129..87e030d95 100755 --- a/util/tracer_nvbit/tracer_tool/traces-processing/Makefile +++ b/util/tracer_nvbit/tracer_tool/traces-processing/Makefile @@ -1,7 +1,7 @@ TARGET := post-traces-processing $(TARGET): post-traces-processing.cpp - g++ -std=c++11 -o $@ $^ + g++ -std=c++14 -O3 -g -o $@ $^ run: $(TARGET) ./$(TARGET) diff --git a/util/tracer_nvbit/tracer_tool/traces-processing/post-traces-processing.cpp b/util/tracer_nvbit/tracer_tool/traces-processing/post-traces-processing.cpp index 95ea5ed62..935dd55e9 100644 --- a/util/tracer_nvbit/tracer_tool/traces-processing/post-traces-processing.cpp +++ b/util/tracer_nvbit/tracer_tool/traces-processing/post-traces-processing.cpp @@ -5,28 +5,93 @@ #include #include #include +#include +#include +#include + +#include +#include +#include + using namespace std; struct threadblock_info { bool initialized; unsigned tb_id_x, tb_id_y, tb_id_z; - vector> warp_insts_array; + vector> warp_insts_array; threadblock_info() { initialized = false; tb_id_x = tb_id_y = tb_id_z = 0; } }; +/// @brief There exist significant repetition in the trace. The WarpInstLUT +/// registers recurrent trace fragments in a hash map. Strings (trace fragments) +/// are mapped to a pointer to a unique copy of that string, which is guaranteed +/// to live throughout the scope of the lifetime of this WarpInstLUT. +struct WarpInstLUT { + // A mapping from "raw instruction string" to "a pointer to a global copy of + // that string". For any element (x->y) of this map, *y==x holds. + unordered_map> registration_table; + + /// @brief Is a string already registered? + /// @param s The probing string. + /// @return nullptr if the probing string does not exist in the look up table. + /// Otherwise, a const pointer to a unique copy of that string. + const string* lookup_entry(const string s) const{ + const auto it = registration_table.find(s); + + // not registered + if(it == registration_table.end()){ + return nullptr; + } else { + return it->second.get(); + } + } + + /// @brief Add a string to the look up table. + /// @param s The string to be added. + /// @return A const pointer to the unique copy of the string. + const string* register_new_entry(const string s){ + // Check if the string is already in the LUT. + const string *entry_ptr = lookup_entry(s); + if(entry_ptr) { + // just in case a rare hash collision happens, we panic + if(s != *entry_ptr){ + cerr << "FATAL: new string insertion " << s + << "collides with the hash of a different string in the registration table " + << *entry_ptr <<"\n"; + abort(); + } + return entry_ptr; + } + + // Create a new string + auto new_string_ptr = std::make_unique(s); + entry_ptr = new_string_ptr.get(); + registration_table.insert({s, std::move(new_string_ptr)}); + + return entry_ptr; + } +}; + void group_per_block(const char *filepath); void group_per_core(const char *filepath); +// This program works by redirecting the stdin/stdout to child processes. The +// stdin is piped to a process that reads from disk the input trace file. The +// stdout is piped to a process that writes to disk the post-process trace. We +// should preserve the original file descriptors for stdin/stdout before doing +// redirections. +int preserved_stdin_fileno; +int preserved_stdout_fileno; + ////////////////////////////////////////////////////////////////////////////////////////////////////////////////// int main(int argc, char **argv) { - string kernellist_filepath; bool is_per_core; if (argc == 1) { - cout << "File path is missing\n"; + cerr << "File path is missing\n"; return 0; } else if (argc == 2) { kernellist_filepath = argv[1]; @@ -36,7 +101,7 @@ int main(int argc, char **argv) { kernellist_filepath = argv[1]; is_per_core = bool(argv[2]); } else { - cout << "Too Many Arguemnts!\n"; + cerr << "Too Many Arguemnts!\n"; return 0; } @@ -47,7 +112,7 @@ int main(int argc, char **argv) { ofs.open((string(kernellist_filepath) + ".g").c_str()); if (!ifs.is_open()) { - cout << "Unable to open file: " << kernellist_filepath << endl; + cerr << "Unable to open file: " << kernellist_filepath << endl; return 0; } @@ -68,9 +133,15 @@ int main(int argc, char **argv) { } else if (line.substr(0, 6) == "kernel") { filepath = directory + "/" + line; group_per_block(filepath.c_str()); - ofs << line + "g" << endl; + + int _l = line.length(); + if(_l > 3 && line.substr(_l - 3, 3) == ".xz"){ + ofs << line.substr(0, _l-3) << "g.xz" << endl; + } else { + ofs << line + "g" << endl; + } } else { - cout << "Undefined command: " << line << endl; + cerr << "Undefined command: " << line << endl; return 0; } } @@ -80,20 +151,110 @@ int main(int argc, char **argv) { return 0; } +// This function redirects stdin and stdout for trace processing. +// For error/warning/info message to print to the terminal, always use the stderr stream. +// The io redirection will be restored by the time the function returns. void group_per_block(const char *filepath) { + preserved_stdin_fileno = dup(STDIN_FILENO); + preserved_stdout_fileno = dup(STDOUT_FILENO); + + string filepath_str{filepath}; + WarpInstLUT warp_inst_lut; + + pid_t sink_process_pid=0; + string trace_sink_cmd; + int sink_pipe_fd[2]; + + pid_t source_process_pid=0; + string trace_source_cmd; + int source_pipe_fd[2]; + string output_filepath; + + bool input_file_is_xz = false; + int _l = filepath_str.length(); + if(_l > 3 && filepath_str.substr(_l - 3, 3) == ".xz"){ + // kernel-1.trace.xz --(xz -dc)--> f --(xz -1 -T0)--> kernel-1.traceg.xz + input_file_is_xz = true; + output_filepath = filepath_str.substr(0, _l - 3) + "g.xz"; + trace_source_cmd = "xz -dc " + filepath_str; + trace_sink_cmd = "xz -1 -T0 > " + output_filepath; + } else if(_l > 6 && filepath_str.substr(_l - 6, 6) == ".trace"){ + // kernel-2.trace --(cat)--> f --(cat)--> kernel-2.traceg + input_file_is_xz = false; + output_filepath = filepath_str + "g"; + trace_source_cmd = "cat " + filepath_str; + trace_sink_cmd = "cat > " + output_filepath; + } else { + cerr << "Only support xz or raw text format. Unable to process - and skipping - trace file " + << filepath_str << endl; + close(preserved_stdin_fileno); + close(preserved_stdout_fileno); + return; + } - ofstream ofs; - ifstream ifs; - - ifs.open(filepath); + //cerr << "source cmd is "< 0) { + // parent process - the trace post processor + // stdin is now redirected to the read end of the source_pipe + close(source_pipe_fd[1]); + int r = dup2(source_pipe_fd[0], STDIN_FILENO); + } else { + cerr << "Failed to fork data source process\n"; + perror("fork"); + exit(1); + } - if (!ifs.is_open()) { - cout << "Unable to open file: " << filepath << endl; - return; + // fork a child process as the trace sink + if(pipe(sink_pipe_fd)!=0){ + cerr << "Failed to create pipe\n"; + perror("pipe"); + exit(1); + } + sink_process_pid = fork(); + if(sink_process_pid == 0){ + // child process + close(sink_pipe_fd[1]); + dup2(sink_pipe_fd[0], STDIN_FILENO); + signal(SIGINT, SIG_IGN); // ignore SIGINT + execle("/bin/sh", "sh", "-c", trace_sink_cmd.c_str(), NULL, environ); + perror("execle"); // child shouldn't reach here if all is well. + exit(1); + } else if (sink_process_pid > 0){ + // parent process - the trace post processor + // stdout is now redirected to the write end of the sink_pipe + close(sink_pipe_fd[0]); + int r = dup2(sink_pipe_fd[1], STDOUT_FILENO); + } else { + cerr << "Failed to fork data sink process\n"; + perror("fork"); + exit(1); } - cout << "Processing file " << filepath << endl; - ofs.open((string(filepath) + "g").c_str()); + cerr << "Processing file " << filepath << endl; vector insts; unsigned grid_dim_x, grid_dim_y, grid_dim_z, tb_dim_x, tb_dim_y, tb_dim_z; @@ -107,11 +268,14 @@ void group_per_block(const char *filepath) { // Add a flag for LDGSTS instruction to indicate which one to remove vector> ldgsts_flags; // true to remove, false to not - while (!ifs.eof()) { - getline(ifs, line); + // Important... without clear(), cin.eof() may evaluate to true on the second + // kernel + cin.clear(); + while (!cin.eof()) { + getline(cin, line); if (line.length() == 0 || line[0] == '#') { - ofs << line << endl; + cout << line << endl; continue; } @@ -148,7 +312,7 @@ void group_per_block(const char *filepath) { } } } - ofs << line << endl; + cout << line << endl; continue; } else { @@ -181,6 +345,10 @@ void group_per_block(const char *filepath) { } opcode_ss >> opcode; + // Look up the warp inst table to see if this instruction has been + // registered. If yes, we just copy the pointer to that string. + const string *inst_ptr = warp_inst_lut.lookup_entry(rest_of_line); + if(!inst_ptr) inst_ptr = warp_inst_lut.register_new_entry(rest_of_line); // One actual LDGSTS instruction includes 2 LDGSTS instructions in the trace, // because it has two memory references. @@ -188,50 +356,57 @@ void group_per_block(const char *filepath) { if (opcode.find("LDGSTS") != string::npos) { if (!ldgsts_flags[tb_id][warpid_tb]) { - insts[tb_id].warp_insts_array[warpid_tb].push_back(rest_of_line); + insts[tb_id].warp_insts_array[warpid_tb].push_back(inst_ptr); } ldgsts_flags[tb_id][warpid_tb] = !ldgsts_flags[tb_id][warpid_tb]; } else { - insts[tb_id].warp_insts_array[warpid_tb].push_back(rest_of_line); + insts[tb_id].warp_insts_array[warpid_tb].push_back(inst_ptr); } } } for (unsigned i = 0; i < insts.size(); ++i) { - // ofs< 0) { - ofs << endl << "#BEGIN_TB" << endl; - ofs << endl + cout << "\n" << "#BEGIN_TB" << "\n"; + cout << "\n" << "thread block = " << insts[i].tb_id_x << "," << insts[i].tb_id_y - << "," << insts[i].tb_id_z << endl; + << "," << insts[i].tb_id_z << "\n"; } else { - cout << "Warning: Thread block " << insts[i].tb_id_x << "," + cerr << "Warning: Thread block " << insts[i].tb_id_x << "," << insts[i].tb_id_y << "," << insts[i].tb_id_z << " is empty" - << endl; + << "\n"; continue; - // ofs.close(); - // return; } for (unsigned j = 0; j < insts[i].warp_insts_array.size(); ++j) { - ofs << endl << "warp = " << j << endl; - ofs << "insts = " << insts[i].warp_insts_array[j].size() << endl; + cout << "\n" << "warp = " << j << "\n"; + cout << "insts = " << insts[i].warp_insts_array[j].size() << "\n"; if (insts[i].warp_insts_array[j].size() == 0) { - cout << "Warning: Warp " << j << " in thread block" << insts[i].tb_id_x + cerr << "Warning: Warp " << j << " in thread block" << insts[i].tb_id_x << "," << insts[i].tb_id_y << "," << insts[i].tb_id_z - << " is empty" << endl; - // ofs.close(); - // return; + << " is empty" << "\n"; } - for (unsigned k = 0; k < insts[i].warp_insts_array[j].size(); ++k) { - ofs << insts[i].warp_insts_array[j][k] << endl; + for (auto it = insts[i].warp_insts_array[j].cbegin(); + it != insts[i].warp_insts_array[j].cend(); ++it) { + // dereference once: const string* + // dereference twice: const string + cout << **it << "\n"; } } - ofs << endl << "#END_TB" << endl; + cout << endl << "#END_TB" << endl; } - ofs.close(); - ifs.close(); + close(source_pipe_fd[0]); + close(source_pipe_fd[1]); + close(sink_pipe_fd[0]); + close(sink_pipe_fd[1]); + + // restore stdin/stdout file descriptor + dup2(preserved_stdin_fileno, STDIN_FILENO); + dup2(preserved_stdout_fileno, STDOUT_FILENO); + close(preserved_stdin_fileno); + close(preserved_stdout_fileno); } void group_per_core(const char *filepath) {