Skip to content

Commit

Permalink
add shared memory bank conflicts correlation
Browse files Browse the repository at this point in the history
  • Loading branch information
barnes88 committed Jun 12, 2023
1 parent 2b81b27 commit 523e73f
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 1 deletion.
10 changes: 9 additions & 1 deletion util/hw_stats/run_hw.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
parser.add_option("-l", "--limit_kernel_number", dest="kernel_number", type=int, default=-99,
help="Limits the number of profiled kernels (useful in larger applications")
parser.add_option("-C", "--collect", dest="collect", default="cycles",
help="Pass what you want from the hardware. Options are: \"cycles,other_stats\"")
help="Pass what you want from the hardware. Options are: \"cycles,other_stats,full_set\"")

(options, args) = parser.parse_args()

Expand Down Expand Up @@ -147,6 +147,14 @@
",l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_miss.sum,idc__requests.sum,idc__requests_lookup_hit.sum," +\
"sm__sass_inst_executed_op_shared_ld.sum,sm__sass_inst_executed_op_shared_st.sum,lts__t_sectors_srcunit_tex_op_read_lookup_miss.sum,lts__t_sectors_srcunit_tex_op_write_lookup_miss.sum,sm__pipe_alu_cycles_active.sum,sm__pipe_fma_cycles_active.sum,sm__pipe_fp64_cycles_active.sum,sm__pipe_shared_cycles_active.sum,sm__pipe_tensor_cycles_active.sum,sm__pipe_tensor_op_hmma_cycles_active.sum,sm__cycles_active.sum,sm__cycles_active.avg,sm__cycles_elapsed.avg,sm__sass_thread_inst_executed_op_integer_pred_on.sum,sm__sass_thread_inst_executed_ops_dadd_dmul_dfma_pred_on.sum,sm__sass_thread_inst_executed_ops_fadd_fmul_ffma_pred_on.sum,sm__sass_thread_inst_executed_ops_hadd_hmul_hfma_pred_on.sum,sm__inst_executed_pipe_alu.sum,sm__inst_executed_pipe_fma.sum,sm__inst_executed_pipe_fp16.sum,sm__inst_executed_pipe_fp64.sum,sm__inst_executed_pipe_tensor.sum,sm__inst_executed_pipe_tex.sum,sm__inst_executed_pipe_xu.sum,sm__inst_executed_pipe_lsu.sum," +\
"sm__sass_thread_inst_executed_op_fp16_pred_on.sum,sm__sass_thread_inst_executed_op_fp32_pred_on.sum,sm__sass_thread_inst_executed_op_fp64_pred_on.sum,sm__sass_thread_inst_executed_op_dmul_pred_on.sum,sm__sass_thread_inst_executed_op_dfma_pred_on.sum,sm__sass_inst_executed_op_memory_128b.sum,sm__sass_inst_executed_op_memory_64b.sum,sm__sass_inst_executed_op_memory_32b.sum,sm__sass_inst_executed_op_memory_16b.sum,sm__sass_inst_executed_op_memory_8b.sum,smsp__thread_inst_executed_per_inst_executed.ratio,sm__sass_thread_inst_executed.sum" +\
"l1tex__data_bank_conflicts_pipe_lsu_mem_shared.sum,l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum,l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum" +\
" --csv --page raw --target-processes all " + kernel_number +\
" " + exec_path + " " + str(args) +\
" | tee " + os.path.join(this_run_dir,logfile + ".nsight")
elif "full_set" in options.collect:
if options.nsight_profiler:
sh_contents += "\nexport CUDA_VERSION=\"" + cuda_version + "\"; export CUDA_VISIBLE_DEVICES=\"" + options.device_num +\
"\" ; timeout 30m nv-nsight-cu-cli --set full" +\
" --csv --page raw --target-processes all " + kernel_number +\
" " + exec_path + " " + str(args) +\
" | tee " + os.path.join(this_run_dir,logfile + ".nsight")
Expand Down
7 changes: 7 additions & 0 deletions util/job_launching/apps/define-all-apps.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ GPU_Microbenchmark:
- shared_lat:
- args:
accel-sim-mem: 1G
- shared_bank_conflicts:
## argument 1 kernel has conflicts
- args: 1
accel-sim-mem: 1G
## argument 2 kernel doesn't have conflicts
- args: 2
accel-sim-mem: 1G
- MaxFlops:
- args:
accel-sim-mem: 1G
Expand Down
2 changes: 2 additions & 0 deletions util/job_launching/stats/example_stats.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ collect_aggregate:
- 'total dram reads\s*=\s*(.*)'
- 'total dram writes\s*=\s*(.*)'
- 'kernel_launch_uid\s*=\s*(.*)'
- 'gpgpu_n_shmem_bkconflict\s*=\s*(.*)'
- 'gpgpu_n_l1cache_bkconflict\s*=\s*(.*)'


# These stats are reset each kernel and should not be diff'd
Expand Down
11 changes: 11 additions & 0 deletions util/plotting/correl_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,17 @@
drophwnumbelow=0,
plottype="log",
stattype="counter"
),
CorrelStat(chart_name="Shared Memory Bank Conflicts",
plotfile="shmem-bank-conflict",
hw_eval="np.average(hw[\"l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum\"])\
+ np.average(hw[\"l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum\"])",
hw_error=None,
sim_eval="float(sim[\"gpgpu_n_shmem_bkconflict\s*=\s*(.*)\"])",
hw_name="all",
drophwnumbelow=0,
plottype="log",
stattype="counter"
),
CorrelStat(chart_name="DRAM Reads",
plotfile="dram-read-transactions",
Expand Down

0 comments on commit 523e73f

Please sign in to comment.