Skip to content

Commit

Permalink
removed py script
Browse files Browse the repository at this point in the history
  • Loading branch information
adeeconometrics committed May 18, 2024
1 parent 595c276 commit ec937c0
Show file tree
Hide file tree
Showing 5 changed files with 10 additions and 28 deletions.
2 changes: 1 addition & 1 deletion matmul/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ BUILD_DIR := build
CXX := g++

# Compiler flags
CXXFLAGS := -O0 -Wall -Wextra -pedantic -std=c++17 -pthread -mfpu=neon
CXXFLAGS := -O3 -Wall -Wextra -pedantic -Xclang -std=c++17 -pthread -mfpu=neon -ffast-math

# Source files
SRCS := $(wildcard src/*.cxx)
Expand Down
17 changes: 0 additions & 17 deletions matmul/gflops.py

This file was deleted.

5 changes: 3 additions & 2 deletions matmul/include/matmul.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@

#include "../include/matmul.hpp"
#include "../include/matrix.hpp"
#include "../include/utils.hpp"

#include <algorithm>
#include <future>
#include <memory>
#include <thread>
#include <type_traits>

Expand All @@ -24,6 +22,7 @@ auto iterative(const Matrix<T, M, N> &t_lhs,
Matrix<T, M, N> result;

for (std::size_t i = 0; i < M; ++i) {
#pragma clang loop vectorize(enable)
for (std::size_t j = 0; j < N; ++j) {
T sum = 0;
for (std::size_t k = 0; k < N; ++k) {
Expand All @@ -43,6 +42,7 @@ auto loop_reorder(const Matrix<T, M, N> &t_lhs,

for (std::size_t i = 0; i < M; ++i) {
for (std::size_t j = 0; j < N; ++j) {
#pragma clang loop vectorize(enable)
for (std::size_t k = 0; k < N; ++k) {
result(i, k) += t_lhs(i, j) * t_rhs(j, k);
}
Expand All @@ -61,6 +61,7 @@ auto gemm(const Matrix<T, N, M> &t_lhs,
// Loop over the blocks
for (std::size_t i = 0; i < N; i += block_size) {
for (std::size_t j = 0; j < N; j += block_size) {
#pragma clang loop vectorize(enable)
for (std::size_t k = 0; k < N; k += block_size) {
// Multiply the blocks
for (std::size_t ii = i; ii < std::min(i + block_size, N); ++ii) {
Expand Down
10 changes: 4 additions & 6 deletions matmul/include/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,10 @@ class Timer {
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time -
start_time)
.count();
const auto mean_duration = total_duration / m_iterations;
const float gflops =
(2 * 1024 * 1024 * 1024 / mean_duration / 1'000'000'000);
std::cout << "mean elapsed time took: " << mean_duration << " or " << gflops
<< "GFlops"
<< " ns for " << m_name << std::endl;
const double mean_duration = total_duration / m_iterations;
const double gflops = (2147483648 / mean_duration);
std::cout << "mean elapsed time took: " << mean_duration << " ns for "
<< m_name << " or " << gflops << "GFlops" << std::endl;
}

auto start() -> void {
Expand Down
4 changes: 2 additions & 2 deletions matmul/src/main.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ auto test_matmul() -> void {
auto loop_reorder_mat =
bench(loop_reorder_func, lhs_matrix, rhs_matrix, "loop_reorder");

// auto blocked = block_multiply<float, Rows, Cols>(lhs_matrix, rhs_matrix);
auto blocked_func = std::function<Matrix<float, Rows, Cols>(
const Matrix<float, Rows, Cols> &, const Matrix<float, Rows, Cols> &)>(
gemm<float, Rows, Cols>);
Expand All @@ -60,7 +59,8 @@ auto test_matmul() -> void {
auto async_gemm_func = std::function<Matrix<float, Rows, Cols>(
const Matrix<float, Rows, Cols> &, const Matrix<float, Rows, Cols> &)>(
gemm_neon<float, Rows, Cols>);
auto async_gemm = bench(async_gemm_func, lhs_matrix, rhs_matrix, "neon", 2);
auto async_gemm =
bench(async_gemm_func, lhs_matrix, rhs_matrix, "async_gemm", 2);

auto neon_gemm_func = std::function<Matrix<float, Rows, Cols>(
const Matrix<float, Rows, Cols> &, const Matrix<float, Rows, Cols> &)>(
Expand Down

0 comments on commit ec937c0

Please sign in to comment.