#include "matrix.h"
#include "mm_cache_omp1.h"

#include <string.h>
#include <iostream>
#include <iomanip>
#include <stdlib.h>

void mm_cache_omp1(const Matrix &a, const Matrix &b, Matrix &product, int threshold) {
  mm_cache_omp1(a,b,product, 0,a.Rows(), 0,b.Cols(), 0,a.Cols(), threshold);
}

// C must be cleared before this is called
void mm_cache_omp1(const Matrix &a, const Matrix &b, Matrix &product,
	 int i0, int i1, int j0, int j1, int k0, int k1, int threshold) {
  
  //std::cout << "i: " << i0 << ".." << i1 << "  j:" << j0 << ".." << j1 <<
  //"  k: " << k0 << ".." << k1 << std::endl;
  
  int di = i1 - i0;
  int dj = j1 - j0;
  int dk = k1 - k0;
  if (di >= dj && di >= dk && di >= threshold) {
    int mi = i0 + di / 2;
    mm_cache_omp1(a, b, product, i0, mi, j0, j1, k0, k1, threshold);
    mm_cache_omp1(a, b, product, mi, i1, j0, j1, k0, k1, threshold);
  } else if (dj >= dk && dj >= threshold) {
    int mj = j0 + dj / 2;
    mm_cache_omp1(a, b, product, i0, i1, j0, mj, k0, k1, threshold);
    mm_cache_omp1(a, b, product, i0, i1, mj, j1, k0, k1, threshold);
  } else if (dk >= threshold) {
    int mk = k0 + dk / 2;
    mm_cache_omp1(a, b, product, i0, i1, j0, j1, k0, mk, threshold);
    mm_cache_omp1(a, b, product, i0, i1, j0, j1, mk, k1, threshold);
  } else { 
#pragma omp parallel for
    for (int i = i0; i < i1; i++)
      for (int j = j0; j < j1; j++)
	for (int k = k0; k < k1; k++)
	  product(i,j) += a(i,k) * b(k,j);
  }
}
