#define __CL_ENABLE_EXCEPTIONS

#include <iostream>
#include <fstream>
#include <omp.h>
#include <CL/cl.hpp>
#include "TGA.h"

std::string LoadKernelSource(std::string filename) {
	// look in current dir - if missing, look in parent dir - so exe can be in Release subdir of source, for instance
	std::ifstream file(filename);
	if (!file.is_open()) {
		file.open("../" + filename);
		if (!file.is_open())
			throw std::exception("File not found");
	}

	return std::string((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
}


cl::size_t<3> range3(size_t a, size_t b, size_t c) {
	cl::size_t<3> range;
	range.push_back(a); range.push_back(b); range.push_back(c);
	return range;
}

int main(int argc, char *argv[]) {
	size_t width = 512;
	if (argc>=2)
		width = atoi(argv[1]);
	size_t height = width;

	cl::Program program;
	std::vector<cl::Device> devices;
	try {
		// get a platform and device
		std::vector<cl::Platform> platforms;
		cl::Platform::get(&platforms);
		if (platforms.size() == 0) {
			std::cout << "OpenCL not available" << std::endl;
			return 1;
		}

		// create context and queue
		cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[0](), 0 };
		cl::Context context = cl::Context(CL_DEVICE_TYPE_GPU, cprops);

		devices = context.getInfo<CL_CONTEXT_DEVICES>();
		if (devices.size() == 0) {
			std::cout << "GPU device not available" << std::endl;
			return 1;
		}

		cl::CommandQueue queue = cl::CommandQueue(context, devices[0]);

		// compile source, get kernel entry point
		std::string source = LoadKernelSource("Mandelbox.cl");
		cl::Program::Sources sources(1, std::make_pair(source.c_str(), source.size()));
		program = cl::Program(context, sources);
		program.build(devices);
		cl::Kernel kernel = cl::Kernel(program, "Mandelbox");

		// create data buffer
		cl::Image2D img = cl::Image2D(context, CL_MEM_WRITE_ONLY, cl::ImageFormat(CL_RGBA, CL_UNSIGNED_INT8), width, height);

		// start kernel, let OpenCL pick local work size (blank NDRange)
		float scale = 3.0f;
		kernel.setArg(0, scale);
		kernel.setArg(1, img);
		double startTime = omp_get_wtime();
		queue.enqueueNDRangeKernel(kernel, cl::NDRange(), cl::NDRange(width, height), cl::NDRange());
		queue.finish();
		double endTime = omp_get_wtime()-startTime;

		size_t image_row_pitch, image_slice_pitch;
		unsigned char *ptr = (unsigned char *)queue.enqueueMapImage(img, CL_TRUE, CL_MAP_READ, range3(0,0,0), 
			range3(width,height,1), &image_row_pitch, &image_slice_pitch);

		WriteTGA("test.tga", width, height, ptr);
		std::cout << "Calculation time: " << endTime << std::endl;
	} catch (cl::Error &err) {
		std::cout << "Error: " << err.what() << "(" << err.err() << ")" << std:: endl;
		// if it was a compilation error
		if (err.err() == CL_BUILD_PROGRAM_FAILURE)
			std::cout << program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]) << std::endl;
	} catch (std::exception &e) {
		std::cout << "Error: " << e.what() << std::endl;
	}

	return 0;
}

