// LINES demo for @party 2018
// concept & code by Mike Erwin
// Apr-Jun 2018

// $ g++-5 -std=c++11 -o lines6 -mcpu=G3 -Ofast -ffast-math -falign-loops=16 lines6.cpp Vec3.cpp filebits.o randf.o -framework OpenGL -framework QuartzCore -framework ApplicationServices -framework GLUT
// $ g++-5 -std=c++11 -o lines6 -mcpu=G3 -Os lines6.cpp  filebits.o  -framework OpenGL -framework GLUT

// I might have a shot at 64K demo! This code compiles down to 23696 bytes. Could reduce that further:
// - support exactly one 3D model
// - eliminate unused data fields
// - compress used data
// - specialize for spherical models (planet surface locations + elevation)

#include "filebits.h"
#include "Vec3-custom.h"
#include <vector>
#include <chrono>

#define USE_CORE_GFX 0

extern "C" {
#include <unistd.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>

#include <OpenGL/glu.h>
#if USE_CORE_GFX
 #include "coregfx/coregfx.h"
#else
 #include <GLUT/glut.h>
#endif
}

constexpr u32 cache_line_sz = 32; // size of PowerPC G3 cacheline

using idx = u16;

struct Edge
{
	// p1 *----------* p2

	idx p1, p2;
};

struct RichEdge : Edge
{
	//         n2
	// p1 *----------* p2
	//         n1

	idx n1, n2;

	RichEdge(idx ip1, idx ip2, idx in1, idx in2)
		: Edge { ip1, ip2 }
		, n1 { in1 }
		, n2 { in2 }
	{ }
};

// This demo focuses on lines, but surfaces are important too!
// Render into depth buffer so we can do hidden line rendering.
struct Tri
{
	idx p1, p2, p3;
//	idx n;
};

const Vec3 up { 0.0f, 0.0f, 1.0f };
Vec3 eye { 0.0f, 0.0f, 0.0f };

enum class EdgeFacing
{
	Front,
	Back,
	Silhouette
};

struct Mesh
{
	File* file { nullptr };

	u32 P_ct;
	u32 N_ct;
	u32 E_ct;
	u32 BE_ct;

	const Vec3* P; // positions
	const Vec3* N; // normals
	const RichEdge* RE; // edges
	const Edge* BE; // border edges

	// edges, ready to draw
	struct EdgeSet
	{
		Edge* edges;
		u32 ct;
	};

	EdgeSet E[3];
	
	Edge* combined_idx_buffer;
	Edge* scratch_idx_buffer;
	
	
	GLuint VAO;
	GLuint VBO[2]; // VBO[0] = vertex buffer, VBO[1] = index buffer
	// could double-buffer the INDEX buffer...
	
	// Remember that VAO doesn't remember bound BUFFERS. Just the vertex array state:
	// Which attributes are enabled, and pointers for each (which might be in a buffer).
	
	void prep()
	{
		glGenVertexArraysAPPLE(1, &VAO);
		glBindVertexArrayAPPLE(VAO);
		glGenBuffersARB(2, VBO);

		glBindBufferARB(GL_ARRAY_BUFFER, VBO[0]);
		glBufferDataARB(GL_ARRAY_BUFFER, 3 * sizeof(f32) * P_ct, P, GL_STATIC_DRAW);
		glEnableClientState(GL_VERTEX_ARRAY);
		glVertexPointer(3, GL_FLOAT, 0, 0);

		glBindBufferARB(GL_ARRAY_BUFFER, 0);

//		glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, VBO[1]);
//		glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER, 2 * sizeof(idx) * E_ct, NULL, GL_DYNAMIC_DRAW);
//		glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, 0);

		glBindVertexArrayAPPLE(0);
		
		combined_idx_buffer = (Edge*) malloc((E_ct + BE_ct) * sizeof(Edge));
		scratch_idx_buffer = (Edge*) malloc(E_ct * sizeof(Edge));
		
		memcpy(combined_idx_buffer + E_ct, BE, BE_ct * sizeof(Edge));
	}
	
	void draw() const
	{		
		glBindVertexArrayAPPLE(VAO);

//		glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, VBO[1]);
		
		u32 offset = 0;
		for (u32 i = 0; i < 3; ++i)
		{
			const f32 colors[3][3] = {
				{ 0.2f, 0.2f, 0.8f },
				{ 0.0f, 0.0f, 0.25f },
				{ 1.0f, 1.0f, 1.0f }
			};

			const EdgeSet& e = E[i];

			if (i == 0)
				glLineWidth(1); // same for Front & Back
			else if (i == 2)
				glLineWidth(2); // thicker Silhouette

			if (e.ct)
			{
				glColor3fv(colors[i]);
//				glDrawRangeElementsEXT(GL_LINES, 0, P_ct - 1, e.ct * 2, GL_UNSIGNED_SHORT, (const void*) offset);
				glDrawRangeElementsEXT(GL_LINES, 0, P_ct - 1, e.ct * 2, GL_UNSIGNED_SHORT, (const byte*)(combined_idx_buffer) + offset);
				u32 sz = e.ct * 2 * sizeof(idx);
				offset += sz;
			}
		}

//		glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, 0);
		glBindVertexArrayAPPLE(0);
	}

	EdgeFacing facing(const RichEdge& e) const
	{
		// PRO TIP - transform the eye, use original normals!
		// work in world coords
		const Vec3 eye_dir = eye.dirTo(P[e.p1]);
		// double-check with p2 ?
		bool f1 = dot(N[e.n1], eye_dir) < 0;
		bool f2 = dot(N[e.n2], eye_dir) < 0;

		if (f1 and f2)
			return EdgeFacing::Front;
		else if (f1 or f2)
			return EdgeFacing::Silhouette;
		else
			return EdgeFacing::Back;
	}
	
	void sort_edges()
	{
		for (EdgeSet& set : E)
			set.ct = 0;
		
		Edge* front = combined_idx_buffer;
		Edge* back = scratch_idx_buffer;
		Edge* silhouette = combined_idx_buffer + E_ct;
		
		Edge* f = front;
		Edge* b = back;
		Edge* s = silhouette;

#if 1
		// assumes BE_ct < E_ct / 2
		const u32 cache_line_half_ct = E_ct * sizeof(Edge) / cache_line_sz / 2;
		for (u32 i = 0; i < cache_line_half_ct; ++i)
		{
			__dcbz((byte*) combined_idx_buffer, i * cache_line_sz);
//			__dcbz((byte*) combined_idx_buffer, 2 * i * cache_line_sz);
			__dcbz((byte*) scratch_idx_buffer, i * cache_line_sz);
		}
#endif

		for (u32 i = 0; i < E_ct; ++i)
		{
			const RichEdge& e = RE[i];
#if 0
			const EdgeFacing f = facing(e);
			EdgeSet& set = E[(int)f];
			set.edges[set.ct++] = { e.p1, e.p2 }; // todo: efficient copy constructor
#else
			switch (facing(e))
			{
				case EdgeFacing::Front:
					*(f++) = { e.p1, e.p2 };
					break;
				case EdgeFacing::Back:
					*(b++) = { e.p1, e.p2 };
					break;
				case EdgeFacing::Silhouette:
					*(--s) = { e.p1, e.p2 };
					break;
			}
#endif
		}

//		E[(int)EdgeFacing::Front].edges = front;
		E[(int)EdgeFacing::Front].ct = f - front;
		
//		E[(int)EdgeFacing::Back].edges = back;
		const u32 back_ct = b - back;
		E[(int)EdgeFacing::Back].ct = back_ct;
		
//		E[(int)EdgeFacing::Silhouette].edges = s;
		E[(int)EdgeFacing::Silhouette].ct = silhouette - s + BE_ct;
		
		// copy scratch index buffer into shared index buffer
		memcpy(f, back, back_ct * 2 * sizeof(idx));
//		bzero(f, back_ct * 2 * sizeof(idx));

//		printf("front %d, back %d, silhouette %d\n",
//				 E[(int)EdgeFacing::Front].ct,
//				 E[(int)EdgeFacing::Back].ct,
//				 E[(int)EdgeFacing::Silhouette].ct);
		
		// transfer index data to GPU
//		glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, VBO[1]);
#if 1
//		glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER, 2 * sizeof(idx) * (E_ct + BE_ct), combined_idx_buffer, GL_DYNAMIC_DRAW);
#else
		byte* buffer = (byte*) glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY);

		u32 offset = 0;
		for (const EdgeSet& e : E)
		{
			if (e.ct)
			{
				u32 sz = e.ct * 2 * sizeof(idx);
				memcpy(buffer + offset, e.edges, sz);
				offset += sz;
			}
		}
		
		glUnmapBufferARB(GL_ELEMENT_ARRAY_BUFFER);
#endif
//		glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER, 0);
	}
};


void align(u32& offset)
{
	u32 o = offset % cache_line_sz;
	if (o > 0)
		offset += (cache_line_sz - o);
}

Mesh load_Mesh(const char* fname)
{
	Mesh m;
	
	m.file = new File(fname);

	File& f = *m.file;

	printf("%s is %u bytes\n", fname, f.size());
	
	m.P_ct = f.get_u32(0);
	m.N_ct = f.get_u32(4);
	m.E_ct = f.get_u32(8);
	m.BE_ct = f.get_u32(12);

	printf("%u verts\n", m.P_ct);
	printf("%u triangles\n", m.N_ct);
	printf("%u + %u edges\n", m.E_ct, m.BE_ct);
	
	// make sure index values fit in u16
	assert(m.P_ct <= 0xFFFF);
	assert(m.N_ct <= 0xFFFF);
	
	const byte* data = f.data();
	
	assert((u32)data % cache_line_sz == 0);

	constexpr u32 header_sz = 4 * sizeof(u32);
	u32 offset = header_sz;
	align(offset);
	
	m.P = (const Vec3*)(data + offset);
	
	offset += m.P_ct * sizeof(Vec3);
	align(offset);
	
	m.N = (const Vec3*)(data + offset);
	
	offset += m.N_ct * sizeof(Vec3);
	align(offset);
	
	m.RE = (const RichEdge*)(data + offset);

	offset += m.E_ct * sizeof(RichEdge);
	align(offset);
	
	m.BE = (const Edge*)(data + offset);
	
	for (auto& set : m.E)
	{
		set.ct = 0;
		set.edges = new Edge[m.E_ct];
	}
	
	return m;
}

std::vector<Mesh> meshes;

#if USE_CORE_GFX
void draw(float dt)
{
	float angle = dt;
#else
const float dt = 1.0f / 60.0f;
void draw()
{
	static f32 angle = 0.0f;
	angle += 0.5f * dt;
#endif
	eye.x = 2.0f * cosf(angle);
	eye.y = 2.0f * sinf(angle);
	eye.z = 1.0f;

//	static auto prev_time = glutGet(GLUT_ELAPSED_TIME);
//	const auto curr_time = glutGet(GLUT_ELAPSED_TIME);
//	
//	const auto dt = curr_time - prev_time;
//	printf("dt = %d\n", dt);
//	prev_time = curr_time;
	
	for (Mesh& m : meshes)
		m.sort_edges();

	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
	
	glLoadIdentity();
	gluLookAt(eye.x, eye.y, eye.z, 0.0f, 0.0f, 0.0f, up.x, up.y, up.z);
	
	for (const Mesh& m : meshes)
		m.draw();

#if USE_CORE_GFX
	flushOpenGL();
#else
	glutSwapBuffers();
#endif
}

#if !USE_CORE_GFX
void timerFunc()
{
	glutPostRedisplay();
}
#endif

void reshape(int w, int h)
{
	glViewport(0, 0, w, h);

	glMatrixMode(GL_PROJECTION);
	glLoadIdentity();
	gluPerspective(90.0, (float)w / (float)h, 0.01, 1000.0);

	glMatrixMode(GL_MODELVIEW);
}

int main(int argc, char* argv[])
{
	meshes.reserve(argc - 1);
	for (int i = 1; i < argc; ++i)
	{
		const char* fname = argv[i];
		meshes.push_back(load_Mesh(fname));
	}

#if USE_CORE_GFX
	startOpenGL();
#else
	glutInit(&argc, argv);
	glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
	glutCreateWindow("Lines");
#endif

	glClearColor(0, 0, 0.15, 1.0);
	
	glEnable(GL_DEPTH_TEST);

	glShadeModel(GL_FLAT);
	
	for (auto& mesh : meshes)
		mesh.prep();

#if USE_CORE_GFX
	reshape(1024, 768);
	setDisplayCallback(draw);
	startAnimation();

	sleep(10);

	stopAnimation();
	stopOpenGL();
#else
	glutReshapeFunc(reshape);
	glutDisplayFunc(draw);
	glutIdleFunc(glutPostRedisplay);
//	glutFullScreen();

	glutMainLoop();
#endif
}
