#ifndef VECTOR_H
#define VECTOR_H

#include <math.h>

#pragma pack(16)

#define SIMD

class Matrix;


#define Vector __declspec(align(16)) CVector

//--------------------------------------------
class CVector {
//--------------------------------------------
	float x, y, z, w;
 public:
	CVector( ) { x = y = z = w = 0.0; }
	CVector( float x0, float y0, float z0, float w0 = 1.0 ) { x = x0; y = y0; z = z0; w = w0; }
	
	float operator*( const Vector& v ) { return (x * v.x + y * v.y + z * v.z); }

	CVector operator+( const CVector& v ) { 
		__declspec(align(16)) CVector result;

#ifdef SIMD
		__asm {
			mov		esi, this
			mov		edi, v
			movups	xmm0, [esi]
			movups	xmm1, [edi]
			addps	xmm0, xmm1
			movaps	result, xmm0
		}
#else
		result.x = x + v.x; result.y = y + v.y; result.z = z + v.z; result.w = w + v.w; 
#endif
		return result;
	}

	CVector operator*( float f ) { 
		Vector result( x * f, y * f, z * f ); 
		return result;
	}

	void operator+=( const CVector& v ) { 
#ifdef SIMD
		__asm {
			mov		esi, this
			mov		edi, v
			movups	xmm0, [esi]
			movups	xmm1, [edi]
			addps	xmm0, xmm1
			movups	[esi], xmm0
		}
#else
		x += v.x; y += v.y; z += v.z; 
#endif
	}

	void operator*=( float f ) { x *= f; y *= f; z *= f; }

	CVector operator-( const CVector& v ) { 
		 Vector result;  
#ifdef SIMD
		__asm {
			mov		esi, this
			mov		edi, v
			movups	xmm0, [esi]
			movups	xmm1, [edi]
			subps	xmm0, xmm1
			movaps	result, xmm0
		}
#else
		result.x = x - v.x; result.y = y - v.y; result.z = z - v.z;
#endif
		return result;
	}
	CVector operator/( float f ) { 
		Vector result( x/f, y/f, z/f ); 
		return result;
	}
    CVector operator%( const CVector& v ){ 
		Vector result;
#ifdef SIMD
		__asm {
			mov		esi, this
			mov		edi, v
			movups	xmm0, [esi]
			movups	xmm1, [edi]
			movaps	xmm2, xmm0
			movaps	xmm3, xmm1

			shufps	xmm0, xmm0, 0xc9
			shufps	xmm1, xmm1,	0xd2
			mulps	xmm0, xmm1

			shufps	xmm2, xmm2, 0xd2
			shufps	xmm3, xmm3,	0xc9
			mulps	xmm2, xmm3

			subps	xmm0, xmm2
			movaps	result, xmm0
		}
#else		
		result.x = y * v.z - z * v.y; result.y = z * v.x - x * v.z; result.z = x * v.y - y * v.x; 
#endif
		return result;
	}    
	float Length( ) { return (float)sqrt( x * x + y * y + z * z ); }
	void Normalize( ) {
		float l = Length( );
		if ( l < 0.000001f) { x = 1; y = 0; z = 0; }
		else { x /= l; y /= l; z /= l; }
	}
	CVector UnitVector( ) { 
		Vector r = * this;
		r.Normalize();
		return r;
	}
	
	float * GetArray() { return &x; }

	float& X() { return x; }
	float& Y() { return y; }
	float& Z() { return z; }

	friend class Matrix;
};

ostream& operator<<( ostream& s, Vector& v );

//--------------------------------------------
class Matrix {
//--------------------------------------------
 public:
	float m[4][4];
	Matrix( ) { }
	Matrix( float d1, float d2, float d3 ) {
		Clear();
		m[0][0] = d1; m[1][1] = d2; m[2][2] = d3;
	}
	void Clear( ) { memset( &m[0][0], 0, sizeof( m ) ); }
	
	CVector operator*( const CVector& v ) { 
		return CVector(m[0][0] * v.x + m[0][1] * v.y + m[0][2] * v.z,
			          m[1][0] * v.x + m[1][1] * v.y + m[1][2] * v.z,
					  m[2][0] * v.x + m[2][1] * v.y + m[2][2] * v.z); 
	}

	Matrix operator*( const Matrix& mat ) {
		Matrix result;
		for( int i = 0; i < 3; i++ )
			for( int j = 0; j < 3; j++ ) {
				result.m[i][j] = 0;
				for( int k = 0; k < 3; k++ ) result.m[i][j] += m[i][k] * mat.m[k][j];
			}
		return result;
	}

	Matrix Transpose( ) {
		Matrix result;
		for( int i = 0; i < 3; i++ )
			for( int j = 0; j < 3; j++ ) result.m[j][i] = m[i][j];
		return result;
	}
	float * GetArray() { return &m[0][0]; }
};

ostream& operator<<( ostream& s, CVector& v );

#endif