/** Orion Sky Lawlor, olawlor@acm.org, 2005/11/30
 Demonstrates use of a bunch of Intel SSE instructions via xmmintrin.h.
*/
#include <stdio.h>
#include <xmmintrin.h>

void print_mmx(const char *what,__m128 v) {
	float out[4];
	_mm_storeu_ps(out,v);
	printf("%s: %f %f %f %f\n",what,
		out[0],out[1],out[2],out[3]);
}

int main() {
	__m128 a,b;
	float in[8]={0.0,1.0,2.0,3.0,4.0};
	float f=1.0/(15.0*15.0);
	a=_mm_setr_ps(1.1,2.2,3.3,4.4);
	b=_mm_setr_ps(21.1,22.2,23.3,24.4);
	print_mmx("A",a);
	print_mmx("B",b);
	print_mmx("\nadd",_mm_add_ps(a,b));
	print_mmx("sub",_mm_sub_ps(a,b));
	print_mmx("mul",_mm_mul_ps(a,b));
	print_mmx("div",_mm_div_ps(a,b));
	print_mmx("min",_mm_min_ps(a,b));
	print_mmx("max",_mm_max_ps(a,b));
	print_mmx("max_ss",_mm_max_ss(a,b));
	print_mmx("\nshuffle",_mm_shuffle_ps(a,b,
		_MM_SHUFFLE(1,0,0,0)));
	print_mmx("sqrt a",_mm_sqrt_ps(a));
	print_mmx("rcp a",_mm_rcp_ps(a));
	print_mmx("rsqrt a",_mm_rsqrt_ps(a));
	print_mmx("rcp 1/225",_mm_rcp_ps(_mm_load1_ps(&f)));
	print_mmx("rsqrt 1/(15*15)",_mm_rsqrt_ps(_mm_load1_ps(&f)));
	print_mmx("\nloadu[0]",_mm_loadu_ps(&in[0]));
	print_mmx("loadu[1]",_mm_loadu_ps(&in[1]));
	/* print_mmx("load[0]",_mm_load_ps(&in[0]));  segfaults on windows, where stack isn't aligned */
	/* print_mmx("load[1]",_mm_load_ps(&in[1]));  always segfaults!  */
	return 0;
}
/*<@>
<@> ******** Program output: ********
<@> A: 1.100000 2.200000 3.300000 4.400000
<@> B: 21.100000 22.200001 23.299999 24.400000
<@> 
<@> add: 22.200001 24.400002 26.599998 28.799999
<@> sub: -20.000000 -20.000000 -20.000000 -20.000000
<@> mul: 23.210001 48.840004 76.889999 107.360001
<@> div: 0.052133 0.099099 0.141631 0.180328
<@> min: 1.100000 2.200000 3.300000 4.400000
<@> max: 21.100000 22.200001 23.299999 24.400000
<@> max_ss: 21.100000 2.200000 3.300000 4.400000
<@> 
<@> shuffle: 1.100000 1.100000 21.100000 22.200001
<@> sqrt a: 1.048809 1.483240 1.816590 2.097618
<@> rcp a: 0.909180 0.454590 0.302979 0.227295
<@> rsqrt a: 0.953369 0.674194 0.550537 0.476685
<@> rcp 1/225: 224.968750 224.968750 224.968750 224.968750
<@> rsqrt 1/(15*15): 14.998047 14.998047 14.998047 14.998047
<@> 
<@> loadu[0]: 0.000000 1.000000 2.000000 3.000000
<@> loadu[1]: 1.000000 2.000000 3.000000 4.000000
<@> */
