; ; sse_add_su3_vector( su3_vector *a, su3_vector *b, su3_vector *c) ; ; global sse_add_su3_vector sse_add_su3_vector: push ebp mov ebp,esp push eax push ebx push ecx mov eax,[ebp+8] ; su3_vector *a mov ebx,[ebp+12] ; su3_vector *b mov ecx,[ebp+16] ; su3_vector *c movups xmm0,[eax] ; <(aa)->c[0]> movlps xmm1,[eax+16] ; <(aa)->c[2]> shufps xmm1,xmm1,0x44 movups xmm2,[ebx] ; <(bb)->c[0]> movlps xmm3,[ebx+16] ; <(bb)->c[2]> shufps xmm3,xmm3,0x44 addps xmm0,xmm2 addps xmm1,xmm3 movups [ecx],xmm0 ; <(cc)->c[0]> movlps [ecx+16],xmm1 ; <(cc)->c[2]> here: pop ecx pop ebx pop eax mov esp,ebp pop ebp ret