comment ~
    Copyright (C) 2008 Rouslan Dimitrov

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
~

.686
.model flat
.xmm
.code

matrix_vector_multiply equ ?MatrixVectorBatchMultiply@@YIXPAM00H@Z

; uses __fastcall calling convention
; args (destination: ecx, matrix: edx, vector array: stack, byte size: stack)
; the source matrix is loaded first, so it is safe to use the same pointer as destination

matrix_vector_multiply proc		; uses eax
	push		esi
	mov			esi, [esp+8]	; source 2
	
	movaps		xmm4, [edx]		; source 1 (this matrix)
	movaps		xmm5, [edx+16]
	movaps		xmm6, [edx+32]
	movaps		xmm7, [edx+48]
	
	mov			eax, 0
$column_loop:
	movaps		xmm0, [esi+eax]
	movaps		xmm1, xmm0
	movaps		xmm2, xmm0
	movaps		xmm3, xmm0
	
	shufps		xmm0, xmm0, 000h
	shufps		xmm1, xmm1, 055h
	shufps		xmm2, xmm2, 0AAh
	shufps		xmm3, xmm3, 0FFh
	
	mulps		xmm0, xmm4
	mulps		xmm1, xmm5
	mulps		xmm2, xmm6
	mulps		xmm3, xmm7
	
	addps		xmm0, xmm1
	addps		xmm2, xmm3
	addps		xmm0, xmm2
	movaps		[ecx+eax], xmm0
	
	add			eax, 16
	cmp			eax, [esp+12]	; byte_size
	jl			$column_loop
	pop			esi
	ret			8				; 2 args on stack
	
matrix_vector_multiply endp

end
	
	


	
