/************************************************************************
*
*  FREESCALE SEMICONDUCTORS INC.
*  ALL RIGHTS RESERVED
*  (c) Copyright 2003 Freescale semiconductors, Inc.
*
*************************************************************************
*
*  FILE NAME  : dot_prod.h
*
*  PURPOSE    : DOT_PROD_{S,U}L macros definition for CW compiler
*
*  AUTHOR     : Igor Drozdinsky, Dmitry Karpenko
*
***********************************************************************/

#ifndef __DOT_PROD_H
#define __DOT_PROD_H



/************************************************************************
* NAME: DOT_PROD_UL
*
* DESCRIPTION: 	returns the dot product of arrays arr1 and arr2
*
* NOTE: 	The elements type is assumed to be unsigned long
*************************************************************************/

#define DOT_PROD_UL(arr1, arr2, size) ({				\
				unsigned long *a = (arr1);		\
				unsigned long *b = (arr2);		\
				int s = (size);				\
				unsigned long res = 0;			\
				asm{					\
				/* Save registers */\
				lea	-60(a7), a7			;\
				movem.l	d0-d6/a0-a5, (a7)		;\
				move.l	MACSR, d6			;\
				/* Initialize MACSR register for unsigned operations */\
				move.l	%0x40, d0			;\
				move.l	d0, MACSR			;\
				/* Load function variables */\
				move.l	a, a0				;\
				move.l	b, a1				;\
				move.l	s, d0				;\
				/* Initialize eMAC accumulators */ \
				move.l	%0, ACC0			;\
				move.l	%0, ACC1			;\
				move.l	%0, ACC2			;\
				move.l	%0, ACC3			;\
				/* Load in d0 number of 4 operations */ \
				lsr.l	%2, d0				;\
				beq	*+66				;\
				/* Perform 4 by 4 operations as Dot Product */  \
				movem.l	(a0), d1-d4			;\
				lea	16(a0),a0			;\
				subq.l	%1, d0				;\
				beq	*+30				;\
			LoopDotProduct:				\
				movem.l	(a1), a2-a5			;\
				lea	16(a1),a1			;\
				macl.l	d1, a2, (a0)+, d1, ACC0		;\
				macl.l	d2, a3, (a0)+, d2, ACC0		;\
				macl.l	d3, a4, (a0)+, d3, ACC0		;\
				macl.l	d4, a5, (a0)+, d4, ACC0		;\
				subq.l	%1, d0				;\
				bne	LoopDotProduct				;\
				/* Perform the last 4 by 4 operation as Dot Product */\
				movem.l	(a1), a2-a5			;\
				lea	16(a1),a1			;\
				mac.l	d1, a2, ACC0			;\
				mac.l	d2, a3, ACC0			;\
				mac.l	d3, a4, ACC0			;\
				mac.l	d4, a5, ACC0			;\
				/* Load in d0 number of operations left */\
				move.l	s, d0				;\
				andi.l	%3, d0				;\
				beq	*+14				;\
				/* Perform last operations as Dot Product */\
				move.l	(a0)+, d1			;\
				move.l	(a1)+, d2			;\
				mac.l	d1, d2, ACC0			;\
				subq.l	%1, d0				;\
				bne	*-10				;\
				/* Store the result */ \
				move.l	ACC0, d0			;\
				move.l	d0, res				;\
				/* Restore registers */  \
				move.l	d6, MACSR			;\
				movem.l	(a7), d0-d6/a0-a5		;\
				lea	60(a7), a7			;\
				};							\
				res; })
				
/************************************************************************
* NAME: DOT_PROD_SL
*
* DESCRIPTION: 	returns the dot product of arrays arr1 and arr2
*
* NOTE: 	The elements type is assumed to be signed long
*************************************************************************/

#define DOT_PROD_SL(arr1, arr2, size) ({				\
				signed long *a = (arr1);		\
				signed long *b = (arr2);		\
				int s = (size);				\
				signed long res = 0;			\
				asm{					\
				/* Save registers */\
				lea	-60(a7), a7			;\
				movem.l	d0-d6/a0-a5, (a7)		;\
				move.l	MACSR, d6			;\
				/* Initialize MACSR register for signed operations */\
				move.l	%0x00, d0			;\
				move.l	d0, MACSR			;\
				/* Load function variables */\
				move.l	a, a0				;\
				move.l	b, a1				;\
				move.l	s, d0				;\
				/* Initialize eMAC accumulators */ \
				move.l	%0, ACC0			;\
				move.l	%0, ACC1			;\
				move.l	%0, ACC2			;\
				move.l	%0, ACC3			;\
				/* Load in d0 number of 4 operations */ \
				lsr.l	%2, d0				;\
				beq	*+66				;\
				/* Perform 4 by 4 operations as Dot Product */  \
				movem.l	(a0), d1-d4			;\
				lea	16(a0),a0			;\
				subq.l	%1, d0				;\
				beq	*+30				;\
				movem.l	(a1), a2-a5			;\
				lea	16(a1),a1			;\
				macl.l	d1, a2, (a0)+, d1, ACC0		;\
				macl.l	d2, a3, (a0)+, d2, ACC0		;\
				macl.l	d3, a4, (a0)+, d3, ACC0		;\
				macl.l	d4, a5, (a0)+, d4, ACC0		;\
				subq.l	%1, d0				;\
				bne	*-26				;\
				/* Perform the last 4 by 4 operation as Dot Product */\
				movem.l	(a1), a2-a5			;\
				lea	16(a1),a1			;\
				mac.l	d1, a2, ACC0			;\
				mac.l	d2, a3, ACC0			;\
				mac.l	d3, a4, ACC0			;\
				mac.l	d4, a5, ACC0			;\
				/* Load in d0 number of operations left */\
				move.l	s, d0				;\
				andi.l	%3, d0				;\
				beq	*+14				;\
				/* Perform last operations as Dot Product */\
				move.l	(a0)+, d1			;\
				move.l	(a1)+, d2			;\
				mac.l	d1, d2, ACC0			;\
				subq.l	%1, d0				;\
				bne	*-10				;\
				/* Store the result */ \
				move.l	ACC0, d0			;\
				move.l	d0, res				;\
				/* Restore registers */  \
				move.l	d6, MACSR			;\
				movem.l	(a7), d0-d6/a0-a5		;\
				lea	60(a7), a7			;\
				};							\
				res; })
				
				
#endif //__DOT_PROD_H
