/************************************************************************
*
*  FREESCALE SEMICONDUCTORS INC.
*  ALL RIGHTS RESERVED
*  (c) Copyright 2003 Freescale semiconductors, Inc.
*
*************************************************************************
*
*  FILE NAME  : ARR1D_MUL.h
*
*  PURPOSE    : ARR1D_MUL_{S,U}L macros definition for CW compiler
*
*  AUTHOR     :  Andriy Tymkiv, Anatoly Khaynakov
*
***********************************************************************/

#ifndef __ARR1D_MUL_H
#define __ARR1D_MUL_H


/************************************************************************
* NAME: ARR1D_MUL2_SL
*
* DESCRIPTION: 	calculates the product of src[0..size-1] and dest[0..size-1]
*		arrays:	
*			dest[i] = dest[i] * src[i] 
*
* NOTE: 	The elements type is assumed to be signed long
*************************************************************************/

#define ARR1D_MUL2_SL(dest,src,size) do {		\
		long *d = (dest);		 	\
		long *s = (src);			\
	 	long n = (size);			\
	 	asm{					\
	 		/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d7/a0-a5,(a7)	;\
			/* Initialize MACSR register for signed operations */\
			move.l #0,d0			;\
			move.l d0,MACSR			;\
			moveq.l %16,d0			;\
			/* Load function variables */\
			move.l d,a0			;\
			move.l s,a1			;\
			move.l n,d1			;\
			/* Load counter */  \
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+76			;\
			/* Initialize eMAC accumulator */ \
			move.l #0,ACC0			;\
			/* Perform 4 by 4 operations as MUL */  \
			movem.l (a1),d7/a3-a5		;\
			add.l d0,a1			;\
			movem.l (a0),d3-d6		;\
			macl.l d7,d3,(a1)+,d7,ACC0	;\
			move.l ACC0,d3			;\
			move.l #0,ACC0			;\
			macl.l a3,d4,(a1)+,a3,ACC0	;\
			move.l ACC0,d4			;\
			move.l #0,ACC0			;\
			macl.l a4,d5,(a1)+,a4,ACC0	;\
			move.l ACC0,d5			;\
			move.l #0,ACC0			;\
			macl.l a5,d6,(a1)+,a5,ACC0	;\
			move.l ACC0,d6			;\
			move.l #0,ACC0			;\
			/* Store results */ \
			movem.l d3-d6,(a0)		;\
			add.l d0,a0			;\
			subq.l %1,d1			;\
			bne *-60			;\
			/* Load in d2 number of operations left */\
			and.l %3,d2			;\
			beq *+16			;\
			/* Perform last operations as MUL */\
			sub.l d0,a1			;\
			move.l (a0),d3			;\
			muls.l (a1)+,d3			;\
			move.l d3,(a0)+			;\
			subq.l %1,d2			;\
			bne *-10			;\
			/* Restore registers */  \
			movem.l (a7),d0-d7/a0-a5	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)





/************************************************************************
* NAME: ARR1D_MUL2_UL
*
* DESCRIPTION: 	calculates the product of src[0..size-1] and dest[0..size-1]
*		arrays:	
*			dest[i] = dest[i] * src[i] 
*
* NOTE: 	The elements type is assumed to be unsigned long
*************************************************************************/

#define ARR1D_MUL2_UL(dest,src,size) do {		\
		unsigned long *d = (dest);	 	\
		unsigned long *s = (src);		\
	 	long n = (size);			\
	 	asm{					\
	 		/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d7/a0-a5,(a7)	;\
			/* Initialize MACSR register for unsigned operations */\
			move.l #0x40,d0			;\
			move.l d0,MACSR			;\
			moveq.l %16,d0			;\
			/* Load function variables */\
			move.l d,a0			;\
			move.l s,a1			;\
			move.l n,d1			;\
			/* Load counter */  \
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+76			;\
			/* Initialize eMAC accumulator */ \
			move.l #0,ACC0			;\
			/* Perform 4 by 4 operations as MUL */  \
			movem.l (a1),d7/a3-a5		;\
			add.l d0,a1			;\
			movem.l (a0),d3-d6		;\
			macl.l d7,d3,(a1)+,d7,ACC0	;\
			move.l ACC0,d3			;\
			move.l #0,ACC0			;\
			macl.l a3,d4,(a1)+,a3,ACC0	;\
			move.l ACC0,d4			;\
			move.l #0,ACC0			;\
			macl.l a4,d5,(a1)+,a4,ACC0	;\
			move.l ACC0,d5			;\
			move.l #0,ACC0			;\
			macl.l a5,d6,(a1)+,a5,ACC0	;\
			move.l ACC0,d6			;\
			move.l #0,ACC0			;\
			/* Store results */ \
			movem.l d3-d6,(a0)		;\
			add.l d0,a0			;\
			subq.l %1,d1			;\
			bne *-60			;\
			/* Load in d2 number of operations left */\
			and.l %3,d2			;\
			beq *+16			;\
			/* Perform last operations as MUL */\
			sub.l d0,a1			;\
			move.l (a0),d3			;\
			mulu.l (a1)+,d3			;\
			move.l d3,(a0)+			;\
			subq.l %1,d2			;\
			bne *-10			;\
			/* Restore registers */  \
			movem.l (a7),d0-d7/a0-a5	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)




/************************************************************************
* NAME: ARR1D_MUL3_UL
*
* DESCRIPTION: 	calculates the product of src1[0..size-1] and src2[0..size-1]
*		arrays in dest[0..size-1]:	
*			dest[i] = src1[i] * src2[i] 
*
* NOTE: 	The elements type is assumed to be unsigned long
*************************************************************************/

#define ARR1D_MUL3_UL(dest,src1,src2,size) do {		\
		unsigned long *d = (dest);	 	\
		unsigned long *s2 = (src2);		\
		unsigned long *s1 = (src1);		\
	 	long n = (size);			\
	 	asm{					\
	 		/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d7/a0-a5,(a7)	;\
			/* Initialize MACSR register for unsigned operations */\
			move.l #0x40,d0			;\
			move.l d0,MACSR			;\
			moveq.l %16,d0			;\
			/* Load function variables */\
			move.l d,a0			;\
			move.l s1,a1			;\
			move.l s2,a2			;\
			move.l n,d1			;\
			/* Load counter */  \
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+78			;\
			/* Initialize eMAC accumulator */ \
			move.l #0,ACC0			;\
			/* Perform 4 by 4 operations as MUL */  \
			movem.l (a1),d7/a3-a5		;\
			add.l d0,a1			;\
			movem.l (a2),d3-d6		;\
			macl.l d7,d3,(a1)+,d7,ACC0	;\
			move.l ACC0,d3			;\
			move.l #0,ACC0			;\
			macl.l a3,d4,(a1)+,a3,ACC0	;\
			move.l ACC0,d4			;\
			move.l #0,ACC0			;\
			macl.l a4,d5,(a1)+,a4,ACC0	;\
			move.l ACC0,d5			;\
			move.l #0,ACC0			;\
			macl.l a5,d6,(a1)+,a5,ACC0	;\
			move.l ACC0,d6			;\
			move.l #0,ACC0			;\
			/* Store results */ \
			movem.l d3-d6,(a0)		;\
			add.l d0,a2			;\
			add.l d0,a0			;\
			subq.l %1,d1			;\
			bne *-62			;\
			/* Load in d2 number of operations left */\
			and.l %3,d2			;\
			beq *+16			;\
			/* Perform last operations as MUL */\
			sub.l d0,a1			;\
			move.l (a2)+,d3			;\
			mulu.l (a1)+,d3			;\
			move.l d3,(a0)+			;\
			subq.l %1,d2			;\
			bne *-10			;\
			/* Restore registers */  \
			movem.l (a7),d0-d7/a0-a5	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)



/************************************************************************
* NAME: ARR1D_MUL3_SL
*
* DESCRIPTION: 	calculates the product of src1[0..size-1] and src2[0..size-1]
*		arrays in dest[0..size-1]:	
*			dest[i] = src1[i] * src2[i] 
*
* NOTE: 	The elements type is assumed to be signed long
*************************************************************************/

#define ARR1D_MUL3_SL(dest,src1,src2,size) do {		\
		long *d = (dest);		 	\
		long *s2 = (src2);			\
		long *s1 = (src1);			\
	 	long n = (size);			\
	 	asm{					\
	 		/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d7/a0-a5,(a7)	;\
			/* Initialize MACSR register for signed operations */\
			move.l #0,d0			;\
			move.l d0,MACSR			;\
			moveq.l %16,d0			;\
			/* Load function variables */\
			move.l d,a0			;\
			move.l s1,a1			;\
			move.l s2,a2			;\
			move.l n,d1			;\
			/* Load counter */  \
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+78			;\
			/* Initialize eMAC accumulator */ \
			move.l #0,ACC0			;\
			/* Perform 4 by 4 operations as MUL */  \
			movem.l (a1),d7/a3-a5		;\
			add.l d0,a1			;\
			movem.l (a2),d3-d6		;\
			macl.l d7,d3,(a1)+,d7,ACC0	;\
			move.l ACC0,d3			;\
			move.l #0,ACC0			;\
			macl.l a3,d4,(a1)+,a3,ACC0	;\
			move.l ACC0,d4			;\
			move.l #0,ACC0			;\
			macl.l a4,d5,(a1)+,a4,ACC0	;\
			move.l ACC0,d5			;\
			move.l #0,ACC0			;\
			macl.l a5,d6,(a1)+,a5,ACC0	;\
			move.l ACC0,d6			;\
			move.l #0,ACC0			;\
			/* Store results */ \
			movem.l d3-d6,(a0)		;\
			add.l d0,a2			;\
			add.l d0,a0			;\
			subq.l %1,d1			;\
			bne *-62			;\
			/* Load in d2 number of operations left */\
			and.l %3,d2			;\
			beq *+16			;\
			/* Perform last operations as MUL */\
			sub.l d0,a1			;\
			move.l (a2)+,d3			;\
			muls.l (a1)+,d3			;\
			move.l d3,(a0)+			;\
			subq.l %1,d2			;\
			bne *-10			;\
			/* Restore registers */  \
			movem.l (a7),d0-d7/a0-a5	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)


#endif //__ARR1D_MUL_H

