/************************************************************************
*
*  FREESCALE SEMICONDUCTORS INC.
*  ALL RIGHTS RESERVED
*  (c) Copyright 2003 Freescale semiconductors, Inc.
*
*************************************************************************
*
*  FILE NAME  : ARR2D_ADD.h
*
*  PURPOSE    : ARR2D_ADD{2,3}_{S,U}L macros definition for CW compiler
*
*  AUTHOR     :  Andriy Tymkiv, Anatoly Khaynakov
*
***********************************************************************/

#ifndef __ARR2D_ADD_H
#define __ARR2D_ADD_H


/************************************************************************
* NAME: ARR2D_ADD2_UL
*
* DESCRIPTION: 	calculates the sum of src[0..size1-1][0..size2-1] and 
*		dest[0..size1-1][size2-1] arrays:	
*			dest[i][j] = dest[i][j] + src[i][j] 
*
*
* NOTE: 	The elements type is assumed to be unsigned long
*************************************************************************/


#define ARR2D_ADD2_UL(dest,src,size1,size2) do {	\
		unsigned long *d = (&dest[0][0]); 	\
		unsigned long *s = (&src[0][0]);	\
	 	long n1 = (size1);			\
		long n2 = (size2);			\
	 	asm{					\
			/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d6/a0-a1,(a7)	;\
			/* Load function variables */\
			move.l d,a0			;\
			move.l s,a1			;\
			move.l n1,d1			;\
			move.l n2,d2			;\
			/* Load counter */  \
			mulu.l d2,d1			;\
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+28			;\
			/* Perform 4 by 4 operations as SUM */  \
			movem.l (a0),d3-d6		;\
			add.l (a1)+,d3			;\
			add.l (a1)+,d4			;\
			add.l (a1)+,d5			;\
			add.l (a1)+,d6			;\
			/* Store results */ \
			movem.l d3-d6,(a0)		;\
			add.l %16,a0			;\
			subq.l %1,d1			;\
			bne *-24			;\
			/* Load in d2 number of operations left */  \
			and.l %3,d2			;\
			beq *+12			;\
			/* Perform last operations as SUM */  \
			move.l (a0),d3			;\
			add.l (a1)+,d3			;\
			move.l d3,(a0)+			;\
			subq.l %1,d2			;\
			bne *-8				;\
			/* Restore registers */  \
			movem.l (a7),d0-d6/a0-a1	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)



/************************************************************************
* NAME: ARR2D_ADD3_UL
*
* DESCRIPTION: 	calculates the sum of src1[0..size1-1][0..size2-1] and 
*		src2[0..size1-1][size2-1] arrays in dest[0..size1-1][0..size2-1]:	
*			dest[i][j] = src1[i][j] + src2[i][j] 
*
*
* NOTE: 	The elements type is assumed to be unsigned long
*************************************************************************/

#define ARR2D_ADD3_UL(dest,src1,src2,size1,size2) do {		\
		unsigned long *d = (&dest[0][0]); 	\
		unsigned long *s1 = (&src1[0][0]); 	\
		unsigned long *s2 = (&src2[0][0]); 	\
	 	long n1 = (size1);			\
	 	long n2 = (size2);			\
	 	asm{					\
			/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d6/a0-a2,(a7)	;\
			/* Load function variables */\
			move.l d,a0			;\
			move.l s1,a1			;\
			move.l s2,a2			;\
			move.l n1,d1			;\
			move.l n2,d2			;\
			/* Load counter */  \
			mulu.l d2,d1			;\
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+34			;\
			/* Perform 4 by 4 operations as SUM */  \
			movem.l (a1),d3-d6		;\
			add.l (a2)+,d3			;\
			add.l (a2)+,d4			;\
			add.l (a2)+,d5			;\
			add.l (a2)+,d6			;\
			/* Store results */ \
			movem.l d3-d6,(a0)		;\
			add.l %16,a0			;\
			add.l %16,a1			;\
			subq.l %1,d1			;\
			bne *-30			;\
			/* Load in d2 number of operations left */  \
			and.l %3,d2			;\
			beq *+12			;\
			/* Perform last operations as SUM */  \
			move.l (a1)+,d3			;\
			add.l (a2)+,d3			;\
			move.l d3,(a0)+			;\
			subq.l %1,d2			;\
			bne *-8				;\
			/* Restore registers */  \
			movem.l (a7),d0-d6/a0-a2	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)



/************************************************************************
* NAME: ARR2D_ADD2_SL
*
* DESCRIPTION: 	calculates the sum of src[0..size1-1][0..size2-1] and 
*		dest[0..size1-1][size2-1] arrays:	
*			dest[i][j] = dest[i][j] + src[i][j] 
*

*
* NOTE: 	The elements type is assumed to be signed long
*************************************************************************/

#define ARR2D_ADD2_SL(dest,src,size1,size2) do {	\
		long *d = (&dest[0][0]);	 	\
		long *s = (&src[0][0]);			\
	 	long n1 = (size1);			\
	 	long n2 = (size2);			\
	 	asm{					\
			/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d6/a0-a1,(a7)	;\
			/* Load function variables */\
			move.l d,a0			;\
			move.l s,a1			;\
			move.l n1,d1			;\
			move.l n2,d2			;\
			/* Load counter */  \
			mulu.l d2,d1			;\
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+28			;\
			/* Perform 4 by 4 operations as SUM */  \
			movem.l (a0),d3-d6		;\
			add.l (a1)+,d3			;\
			add.l (a1)+,d4			;\
			add.l (a1)+,d5			;\
			add.l (a1)+,d6			;\
			/* Store results */ \
			movem.l d3-d6,(a0)		;\
			add.l %16,a0			;\
			subq.l %1,d1			;\
			bne *-24			;\
			/* Load in d2 number of operations left */  \
			and.l %3,d2			;\
			beq *+12			;\
			/* Perform last operations as SUM */  \
			move.l (a0),d3			;\
			add.l (a1)+,d3			;\
			move.l d3,(a0)+			;\
			subq.l %1,d2			;\
			bne *-8				;\
			/* Restore registers */  \
			movem.l (a7),d0-d6/a0-a1	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)



/************************************************************************
* NAME: ARR2D_ADD3_SL
*
* DESCRIPTION: 	calculates the sum of src1[0..size1-1][0..size2-1] and 
*		src2[0..size1-1][size2-1] arrays in dest[0..size1-1][0..size2-1]:	
*			dest[i][j] = src1[i][j] + src2[i][j] 
*
* NOTE: 	The elements type is assumed to be signed long
*************************************************************************/

#define ARR2D_ADD3_SL(dest,src1,src2,size1,size2) do {	\
		long *d = (&dest[0][0]);	 	\
		long *s1 = (&src1[0][0]); 		\
		long *s2 = (&src2[0][0]); 		\
	 	long n1 = (size1);			\
	 	long n2 = (size2);			\
	 	asm{					\
			/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d6/a0-a2,(a7)	;\
			/* Load function variables */\
			move.l d,a0			;\
			move.l s1,a1			;\
			move.l s2,a2			;\
			move.l n1,d1			;\
			move.l n2,d2			;\
			/* Load counter */  \
			mulu.l d2,d1			;\
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+34			;\
			/* Perform 4 by 4 operations as SUM */  \
			movem.l (a1),d3-d6		;\
			add.l (a2)+,d3			;\
			add.l (a2)+,d4			;\
			add.l (a2)+,d5			;\
			add.l (a2)+,d6			;\
			/* Store results */ \
			movem.l d3-d6,(a0)		;\
			add.l %16,a0			;\
			add.l %16,a1			;\
			subq.l %1,d1			;\
			bne *-30			;\
			/* Load in d2 number of operations left */  \
			and.l %3,d2			;\
			beq *+12			;\
			/* Perform last operations as SUM */  \
			move.l (a1)+,d3			;\
			add.l (a2)+,d3			;\
			move.l d3,(a0)+			;\
			subq.l %1,d2			;\
			bne *-8				;\
			/* Restore registers */  \
			movem.l (a7),d0-d6/a0-a2	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)


#endif // __ARR2D_ADD_H