/************************************************************************
*
*  FREESCALE SEMICONDUCTORS INC.
*  ALL RIGHTS RESERVED
*  (c) Copyright 2003 Freescale semiconductors, Inc.
*
*************************************************************************
*
*  FILE NAME  : RUNN_SUM.h
*
*  PURPOSE    :  RUNN_SUM macro definition for CW
*
*  AUTHOR     :  Andriy Tymkiv, Anatoly Khaynakov
*
***********************************************************************/

#ifndef __RUNN_SUM_H
#define __RUNN_SUM_H


/************************************************************************
* NAME: RUNN_SUM_UL
*
* DESCRIPTION: 	calculates running sum of the src[0..size-1] array
*		in the dst[0..size-1] array
*
* NOTE: 	The elements type is assumed to be FRAC32
*************************************************************************/

/*
  src[0]
  src[1]
  src[2]
  src[3]
  ...
  src[n]	

  dst[0] = src[0]+0
  dst[1] = src[1]+dst[0]
  dst[2] = src[2]+dst[1]
  dst[3] = src[3]+dst[2]
  ...
  dst[n] = src[n]+dst[n-1]
  
  	
	
	d0	y[i-1]
	d1	cntr
	d2	cntr
	d3	
	d4	x[i+1]
	d5	x[i+2]
	d6	x[i+3]
	d7	x[i+4]
	
	a0	dst
	a1	src
	a2	
	a3	
	a4	
	a5


*/


#define RUNN_SUM(dst,src,size) do {			\
		FRAC32 *b = dst;			\
		FRAC32 *a = src;			\
		long n = (size);			\
	 	asm{					\
	 		/* Save registers */\
			lea -60(a7),a7			;\
			movem.l d0-d7/a0-a5,(a7)	;\
			/* Load function variables */\
			move.l b,a0			;\
			move.l a,a1			;\
			move.l n,d1			;\
			move.l #0,d0			;\
			/* Load counter */  \
			move.l d1,d2			;\
			/* Load in d1 number of 4 operations */ \
			asr.l %2,d1			;\
			beq *+32			;\
			/* Perform 4 by 4 operations as Running Sum and store the result */  \
			movem.l (a1),d4-d7		;\
			add.l d4,d0			;\
			move.l d0,(a0)+			;\
			add.l d5,d0			;\
			move.l d0,(a0)+			;\
			add.l d6,d0			;\
			move.l d0,(a0)+			;\
			add.l d7,d0			;\
			move.l d0,(a0)+			;\
			add.l #16,a1			;\
			subq.l %1,d1			;\
			bne *-28			;\
			/* Load in d2 number of operations left */  \
			and.l %3,d2			;\
			beq *+12			;\
			/* Perform last operations as Running Sum and store the result */  \
			move.l (a1)+,d4			;\
			add.l d4,d0			;\
			move.l d0,(a0)+			;\
			subq.l #1,d2			;\
			bne *-8				;\
			/* Restore registers */  \
			movem.l (a7),d0-d7/a0-a5	;\
			lea 60(a7),a7	    		;\
			};				\
		} while (0)



#endif // __RUNN_SUM_H