/**************************************************************

File Name:	fir_blk.asm
		
Date Modified:	2/16/99	RFG
				7/17/00 PPG

Purpose:	Subroutine that implements a Block FIR Filter given
			coefficients and samples.

Equation:	y(n) = Summation from k=0 to M of h(k)*x(n-k)

Calling Parameters:
			b0,i0 = address of delay line buffer
			l0 = length of delay line buffer
			b1,i1 = address of input samples buffer
			b8,i8 = address of coefficients buffer
			l8 = length of coefficients buffer
			b9,i9 = address of output buffer
			r1 = number of taps in the filter divided by 2
			r2 = number of samples
			r3 = (number of taps in filter - 6) / 2
			s0,m0,l1,l9 = 0
			m1,m10 = 1
			m2=-1
			m3,m9 = 2

Assumptions:
			All arrays must start on even address boundaries.
			All arrays must have an even number length (zero  
		    pad if necessary)

Return Values:
			i9=OUTPUT

Registers Affected:
			f0,s0,f4,s4,f8,s8,f12,s12
			i0,i8,i9

Cycle Count:
			9 + taps/2 + samples(6 + taps/2) + 9 cache misses
			
Number of PM Locations:
			21 instruction words
			2 * Number of taps locations for coefficients
			Number of samples + 1 locations for the output buffer
			
Number of DM Locations:
			Number of taps locations for the delay line buffer
			Number of samples locations for the input buffer

Circular buffer notes: 
Because SIMD or Long word access transfer two 32-bit words, programs must be 
careful when using these accesses in circular buffering.  It is important that 
SIMD or Long word accesses do not cross a circular buffer boundary.  If a SIMD 
mode access occurs using a circular buffer index register that points to the 
last location in the circular buffer (end of buffer), the resulting access 
transfers the last location in the circular buffer and the 
first location outside the buffer (end of buffer + 1).  

**************************************************************/

#include    "def21160.h"

.global fir;

/* program memory code */
.section/pm seg_pmco;

fir:
    
    bit set MODE1 CBUFEN | PEYEN;
    nop;
    f9 = dm(i3,m3);
    f9 <-> s9;
    f5 = f5-f5,f0 = dm(i1,m3);					
    f10 = f9;
    s9 <-> f10;
    f0 <-> s0;
    dm(i0,m3) = f0;
       
    f11 = f11-f11,f0 = dm(i2,m4),f4 = pm(i8,m9);
 lcntr = r2, do main_fir until lce; 
     f5 = f0*f4,f0 = dm(i2,m4),f4 = pm(i8,m9);
main_fir:    f11= f11+f5;
    f5 = f0*f4;
    f11 = f11+f5; 
    f8 = s11;
    f11 = f11+f8;
    f8  = f11;
    s11 = f8;
    
    f9  = f9-f11;
    
    pm(i14,m10)=f9; 
	
    f9=f9*f6;
lcntr = r1, do adapt until lce;   
    f12 = pm(i10,m9),f3 = dm(i4,m4); 
    f3 = f3*f9;
    f12 = f12+f3;
adapt:    pm(i11,m9) = f12;
  
        modify(i4,m3);
        modify(i2,m3);
bit clr MODE1 PEYEN;
		nop;
		pm(i9,m9)=f11;
		rts;

fir.end:
