/* LMS.ASM    performs the following LMS algorithm implemented with a transversal
             FIR filter structure

           ******************************************************************
	   * 1)  y(n)= w.u  ( . = dot_product), y(n)= FIR filter output     *
           * where  w= [w0(n) w1(n) ... wN-1(n)]= filter weights            *
	   * and  u= [u(n) u(n-1) ... u(n-N+1)]= input samples in delay line*
	   * n= time index, N= number of filter weights (taps)              *
           * 2)  e(n)= d(n)-y(n), e(n)= error signal & d(n)= desired output *
           * 3)  wi(n+1)= wi(n)+STEPSIZE*e(n)*u(n-i),  0 =<i<= N-1          *
	   ******************************************************************

     Written by: Wassim G. Najm , Analog Devices, DSP division, April 2 1991

     Calling parameters (inputs):
     f0= u(n) = input sample
     f1= d(n) = desired output

     Altered registers: 
     f0, f1, f4, f6, f7, f8, f12, f13

     Computation cycles:
     lms_alg: 3N+8 per iteration, lms_init: 12+N

     Results (outputs):
     f13= y(n)= filter output
     f6= e(n)= filter error signal
     i8 -> Program Memory Data buffer of the filter weights

     Memory usage:
     pm code= 29 words, pm data= N words, dm data= N words
*/

#define	TAPS		5
#define	STEPSIZE	0.01

.GLOBAL	lms_init, lms_alg;

.SEGMENT/DM    seg_pmda;
.VAR	deline_data[TAPS];
.ENDSEG;

.SEGMENT/PM     seg_dmda;
.VAR	weights[TAPS];
.ENDSEG;

.SEGMENT/PM     seg_pmco;
lms_init:	b0=deline_data;
		m0=-1;
		l0=TAPS; 	//circular delay line buffer
		b8=weights;
		b9=b8;
		m8=1;
		l8=TAPS; 	//circular weight buffer
		l9=l8;
		f7=STEPSIZE;
		f0=0.0;
		lcntr=TAPS, do clear_bufs until lce;
clear_bufs:	  dm(i0,m0)=f0, pm(i8,m8)=f0; 	//clear delay line & weights
		rts;
lms_init.end:


lms_alg:	dm(i0,m0)=f0, f4=pm(i8,m8); 	//store u(n) in delay line, f4=w0(n)
		f8=f0*f4, f0=dm(i0,m0), f4=pm(i8,m8); //f8= u(n)*w0(n)
						      //f0= u(n-1), f4= w1(n)
		f12=f0*f4, f0=dm(i0,m0), f4= pm(i8,m8);
				//f12= u(n-1)*w1(n), f0= u(n-2), f4= w2(n)
		lcntr=TAPS-3, do macs until lce;
macs:		  f12=f0*f4, f8=f8+f12, f0=dm(i0,m0), f4= pm(i8,m8);
				//f12= u(n-i)*wi(n), f8= sum of prod, f0= u(n-i-1), f4= wi+1(n)
		f12=f0*f4, f8=f8+f12; 	//f12= u(n-N+1)*wN-1(n)
		f13=f8+f12; 			//f13= y(n)
		f6=f1-f13; 				//f6= e(n)
		f1=f6*f7, f4=dm(i0,m0); //f1= STEPSIZE*e(n), f4= u(n)
		f0=f1*f4, f12=pm(i8,m8);//f0= STEPSIZE*e(n)*u(n), f12= w0(n)
		lcntr=TAPS-1, do update_weights until lce;
		  f8=f0+f12, f4=dm(i0,m0), f12=pm(i8,m8); 	//f8= wi(n+1)
					           	//f4= u(n-i-1), f12= wi+1(n)
update_weights:   f0=f1*f4, pm(i9,m8)=f8; //f0= STEPSIZE*e(n)*u(n-i-1)
					  			//store wi(n+1)
		rts(db);
		f8=f0+f12, f0=dm(i0,1); //f8= wN-1(n+1)
				//i0 -> u(n+1) location in delay line
		pm(i9,m8)=f8; 			//store wN-1(n+1)

lms_alg.end:
.ENDSEG;

