/************************************************************** File Name: vecprod.asm Date Modified: 2/10/99 RFG Purpose: Subroutine that implements a Vector Dot Product given two vectors. Equation: SUM += A(i) * B(i) Calling Parameters: b0,i0 = address of vector A b8,i8 = address of vector B r1 = (number of elements in vectors - 6) / 2 l0,l8 = 0 m1,m9 = 2 Assumptions: All arrays must start on even address boundaries. All arrays must have an even number length (zero pad if necessary) Return Values: f12=SUM Registers Affected: f0,f4,f8,f12 i0,i8 Cycle Count: 12 + (N-6)/2 + 4 cache misses Number of PM Locations: 13 instruction words N for the Number of elements for the vector B Number of DM Locations: N for the Number of elements for the vector A 1 for SUM **************************************************************/ #include "def21161.h" .global vector_dot_product; /* program memory code */ .section/pm seg_pmco; vector_dot_product: /* alu, multiplier precision, SIMD mode enable */ bit set MODE1 RND32 | PEYEN; nop; /* start vector product main loop */ f0=dm(i0,m1), f4=pm(i8,m9); f8=f0*f4, f0=dm(i0,m1), f4=pm(i8,m9); f12=f0*f4, f0=dm(i0,m1), f4=pm(i8,m9); lcntr=r1, do vecprod until lce; /* vector product loop */ vecprod: f8=f0*f4, f12=f8+f12, f0=dm(i0,m1), f4=pm(i8,m9); f8=f0*f4, f12=f8+f12; bit clr MODE1 PEYEN; f12=f8+f12; /* Last accumulate in SIMD */ rts (db); r8=s12; f12=f8+f12; vector_dot_product.end: