Page 288 - ARM 64 Bit Assembly Language
P. 288

Non-integral mathematics 277


                   96         .word 0x94603063,  26  // -1/15! as an S(-40,32)
                   97         .word 0x654B1DC1,  32  //  1/17! as an S(-48,32)
                   98         .equ tablen,(.-sintab)  // set tablen to size of table
                   99         // The ’.’ refers to the current address counter value.
                   100        // Subtracting the address of sintab from the current
                   101        // address gives the size of the table.
                   102
                   103        .text
                   104  //-------------------------------------------------------------
                   105  // sinq(x)
                   106  // input: x -> S(1,30) s.t. 0 <= x <= pi/2
                   107  // returns sin(x) -> S(3,28)
                   108  // x0 : Sum of terms
                   109  // x1 : pointer to table
                   110  // x2 : next coefficient
                   111  // x3 : next shift
                   112  // x4 : x^2
                   113  // x5 : x^(2n-1)
                   114  // x6 : next term
                   115  // x7 : pointer to end of table
                   116  sinq:  smull  x4,w0,w0         // w4 will hold x^2
                   117        // x^2 is now an S(3,60) in x4 (0<= x^2 <= 2.467)
                   118        mov    x5,x0             // x5 will keep x^(2n-1). Start with x
                   119        // x5 now contains x as an S(1,30)
                   120        // The first term in the Taylor series is simply x, so
                   121        // convert x to an S(2,61) by shifting it left
                   122        lsl    x0,x0,#31         // x0 holds the sum
                   123        ldr    x1,=(sintab+8)    // get pointer to beginning of table
                   124                                 // but skip first entry
                   125        mov    w2,#0xAAAAAAAA    // Since first coefficient is a pattern
                   126        mov    x3,#0             // we can load it more quickly this way
                   127        asr    x4,x4,#31         // convert x^2 to an S(2,29)
                   128        add    x7,x1,#(tablen-8) // get pointer to end of table
                   129        b      firstmul          // skip the first load
                   130
                   131        // We know that we will always execute the loop 6 times,
                   132        // so we use a post-test loop.
                   133  sloop:  ldpsw  x2,x3,[x1],#8   // Load two values from the table
                   134        // x2 now has 1/(2n+1)! sign extended to 64 bits
                   135        // x3 contains the correcting shift  sign extended to 64 bits
                   136        //  The multiply will take time, so start it now
                   137  firstmul:
                   138        smull  x5,w4,w5        // x5 <- x^(2n+1) as an S(4,59)
                   139        cmp    x1, x7          // perfomance: do loop test early
                   140        asr    x5,x5,#31       // convert x^(2n-1) to S(3,28)
                   141        smull  x6,w5,w2        // multiply by value from the table
                   142        add    x6,x6,x6,lsr #63// if the result is negative, then add one
                   143        asr    x6,x6,x3        // apply shift to make an S(2,61)
                   144        add    x0,x0,x6        // add to running total
   283   284   285   286   287   288   289   290   291   292   293