CS 301 - Homework 7 (KEY)

Here are a bunch of programs that are much slower than they could be.  Using the "Time" mode of NetRun, speed each one up by a factor of 5 or more.  Be sure to keep the output exactly the same.
  1. Change the function "clear_img" to do exactly the same thing* to the "img" array, but faster. You should be able to get down to at least 5ns/pixel. 
    enum {h=1024}; /* image height-- number of rows */
    enum {w=2048}; /* image width-- number of columns */
    enum {channels=3}; /* color channels (e.g., RGB) per pixel*/

    /// Image pixel storage
    unsigned char img[h][w][channels];

    /// Set the left half of the image to 128 (middle grey)
    int clear_img(void) {
    for (int y=0;y<h;y++) /* FIX: Y-loop first for smoother memory */
    for (int x=0;x<w/2;x++)
    for (int c=0;c<channels;c++) /* FIX: could unroll this too... */
    img[y][x][c]=128;
    return 0;
    }

    int foo(void) {
    double t=time_function(clear_img);
    int n_pixels=h*(w/2);

    printf(" clear_img: %.3f ns/pixel\n", t/n_pixels*1.0e9);
    return img[0][0][0];
    }
  2. Make the function "mutate_stuff" compute the same thing*, but faster. Don't worry about roundoff changing the result due to the order of operations. You should be able to get down to at least 5ns/stuff. 
    enum {n_stuff=1324567}; /* amount of stuff */
    double stuff[n_stuff];

    /// Compute stuff[i]=(stuff[i]*a+b)/c
    int mutate_stuff(double a,double b,double c) {
    int i;
    for (i=0;i<n_stuff;i++) /* FIX: 1 loop instead of 3 */
    stuff[i]=stuff[i]*(a/c)+(b/c);
    /* FIX: parenthesis above allow compiler to optimize out divide */
    return 0;
    }
    int time_mutate(void) {
    return mutate_stuff(1.5,2.0,3.0);
    }

    int foo(void) {
    double t=time_function(time_mutate);
    printf(" mutate_stuff: %.3f ns/stuff\n", t/n_stuff*1.0e9);
    stuff[0]=1234.0; time_mutate();
    return (int)stuff[0];
    }
  3. Make the function "set_img" compute the same thing*, but faster. I'll give partial credit for anything under 2000ns/pixel, but for full credit you need to get below 500ns/pixel. (Executable NetRun Link)
    #include <complex> /* standard library complex number class */

    enum {h=20}; /* image height-- number of rows */
    enum {w=30}; /* image width-- number of columns */

    /// Image pixel storage
    typedef std::complex<float> myType;
    myType pos[w][h]; // Complex variable storing current position FIX: not needed
    char state[w][h]; // if 0, in progress. Otherwise, an output character.

    /// Set up the "state" image
    int set_img(void) {
    /* iteration loop */
    for (int y=0;y<h;y++)
    for (int x=0;x<w;x++)
    {
    myType t=0;
    myType off(-x*(2.0/w),y*(2.0/h)); /* FIX: parenthesis! */
    state[x][y]=0; /* iteration 0 work */
    for (int it=0;it<100;it++) { /* FIX: loop interchange for cache */
    t=t*t+off;
    /* FIX: replace "abs" with faster squared-magnitude test */
    if ((t.real()*t.real()+t.imag()*t.imag())>2.0*2.0) {
    state[x][y]=" .,:-+*#"[it&7];
    break;
    }
    }
    }
    return 0;
    }

    int foo(void) {
    double t=time_function(set_img);
    int n_pixels=h*w;

    printf(" set_img: %.3f ns/pixel\n", t/n_pixels*1.0e9);
    /* Print the image as ASCII art */
    for (int y=0;y<h;y++) {
    for (int x=0;x<w;x++)
    printf("%c",state[x][y]?state[x][y]:'X');
    printf("\n");
    }
    return 0;
    }
*: "do the same thing" means "compute the same output", *not* "use the exact same set of operations". I don't care about floating-point roundoff, as long as it doesn't affect the final answer, so you're free to use mathematical transformations (hint, hint!).

As usual, you'll turn these problem in by just naming them HW7_1, HW7_2, etc. in NetRun

Problems are due Friday, November 18, at Midnight. 


O. Lawlor, ffosl@uaf.edu
Up to: Class Site, CS, UAF