1 Reply Latest reply on Sep 23, 2014 7:42 AM by DiegoV_Intel

    Galileo CPU benchmarking results

    742002

      First, let me caveat the results here-in; this benchmarking is pretty rudimentary and I've performed it just to get a rough comparison between the Galileo running Quark and the other microcontrollers I have some experience with.  I do find figures such as these useful when designing applications, and I'm sharing my methodology and results in case anybody else might benefit from them as well.

       

      I'm running a Gen 1 Galileo off the SD card.  The image on the SD card came from http://downloadmirror.intel.com/24272/eng/SDCard.1.0.3.zip.

       

      The script I used to compile the program and the program itself are included below.

       

      Compiler optimization is turned off, but of course, whatever CPU optimizations occur once the program is running are all affecting these results.  I'm just days into my Galileo/Quark education and don't know what the CPU is doing behind the scenes vis-a-vis pipelining, etc.

       

      This program was cross-compiled in my Ubuntu 12 VM with the 1.5.3 IDE installed.

       

      There are many options for timing cpu operations, and some are far superior to my technique, but be that as it may....

       

      The measurements were taken by looking at the period of a full square-wave cycle generated on pin 3.  I used my Tektronix 2465B oscilloscope's measurement function to obtain the timings.

       

      The "measurement cost" is the period of the square wave itself; thus the program spends about 1.4 microseconds just to cycle pin 3 high and low.  Note that if you are reading the source code, two iterations of the "for (i;0;;i++)" loop occur for a full square wave; thus although the source code contains 100 operations in the for loop block, twice that many operations occur between one rising edge of pin 3 and the next rising edge.  That's why you see "200" in the "Count" results column.

       

      Here are the results (the figure of interest is the last column, nanoseconds per operation).

              

      OperationCountusecless measurement costns per op
      Measurement cost11.4200.000NA
      for(j=0;j<100;j++)26.5695.1492,574.50
      16 bit add2003.6812.26111.31
      16 bit mul2008.6647.24436.22
      16 bit div20015.75014.33071.65
      32 bit add2003.1561.7368.68
      32 bit mul2005.1663.74618.73
      32 bit div20022.64021.220106.10
      64 bit add2005.2083.78818.94
      64 bit mul20022.72021.300106.50
      64 bit div20051.98050.560252.80
      float add20010.5759.15545.78
      double add20011.1609.74048.70
      float mul20011.67510.25551.28
      double mul20013.78012.36061.80
      float div20043.46042.040210.20
      double div20043.50042.080210.40

       

      >>>>>>>>>>>>>>>> Start of source code >>>>>>>>>>>>>>>>>

      #include <stdio.h>

      #include <sys/types.h>

      #include <sys/stat.h>

      #include <fcntl.h>

      #include <unistd.h>

      #include <sys/mman.h>

      #include <sys/types.h>

       

       

      // Values for FASTPIN are either 2 or 3

      #define FASTPIN 3

      #if (FASTPIN!=2) && (FASTPIN!=3)

      #endif

       

       

      #define PERMS (S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)

      #define FMODE (O_CREAT|O_WRONLY)

      #if FASTPIN==2

      #define MAPPED_PIN 0X40 // IO2

      #define GPIONAME "gpio14" // IO2

      #define MUXNAME "gpio31" // Mux used to assign Arduino IO2 to header

      #elif FASTPIN==3

      #define MAPPED_PIN 0X80 // IO3

      #define GPIONAME "gpio15" // IO3

      #define MUXNAME "gpio30" // Mux used to assign Arduino IO3 to header

      #else

      FASTPIN must be 2 or 3!!!

      #endif

       

       

      #define MAPSIZE 4096

      #define UIODEVICE "/dev/uio1"

       

       

      const char direction_fn[]="/sys/class/gpio/" GPIONAME "/direction";

      const char mux_fn[]="/sys/class/gpio/" MUXNAME "/value";

       

       

       

       

      #define fastWrite(p,v) ((v)?\

        (*(volatile unsigned int*) pMap |= MAPPED_PIN):\

        (*(volatile unsigned int*) pMap &= ~MAPPED_PIN))

       

       

      int main(int argc, char* argv[])

      {

        const char dir_str[]="out";

       

       

        int i=0;

        int fd;

        unsigned char *pMap;

       

       

        printf("Fast toggle via " UIODEVICE "\n");

        printf("sizeof(short) = %u bits\n",sizeof(short)*8);

        printf("sizeof(int) = %u bits\n",sizeof(int)*8);

        printf("sizeof(long long) = %u bits\n",sizeof(long long)*8);

        printf("sizeof(float) = %u bits\n",sizeof(float)*8);

        printf("sizeof(double) = %u bits\n",sizeof(double)*8);

       

       

        fd = open(mux_fn,FMODE,PERMS);

        write(fd,"0",1);

        close(fd);

       

       

        fd = open(direction_fn,FMODE,PERMS);

        write(fd,dir_str,sizeof(dir_str)-1);

        close(fd);

       

       

        fd = open(UIODEVICE,O_RDWR);

        if (fd < 0)

        {

        printf("Unable to open " UIODEVICE "\n");

        return(1);

        }

       

       

        pMap = (unsigned char*)mmap(NULL, MAPSIZE,

                       PROT_READ|PROT_WRITE,

                       MAP_FILE|MAP_SHARED,

                       fd, 0);

       

       

        if ((pMap == 0) || ((unsigned int)pMap == -1))

        {

        printf("Unable to memory map device " UIODEVICE " to %u\n",pMap);

        return(2);

        }

       

       

        printf("Memory mapped address is %lx\n",pMap);

       

       

        for (i=0;;i++)

        {

        fastWrite(pMap, i&1);

        {

        /*

        Set TESTTYPE to the type being benchmarked:

        unsigned short

        unsigned int

        unsigned long long

        double

        float

       

       

        Set OP to the operation being benchmarked:

        +

        *

        /

        */

      #define TESTTYPE unsigned short

      #define OP /

       

       

        TESTTYPE a=20;

        TESTTYPE b=9;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        a = a OP b;

        }

        }

        munmap(pMap,MAPSIZE);

       

       

        close(fd);

       

       

        return(0);

      }

      <<<<<<<<<<<<<<<< End of source code <<<<<<<<<<<<<<<<<

       

      >>>>>>>>>>>>>>>> Start of source code >>>>>>>>>>>>>>>>>

      #

      # Assumes the cross-compiler is in the PATH

      #

       

      export G_CC=i586-poky-linux-uclibc-g++

       

      SYSROOT=/opt/arduino/hardware/tools/sysroots/i586-poky-linux-uclibc

       

      BASENAME=bench3

       

      ${G_CC} -O0 --sysroot=${SYSROOT} ${BASENAME}.c -o ${BASENAME}

      <<<<<<<<<<<<<<<< End of source code <<<<<<<<<<<<<<<<<

       

      Message was edited by: F Shah Pasted source code in again.  Previously, I seemed to have managed to paste the make script into the middle of the source code, and that would not have compiled had anyone tried.