• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/dct-test.c

Go to the documentation of this file.
00001 /*
00002  * (c) 2001 Fabrice Bellard
00003  *     2007 Marc Hoffman <marc.hoffman@analog.com>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <sys/time.h>
00032 #include <unistd.h>
00033 #include <math.h>
00034 
00035 #include "libavutil/cpu.h"
00036 #include "libavutil/common.h"
00037 #include "libavutil/lfg.h"
00038 
00039 #include "simple_idct.h"
00040 #include "aandcttab.h"
00041 #include "faandct.h"
00042 #include "faanidct.h"
00043 #include "x86/idct_xvid.h"
00044 #include "dctref.h"
00045 
00046 #undef printf
00047 
00048 void ff_mmx_idct(DCTELEM *data);
00049 void ff_mmxext_idct(DCTELEM *data);
00050 
00051 void odivx_idct_c(short *block);
00052 
00053 // BFIN
00054 void ff_bfin_idct(DCTELEM *block);
00055 void ff_bfin_fdct(DCTELEM *block);
00056 
00057 // ALTIVEC
00058 void fdct_altivec(DCTELEM *block);
00059 //void idct_altivec(DCTELEM *block);?? no routine
00060 
00061 // ARM
00062 void ff_j_rev_dct_arm(DCTELEM *data);
00063 void ff_simple_idct_arm(DCTELEM *data);
00064 void ff_simple_idct_armv5te(DCTELEM *data);
00065 void ff_simple_idct_armv6(DCTELEM *data);
00066 void ff_simple_idct_neon(DCTELEM *data);
00067 
00068 void ff_simple_idct_axp(DCTELEM *data);
00069 
00070 struct algo {
00071     const char *name;
00072     void (*func)(DCTELEM *block);
00073     enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
00074                      SSE2_PERM, PARTTRANS_PERM } format;
00075     int mm_support;
00076     int nonspec;
00077 };
00078 
00079 #ifndef FAAN_POSTSCALE
00080 #define FAAN_SCALE SCALE_PERM
00081 #else
00082 #define FAAN_SCALE NO_PERM
00083 #endif
00084 
00085 static int cpu_flags;
00086 
00087 static const struct algo fdct_tab[] = {
00088     { "REF-DBL",        ff_ref_fdct,           NO_PERM    },
00089     { "FAAN",           ff_faandct,            FAAN_SCALE },
00090     { "IJG-AAN-INT",    fdct_ifast,            SCALE_PERM },
00091     { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
00092 
00093 #if HAVE_MMX
00094     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
00095     { "MMX2",           ff_fdct_mmx2,          NO_PERM,   AV_CPU_FLAG_MMX2    },
00096     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    },
00097 #endif
00098 
00099 #if HAVE_ALTIVEC
00100     { "altivecfdct",    fdct_altivec,          NO_PERM,   AV_CPU_FLAG_ALTIVEC },
00101 #endif
00102 
00103 #if ARCH_BFIN
00104     { "BFINfdct",       ff_bfin_fdct,          NO_PERM  },
00105 #endif
00106 
00107     { 0 }
00108 };
00109 
00110 static const struct algo idct_tab[] = {
00111     { "FAANI",          ff_faanidct,           NO_PERM  },
00112     { "REF-DBL",        ff_ref_idct,           NO_PERM  },
00113     { "INT",            j_rev_dct,             MMX_PERM },
00114     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
00115 
00116 #if HAVE_MMX
00117 #if CONFIG_GPL
00118     { "LIBMPEG2-MMX",   ff_mmx_idct,           MMX_PERM,  AV_CPU_FLAG_MMX,  1 },
00119     { "LIBMPEG2-MMX2",  ff_mmxext_idct,        MMX_PERM,  AV_CPU_FLAG_MMX2, 1 },
00120 #endif
00121     { "SIMPLE-MMX",     ff_simple_idct_mmx,  MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
00122     { "XVID-MMX",       ff_idct_xvid_mmx,      NO_PERM,   AV_CPU_FLAG_MMX,  1 },
00123     { "XVID-MMX2",      ff_idct_xvid_mmx2,     NO_PERM,   AV_CPU_FLAG_MMX2, 1 },
00124     { "XVID-SSE2",      ff_idct_xvid_sse2,     SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
00125 #endif
00126 
00127 #if ARCH_BFIN
00128     { "BFINidct",       ff_bfin_idct,          NO_PERM  },
00129 #endif
00130 
00131 #if ARCH_ARM
00132     { "SIMPLE-ARM",     ff_simple_idct_arm,    NO_PERM  },
00133     { "INT-ARM",        ff_j_rev_dct_arm,      MMX_PERM },
00134 #endif
00135 #if HAVE_ARMV5TE
00136     { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM  },
00137 #endif
00138 #if HAVE_ARMV6
00139     { "SIMPLE-ARMV6",   ff_simple_idct_armv6,  MMX_PERM },
00140 #endif
00141 #if HAVE_NEON
00142     { "SIMPLE-NEON",    ff_simple_idct_neon,   PARTTRANS_PERM },
00143 #endif
00144 
00145 #if ARCH_ALPHA
00146     { "SIMPLE-ALPHA",   ff_simple_idct_axp,    NO_PERM },
00147 #endif
00148 
00149     { 0 }
00150 };
00151 
00152 #define AANSCALE_BITS 12
00153 
00154 static int64_t gettime(void)
00155 {
00156     struct timeval tv;
00157     gettimeofday(&tv, NULL);
00158     return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
00159 }
00160 
00161 #define NB_ITS 20000
00162 #define NB_ITS_SPEED 50000
00163 
00164 static short idct_mmx_perm[64];
00165 
00166 static short idct_simple_mmx_perm[64] = {
00167     0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00168     0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00169     0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00170     0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00171     0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00172     0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00173     0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00174     0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00175 };
00176 
00177 static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
00178 
00179 static void idct_mmx_init(void)
00180 {
00181     int i;
00182 
00183     /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
00184     for (i = 0; i < 64; i++) {
00185         idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
00186     }
00187 }
00188 
00189 DECLARE_ALIGNED(16, static DCTELEM, block)[64];
00190 DECLARE_ALIGNED(8,  static DCTELEM, block1)[64];
00191 
00192 static inline void mmx_emms(void)
00193 {
00194 #if HAVE_MMX
00195     if (cpu_flags & AV_CPU_FLAG_MMX)
00196         __asm__ volatile ("emms\n\t");
00197 #endif
00198 }
00199 
00200 static void init_block(DCTELEM block[64], int test, int is_idct, AVLFG *prng)
00201 {
00202     int i, j;
00203 
00204     memset(block, 0, 64 * sizeof(*block));
00205 
00206     switch (test) {
00207     case 0:
00208         for (i = 0; i < 64; i++)
00209             block[i] = (av_lfg_get(prng) % 512) - 256;
00210         if (is_idct) {
00211             ff_ref_fdct(block);
00212             for (i = 0; i < 64; i++)
00213                 block[i] >>= 3;
00214         }
00215         break;
00216     case 1:
00217         j = av_lfg_get(prng) % 10 + 1;
00218         for (i = 0; i < j; i++)
00219             block[av_lfg_get(prng) % 64] = av_lfg_get(prng) % 512 - 256;
00220         break;
00221     case 2:
00222         block[ 0] = av_lfg_get(prng) % 4096 - 2048;
00223         block[63] = (block[0] & 1) ^ 1;
00224         break;
00225     }
00226 }
00227 
00228 static void permute(DCTELEM dst[64], const DCTELEM src[64], int perm)
00229 {
00230     int i;
00231 
00232     if (perm == MMX_PERM) {
00233         for (i = 0; i < 64; i++)
00234             dst[idct_mmx_perm[i]] = src[i];
00235     } else if (perm == MMX_SIMPLE_PERM) {
00236         for (i = 0; i < 64; i++)
00237             dst[idct_simple_mmx_perm[i]] = src[i];
00238     } else if (perm == SSE2_PERM) {
00239         for (i = 0; i < 64; i++)
00240             dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
00241     } else if (perm == PARTTRANS_PERM) {
00242         for (i = 0; i < 64; i++)
00243             dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
00244     } else {
00245         for (i = 0; i < 64; i++)
00246             dst[i] = src[i];
00247     }
00248 }
00249 
00250 static int dct_error(const struct algo *dct, int test, int is_idct, int speed)
00251 {
00252     void (*ref)(DCTELEM *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
00253     int it, i, scale;
00254     int err_inf, v;
00255     int64_t err2, ti, ti1, it1, err_sum = 0;
00256     int64_t sysErr[64], sysErrMax = 0;
00257     int maxout = 0;
00258     int blockSumErrMax = 0, blockSumErr;
00259     AVLFG prng;
00260     double omse, ome;
00261     int spec_err;
00262 
00263     av_lfg_init(&prng, 1);
00264 
00265     err_inf = 0;
00266     err2 = 0;
00267     for (i = 0; i < 64; i++)
00268         sysErr[i] = 0;
00269     for (it = 0; it < NB_ITS; it++) {
00270         init_block(block1, test, is_idct, &prng);
00271         permute(block, block1, dct->format);
00272 
00273         dct->func(block);
00274         mmx_emms();
00275 
00276         if (dct->format == SCALE_PERM) {
00277             for (i = 0; i < 64; i++) {
00278                 scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
00279                 block[i] = (block[i] * scale) >> AANSCALE_BITS;
00280             }
00281         }
00282 
00283         ref(block1);
00284 
00285         blockSumErr = 0;
00286         for (i = 0; i < 64; i++) {
00287             int err = block[i] - block1[i];
00288             err_sum += err;
00289             v = abs(err);
00290             if (v > err_inf)
00291                 err_inf = v;
00292             err2 += v * v;
00293             sysErr[i] += block[i] - block1[i];
00294             blockSumErr += v;
00295             if (abs(block[i]) > maxout)
00296                 maxout = abs(block[i]);
00297         }
00298         if (blockSumErrMax < blockSumErr)
00299             blockSumErrMax = blockSumErr;
00300     }
00301     for (i = 0; i < 64; i++)
00302         sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
00303 
00304     for (i = 0; i < 64; i++) {
00305         if (i % 8 == 0)
00306             printf("\n");
00307         printf("%7d ", (int) sysErr[i]);
00308     }
00309     printf("\n");
00310 
00311     omse = (double) err2 / NB_ITS / 64;
00312     ome  = (double) err_sum / NB_ITS / 64;
00313 
00314     spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
00315 
00316     printf("%s %s: ppe=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
00317            is_idct ? "IDCT" : "DCT", dct->name, err_inf,
00318            omse, ome, (double) sysErrMax / NB_ITS,
00319            maxout, blockSumErrMax);
00320 
00321     if (spec_err && !dct->nonspec)
00322         return 1;
00323 
00324     if (!speed)
00325         return 0;
00326 
00327     /* speed test */
00328     init_block(block, test, is_idct, &prng);
00329     permute(block1, block, dct->format);
00330 
00331     ti = gettime();
00332     it1 = 0;
00333     do {
00334         for (it = 0; it < NB_ITS_SPEED; it++) {
00335             memcpy(block, block1, sizeof(block));
00336             dct->func(block);
00337         }
00338         it1 += NB_ITS_SPEED;
00339         ti1 = gettime() - ti;
00340     } while (ti1 < 1000000);
00341     mmx_emms();
00342 
00343     printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
00344            (double) it1 * 1000.0 / (double) ti1);
00345 
00346     return 0;
00347 }
00348 
00349 DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
00350 DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
00351 
00352 static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
00353 {
00354     static int init;
00355     static double c8[8][8];
00356     static double c4[4][4];
00357     double block1[64], block2[64], block3[64];
00358     double s, sum, v;
00359     int i, j, k;
00360 
00361     if (!init) {
00362         init = 1;
00363 
00364         for (i = 0; i < 8; i++) {
00365             sum = 0;
00366             for (j = 0; j < 8; j++) {
00367                 s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
00368                 c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
00369                 sum += c8[i][j] * c8[i][j];
00370             }
00371         }
00372 
00373         for (i = 0; i < 4; i++) {
00374             sum = 0;
00375             for (j = 0; j < 4; j++) {
00376                 s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
00377                 c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
00378                 sum += c4[i][j] * c4[i][j];
00379             }
00380         }
00381     }
00382 
00383     /* butterfly */
00384     s = 0.5 * sqrt(2.0);
00385     for (i = 0; i < 4; i++) {
00386         for (j = 0; j < 8; j++) {
00387             block1[8 * (2 * i) + j] =
00388                 (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
00389             block1[8 * (2 * i + 1) + j] =
00390                 (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
00391         }
00392     }
00393 
00394     /* idct8 on lines */
00395     for (i = 0; i < 8; i++) {
00396         for (j = 0; j < 8; j++) {
00397             sum = 0;
00398             for (k = 0; k < 8; k++)
00399                 sum += c8[k][j] * block1[8 * i + k];
00400             block2[8 * i + j] = sum;
00401         }
00402     }
00403 
00404     /* idct4 */
00405     for (i = 0; i < 8; i++) {
00406         for (j = 0; j < 4; j++) {
00407             /* top */
00408             sum = 0;
00409             for (k = 0; k < 4; k++)
00410                 sum += c4[k][j] * block2[8 * (2 * k) + i];
00411             block3[8 * (2 * j) + i] = sum;
00412 
00413             /* bottom */
00414             sum = 0;
00415             for (k = 0; k < 4; k++)
00416                 sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
00417             block3[8 * (2 * j + 1) + i] = sum;
00418         }
00419     }
00420 
00421     /* clamp and store the result */
00422     for (i = 0; i < 8; i++) {
00423         for (j = 0; j < 8; j++) {
00424             v = block3[8 * i + j];
00425             if      (v < 0)   v = 0;
00426             else if (v > 255) v = 255;
00427             dest[i * linesize + j] = (int) rint(v);
00428         }
00429     }
00430 }
00431 
00432 static void idct248_error(const char *name,
00433                           void (*idct248_put)(uint8_t *dest, int line_size,
00434                                               int16_t *block),
00435                           int speed)
00436 {
00437     int it, i, it1, ti, ti1, err_max, v;
00438     AVLFG prng;
00439 
00440     av_lfg_init(&prng, 1);
00441 
00442     /* just one test to see if code is correct (precision is less
00443        important here) */
00444     err_max = 0;
00445     for (it = 0; it < NB_ITS; it++) {
00446         /* XXX: use forward transform to generate values */
00447         for (i = 0; i < 64; i++)
00448             block1[i] = av_lfg_get(&prng) % 256 - 128;
00449         block1[0] += 1024;
00450 
00451         for (i = 0; i < 64; i++)
00452             block[i] = block1[i];
00453         idct248_ref(img_dest1, 8, block);
00454 
00455         for (i = 0; i < 64; i++)
00456             block[i] = block1[i];
00457         idct248_put(img_dest, 8, block);
00458 
00459         for (i = 0; i < 64; i++) {
00460             v = abs((int) img_dest[i] - (int) img_dest1[i]);
00461             if (v == 255)
00462                 printf("%d %d\n", img_dest[i], img_dest1[i]);
00463             if (v > err_max)
00464                 err_max = v;
00465         }
00466     }
00467     printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
00468 
00469     if (!speed)
00470         return;
00471 
00472     ti = gettime();
00473     it1 = 0;
00474     do {
00475         for (it = 0; it < NB_ITS_SPEED; it++) {
00476             for (i = 0; i < 64; i++)
00477                 block[i] = block1[i];
00478             idct248_put(img_dest, 8, block);
00479         }
00480         it1 += NB_ITS_SPEED;
00481         ti1 = gettime() - ti;
00482     } while (ti1 < 1000000);
00483     mmx_emms();
00484 
00485     printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
00486            (double) it1 * 1000.0 / (double) ti1);
00487 }
00488 
00489 static void help(void)
00490 {
00491     printf("dct-test [-i] [<test-number>]\n"
00492            "test-number 0 -> test with random matrixes\n"
00493            "            1 -> test with random sparse matrixes\n"
00494            "            2 -> do 3. test from mpeg4 std\n"
00495            "-i          test IDCT implementations\n"
00496            "-4          test IDCT248 implementations\n"
00497            "-t          speed test\n");
00498 }
00499 
00500 int main(int argc, char **argv)
00501 {
00502     int test_idct = 0, test_248_dct = 0;
00503     int c, i;
00504     int test = 1;
00505     int speed = 0;
00506     int err = 0;
00507 
00508     cpu_flags = av_get_cpu_flags();
00509 
00510     ff_ref_dct_init();
00511     idct_mmx_init();
00512 
00513     for (;;) {
00514         c = getopt(argc, argv, "ih4t");
00515         if (c == -1)
00516             break;
00517         switch (c) {
00518         case 'i':
00519             test_idct = 1;
00520             break;
00521         case '4':
00522             test_248_dct = 1;
00523             break;
00524         case 't':
00525             speed = 1;
00526             break;
00527         default:
00528         case 'h':
00529             help();
00530             return 0;
00531         }
00532     }
00533 
00534     if (optind < argc)
00535         test = atoi(argv[optind]);
00536 
00537     printf("Libav DCT/IDCT test\n");
00538 
00539     if (test_248_dct) {
00540         idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
00541     } else {
00542         const struct algo *algos = test_idct ? idct_tab : fdct_tab;
00543         for (i = 0; algos[i].name; i++)
00544             if (!(~cpu_flags & algos[i].mm_support)) {
00545                 err |= dct_error(&algos[i], test, test_idct, speed);
00546             }
00547     }
00548 
00549     return err;
00550 }
Generated on Sat Mar 17 2012 12:57:43 for Libav by doxygen 1.7.1