• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavfilter/x86/yadif_template.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of Libav.
00005  *
00006  * Libav is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * Libav is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License along
00017  * with Libav; if not, write to the Free Software Foundation, Inc.,
00018  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00019  */
00020 
00021 #ifdef COMPILE_TEMPLATE_SSE
00022 #define MM "%%xmm"
00023 #define MOV  "movq"
00024 #define MOVQ "movdqa"
00025 #define MOVQU "movdqu"
00026 #define STEP 8
00027 #define LOAD(mem,dst) \
00028             MOV"       "mem", "dst" \n\t"\
00029             "punpcklbw "MM"7, "dst" \n\t"
00030 #define PSRL1(reg) "psrldq $1, "reg" \n\t"
00031 #define PSRL2(reg) "psrldq $2, "reg" \n\t"
00032 #define PSHUF(src,dst) "movdqa "dst", "src" \n\t"\
00033                        "psrldq $2, "src"     \n\t"
00034 #else
00035 #define MM "%%mm"
00036 #define MOV  "movd"
00037 #define MOVQ "movq"
00038 #define MOVQU "movq"
00039 #define STEP 4
00040 #define LOAD(mem,dst) \
00041             MOV"       "mem", "dst" \n\t"\
00042             "punpcklbw "MM"7, "dst" \n\t"
00043 #define PSRL1(reg) "psrlq $8, "reg" \n\t"
00044 #define PSRL2(reg) "psrlq $16, "reg" \n\t"
00045 #define PSHUF(src,dst) "pshufw $9, "dst", "src" \n\t"
00046 #endif
00047 
00048 #ifdef COMPILE_TEMPLATE_SSSE3
00049 #define PABS(tmp,dst) \
00050             "pabsw     "dst", "dst" \n\t"
00051 #else
00052 #define PABS(tmp,dst) \
00053             "pxor     "tmp", "tmp" \n\t"\
00054             "psubw    "dst", "tmp" \n\t"\
00055             "pmaxsw   "tmp", "dst" \n\t"
00056 #endif
00057 
00058 #define CHECK(pj,mj) \
00059             MOVQU" "#pj"(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1+j] */\
00060             MOVQU" "#mj"(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1-j] */\
00061             MOVQ"      "MM"2, "MM"4 \n\t"\
00062             MOVQ"      "MM"2, "MM"5 \n\t"\
00063             "pxor      "MM"3, "MM"4 \n\t"\
00064             "pavgb     "MM"3, "MM"5 \n\t"\
00065             "pand     "MANGLE(pb_1)", "MM"4 \n\t"\
00066             "psubusb   "MM"4, "MM"5 \n\t"\
00067             PSRL1(MM"5")                 \
00068             "punpcklbw "MM"7, "MM"5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
00069             MOVQ"      "MM"2, "MM"4 \n\t"\
00070             "psubusb   "MM"3, "MM"2 \n\t"\
00071             "psubusb   "MM"4, "MM"3 \n\t"\
00072             "pmaxub    "MM"3, "MM"2 \n\t"\
00073             MOVQ"      "MM"2, "MM"3 \n\t"\
00074             MOVQ"      "MM"2, "MM"4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
00075             PSRL1(MM"3")                  /* ABS(cur[x-refs  +j] - cur[x+refs  -j]) */\
00076             PSRL2(MM"4")                  /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
00077             "punpcklbw "MM"7, "MM"2 \n\t"\
00078             "punpcklbw "MM"7, "MM"3 \n\t"\
00079             "punpcklbw "MM"7, "MM"4 \n\t"\
00080             "paddw     "MM"3, "MM"2 \n\t"\
00081             "paddw     "MM"4, "MM"2 \n\t" /* score */
00082 
00083 #define CHECK1 \
00084             MOVQ"      "MM"0, "MM"3 \n\t"\
00085             "pcmpgtw   "MM"2, "MM"3 \n\t" /* if(score < spatial_score) */\
00086             "pminsw    "MM"2, "MM"0 \n\t" /* spatial_score= score; */\
00087             MOVQ"      "MM"3, "MM"6 \n\t"\
00088             "pand      "MM"3, "MM"5 \n\t"\
00089             "pandn     "MM"1, "MM"3 \n\t"\
00090             "por       "MM"5, "MM"3 \n\t"\
00091             MOVQ"      "MM"3, "MM"1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
00092 
00093 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
00094                   hurts both quality and speed, but matches the C version. */\
00095             "paddw    "MANGLE(pw_1)", "MM"6 \n\t"\
00096             "psllw     $14,   "MM"6 \n\t"\
00097             "paddsw    "MM"6, "MM"2 \n\t"\
00098             MOVQ"      "MM"0, "MM"3 \n\t"\
00099             "pcmpgtw   "MM"2, "MM"3 \n\t"\
00100             "pminsw    "MM"2, "MM"0 \n\t"\
00101             "pand      "MM"3, "MM"5 \n\t"\
00102             "pandn     "MM"1, "MM"3 \n\t"\
00103             "por       "MM"5, "MM"3 \n\t"\
00104             MOVQ"      "MM"3, "MM"1 \n\t"
00105 
00106 void RENAME(ff_yadif_filter_line)(uint8_t *dst,
00107                                   uint8_t *prev, uint8_t *cur, uint8_t *next,
00108                                   int w, int prefs, int mrefs, int parity, int mode)
00109 {
00110     DECLARE_ALIGNED(16, uint8_t, tmp0[16]);
00111     DECLARE_ALIGNED(16, uint8_t, tmp1[16]);
00112     DECLARE_ALIGNED(16, uint8_t, tmp2[16]);
00113     DECLARE_ALIGNED(16, uint8_t, tmp3[16]);
00114     int x;
00115 
00116 #define FILTER\
00117     for(x=0; x<w; x+=STEP){\
00118         __asm__ volatile(\
00119             "pxor      "MM"7, "MM"7 \n\t"\
00120             LOAD("(%[cur],%[mrefs])", MM"0") /* c = cur[x-refs] */\
00121             LOAD("(%[cur],%[prefs])", MM"1") /* e = cur[x+refs] */\
00122             LOAD("(%["prev2"])", MM"2") /* prev2[x] */\
00123             LOAD("(%["next2"])", MM"3") /* next2[x] */\
00124             MOVQ"      "MM"3, "MM"4 \n\t"\
00125             "paddw     "MM"2, "MM"3 \n\t"\
00126             "psraw     $1,    "MM"3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
00127             MOVQ"      "MM"0, %[tmp0] \n\t" /* c */\
00128             MOVQ"      "MM"3, %[tmp1] \n\t" /* d */\
00129             MOVQ"      "MM"1, %[tmp2] \n\t" /* e */\
00130             "psubw     "MM"4, "MM"2 \n\t"\
00131             PABS(      MM"4", MM"2") /* temporal_diff0 */\
00132             LOAD("(%[prev],%[mrefs])", MM"3") /* prev[x-refs] */\
00133             LOAD("(%[prev],%[prefs])", MM"4") /* prev[x+refs] */\
00134             "psubw     "MM"0, "MM"3 \n\t"\
00135             "psubw     "MM"1, "MM"4 \n\t"\
00136             PABS(      MM"5", MM"3")\
00137             PABS(      MM"5", MM"4")\
00138             "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff1 */\
00139             "psrlw     $1,    "MM"2 \n\t"\
00140             "psrlw     $1,    "MM"3 \n\t"\
00141             "pmaxsw    "MM"3, "MM"2 \n\t"\
00142             LOAD("(%[next],%[mrefs])", MM"3") /* next[x-refs] */\
00143             LOAD("(%[next],%[prefs])", MM"4") /* next[x+refs] */\
00144             "psubw     "MM"0, "MM"3 \n\t"\
00145             "psubw     "MM"1, "MM"4 \n\t"\
00146             PABS(      MM"5", MM"3")\
00147             PABS(      MM"5", MM"4")\
00148             "paddw     "MM"4, "MM"3 \n\t" /* temporal_diff2 */\
00149             "psrlw     $1,    "MM"3 \n\t"\
00150             "pmaxsw    "MM"3, "MM"2 \n\t"\
00151             MOVQ"      "MM"2, %[tmp3] \n\t" /* diff */\
00152 \
00153             "paddw     "MM"0, "MM"1 \n\t"\
00154             "paddw     "MM"0, "MM"0 \n\t"\
00155             "psubw     "MM"1, "MM"0 \n\t"\
00156             "psrlw     $1,    "MM"1 \n\t" /* spatial_pred */\
00157             PABS(      MM"2", MM"0")      /* ABS(c-e) */\
00158 \
00159             MOVQU" -1(%[cur],%[mrefs]), "MM"2 \n\t" /* cur[x-refs-1] */\
00160             MOVQU" -1(%[cur],%[prefs]), "MM"3 \n\t" /* cur[x+refs-1] */\
00161             MOVQ"      "MM"2, "MM"4 \n\t"\
00162             "psubusb   "MM"3, "MM"2 \n\t"\
00163             "psubusb   "MM"4, "MM"3 \n\t"\
00164             "pmaxub    "MM"3, "MM"2 \n\t"\
00165             PSHUF(MM"3", MM"2") \
00166             "punpcklbw "MM"7, "MM"2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
00167             "punpcklbw "MM"7, "MM"3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
00168             "paddw     "MM"2, "MM"0 \n\t"\
00169             "paddw     "MM"3, "MM"0 \n\t"\
00170             "psubw    "MANGLE(pw_1)", "MM"0 \n\t" /* spatial_score */\
00171 \
00172             CHECK(-2,0)\
00173             CHECK1\
00174             CHECK(-3,1)\
00175             CHECK2\
00176             CHECK(0,-2)\
00177             CHECK1\
00178             CHECK(1,-3)\
00179             CHECK2\
00180 \
00181             /* if(p->mode<2) ... */\
00182             MOVQ"    %[tmp3], "MM"6 \n\t" /* diff */\
00183             "cmpl      $2, %[mode] \n\t"\
00184             "jge       1f \n\t"\
00185             LOAD("(%["prev2"],%[mrefs],2)", MM"2") /* prev2[x-2*refs] */\
00186             LOAD("(%["next2"],%[mrefs],2)", MM"4") /* next2[x-2*refs] */\
00187             LOAD("(%["prev2"],%[prefs],2)", MM"3") /* prev2[x+2*refs] */\
00188             LOAD("(%["next2"],%[prefs],2)", MM"5") /* next2[x+2*refs] */\
00189             "paddw     "MM"4, "MM"2 \n\t"\
00190             "paddw     "MM"5, "MM"3 \n\t"\
00191             "psrlw     $1,    "MM"2 \n\t" /* b */\
00192             "psrlw     $1,    "MM"3 \n\t" /* f */\
00193             MOVQ"    %[tmp0], "MM"4 \n\t" /* c */\
00194             MOVQ"    %[tmp1], "MM"5 \n\t" /* d */\
00195             MOVQ"    %[tmp2], "MM"7 \n\t" /* e */\
00196             "psubw     "MM"4, "MM"2 \n\t" /* b-c */\
00197             "psubw     "MM"7, "MM"3 \n\t" /* f-e */\
00198             MOVQ"      "MM"5, "MM"0 \n\t"\
00199             "psubw     "MM"4, "MM"5 \n\t" /* d-c */\
00200             "psubw     "MM"7, "MM"0 \n\t" /* d-e */\
00201             MOVQ"      "MM"2, "MM"4 \n\t"\
00202             "pminsw    "MM"3, "MM"2 \n\t"\
00203             "pmaxsw    "MM"4, "MM"3 \n\t"\
00204             "pmaxsw    "MM"5, "MM"2 \n\t"\
00205             "pminsw    "MM"5, "MM"3 \n\t"\
00206             "pmaxsw    "MM"0, "MM"2 \n\t" /* max */\
00207             "pminsw    "MM"0, "MM"3 \n\t" /* min */\
00208             "pxor      "MM"4, "MM"4 \n\t"\
00209             "pmaxsw    "MM"3, "MM"6 \n\t"\
00210             "psubw     "MM"2, "MM"4 \n\t" /* -max */\
00211             "pmaxsw    "MM"4, "MM"6 \n\t" /* diff= MAX3(diff, min, -max); */\
00212             "1: \n\t"\
00213 \
00214             MOVQ"    %[tmp1], "MM"2 \n\t" /* d */\
00215             MOVQ"      "MM"2, "MM"3 \n\t"\
00216             "psubw     "MM"6, "MM"2 \n\t" /* d-diff */\
00217             "paddw     "MM"6, "MM"3 \n\t" /* d+diff */\
00218             "pmaxsw    "MM"2, "MM"1 \n\t"\
00219             "pminsw    "MM"3, "MM"1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
00220             "packuswb  "MM"1, "MM"1 \n\t"\
00221 \
00222             :[tmp0]"=m"(tmp0),\
00223              [tmp1]"=m"(tmp1),\
00224              [tmp2]"=m"(tmp2),\
00225              [tmp3]"=m"(tmp3)\
00226             :[prev] "r"(prev),\
00227              [cur]  "r"(cur),\
00228              [next] "r"(next),\
00229              [prefs]"r"((x86_reg)prefs),\
00230              [mrefs]"r"((x86_reg)mrefs),\
00231              [mode] "g"(mode)\
00232         );\
00233         __asm__ volatile(MOV" "MM"1, %0" :"=m"(*dst));\
00234         dst += STEP;\
00235         prev+= STEP;\
00236         cur += STEP;\
00237         next+= STEP;\
00238     }
00239 
00240     if (parity) {
00241 #define prev2 "prev"
00242 #define next2 "cur"
00243         FILTER
00244 #undef prev2
00245 #undef next2
00246     } else {
00247 #define prev2 "cur"
00248 #define next2 "next"
00249         FILTER
00250 #undef prev2
00251 #undef next2
00252     }
00253 }
00254 #undef STEP
00255 #undef MM
00256 #undef MOV
00257 #undef MOVQ
00258 #undef MOVQU
00259 #undef PSHUF
00260 #undef PSRL1
00261 #undef PSRL2
00262 #undef LOAD
00263 #undef PABS
00264 #undef CHECK
00265 #undef CHECK1
00266 #undef CHECK2
00267 #undef FILTER
00268 
Generated on Sat Mar 17 2012 12:57:52 for Libav by doxygen 1.7.1