/* * Copyright (c) 1998, 2003, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ #include "vis_proto.h" #include "mlib_image.h" #include "mlib_ImageColormap.h" #include "mlib_ImageAffine.h" #include "mlib_v_ImageFilters.h" /***************************************************************/ #define MLIB_LIMIT 512 #define MLIB_SHIFT 16 /***************************************************************/ #undef DECLAREVAR #define DECLAREVAR() \ DECLAREVAR0(); \ mlib_s32 *warp_tbl = param -> warp_tbl; \ mlib_s32 xSrc, ySrc; \ mlib_s32 srcYStride = param -> srcYStride; \ mlib_s32 filter = param -> filter; \ mlib_s32 max_xsize = param -> max_xsize; \ MLIB_TYPE *srcIndexPtr; \ MLIB_TYPE *dstIndexPtr; \ mlib_d64 *dstPixelPtr; \ mlib_s32 i /***************************************************************/ #define DECLAREVAR_U8() \ mlib_s32 filterposx, filterposy; \ mlib_d64 sum0, sum1, sum2, sum3; \ mlib_f32 hi_row00, hi_row10, hi_row20, hi_row30; \ mlib_f32 hi_row01, hi_row11, hi_row21, hi_row31; \ mlib_f32 lo_row00, lo_row10, lo_row20, lo_row30; \ mlib_f32 lo_row01, lo_row11, lo_row21, lo_row31; \ mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3, yFilter; \ mlib_d64 v00, v10, v20, v30; \ mlib_d64 v01, v11, v21, v31; \ mlib_d64 v02, v12, v22, v32; \ mlib_d64 v03, v13, v23, v33; \ mlib_d64 d0, d1, d2, d3; \ mlib_d64 d00, d10, d20, d30; \ mlib_d64 d01, d11, d21, d31; \ mlib_s32 cols; \ mlib_d64 res, *xPtr /***************************************************************/ #define DECLAREVAR_S16() \ mlib_s32 filterposx, filterposy; \ mlib_d64 sum0, sum1, sum2, sum3; \ mlib_d64 row00, row10, row20, row30; \ mlib_d64 row01, row11, row21, row31; \ mlib_d64 row02, row12, row22, row32; \ mlib_d64 row03, row13, row23, row33; \ mlib_d64 xFilter0, xFilter1, xFilter2, xFilter3; \ mlib_d64 yFilter0, yFilter1, yFilter2, yFilter3; \ mlib_d64 v00, v01, v02, v03, v10, v11, v12, v13; \ mlib_d64 v20, v21, v22, v23, v30, v31, v32, v33; \ mlib_d64 u00, u01, u10, u11, u20, u21, u30, u31; \ mlib_d64 d0, d1, d2, d3; \ mlib_d64 *yPtr, *xPtr; \ mlib_s32 cols; \ mlib_d64 res; \ mlib_f32 f_x01000100 = vis_to_float(0x01000100) /***************************************************************/ #undef CLIP #define CLIP() \ dstData += dstYStride; \ xLeft = leftEdges[j]; \ xRight = rightEdges[j]; \ X = xStarts[j]; \ Y = yStarts[j]; \ PREPARE_DELTAS \ if (xLeft > xRight) \ continue; \ dstIndexPtr = (MLIB_TYPE *)dstData + xLeft; \ dstPixelPtr = dstRowPtr /***************************************************************/ #define FADD_4BC_U8() \ d0 = vis_fpadd16(d00, d10); \ d1 = vis_fpadd16(d20, d30); \ d0 = vis_fpadd16(d0, d1); \ d2 = vis_fpadd16(d01, d11); \ d3 = vis_fpadd16(d21, d31); \ d2 = vis_fpadd16(d2, d3); \ res = vis_fpack16_pair(d0, d2) /***************************************************************/ #define LOAD_BC_U8_4CH_1PIXEL(mlib_filters_u8, mlib_filters_u8_4) \ filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ yFilter = *((mlib_d64 *) ((mlib_u8 *)mlib_filters_u8 + filterposy)); \ filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_u8_4+4*filterposx)); \ xFilter0 = xPtr[0]; \ xFilter1 = xPtr[1]; \ xFilter2 = xPtr[2]; \ xFilter3 = xPtr[3]; \ X += dX; \ Y += dY; \ hi_row00 = flut[srcIndexPtr[0]]; \ lo_row00 = flut[srcIndexPtr[1]]; \ hi_row01 = flut[srcIndexPtr[2]]; \ lo_row01 = flut[srcIndexPtr[3]]; \ srcIndexPtr += srcYStride; \ hi_row10 = flut[srcIndexPtr[0]]; \ lo_row10 = flut[srcIndexPtr[1]]; \ hi_row11 = flut[srcIndexPtr[2]]; \ lo_row11 = flut[srcIndexPtr[3]]; \ srcIndexPtr += srcYStride; \ hi_row20 = flut[srcIndexPtr[0]]; \ lo_row20 = flut[srcIndexPtr[1]]; \ hi_row21 = flut[srcIndexPtr[2]]; \ lo_row21 = flut[srcIndexPtr[3]]; \ srcIndexPtr += srcYStride; \ hi_row30 = flut[srcIndexPtr[0]]; \ lo_row30 = flut[srcIndexPtr[1]]; \ hi_row31 = flut[srcIndexPtr[2]]; \ lo_row31 = flut[srcIndexPtr[3]] /***************************************************************/ #define NEXT_PIXEL_4BC() \ xSrc = (X >> MLIB_SHIFT)-1; \ ySrc = (Y >> MLIB_SHIFT)-1; \ srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc /***************************************************************/ #define RESULT_4BC_U8_1PIXEL(ind) \ v00 = vis_fmul8x16au(hi_row00, vis_read_hi(yFilter)); \ v01 = vis_fmul8x16au(lo_row00, vis_read_hi(yFilter)); \ v02 = vis_fmul8x16au(hi_row01, vis_read_hi(yFilter)); \ v03 = vis_fmul8x16au(lo_row01, vis_read_hi(yFilter)); \ v10 = vis_fmul8x16al(hi_row10, vis_read_hi(yFilter)); \ v11 = vis_fmul8x16al(lo_row10, vis_read_hi(yFilter)); \ sum0 = vis_fpadd16(v00, v10); \ v12 = vis_fmul8x16al(hi_row11, vis_read_hi(yFilter)); \ sum1 = vis_fpadd16(v01, v11); \ v13 = vis_fmul8x16al(lo_row11, vis_read_hi(yFilter)); \ sum2 = vis_fpadd16(v02, v12); \ v20 = vis_fmul8x16au(hi_row20, vis_read_lo(yFilter)); \ sum3 = vis_fpadd16(v03, v13); \ v21 = vis_fmul8x16au(lo_row20, vis_read_lo(yFilter)); \ sum0 = vis_fpadd16(sum0, v20); \ v22 = vis_fmul8x16au(hi_row21, vis_read_lo(yFilter)); \ sum1 = vis_fpadd16(sum1, v21); \ v23 = vis_fmul8x16au(lo_row21, vis_read_lo(yFilter)); \ sum2 = vis_fpadd16(sum2, v22); \ v30 = vis_fmul8x16al(hi_row30, vis_read_lo(yFilter)); \ sum3 = vis_fpadd16(sum3, v23); \ v31 = vis_fmul8x16al(lo_row30, vis_read_lo(yFilter)); \ sum0 = vis_fpadd16(sum0, v30); \ v32 = vis_fmul8x16al(hi_row31, vis_read_lo(yFilter)); \ sum1 = vis_fpadd16(sum1, v31); \ v33 = vis_fmul8x16al(lo_row31, vis_read_lo(yFilter)); \ sum2 = vis_fpadd16(sum2, v32); \ v00 = vis_fmul8sux16(sum0, xFilter0); \ sum3 = vis_fpadd16(sum3, v33); \ v01 = vis_fmul8ulx16(sum0, xFilter0); \ v10 = vis_fmul8sux16(sum1, xFilter1); \ d0##ind = vis_fpadd16(v00, v01); \ v11 = vis_fmul8ulx16(sum1, xFilter1); \ v20 = vis_fmul8sux16(sum2, xFilter2); \ d1##ind = vis_fpadd16(v10, v11); \ v21 = vis_fmul8ulx16(sum2, xFilter2); \ v30 = vis_fmul8sux16(sum3, xFilter3); \ d2##ind = vis_fpadd16(v20, v21); \ v31 = vis_fmul8ulx16(sum3, xFilter3); \ d3##ind = vis_fpadd16(v30, v31) /***************************************************************/ #define BC_U8_4CH(ind, mlib_filters_u8, mlib_filters_u8_4) \ v00 = vis_fmul8x16au(hi_row00, vis_read_hi(yFilter)); \ v01 = vis_fmul8x16au(lo_row00, vis_read_hi(yFilter)); \ v02 = vis_fmul8x16au(hi_row01, vis_read_hi(yFilter)); \ v03 = vis_fmul8x16au(lo_row01, vis_read_hi(yFilter)); \ hi_row00 = flut[srcIndexPtr[0]]; \ filterposy = (Y >> FILTER_SHIFT); \ v10 = vis_fmul8x16al(hi_row10, vis_read_hi(yFilter)); \ lo_row00 = flut[srcIndexPtr[1]]; \ v11 = vis_fmul8x16al(lo_row10, vis_read_hi(yFilter)); \ sum0 = vis_fpadd16(v00, v10); \ hi_row01 = flut[srcIndexPtr[2]]; \ v12 = vis_fmul8x16al(hi_row11, vis_read_hi(yFilter)); \ lo_row01 = flut[srcIndexPtr[3]]; \ filterposx = (X >> FILTER_SHIFT); \ v13 = vis_fmul8x16al(lo_row11, vis_read_hi(yFilter)); \ srcIndexPtr += srcYStride; \ hi_row10 = flut[srcIndexPtr[0]]; \ v20 = vis_fmul8x16au(hi_row20, vis_read_lo(yFilter)); \ sum1 = vis_fpadd16(v01, v11); \ lo_row10 = flut[srcIndexPtr[1]]; \ X += dX; \ hi_row11 = flut[srcIndexPtr[2]]; \ v21 = vis_fmul8x16au(lo_row20, vis_read_lo(yFilter)); \ sum2 = vis_fpadd16(v02, v12); \ lo_row11 = flut[srcIndexPtr[3]]; \ v22 = vis_fmul8x16au(hi_row21, vis_read_lo(yFilter)); \ srcIndexPtr += srcYStride; \ hi_row20 = flut[srcIndexPtr[0]]; \ v23 = vis_fmul8x16au(lo_row21, vis_read_lo(yFilter)); \ sum3 = vis_fpadd16(v03, v13); \ Y += dY; \ xSrc = (X >> MLIB_SHIFT)-1; \ v30 = vis_fmul8x16al(hi_row30, vis_read_lo(yFilter)); \ sum0 = vis_fpadd16(sum0, v20); \ lo_row20 = flut[srcIndexPtr[1]]; \ ySrc = (Y >> MLIB_SHIFT)-1; \ hi_row21 = flut[srcIndexPtr[2]]; \ v31 = vis_fmul8x16al(lo_row30, vis_read_lo(yFilter)); \ sum1 = vis_fpadd16(sum1, v21); \ filterposy &= FILTER_MASK; \ lo_row21 = flut[srcIndexPtr[3]]; \ v32 = vis_fmul8x16al(hi_row31, vis_read_lo(yFilter)); \ srcIndexPtr += srcYStride; \ filterposx &= FILTER_MASK; \ v33 = vis_fmul8x16al(lo_row31, vis_read_lo(yFilter)); \ sum2 = vis_fpadd16(sum2, v22); \ hi_row30 = flut[srcIndexPtr[0]]; \ sum3 = vis_fpadd16(sum3, v23); \ sum0 = vis_fpadd16(sum0, v30); \ lo_row30 = flut[srcIndexPtr[1]]; \ sum1 = vis_fpadd16(sum1, v31); \ v00 = vis_fmul8sux16(sum0, xFilter0); \ hi_row31 = flut[srcIndexPtr[2]]; \ sum2 = vis_fpadd16(sum2, v32); \ v01 = vis_fmul8ulx16(sum0, xFilter0); \ sum3 = vis_fpadd16(sum3, v33); \ lo_row31 = flut[srcIndexPtr[3]]; \ v10 = vis_fmul8sux16(sum1, xFilter1); \ d0##ind = vis_fpadd16(v00, v01); \ yFilter = *((mlib_d64 *)((mlib_u8 *)mlib_filters_u8 + filterposy)); \ v11 = vis_fmul8ulx16(sum1, xFilter1); \ xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_u8_4+4*filterposx)); \ xFilter0 = xPtr[0]; \ v20 = vis_fmul8sux16(sum2, xFilter2); \ d1##ind = vis_fpadd16(v10, v11); \ xFilter1 = xPtr[1]; \ v21 = vis_fmul8ulx16(sum2, xFilter2); \ xFilter2 = xPtr[2]; \ v30 = vis_fmul8sux16(sum3, xFilter3); \ d2##ind = vis_fpadd16(v20, v21); \ xFilter3 = xPtr[3]; \ v31 = vis_fmul8ulx16(sum3, xFilter3); \ srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc; \ d3##ind = vis_fpadd16(v30, v31) /***************************************************************/ #define LOAD_BC_S16_4CH_1PIXEL(mlib_filters_s16_4) \ row00 = flut[srcIndexPtr[0]]; \ row01 = flut[srcIndexPtr[1]]; \ row02 = flut[srcIndexPtr[2]]; \ row03 = flut[srcIndexPtr[3]]; \ srcIndexPtr += srcYStride; \ row10 = flut[srcIndexPtr[0]]; \ row11 = flut[srcIndexPtr[1]]; \ row12 = flut[srcIndexPtr[2]]; \ row13 = flut[srcIndexPtr[3]]; \ srcIndexPtr += srcYStride; \ row20 = flut[srcIndexPtr[0]]; \ row21 = flut[srcIndexPtr[1]]; \ row22 = flut[srcIndexPtr[2]]; \ row23 = flut[srcIndexPtr[3]]; \ srcIndexPtr += srcYStride; \ row30 = flut[srcIndexPtr[0]]; \ row31 = flut[srcIndexPtr[1]]; \ row32 = flut[srcIndexPtr[2]]; \ row33 = flut[srcIndexPtr[3]]; \ filterposy = (Y >> FILTER_SHIFT) & FILTER_MASK; \ yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ yFilter0 = yPtr[0]; \ yFilter1 = yPtr[1]; \ yFilter2 = yPtr[2]; \ yFilter3 = yPtr[3]; \ filterposx = (X >> FILTER_SHIFT) & FILTER_MASK; \ xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ xFilter0 = xPtr[0]; \ xFilter1 = xPtr[1]; \ xFilter2 = xPtr[2]; \ xFilter3 = xPtr[3]; \ X += dX; \ Y += dY /***************************************************************/ #define RESULT_4BC_S16_1PIXEL() \ u00 = vis_fmul8sux16(row00, yFilter0); \ u01 = vis_fmul8ulx16(row00, yFilter0); \ u10 = vis_fmul8sux16(row01, yFilter0); \ u11 = vis_fmul8ulx16(row01, yFilter0); \ v00 = vis_fpadd16(u00, u01); \ u20 = vis_fmul8sux16(row02, yFilter0); \ v01 = vis_fpadd16(u10, u11); \ u21 = vis_fmul8ulx16(row02, yFilter0); \ u30 = vis_fmul8sux16(row03, yFilter0); \ u31 = vis_fmul8ulx16(row03, yFilter0); \ v02 = vis_fpadd16(u20, u21); \ u00 = vis_fmul8sux16(row10, yFilter1); \ u01 = vis_fmul8ulx16(row10, yFilter1); \ v03 = vis_fpadd16(u30, u31); \ u10 = vis_fmul8sux16(row11, yFilter1); \ u11 = vis_fmul8ulx16(row11, yFilter1); \ v10 = vis_fpadd16(u00, u01); \ u20 = vis_fmul8sux16(row12, yFilter1); \ v11 = vis_fpadd16(u10, u11); \ u21 = vis_fmul8ulx16(row12, yFilter1); \ u30 = vis_fmul8sux16(row13, yFilter1); \ u31 = vis_fmul8ulx16(row13, yFilter1); \ u00 = vis_fmul8sux16(row20, yFilter2); \ v12 = vis_fpadd16(u20, u21); \ u01 = vis_fmul8ulx16(row20, yFilter2); \ v13 = vis_fpadd16(u30, u31); \ u10 = vis_fmul8sux16(row21, yFilter2); \ u11 = vis_fmul8ulx16(row21, yFilter2); \ v20 = vis_fpadd16(u00, u01); \ u20 = vis_fmul8sux16(row22, yFilter2); \ sum0 = vis_fpadd16(v00, v10); \ u21 = vis_fmul8ulx16(row22, yFilter2); \ u30 = vis_fmul8sux16(row23, yFilter2); \ u31 = vis_fmul8ulx16(row23, yFilter2); \ u00 = vis_fmul8sux16(row30, yFilter3); \ u01 = vis_fmul8ulx16(row30, yFilter3); \ v21 = vis_fpadd16(u10, u11); \ sum1 = vis_fpadd16(v01, v11); \ u10 = vis_fmul8sux16(row31, yFilter3); \ sum2 = vis_fpadd16(v02, v12); \ sum3 = vis_fpadd16(v03, v13); \ v22 = vis_fpadd16(u20, u21); \ u11 = vis_fmul8ulx16(row31, yFilter3); \ sum0 = vis_fpadd16(sum0, v20); \ u20 = vis_fmul8sux16(row32, yFilter3); \ u21 = vis_fmul8ulx16(row32, yFilter3); \ v23 = vis_fpadd16(u30, u31); \ v30 = vis_fpadd16(u00, u01); \ sum1 = vis_fpadd16(sum1, v21); \ u30 = vis_fmul8sux16(row33, yFilter3); \ u31 = vis_fmul8ulx16(row33, yFilter3); \ v31 = vis_fpadd16(u10, u11); \ sum2 = vis_fpadd16(sum2, v22); \ sum3 = vis_fpadd16(sum3, v23); \ v32 = vis_fpadd16(u20, u21); \ sum0 = vis_fpadd16(sum0, v30); \ v33 = vis_fpadd16(u30, u31); \ v00 = vis_fmul8sux16(sum0, xFilter0); \ sum1 = vis_fpadd16(sum1, v31); \ sum2 = vis_fpadd16(sum2, v32); \ v01 = vis_fmul8ulx16(sum0, xFilter0); \ v10 = vis_fmul8sux16(sum1, xFilter1); \ sum3 = vis_fpadd16(sum3, v33); \ v11 = vis_fmul8ulx16(sum1, xFilter1); \ d0 = vis_fpadd16(v00, v01); \ v20 = vis_fmul8sux16(sum2, xFilter2); \ v21 = vis_fmul8ulx16(sum2, xFilter2); \ d1 = vis_fpadd16(v10, v11); \ v30 = vis_fmul8sux16(sum3, xFilter3); \ v31 = vis_fmul8ulx16(sum3, xFilter3); \ d2 = vis_fpadd16(v20, v21); \ d3 = vis_fpadd16(v30, v31); \ d0 = vis_fpadd16(d0, d1); \ d2 = vis_fpadd16(d2, d3); \ d0 = vis_fpadd16(d0, d2); \ d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ res = vis_fpackfix_pair(d2, d3) /***************************************************************/ #define BC_S16_4CH(mlib_filters_s16_4) \ u00 = vis_fmul8sux16(row00, yFilter0); \ u01 = vis_fmul8ulx16(row00, yFilter0); \ u10 = vis_fmul8sux16(row01, yFilter0); \ u11 = vis_fmul8ulx16(row01, yFilter0); \ v00 = vis_fpadd16(u00, u01); \ u20 = vis_fmul8sux16(row02, yFilter0); \ v01 = vis_fpadd16(u10, u11); \ u21 = vis_fmul8ulx16(row02, yFilter0); \ u30 = vis_fmul8sux16(row03, yFilter0); \ u31 = vis_fmul8ulx16(row03, yFilter0); \ v02 = vis_fpadd16(u20, u21); \ row00 = flut[srcIndexPtr[0]]; \ u00 = vis_fmul8sux16(row10, yFilter1); \ u01 = vis_fmul8ulx16(row10, yFilter1); \ filterposy = (Y >> FILTER_SHIFT); \ v03 = vis_fpadd16(u30, u31); \ row01 = flut[srcIndexPtr[1]]; \ u10 = vis_fmul8sux16(row11, yFilter1); \ u11 = vis_fmul8ulx16(row11, yFilter1); \ v10 = vis_fpadd16(u00, u01); \ row02 = flut[srcIndexPtr[2]]; \ u20 = vis_fmul8sux16(row12, yFilter1); \ v11 = vis_fpadd16(u10, u11); \ u21 = vis_fmul8ulx16(row12, yFilter1); \ u30 = vis_fmul8sux16(row13, yFilter1); \ row03 = flut[srcIndexPtr[3]]; \ u31 = vis_fmul8ulx16(row13, yFilter1); \ u00 = vis_fmul8sux16(row20, yFilter2); \ filterposx = (X >> FILTER_SHIFT); \ srcIndexPtr += srcYStride; \ v12 = vis_fpadd16(u20, u21); \ u01 = vis_fmul8ulx16(row20, yFilter2); \ v13 = vis_fpadd16(u30, u31); \ row10 = flut[srcIndexPtr[0]]; \ u10 = vis_fmul8sux16(row21, yFilter2); \ X += dX; \ u11 = vis_fmul8ulx16(row21, yFilter2); \ v20 = vis_fpadd16(u00, u01); \ row11 = flut[srcIndexPtr[1]]; \ u20 = vis_fmul8sux16(row22, yFilter2); \ sum0 = vis_fpadd16(v00, v10); \ u21 = vis_fmul8ulx16(row22, yFilter2); \ row12 = flut[srcIndexPtr[2]]; \ u30 = vis_fmul8sux16(row23, yFilter2); \ u31 = vis_fmul8ulx16(row23, yFilter2); \ row13 = flut[srcIndexPtr[3]]; \ u00 = vis_fmul8sux16(row30, yFilter3); \ srcIndexPtr += srcYStride; \ u01 = vis_fmul8ulx16(row30, yFilter3); \ v21 = vis_fpadd16(u10, u11); \ Y += dY; \ xSrc = (X >> MLIB_SHIFT)-1; \ sum1 = vis_fpadd16(v01, v11); \ row20 = flut[srcIndexPtr[0]]; \ u10 = vis_fmul8sux16(row31, yFilter3); \ sum2 = vis_fpadd16(v02, v12); \ sum3 = vis_fpadd16(v03, v13); \ ySrc = (Y >> MLIB_SHIFT)-1; \ row21 = flut[srcIndexPtr[1]]; \ v22 = vis_fpadd16(u20, u21); \ u11 = vis_fmul8ulx16(row31, yFilter3); \ sum0 = vis_fpadd16(sum0, v20); \ u20 = vis_fmul8sux16(row32, yFilter3); \ row22 = flut[srcIndexPtr[2]]; \ u21 = vis_fmul8ulx16(row32, yFilter3); \ v23 = vis_fpadd16(u30, u31); \ v30 = vis_fpadd16(u00, u01); \ filterposy &= FILTER_MASK; \ sum1 = vis_fpadd16(sum1, v21); \ u30 = vis_fmul8sux16(row33, yFilter3); \ row23 = flut[srcIndexPtr[3]]; \ u31 = vis_fmul8ulx16(row33, yFilter3); \ srcIndexPtr += srcYStride; \ filterposx &= FILTER_MASK; \ v31 = vis_fpadd16(u10, u11); \ row30 = flut[srcIndexPtr[0]]; \ sum2 = vis_fpadd16(sum2, v22); \ sum3 = vis_fpadd16(sum3, v23); \ row31 = flut[srcIndexPtr[1]]; \ v32 = vis_fpadd16(u20, u21); \ sum0 = vis_fpadd16(sum0, v30); \ row32 = flut[srcIndexPtr[2]]; \ v33 = vis_fpadd16(u30, u31); \ row33 = flut[srcIndexPtr[3]]; \ v00 = vis_fmul8sux16(sum0, xFilter0); \ yPtr = ((mlib_d64 *) ((mlib_u8 *)mlib_filters_s16_4 + filterposy*4)); \ sum1 = vis_fpadd16(sum1, v31); \ yFilter0 = yPtr[0]; \ sum2 = vis_fpadd16(sum2, v32); \ v01 = vis_fmul8ulx16(sum0, xFilter0); \ yFilter1 = yPtr[1]; \ v10 = vis_fmul8sux16(sum1, xFilter1); \ sum3 = vis_fpadd16(sum3, v33); \ yFilter2 = yPtr[2]; \ v11 = vis_fmul8ulx16(sum1, xFilter1); \ d0 = vis_fpadd16(v00, v01); \ yFilter3 = yPtr[3]; \ xPtr = ((mlib_d64 *)((mlib_u8 *)mlib_filters_s16_4 + filterposx*4)); \ v20 = vis_fmul8sux16(sum2, xFilter2); \ xFilter0 = xPtr[0]; \ v21 = vis_fmul8ulx16(sum2, xFilter2); \ d1 = vis_fpadd16(v10, v11); \ xFilter1 = xPtr[1]; \ v30 = vis_fmul8sux16(sum3, xFilter3); \ v31 = vis_fmul8ulx16(sum3, xFilter3); \ d2 = vis_fpadd16(v20, v21); \ xFilter2 = xPtr[2]; \ d3 = vis_fpadd16(v30, v31); \ xFilter3 = xPtr[3]; \ srcIndexPtr = (MLIB_TYPE *)lineAddr[ySrc] + xSrc /***************************************************************/ #define FADD_4BC_S16() \ d0 = vis_fpadd16(d0, d1); \ d2 = vis_fpadd16(d2, d3); \ d0 = vis_fpadd16(d0, d2); \ d2 = vis_fmuld8sux16(f_x01000100, vis_read_hi(d0)); \ d3 = vis_fmuld8sux16(f_x01000100, vis_read_lo(d0)); \ res = vis_fpackfix_pair(d2, d3) /***************************************************************/ #undef MLIB_TYPE #define MLIB_TYPE mlib_u8 /***************************************************************/ #undef FILTER_SHIFT #define FILTER_SHIFT 5 #undef FILTER_MASK #define FILTER_MASK (((1 << 8) - 1) << 3) /***************************************************************/ mlib_status mlib_ImageAffineIndex_U8_U8_3CH_BC(mlib_affine_param *param, const void *colormap) { DECLAREVAR(); DECLAREVAR_U8(); mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) - mlib_ImageGetLutOffset(colormap); mlib_d64 dstRowData[MLIB_LIMIT/2]; mlib_d64 *dstRowPtr = dstRowData; const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4; if (filter == MLIB_BICUBIC) { mlib_filters_table_u8 = mlib_filters_u8_bc; mlib_filters_table_u8_4 = mlib_filters_u8_bc_4; } else { mlib_filters_table_u8 = mlib_filters_u8_bc2; mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4; } if (max_xsize > MLIB_LIMIT) { dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1)); if (dstRowPtr == NULL) return MLIB_FAILURE; } vis_write_gsr(3 << 3); for (j = yStart; j <= yFinish; j++) { CLIP(); cols = xRight - xLeft + 1; i = 0; if (i <= cols - 6) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); NEXT_PIXEL_4BC(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); FADD_4BC_U8(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); #pragma pipeloop(0) for (; i <= cols-8; i += 2) { *dstPixelPtr++ = res; FADD_4BC_U8(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); } *dstPixelPtr++ = res; FADD_4BC_U8(); *dstPixelPtr++ = res; RESULT_4BC_U8_1PIXEL(0); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 6; } if (i <= cols-4) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); NEXT_PIXEL_4BC(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); FADD_4BC_U8(); *dstPixelPtr++ = res; RESULT_4BC_U8_1PIXEL(0); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 4; } if (i <= cols-2) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(0); NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 2; } if (i < cols) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(0); d0 = vis_fpadd16(d00, d10); d1 = vis_fpadd16(d20, d30); d0 = vis_fpadd16(d0, d1); res = vis_fpack16_pair(d0, d0); *dstPixelPtr++ = res; } mlib_ImageColorTrue2IndexLine_U8_U8_3_in_4((mlib_u8 *)dstRowPtr, dstIndexPtr, xRight - xLeft + 1, colormap); } if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); return MLIB_SUCCESS; } /***************************************************************/ #undef FILTER_SHIFT #define FILTER_SHIFT 4 #undef FILTER_MASK #define FILTER_MASK (((1 << 9) - 1) << 3) /***************************************************************/ mlib_status mlib_ImageAffineIndex_U8_S16_3CH_BC(mlib_affine_param *param, const void *colormap) { DECLAREVAR(); DECLAREVAR_S16(); mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) - mlib_ImageGetLutOffset(colormap); mlib_d64 dstRowData[MLIB_LIMIT]; mlib_d64 *dstRowPtr = dstRowData; const mlib_s16 *mlib_filters_table_s16_4; if (filter == MLIB_BICUBIC) { mlib_filters_table_s16_4 = mlib_filters_s16_bc_4; } else { mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4; } if (max_xsize > MLIB_LIMIT) { dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize); if (dstRowPtr == NULL) return MLIB_FAILURE; } for (j = yStart; j <= yFinish; j++) { CLIP(); vis_write_gsr(10 << 3); cols = xRight - xLeft + 1; i = 0; if (i <= cols - 4) { NEXT_PIXEL_4BC(); LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); NEXT_PIXEL_4BC(); BC_S16_4CH(mlib_filters_table_s16_4); FADD_4BC_S16(); BC_S16_4CH(mlib_filters_table_s16_4); #pragma pipeloop(0) for (; i < cols-4; i++) { *dstPixelPtr++ = res; FADD_4BC_S16(); BC_S16_4CH(mlib_filters_table_s16_4); } *dstPixelPtr++ = res; FADD_4BC_S16(); *dstPixelPtr++ = res; RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; i += 4; } #pragma pipeloop(0) for (; i < cols; i++) { NEXT_PIXEL_4BC(); LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; } mlib_ImageColorTrue2IndexLine_S16_U8_3_in_4((mlib_s16 *)dstRowPtr, dstIndexPtr, xRight - xLeft + 1, colormap); } if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); return MLIB_SUCCESS; } /***************************************************************/ #undef FILTER_SHIFT #define FILTER_SHIFT 5 #undef FILTER_MASK #define FILTER_MASK (((1 << 8) - 1) << 3) /***************************************************************/ mlib_status mlib_ImageAffineIndex_U8_U8_4CH_BC(mlib_affine_param *param, const void *colormap) { DECLAREVAR(); DECLAREVAR_U8(); mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) - mlib_ImageGetLutOffset(colormap); mlib_d64 dstRowData[MLIB_LIMIT/2]; mlib_d64 *dstRowPtr = dstRowData; const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4; if (filter == MLIB_BICUBIC) { mlib_filters_table_u8 = mlib_filters_u8_bc; mlib_filters_table_u8_4 = mlib_filters_u8_bc_4; } else { mlib_filters_table_u8 = mlib_filters_u8_bc2; mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4; } if (max_xsize > MLIB_LIMIT) { dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1)); if (dstRowPtr == NULL) return MLIB_FAILURE; } vis_write_gsr(3 << 3); for (j = yStart; j <= yFinish; j++) { CLIP(); cols = xRight - xLeft + 1; i = 0; if (i <= cols - 6) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); NEXT_PIXEL_4BC(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); FADD_4BC_U8(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); #pragma pipeloop(0) for (; i <= cols-8; i += 2) { *dstPixelPtr++ = res; FADD_4BC_U8(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); } *dstPixelPtr++ = res; FADD_4BC_U8(); *dstPixelPtr++ = res; RESULT_4BC_U8_1PIXEL(0); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 6; } if (i <= cols-4) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); NEXT_PIXEL_4BC(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); FADD_4BC_U8(); *dstPixelPtr++ = res; RESULT_4BC_U8_1PIXEL(0); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 4; } if (i <= cols-2) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(0); NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 2; } if (i < cols) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(0); d0 = vis_fpadd16(d00, d10); d1 = vis_fpadd16(d20, d30); d0 = vis_fpadd16(d0, d1); res = vis_fpack16_pair(d0, d0); *dstPixelPtr++ = res; } mlib_ImageColorTrue2IndexLine_U8_U8_4((mlib_u8 *)dstRowPtr, dstIndexPtr, xRight - xLeft + 1, colormap); } if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); return MLIB_SUCCESS; } /***************************************************************/ #undef FILTER_SHIFT #define FILTER_SHIFT 4 #undef FILTER_MASK #define FILTER_MASK (((1 << 9) - 1) << 3) /***************************************************************/ mlib_status mlib_ImageAffineIndex_U8_S16_4CH_BC(mlib_affine_param *param, const void *colormap) { DECLAREVAR(); DECLAREVAR_S16(); mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) - mlib_ImageGetLutOffset(colormap); mlib_d64 dstRowData[MLIB_LIMIT]; mlib_d64 *dstRowPtr = dstRowData; const mlib_s16 *mlib_filters_table_s16_4; if (filter == MLIB_BICUBIC) { mlib_filters_table_s16_4 = mlib_filters_s16_bc_4; } else { mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4; } if (max_xsize > MLIB_LIMIT) { dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize); if (dstRowPtr == NULL) return MLIB_FAILURE; } for (j = yStart; j <= yFinish; j++) { CLIP(); vis_write_gsr(10 << 3); cols = xRight - xLeft + 1; i = 0; if (i <= cols - 4) { NEXT_PIXEL_4BC(); LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); NEXT_PIXEL_4BC(); BC_S16_4CH(mlib_filters_table_s16_4); FADD_4BC_S16(); BC_S16_4CH(mlib_filters_table_s16_4); #pragma pipeloop(0) for (; i < cols-4; i++) { *dstPixelPtr++ = res; FADD_4BC_S16(); BC_S16_4CH(mlib_filters_table_s16_4); } *dstPixelPtr++ = res; FADD_4BC_S16(); *dstPixelPtr++ = res; RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; i += 4; } #pragma pipeloop(0) for (; i < cols; i++) { NEXT_PIXEL_4BC(); LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; } mlib_ImageColorTrue2IndexLine_S16_U8_4((mlib_s16 *)dstRowPtr, dstIndexPtr, xRight - xLeft + 1, colormap); } if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); return MLIB_SUCCESS; } /***************************************************************/ #undef MLIB_TYPE #define MLIB_TYPE mlib_s16 /***************************************************************/ #undef FILTER_SHIFT #define FILTER_SHIFT 5 #undef FILTER_MASK #define FILTER_MASK (((1 << 8) - 1) << 3) /***************************************************************/ mlib_status mlib_ImageAffineIndex_S16_U8_3CH_BC(mlib_affine_param *param, const void *colormap) { DECLAREVAR(); DECLAREVAR_U8(); mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) - mlib_ImageGetLutOffset(colormap); mlib_d64 dstRowData[MLIB_LIMIT/2]; mlib_d64 *dstRowPtr = dstRowData; const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4; if (filter == MLIB_BICUBIC) { mlib_filters_table_u8 = mlib_filters_u8_bc; mlib_filters_table_u8_4 = mlib_filters_u8_bc_4; } else { mlib_filters_table_u8 = mlib_filters_u8_bc2; mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4; } srcYStride >>= 1; if (max_xsize > MLIB_LIMIT) { dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1)); if (dstRowPtr == NULL) return MLIB_FAILURE; } vis_write_gsr(3 << 3); for (j = yStart; j <= yFinish; j++) { CLIP(); cols = xRight - xLeft + 1; i = 0; if (i <= cols - 6) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); NEXT_PIXEL_4BC(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); FADD_4BC_U8(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); #pragma pipeloop(0) for (; i <= cols-8; i += 2) { *dstPixelPtr++ = res; FADD_4BC_U8(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); } *dstPixelPtr++ = res; FADD_4BC_U8(); *dstPixelPtr++ = res; RESULT_4BC_U8_1PIXEL(0); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 6; } if (i <= cols-4) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); NEXT_PIXEL_4BC(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); FADD_4BC_U8(); *dstPixelPtr++ = res; RESULT_4BC_U8_1PIXEL(0); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 4; } if (i <= cols-2) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(0); NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 2; } if (i < cols) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(0); d0 = vis_fpadd16(d00, d10); d1 = vis_fpadd16(d20, d30); d0 = vis_fpadd16(d0, d1); res = vis_fpack16_pair(d0, d0); *dstPixelPtr++ = res; } mlib_ImageColorTrue2IndexLine_U8_S16_3_in_4((mlib_u8 *)dstRowPtr, dstIndexPtr, xRight - xLeft + 1, colormap); } if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); return MLIB_SUCCESS; } /***************************************************************/ #undef FILTER_SHIFT #define FILTER_SHIFT 4 #undef FILTER_MASK #define FILTER_MASK (((1 << 9) - 1) << 3) /***************************************************************/ mlib_status mlib_ImageAffineIndex_S16_S16_3CH_BC(mlib_affine_param *param, const void *colormap) { DECLAREVAR(); DECLAREVAR_S16(); mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) - mlib_ImageGetLutOffset(colormap); mlib_d64 dstRowData[MLIB_LIMIT]; mlib_d64 *dstRowPtr = dstRowData; const mlib_s16 *mlib_filters_table_s16_4; if (filter == MLIB_BICUBIC) { mlib_filters_table_s16_4 = mlib_filters_s16_bc_4; } else { mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4; } srcYStride >>= 1; if (max_xsize > MLIB_LIMIT) { dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize); if (dstRowPtr == NULL) return MLIB_FAILURE; } for (j = yStart; j <= yFinish; j++) { CLIP(); vis_write_gsr(10 << 3); cols = xRight - xLeft + 1; i = 0; if (i <= cols - 4) { NEXT_PIXEL_4BC(); LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); NEXT_PIXEL_4BC(); BC_S16_4CH(mlib_filters_table_s16_4); FADD_4BC_S16(); BC_S16_4CH(mlib_filters_table_s16_4); #pragma pipeloop(0) for (; i < cols-4; i++) { *dstPixelPtr++ = res; FADD_4BC_S16(); BC_S16_4CH(mlib_filters_table_s16_4); } *dstPixelPtr++ = res; FADD_4BC_S16(); *dstPixelPtr++ = res; RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; i += 4; } #pragma pipeloop(0) for (; i < cols; i++) { NEXT_PIXEL_4BC(); LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; } mlib_ImageColorTrue2IndexLine_S16_S16_3_in_4((mlib_s16 *)dstRowPtr, dstIndexPtr, xRight - xLeft + 1, colormap); } if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); return MLIB_SUCCESS; } /***************************************************************/ #undef FILTER_SHIFT #define FILTER_SHIFT 5 #undef FILTER_MASK #define FILTER_MASK (((1 << 8) - 1) << 3) /***************************************************************/ mlib_status mlib_ImageAffineIndex_S16_U8_4CH_BC(mlib_affine_param *param, const void *colormap) { DECLAREVAR(); DECLAREVAR_U8(); mlib_f32 *flut = (mlib_f32 *)mlib_ImageGetLutNormalTable(colormap) - mlib_ImageGetLutOffset(colormap); mlib_d64 dstRowData[MLIB_LIMIT/2]; mlib_d64 *dstRowPtr = dstRowData; const mlib_s16 *mlib_filters_table_u8, *mlib_filters_table_u8_4; if (filter == MLIB_BICUBIC) { mlib_filters_table_u8 = mlib_filters_u8_bc; mlib_filters_table_u8_4 = mlib_filters_u8_bc_4; } else { mlib_filters_table_u8 = mlib_filters_u8_bc2; mlib_filters_table_u8_4 = mlib_filters_u8_bc2_4; } srcYStride >>= 1; if (max_xsize > MLIB_LIMIT) { dstRowPtr = mlib_malloc(sizeof(mlib_d64) * ((max_xsize + 1) >> 1)); if (dstRowPtr == NULL) return MLIB_FAILURE; } vis_write_gsr(3 << 3); for (j = yStart; j <= yFinish; j++) { CLIP(); cols = xRight - xLeft + 1; i = 0; if (i <= cols - 6) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); NEXT_PIXEL_4BC(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); FADD_4BC_U8(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); #pragma pipeloop(0) for (; i <= cols-8; i += 2) { *dstPixelPtr++ = res; FADD_4BC_U8(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); } *dstPixelPtr++ = res; FADD_4BC_U8(); *dstPixelPtr++ = res; RESULT_4BC_U8_1PIXEL(0); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 6; } if (i <= cols-4) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); NEXT_PIXEL_4BC(); BC_U8_4CH(0, mlib_filters_table_u8, mlib_filters_table_u8_4); BC_U8_4CH(1, mlib_filters_table_u8, mlib_filters_table_u8_4); FADD_4BC_U8(); *dstPixelPtr++ = res; RESULT_4BC_U8_1PIXEL(0); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 4; } if (i <= cols-2) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(0); NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(1); FADD_4BC_U8(); *dstPixelPtr++ = res; i += 2; } if (i < cols) { NEXT_PIXEL_4BC(); LOAD_BC_U8_4CH_1PIXEL(mlib_filters_table_u8, mlib_filters_table_u8_4); RESULT_4BC_U8_1PIXEL(0); d0 = vis_fpadd16(d00, d10); d1 = vis_fpadd16(d20, d30); d0 = vis_fpadd16(d0, d1); res = vis_fpack16_pair(d0, d0); *dstPixelPtr++ = res; } mlib_ImageColorTrue2IndexLine_U8_S16_4((mlib_u8 *)dstRowPtr, dstIndexPtr, xRight - xLeft + 1, colormap); } if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); return MLIB_SUCCESS; } /***************************************************************/ #undef FILTER_SHIFT #define FILTER_SHIFT 4 #undef FILTER_MASK #define FILTER_MASK (((1 << 9) - 1) << 3) /***************************************************************/ mlib_status mlib_ImageAffineIndex_S16_S16_4CH_BC(mlib_affine_param *param, const void *colormap) { DECLAREVAR(); DECLAREVAR_S16(); mlib_d64 *flut = (mlib_d64 *)mlib_ImageGetLutNormalTable(colormap) - mlib_ImageGetLutOffset(colormap); mlib_d64 dstRowData[MLIB_LIMIT]; mlib_d64 *dstRowPtr = dstRowData; const mlib_s16 *mlib_filters_table_s16_4; if (filter == MLIB_BICUBIC) { mlib_filters_table_s16_4 = mlib_filters_s16_bc_4; } else { mlib_filters_table_s16_4 = mlib_filters_s16_bc2_4; } srcYStride >>= 1; if (max_xsize > MLIB_LIMIT) { dstRowPtr = mlib_malloc(sizeof(mlib_d64) * max_xsize); if (dstRowPtr == NULL) return MLIB_FAILURE; } for (j = yStart; j <= yFinish; j++) { CLIP(); vis_write_gsr(10 << 3); cols = xRight - xLeft + 1; i = 0; if (i <= cols - 4) { NEXT_PIXEL_4BC(); LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); NEXT_PIXEL_4BC(); BC_S16_4CH(mlib_filters_table_s16_4); FADD_4BC_S16(); BC_S16_4CH(mlib_filters_table_s16_4); #pragma pipeloop(0) for (; i < cols-4; i++) { *dstPixelPtr++ = res; FADD_4BC_S16(); BC_S16_4CH(mlib_filters_table_s16_4); } *dstPixelPtr++ = res; FADD_4BC_S16(); *dstPixelPtr++ = res; RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; i += 4; } #pragma pipeloop(0) for (; i < cols; i++) { NEXT_PIXEL_4BC(); LOAD_BC_S16_4CH_1PIXEL(mlib_filters_table_s16_4); RESULT_4BC_S16_1PIXEL(); *dstPixelPtr++ = res; } mlib_ImageColorTrue2IndexLine_S16_S16_4((mlib_s16 *)dstRowPtr, dstIndexPtr, xRight - xLeft + 1, colormap); } if (dstRowPtr != dstRowData) mlib_free(dstRowPtr); return MLIB_SUCCESS; } /***************************************************************/