19 #ifndef _MAGICKCORE_ACCELERATE_PRIVATE_H
20 #define _MAGICKCORE_ACCELERATE_PRIVATE_H
22 #if defined(__cplusplus) || defined(c_plusplus)
26 #if defined(MAGICKCORE_OPENCL_SUPPORT)
31 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33 #define OPENCL_ELSE() "\n #""else " " \n"
34 #define OPENCL_ENDIF() "\n #""endif " " \n"
35 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36 #define STRINGIFY(...) #__VA_ARGS__ "\n"
42 typedef struct _FloatPixelPacket
44 #ifdef MAGICK_PIXEL_RGBA
51 #ifdef MAGICK_PIXEL_BGRA
60 const char* accelerateKernels =
294 inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
296 return((CLQuantum) value);
303 inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
305 return((CLQuantum) (257.0f*value));
312 inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
314 return((CLQuantum) (16843009.0*value));
321 inline
int ClampToCanvas(const
int offset, const
int range)
323 return clamp(offset, (
int)0, range - 1);
328 inline int ClampToCanvasWithHalo(
const int offset,
const int range,
const int edge,
const int section)
330 return clamp(offset, section ? (
int)(0 - edge) : (
int)0, section ? (range - 1) : (range - 1 + edge));
337 return (CLQuantum)(clamp(value, 0.0f, (
float)
QuantumRange) + 0.5f);
342 inline uint ScaleQuantumToMap(CLQuantum value)
344 if (value >= (CLQuantum)
MaxMap)
347 return ((uint)value);
354 float sign = x < (float) 0.0 ? (
float)-1.0 : (float) 1.0;
362 return clamp(value, 0.0f, 1.0f);
368 inline CLQuantum getBlue(CLPixelType p) {
return p.x; }
369 inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
370 inline float getBlueF4(float4 p) {
return p.x; }
371 inline void setBlueF4(float4* p,
float value) { (*p).x = value; }
373 inline CLQuantum getGreen(CLPixelType p) {
return p.y; }
374 inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
375 inline float getGreenF4(float4 p) {
return p.y; }
376 inline void setGreenF4(float4* p,
float value) { (*p).y = value; }
378 inline CLQuantum getRed(CLPixelType p) {
return p.z; }
379 inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
380 inline float getRedF4(float4 p) {
return p.z; }
381 inline void setRedF4(float4* p,
float value) { (*p).z = value; }
383 inline CLQuantum getOpacity(CLPixelType p) {
return p.w; }
384 inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
385 inline float getOpacityF4(float4 p) {
return p.w; }
386 inline void setOpacityF4(float4* p,
float value) { (*p).w = value; }
388 inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
390 inline float GetPixelIntensity(
const int method,
const int colorspace, CLPixelType p)
392 float red = getRed(p);
393 float green = getGreen(p);
394 float blue = getBlue(p);
405 intensity = (red + green + blue) / 3.0;
410 intensity = max(max(red, green), blue);
415 intensity = (min(min(red, green), blue) +
416 max(max(red, green), blue)) / 2.0;
421 intensity = (float)(((
float)red*red + green*green + blue*blue) /
435 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
448 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
462 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
475 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
480 intensity = (float)(sqrt((
float)red*red + green*green + blue*blue) /
513 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
517 if( (v>=M) || (convert_float(v) < convert_float(a)) )
528 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
533 r=MWC_AddMod64(r,b,M);
534 b=MWC_AddMod64(b,b,M);
545 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
550 acc=MWC_MulMod64(acc,sqr,M);
551 sqr=MWC_MulMod64(sqr,sqr,M);
557 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
559 ulong m=MWC_PowMod64(A, distance, M);
560 ulong x=curr.x*(ulong)A+curr.y;
561 x=MWC_MulMod64(x, m, M);
562 return (uint2)((uint)(x/A), (uint)(x%A));
565 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
572 enum{ MWC_BASEID = 4077358422479273989UL };
574 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
575 ulong m=MWC_PowMod64(A, dist, M);
577 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
578 return (uint2)((uint)(x/A), (uint)(x%A));
582 typedef struct{ uint x; uint c; } mwc64x_state_t;
584 enum{ MWC64X_A = 4294883355U };
585 enum{ MWC64X_M = 18446383549859758079UL };
587 void MWC64X_Step(mwc64x_state_t *s)
591 uint Xn=MWC64X_A*X+C;
592 uint carry=(uint)(Xn<C);
593 uint Cn=mad_hi(MWC64X_A,X,carry);
599 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
601 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance);
606 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
608 uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset);
614 uint MWC64X_NextUint(mwc64x_state_t *s)
616 uint res=s->x ^ s->c;
625 float mwcReadPseudoRandomValue(mwc64x_state_t* rng) {
626 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
630 float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel,
NoiseType noise_type,
float attenuate) {
639 alpha=mwcReadPseudoRandomValue(r);
655 beta=mwcReadPseudoRandomValue(r);
656 gamma=sqrt(-2.0f*log(alpha));
657 sigma=gamma*cospi((2.0f*beta));
658 tau=gamma*sinpi((2.0f*beta));
697 if (alpha > MagickEpsilon)
698 sigma=sqrt(-2.0f*log(alpha));
699 beta=mwcReadPseudoRandomValue(r);
701 cospi((
float) (2.0f*beta))/2.0f);
710 for (i=0; alpha > poisson; i++)
712 beta=mwcReadPseudoRandomValue(r);
729 void AddNoise(
const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
730 ,
const unsigned int inputPixelCount,
const unsigned int pixelsPerWorkItem
732 ,
const NoiseType noise_type,
const float attenuate
733 ,
const unsigned int seed0,
const unsigned int seed1
734 ,
const unsigned int numRandomNumbersPerPixel) {
740 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
741 uint offset = span * get_local_size(0) * get_group_id(0);
743 MWC64X_SeedStreams(&rng, offset, span);
745 uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0);
747 uint count = pixelsPerWorkItem;
750 if (pos < inputPixelCount) {
751 CLPixelType p = inputImage[pos];
754 setRed(&p,
ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate)));
758 setGreen(&p,
ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate)));
762 setBlue(&p,
ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate)));
766 setOpacity(&p,
ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate)));
769 filteredImage[pos] = p;
772 pos += get_local_size(0);
798 __kernel
void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
799 const ChannelType channel, __constant
float *filter,
800 const unsigned int width,
801 const unsigned int imageColumns,
const unsigned int imageRows,
802 __local CLPixelType *temp)
804 const int x = get_global_id(0);
805 const int y = get_global_id(1);
807 const int columns = imageColumns;
809 const unsigned int radius = (width-1)/2;
810 const int wsize = get_local_size(0);
811 const unsigned int loadSize = wsize+width;
839 const int groupX=get_local_size(0)*get_group_id(0);
840 const int groupY=get_local_size(1)*get_group_id(1);
843 for (
int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
846 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
855 barrier(CLK_LOCAL_MEM_FENCE);
858 if (get_global_id(0) < columns)
861 float4 result = (float4) 0;
865 \n #ifndef UFACTOR \n
866 \n #define UFACTOR 8 \n
869 for ( ; i+UFACTOR < width; )
871 \n #pragma unroll UFACTOR\n
872 for (
int j=0; j < UFACTOR; j++, i++)
874 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
878 for ( ; i < width; i++)
880 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
889 filtered_im[y*columns+x] = result;
902 __kernel
void BlurRowSection(__global CLPixelType *im, __global float4 *filtered_im,
903 const ChannelType channel, __constant
float *filter,
904 const unsigned int width,
905 const unsigned int imageColumns,
const unsigned int imageRows,
906 __local CLPixelType *temp,
907 const unsigned int offsetRows,
const unsigned int section)
909 const int x = get_global_id(0);
910 const int y = get_global_id(1);
912 const int columns = imageColumns;
914 const unsigned int radius = (width-1)/2;
915 const int wsize = get_local_size(0);
916 const unsigned int loadSize = wsize+width;
919 const int groupX=get_local_size(0)*get_group_id(0);
920 const int groupY=get_local_size(1)*get_group_id(1);
923 im += imageColumns * (offsetRows - radius * section);
926 for (
int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
929 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
938 barrier(CLK_LOCAL_MEM_FENCE);
941 if (get_global_id(0) < columns)
944 float4 result = (float4) 0;
948 \n #ifndef UFACTOR \n
949 \n #define UFACTOR 8 \n
952 for ( ; i+UFACTOR < width; )
954 \n #pragma unroll UFACTOR\n
955 for (
int j=0; j < UFACTOR; j++, i++)
957 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
961 for ( ; i < width; i++)
963 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
972 filtered_im[y*columns+x] = result;
986 __kernel
void BlurColumn(
const __global float4 *blurRowData, __global CLPixelType *filtered_im,
987 const ChannelType channel, __constant
float *filter,
988 const unsigned int width,
989 const unsigned int imageColumns,
const unsigned int imageRows,
990 __local float4 *temp)
992 const int x = get_global_id(0);
993 const int y = get_global_id(1);
997 const int columns = imageColumns;
998 const int rows = imageRows;
1000 unsigned int radius = (width-1)/2;
1001 const int wsize = get_local_size(1);
1002 const unsigned int loadSize = wsize+width;
1005 const int groupX=get_local_size(0)*get_group_id(0);
1006 const int groupY=get_local_size(1)*get_group_id(1);
1011 for (
int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
1013 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
1017 barrier(CLK_LOCAL_MEM_FENCE);
1020 if (get_global_id(1) < rows)
1023 float4 result = (float4) 0;
1027 \n #ifndef UFACTOR \n
1028 \n #define UFACTOR 8 \n
1031 for ( ; i+UFACTOR < width; )
1033 \n #pragma unroll UFACTOR \n
1034 for (
int j=0; j < UFACTOR; j++, i++)
1036 result+=filter[i]*temp[i+get_local_id(1)];
1040 for ( ; i < width; i++)
1042 result+=filter[i]*temp[i+get_local_id(1)];
1051 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
1066 __kernel
void BlurColumnSection(
const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1067 const ChannelType channel, __constant
float *filter,
1068 const unsigned int width,
1069 const unsigned int imageColumns,
const unsigned int imageRows,
1070 __local float4 *temp,
1071 const unsigned int offsetRows,
const unsigned int section)
1073 const int x = get_global_id(0);
1074 const int y = get_global_id(1);
1078 const int columns = imageColumns;
1079 const int rows = imageRows;
1081 unsigned int radius = (width-1)/2;
1082 const int wsize = get_local_size(1);
1083 const unsigned int loadSize = wsize+width;
1086 const int groupX=get_local_size(0)*get_group_id(0);
1087 const int groupY=get_local_size(1)*get_group_id(1);
1092 blurRowData += imageColumns * radius * section;
1095 for (
int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
1097 int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
1098 temp[i] = *(blurRowData+pos);
1102 barrier(CLK_LOCAL_MEM_FENCE);
1105 if (get_global_id(1) < rows)
1108 float4 result = (float4) 0;
1112 \n #ifndef UFACTOR \n
1113 \n #define UFACTOR 8 \n
1116 for ( ; i+UFACTOR < width; )
1118 \n #pragma unroll UFACTOR \n
1119 for (
int j=0; j < UFACTOR; j++, i++)
1121 result+=filter[i]*temp[i+get_local_id(1)];
1124 for ( ; i < width; i++)
1126 result+=filter[i]*temp[i+get_local_id(1)];
1135 filtered_im += imageColumns * offsetRows;
1138 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
1158 const float Sa,
const float Dca,
const float Da)
1163 if ((Sca*Da+Dca*Sa) >= Sa*Da)
1164 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
1165 return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
1199 const float4 *q,float4 *composite) {
1210 gamma=
QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
1212 getRedF4(*q)*Da,Da));
1214 getGreenF4(*q)*Da,Da));
1216 getBlueF4(*q)*Da,Da));
1222 const float alpha,
const float4 *q,
1223 const float beta,float4 *composite)
1237 setOpacityF4(composite,(
float)
QuantumRange*(1.0-gamma));
1239 setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
1240 setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
1241 setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
1247 const float alpha,
const float4 *q,
1248 const float beta,float4 *composite)
1258 void Composite(__global CLPixelType *image,
1259 const unsigned int imageWidth,
1260 const unsigned int imageHeight,
1261 const unsigned int imageMatte,
1262 const __global CLPixelType *compositeImage,
1263 const unsigned int compositeWidth,
1264 const unsigned int compositeHeight,
1265 const unsigned int compositeMatte,
1266 const unsigned int compose,
1268 const float destination_dissolve,
1269 const float source_dissolve) {
1272 index.x = get_global_id(0);
1273 index.y = get_global_id(1);
1276 if (index.x >= imageWidth
1277 || index.y >= imageHeight) {
1280 const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
1282 setRedF4(&destination,getRed(inputPixel));
1283 setGreenF4(&destination,getGreen(inputPixel));
1284 setBlueF4(&destination,getBlue(inputPixel));
1287 const CLPixelType compositePixel
1288 = compositeImage[index.y*imageWidth+index.x];
1290 setRedF4(&source,getRed(compositePixel));
1291 setGreenF4(&source,getGreen(compositePixel));
1292 setBlueF4(&source,getBlue(compositePixel));
1294 if (imageMatte != 0) {
1295 setOpacityF4(&destination,getOpacity(inputPixel));
1298 setOpacityF4(&destination,0.0f);
1301 if (compositeMatte != 0) {
1302 setOpacityF4(&source,getOpacity(compositePixel));
1305 setOpacityF4(&source,0.0f);
1308 float4 composite=destination;
1317 destination_dissolve,&composite);
1324 CLPixelType outputPixel;
1328 setOpacity(&outputPixel,
ClampToQuantum(getOpacityF4(composite)));
1329 image[index.y*imageWidth+index.x] = outputPixel;
1348 float3 HueSaturationBrightness;
1349 HueSaturationBrightness.x = 0.0f;
1350 HueSaturationBrightness.y = 0.0f;
1351 HueSaturationBrightness.z = 0.0f;
1353 float r=(float) getRed(pixel);
1354 float g=(float) getGreen(pixel);
1355 float b=(float) getBlue(pixel);
1357 float tmin=min(min(r,g),b);
1358 float tmax=max(max(r,g),b);
1361 float delta=tmax-tmin;
1362 HueSaturationBrightness.y=delta/tmax;
1365 if (delta != 0.0f) {
1366 HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
1367 HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
1368 HueSaturationBrightness.x/=6.0f;
1369 HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
1372 return HueSaturationBrightness;
1377 float hue = HueSaturationBrightness.x;
1378 float brightness = HueSaturationBrightness.z;
1379 float saturation = HueSaturationBrightness.y;
1383 if (saturation == 0.0f) {
1385 setGreen(&rgb,getRed(rgb));
1386 setBlue(&rgb,getRed(rgb));
1390 float h=6.0f*(hue-floor(hue));
1392 float p=brightness*(1.0f-saturation);
1393 float q=brightness*(1.0f-saturation*f);
1394 float t=brightness*(1.0f-(saturation*(1.0f-f)));
1401 setRed(&rgb, (ih == 1)?clamped_q:
1402 (ih == 2 || ih == 3)?clamped_p:
1403 (ih == 4)?clamped_t:
1406 setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
1407 (ih == 3)?clamped_q:
1408 (ih == 4 || ih == 5)?clamped_p:
1411 setBlue(&rgb, (ih == 2)?clamped_t:
1412 (ih == 3 || ih == 4)?clampedBrightness:
1413 (ih == 5)?clamped_q:
1419 __kernel
void Contrast(__global CLPixelType *im,
const unsigned int sharpen)
1422 const int sign = sharpen!=0?1:-1;
1423 const int x = get_global_id(0);
1424 const int y = get_global_id(1);
1425 const int columns = get_global_size(0);
1426 const int c = x + y * columns;
1428 CLPixelType pixel = im[c];
1430 float brightness = HueSaturationBrightness.z;
1431 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1432 brightness = clamp(brightness,0.0f,1.0f);
1433 HueSaturationBrightness.z = brightness;
1436 filteredPixel.w = pixel.w;
1437 im[c] = filteredPixel;
1456 __kernel
void Histogram(__global CLPixelType * restrict im,
1459 const int colorspace,
1460 __global uint4 * restrict histogram)
1462 const int x = get_global_id(0);
1463 const int y = get_global_id(1);
1464 const int columns = get_global_size(0);
1465 const int c = x + y * columns;
1470 atomic_inc((__global uint *)(&(histogram[pos]))+2);
1483 __kernel
void ContrastStretch(__global CLPixelType * restrict im,
1485 __global CLPixelType * restrict stretch_map,
1486 const float4 white,
const float4 black)
1488 const int x = get_global_id(0);
1489 const int y = get_global_id(1);
1490 const int columns = get_global_size(0);
1491 const int c = x + y * columns;
1494 CLPixelType oValue, eValue;
1495 CLQuantum red, green, blue, opacity;
1500 if ((channel & RedChannel) != 0)
1502 if (getRedF4(white) != getRedF4(black))
1504 ePos = ScaleQuantumToMap(getRed(oValue));
1505 eValue = stretch_map[ePos];
1506 red = getRed(eValue);
1510 if ((channel & GreenChannel) != 0)
1512 if (getGreenF4(white) != getGreenF4(black))
1514 ePos = ScaleQuantumToMap(getGreen(oValue));
1515 eValue = stretch_map[ePos];
1516 green = getGreen(eValue);
1520 if ((channel & BlueChannel) != 0)
1522 if (getBlueF4(white) != getBlueF4(black))
1524 ePos = ScaleQuantumToMap(getBlue(oValue));
1525 eValue = stretch_map[ePos];
1526 blue = getBlue(eValue);
1530 if ((channel & OpacityChannel) != 0)
1532 if (getOpacityF4(white) != getOpacityF4(black))
1534 ePos = ScaleQuantumToMap(getOpacity(oValue));
1535 eValue = stretch_map[ePos];
1536 opacity = getOpacity(eValue);
1541 im[c]=(CLPixelType)(blue, green, red, opacity);
1560 void ConvolveOptimized(
const __global CLPixelType *input, __global CLPixelType *output,
1561 const unsigned int imageWidth,
const unsigned int imageHeight,
1562 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1563 const uint matte,
const ChannelType channel, __local CLPixelType *pixelLocalCache, __local
float* filterCache) {
1566 blockID.x = get_group_id(0);
1567 blockID.y = get_group_id(1);
1571 imageAreaOrg.x = blockID.x * get_local_size(0);
1572 imageAreaOrg.y = blockID.y * get_local_size(1);
1574 int2 midFilterDimen;
1575 midFilterDimen.x = (filterWidth-1)/2;
1576 midFilterDimen.y = (filterHeight-1)/2;
1578 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1581 int2 cachedAreaDimen;
1582 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1583 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1586 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1587 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1588 int groupSize = get_local_size(0) * get_local_size(1);
1589 for (
int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1591 int2 cachedAreaIndex;
1592 cachedAreaIndex.x = i % cachedAreaDimen.x;
1593 cachedAreaIndex.y = i / cachedAreaDimen.x;
1595 int2 imagePixelIndex;
1596 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1600 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1601 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1603 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1607 for (
int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1608 filterCache[i] = filter[i];
1610 barrier(CLK_LOCAL_MEM_FENCE);
1614 imageIndex.x = imageAreaOrg.x + get_local_id(0);
1615 imageIndex.y = imageAreaOrg.y + get_local_id(1);
1618 if (imageIndex.x >= imageWidth
1619 || imageIndex.y >= imageHeight) {
1623 int filterIndex = 0;
1624 float4 sum = (float4)0.0f;
1626 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1627 int cacheIndexY = get_local_id(1);
1628 for (
int j = 0; j < filterHeight; j++) {
1629 int cacheIndexX = get_local_id(0);
1630 for (
int i = 0; i < filterWidth; i++) {
1631 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1632 float f = filterCache[filterIndex];
1647 int cacheIndexY = get_local_id(1);
1648 for (
int j = 0; j < filterHeight; j++) {
1649 int cacheIndexX = get_local_id(0);
1650 for (
int i = 0; i < filterWidth; i++) {
1652 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1654 float f = filterCache[filterIndex];
1655 float g = alpha * f;
1669 sum.xyz = gamma*sum.xyz;
1671 CLPixelType outputPixel;
1677 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1683 void Convolve(
const __global CLPixelType *input, __global CLPixelType *output,
1684 const uint imageWidth,
const uint imageHeight,
1685 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1689 imageIndex.x = get_global_id(0);
1690 imageIndex.y = get_global_id(1);
1696 if (imageIndex.x >= imageWidth
1697 || imageIndex.y >= imageHeight)
1700 int2 midFilterDimen;
1701 midFilterDimen.x = (filterWidth-1)/2;
1702 midFilterDimen.y = (filterHeight-1)/2;
1704 int filterIndex = 0;
1705 float4 sum = (float4)0.0f;
1707 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1708 for (
int j = 0; j < filterHeight; j++) {
1709 int2 inputPixelIndex;
1710 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1711 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1712 for (
int i = 0; i < filterWidth; i++) {
1713 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1714 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1716 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1717 float f = filter[filterIndex];
1732 for (
int j = 0; j < filterHeight; j++) {
1733 int2 inputPixelIndex;
1734 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1735 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1736 for (
int i = 0; i < filterWidth; i++) {
1737 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1738 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1740 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1742 float f = filter[filterIndex];
1743 float g = alpha * f;
1757 sum.xyz = gamma*sum.xyz;
1760 CLPixelType outputPixel;
1766 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1784 __kernel
void HullPass1(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1785 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1786 ,
const int2 offset,
const int polarity,
const int matte) {
1788 int x = get_global_id(0);
1789 int y = get_global_id(1);
1791 CLPixelType v = inputImage[y*imageWidth+x];
1794 neighbor.y = y + offset.y;
1795 neighbor.x = x + offset.x;
1797 int2 clampedNeighbor;
1798 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1799 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1801 CLPixelType r = (clampedNeighbor.x == neighbor.x
1802 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1818 \n #pragma unroll 4\n
1819 for (
unsigned int i = 0; i < 4; i++) {
1820 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1824 \n #pragma unroll 4\n
1825 for (
unsigned int i = 0; i < 4; i++) {
1826 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1831 v.x = (CLQuantum)sv[0];
1832 v.y = (CLQuantum)sv[1];
1833 v.z = (CLQuantum)sv[2];
1836 v.w = (CLQuantum)sv[3];
1838 outputImage[y*imageWidth+x] = v;
1849 __kernel
void HullPass2(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1850 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1851 ,
const int2 offset,
const int polarity,
const int matte) {
1853 int x = get_global_id(0);
1854 int y = get_global_id(1);
1856 CLPixelType v = inputImage[y*imageWidth+x];
1858 int2 neighbor, clampedNeighbor;
1860 neighbor.y = y + offset.y;
1861 neighbor.x = x + offset.x;
1862 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1863 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1865 CLPixelType r = (clampedNeighbor.x == neighbor.x
1866 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1870 neighbor.y = y - offset.y;
1871 neighbor.x = x - offset.x;
1872 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1873 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1875 CLPixelType s = (clampedNeighbor.x == neighbor.x
1876 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1899 \n #pragma unroll 4\n
1900 for (
unsigned int i = 0; i < 4; i++) {
1905 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1909 \n #pragma unroll 4\n
1910 for (
unsigned int i = 0; i < 4; i++) {
1914 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1918 v.x = (CLQuantum)sv[0];
1919 v.y = (CLQuantum)sv[1];
1920 v.z = (CLQuantum)sv[2];
1923 v.w = (CLQuantum)sv[3];
1925 outputImage[y*imageWidth+x] = v;
1947 __kernel
void Equalize(__global CLPixelType * restrict im,
1949 __global CLPixelType * restrict equalize_map,
1950 const float4 white,
const float4 black)
1952 const int x = get_global_id(0);
1953 const int y = get_global_id(1);
1954 const int columns = get_global_size(0);
1955 const int c = x + y * columns;
1958 CLPixelType oValue, eValue;
1959 CLQuantum red, green, blue, opacity;
1964 if ((channel & SyncChannels) != 0)
1966 if (getRedF4(white) != getRedF4(black))
1968 ePos = ScaleQuantumToMap(getRed(oValue));
1969 eValue = equalize_map[ePos];
1970 red = getRed(eValue);
1971 ePos = ScaleQuantumToMap(getGreen(oValue));
1972 eValue = equalize_map[ePos];
1973 green = getRed(eValue);
1974 ePos = ScaleQuantumToMap(getBlue(oValue));
1975 eValue = equalize_map[ePos];
1976 blue = getRed(eValue);
1977 ePos = ScaleQuantumToMap(getOpacity(oValue));
1978 eValue = equalize_map[ePos];
1979 opacity = getRed(eValue);
1982 im[c]=(CLPixelType)(blue, green, red, opacity);
2011 const unsigned int number_parameters,
2012 __constant
float *parameters)
2014 float4 result = (float4) 0.0f;
2019 for (
unsigned int i=0; i < number_parameters; i++)
2020 result = result*(float4)
QuantumScale*convert_float4(pixel) + parameters[i];
2026 float freq,phase,ampl,bias;
2027 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
2028 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
2029 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
2030 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
2032 (freq*
QuantumScale*(
float)pixel.x + phase/360.0f)) + bias);
2034 (freq*
QuantumScale*(
float)pixel.y + phase/360.0f)) + bias);
2036 (freq*
QuantumScale*(
float)pixel.z + phase/360.0f)) + bias);
2038 (freq*
QuantumScale*(
float)pixel.w + phase/360.0f)) + bias);
2043 float width,range,center,bias;
2044 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
2045 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
2046 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
2047 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
2049 result.x = 2.0f/width*(
QuantumScale*(float)pixel.x - center);
2050 result.x = range/
MagickPI*asin(result.x)+bias;
2051 result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
2052 result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
2054 result.y = 2.0f/width*(
QuantumScale*(float)pixel.y - center);
2055 result.y = range/
MagickPI*asin(result.y)+bias;
2056 result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
2057 result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
2059 result.z = 2.0f/width*(
QuantumScale*(float)pixel.z - center);
2060 result.z = range/
MagickPI*asin(result.z)+bias;
2061 result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
2062 result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
2065 result.w = 2.0f/width*(
QuantumScale*(float)pixel.w - center);
2066 result.w = range/
MagickPI*asin(result.w)+bias;
2067 result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
2068 result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
2075 float slope,range,center,bias;
2076 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
2077 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
2078 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
2079 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
2080 result = (float4)
MagickPI*(float4)slope*((float4)
QuantumScale*convert_float4(pixel)-(float4)center);
2100 __kernel
void ComputeFunction(__global CLPixelType *im,
2102 const unsigned int number_parameters, __constant
float *parameters)
2104 const int x = get_global_id(0);
2105 const int y = get_global_id(1);
2106 const int columns = get_global_size(0);
2107 const int c = x + y * columns;
2108 im[c] =
ApplyFunction(im[c],
function, number_parameters, parameters);
2125 __kernel
void Grayscale(__global CLPixelType *im,
2126 const int method,
const int colorspace)
2129 const int x = get_global_id(0);
2130 const int y = get_global_id(1);
2131 const int columns = get_global_size(0);
2132 const int c = x + y * columns;
2134 CLPixelType pixel = im[c];
2142 red=(float)getRed(pixel);
2143 green=(float)getGreen(pixel);
2144 blue=(float)getBlue(pixel);
2148 CLPixelType filteredPixel;
2154 intensity=(red+green+blue)/3.0;
2159 intensity=max(max(red,green),blue);
2164 intensity=(min(min(red,green),blue)+
2165 max(max(red,green),blue))/2.0;
2170 intensity=(float) (((
float) red*red+green*green+
2184 intensity=0.298839*red+0.586811*green+0.114350*blue;
2197 intensity=0.298839*red+0.586811*green+0.114350*blue;
2211 intensity=0.212656*red+0.715158*green+0.072186*blue;
2224 intensity=0.212656*red+0.715158*green+0.072186*blue;
2229 intensity=(float) (sqrt((
float) red*red+green*green+
2230 blue*blue)/sqrt(3.0));
2238 filteredPixel.w = pixel.w;
2240 im[c] = filteredPixel;
2257 inline int mirrorBottom(
int value)
2259 return (value < 0) ? - (value) : value;
2261 inline int mirrorTop(
int value,
int width)
2263 return (value >= width) ? (2 * width - value - 1) : value;
2266 __kernel
void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *tmpImage,
2268 const int imageWidth,
2269 const int imageHeight)
2271 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
2273 int x = get_local_id(0);
2274 int y = get_global_id(1);
2276 global CLPixelType *src = srcImage + y * imageWidth;
2278 for (
int i = x; i < imageWidth; i += get_local_size(0)) {
2280 float weight = 1.0f;
2283 while ((j + 7) < i) {
2284 for (
int k = 0; k < 8; ++k)
2285 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
2290 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
2295 while ((j + 7) < radius + i) {
2296 for (
int k = 0; k < 8; ++k)
2297 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
2301 while (j < radius + i) {
2302 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
2307 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
2313 __kernel
void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *blurImage,
2315 const float strength,
2316 const int imageWidth,
2317 const int imageHeight)
2319 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
2321 int x = get_global_id(0);
2322 int y = get_global_id(1);
2324 if ((x >= imageWidth) || (y >= imageHeight))
2327 global
float *src = blurImage + x;
2330 float weight = 1.0f;
2333 while ((j + 7) < y) {
2334 for (
int k = 0; k < 8; ++k)
2335 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
2340 sum += weight * src[mirrorBottom(j) * imageWidth];
2345 while ((j + 7) < radius + y) {
2346 for (
int k = 0; k < 8; ++k)
2347 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
2351 while (j < radius + y) {
2352 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
2357 CLPixelType pixel = srcImage[x + y * imageWidth];
2358 float srcVal = dot(RGB, convert_float4(pixel));
2359 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
2360 mult = (srcVal + mult) / srcVal;
2366 dstImage[x + y * imageWidth] = pixel;
2384 inline void ConvertRGBToHSL(
const CLQuantum red,
const CLQuantum green,
const CLQuantum blue,
2385 float *hue,
float *saturation,
float *lightness)
2400 *lightness=(tmax+tmin)/2.0;
2421 if (*lightness <= 0.5)
2422 *saturation=c/(2.0*(*lightness));
2424 *saturation=c/(2.0-2.0*(*lightness));
2427 inline void ConvertHSLToRGB(
const float hue,
const float saturation,
const float lightness,
2428 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2443 if (lightness <= 0.5)
2444 c=2.0*lightness*saturation;
2446 c=(2.0-2.0*lightness)*saturation;
2447 tmin=lightness-0.5*c;
2448 h-=360.0*floor(h/360.0);
2450 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2451 switch ((
int) floor(h))
2507 inline void ModulateHSL(
const float percent_hue,
const float percent_saturation,
const float percent_lightness,
2508 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2519 hue+=0.5*(0.01*percent_hue-1.0);
2524 saturation*=0.01*percent_saturation;
2525 lightness*=0.01*percent_lightness;
2529 __kernel
void Modulate(__global CLPixelType *im,
2530 const float percent_brightness,
2531 const float percent_hue,
2532 const float percent_saturation,
2533 const int colorspace)
2536 const int x = get_global_id(0);
2537 const int y = get_global_id(1);
2538 const int columns = get_global_size(0);
2539 const int c = x + y * columns;
2541 CLPixelType pixel = im[c];
2549 green=getGreen(pixel);
2550 blue=getBlue(pixel);
2557 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2558 &red, &green, &blue);
2563 CLPixelType filteredPixel;
2565 setRed(&filteredPixel, red);
2566 setGreen(&filteredPixel, green);
2567 setBlue(&filteredPixel, blue);
2568 filteredPixel.w = pixel.w;
2570 im[c] = filteredPixel;
2588 void MotionBlur(
const __global CLPixelType *input, __global CLPixelType *output,
2589 const unsigned int imageWidth,
const unsigned int imageHeight,
2590 const __global
float *filter,
const unsigned int width,
const __global int2* offset,
2592 const ChannelType channel,
const unsigned int matte) {
2595 currentPixel.x = get_global_id(0);
2596 currentPixel.y = get_global_id(1);
2598 if (currentPixel.x >= imageWidth
2599 || currentPixel.y >= imageHeight)
2603 pixel.x = (float)bias.x;
2604 pixel.y = (
float)bias.y;
2605 pixel.z = (float)bias.z;
2606 pixel.w = (
float)bias.w;
2608 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2610 for (
int i = 0; i < width; i++) {
2613 int2 samplePixel = currentPixel + offset[i];
2614 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2615 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2616 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2618 pixel.x += (filter[i] * (float)samplePixelValue.x);
2619 pixel.y += (filter[i] * (float)samplePixelValue.y);
2620 pixel.z += (filter[i] * (float)samplePixelValue.z);
2621 pixel.w += (filter[i] * (float)samplePixelValue.w);
2624 CLPixelType outputPixel;
2629 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2634 for (
int i = 0; i < width; i++) {
2637 int2 samplePixel = currentPixel + offset[i];
2638 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2639 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2641 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2644 float k = filter[i];
2645 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2646 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2647 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2649 pixel.w += k * alpha * samplePixelValue.w;
2654 pixel.xyz = gamma*pixel.xyz;
2656 CLPixelType outputPixel;
2661 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2679 __kernel
void RadialBlur(
const __global CLPixelType *im, __global CLPixelType *filtered_im,
2681 const unsigned int channel,
const unsigned int matte,
2682 const float2 blurCenter,
2683 __constant
float *cos_theta, __constant
float *sin_theta,
2684 const unsigned int cossin_theta_size)
2686 const int x = get_global_id(0);
2687 const int y = get_global_id(1);
2688 const int columns = get_global_size(0);
2689 const int rows = get_global_size(1);
2690 unsigned int step = 1;
2691 float center_x = (float) x - blurCenter.x;
2692 float center_y = (
float) y - blurCenter.y;
2693 float radius = hypot(center_x, center_y);
2696 float blur_radius = hypot(blurCenter.x, blurCenter.y);
2698 if (radius > MagickEpsilon)
2700 step = (
unsigned int) (blur_radius / radius);
2703 if (step >= cossin_theta_size)
2704 step = cossin_theta_size-1;
2708 result.x = (float)bias.x;
2709 result.y = (
float)bias.y;
2710 result.z = (float)bias.z;
2711 result.w = (
float)bias.w;
2712 float normalize = 0.0f;
2714 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2715 for (
unsigned int i=0; i<cossin_theta_size; i+=step)
2717 result += convert_float4(im[
2718 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2719 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2723 result = result * normalize;
2727 for (
unsigned int i=0; i<cossin_theta_size; i+=step)
2729 float4 p = convert_float4(im[
2730 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2731 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2734 result.x += alpha * p.x;
2735 result.y += alpha * p.y;
2736 result.z += alpha * p.z;
2743 result.x = gamma*result.x;
2744 result.y = gamma*result.y;
2745 result.z = gamma*result.z;
2746 result.w = normalize*result.w;
2770 unsigned int alpha = (
unsigned int)(s.y ^ (s.y << 11));
2774 s.x = (s.x ^ (s.x >> 19)) ^ (alpha ^ (alpha >> 8));
2775 }
while (s.x == ~0UL);
2777 return (normalizeRand*s.x);
2780 __kernel
void RandomNumberGenerator(__global uint* seeds,
const float normalizeRand
2781 , __global
float* randomNumbers,
const uint init
2782 ,
const uint numRandomNumbers) {
2784 unsigned int id = get_global_id(0);
2785 unsigned int seed[4];
2788 seed[0] = seeds[
id * 4];
2789 seed[1] = 0x50a7f451;
2790 seed[2] = 0x5365417e;
2791 seed[3] = 0xc3a4171a;
2794 seed[0] = seeds[
id * 4];
2795 seed[1] = seeds[
id * 4 + 1];
2796 seed[2] = seeds[
id * 4 + 2];
2797 seed[3] = seeds[
id * 4 + 3];
2800 unsigned int numRandomNumbersPerItem = (numRandomNumbers + get_global_size(0) - 1) / get_global_size(0);
2801 for (
unsigned int i = 0; i < numRandomNumbersPerItem; i++) {
2804 unsigned int alpha = (
unsigned int)(seed[1] ^ (seed[1] << 11));
2808 seed[0] = (seed[0] ^ (seed[0] >> 19)) ^ (alpha ^ (alpha >> 8));
2809 }
while (seed[0] == ~0UL);
2810 unsigned int pos = (get_group_id(0)*get_local_size(0)*numRandomNumbersPerItem)
2811 + get_local_size(0) * i + get_local_id(0);
2813 if (pos >= numRandomNumbers)
2815 randomNumbers[pos] = normalizeRand*seed[0];
2819 seeds[
id * 4] = seed[0];
2820 seeds[
id * 4 + 1] = seed[1];
2821 seeds[
id * 4 + 2] = seed[2];
2822 seeds[
id * 4 + 3] = seed[3];
2840 float BoxResizeFilter(
const float x)
2848 float CubicBC(
const float x,
const __global
float* resizeFilterCoefficients)
2880 return(resizeFilterCoefficients[0]+x*(x*
2881 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2883 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2884 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2890 float Sinc(
const float x)
2894 const float alpha=(float) (
MagickPI*x);
2895 return sinpi(x)/alpha;
2909 return ((x<1.0f)?(1.0f-x):0.0f);
2921 const float cosine=cos((
MagickPI*x));
2922 return(0.5f+0.5f*cosine);
2933 const float cosine=cos((
MagickPI*x));
2934 return(0.54f+0.46f*cosine);
2948 const float cosine=cos((
MagickPI*x));
2949 return(0.34f+cosine*(0.5f+cosine*0.16f));
2957 inline float applyResizeFilter(
const float x,
const ResizeWeightingFunctionType filterType,
const __global
float* filterCoefficients)
2967 return CubicBC(x,filterCoefficients);
2969 return BoxResizeFilter(x);
2987 inline float getResizeFilterWeight(
const __global
float* resizeFilterCubicCoefficients,
const ResizeWeightingFunctionType resizeFilterType
2989 ,
const float resizeFilterScale,
const float resizeWindowSupport,
const float resizeFilterBlur,
const float x)
2992 float xBlur = fabs(x/resizeFilterBlur);
2993 if (resizeWindowSupport < MagickEpsilon
3000 scale = resizeFilterScale;
3001 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
3003 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
3010 const char* accelerateKernels2 =
3014 inline unsigned int getNumWorkItemsPerPixel(
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
3015 return (numWorkItems/pixelPerWorkgroup);
3020 inline int pixelToCompute(
const unsigned itemID,
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
3021 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
3022 int pixelIndex = itemID/numWorkItemsPerPixel;
3023 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
3030 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
3031 void ResizeHorizontalFilter(
const __global CLPixelType* inputImage,
const unsigned int inputColumns,
const unsigned int inputRows,
const unsigned int matte
3032 ,
const float xFactor, __global CLPixelType* filteredImage,
const unsigned int filteredColumns,
const unsigned int filteredRows
3033 ,
const int resizeFilterType,
const int resizeWindowType
3034 ,
const __global
float* resizeFilterCubicCoefficients
3035 ,
const float resizeFilterScale,
const float resizeFilterSupport,
const float resizeFilterWindowSupport,
const float resizeFilterBlur
3036 , __local CLPixelType* inputImageCache,
const int numCachedPixels,
const unsigned int pixelPerWorkgroup,
const unsigned int pixelChunkSize
3037 , __local float4* outputPixelCache, __local
float* densityCache, __local
float* gammaCache) {
3041 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
3042 const unsigned int stopX = min(startX + pixelPerWorkgroup,filteredColumns);
3043 const unsigned int actualNumPixelToCompute = stopX - startX;
3046 float scale = max(1.0f/xFactor+MagickEpsilon ,1.0f);
3047 const float support = max(scale*resizeFilterSupport,0.5f);
3050 const int cacheRangeStartX = max((
int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(
int)(0));
3051 const int cacheRangeEndX = min((
int)(cacheRangeStartX + numCachedPixels), (
int)inputColumns);
3054 const unsigned int y = get_global_id(1);
3055 event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
3056 wait_group_events(1,&e);
3058 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
3059 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
3062 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
3063 const unsigned int chunkStopX = min(chunkStartX + pixelChunkSize, stopX);
3064 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
3067 const unsigned int itemID = get_local_id(0);
3068 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
3070 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
3072 float4 filteredPixel = (float4)0.0f;
3073 float density = 0.0f;
3076 if (pixelIndex != -1) {
3079 const int x = chunkStartX + pixelIndex;
3083 const unsigned int start = (
unsigned int)max(bisect-support+0.5f,0.0f);
3084 const unsigned int stop = (
unsigned int)min(bisect+support+0.5f,(
float)inputColumns);
3085 const unsigned int n = stop - start;
3088 unsigned int numStepsPerWorkItem = n / numItems;
3089 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
3091 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
3092 if (startStep < n) {
3093 const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
3095 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
3098 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3099 float4 cp = convert_float4(inputImageCache[cacheIndex]);
3103 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3105 filteredPixel += ((float4)weight)*cp;
3112 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3113 CLPixelType p = inputImageCache[cacheIndex];
3117 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3120 float4 cp = convert_float4(p);
3122 filteredPixel.x += alpha * cp.x;
3123 filteredPixel.y += alpha * cp.y;
3124 filteredPixel.z += alpha * cp.z;
3125 filteredPixel.w += weight * cp.w;
3135 if (itemID < actualNumPixelInThisChunk) {
3136 outputPixelCache[itemID] = (float4)0.0f;
3137 densityCache[itemID] = 0.0f;
3139 gammaCache[itemID] = 0.0f;
3141 barrier(CLK_LOCAL_MEM_FENCE);
3144 for (
unsigned int i = 0; i < numItems; i++) {
3145 if (pixelIndex != -1) {
3146 if (itemID%numItems == i) {
3147 outputPixelCache[pixelIndex]+=filteredPixel;
3148 densityCache[pixelIndex]+=density;
3150 gammaCache[pixelIndex]+=gamma;
3154 barrier(CLK_LOCAL_MEM_FENCE);
3157 if (itemID < actualNumPixelInThisChunk) {
3159 float density = densityCache[itemID];
3160 float4 filteredPixel = outputPixelCache[itemID];
3161 if (density!= 0.0f && density != 1.0)
3164 filteredPixel *= (float4)density;
3166 filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (
ClampToQuantum(filteredPixel.x)
3172 float density = densityCache[itemID];
3173 float gamma = gammaCache[itemID];
3174 float4 filteredPixel = outputPixelCache[itemID];
3176 if (density!= 0.0f && density != 1.0) {
3178 filteredPixel *= (float4)density;
3189 filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
3201 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
3202 void ResizeHorizontalFilterSinc(
const __global CLPixelType* inputImage,
const unsigned int inputColumns,
const unsigned int inputRows,
const unsigned int matte
3203 ,
const float xFactor, __global CLPixelType* filteredImage,
const unsigned int filteredColumns,
const unsigned int filteredRows
3204 ,
const int resizeFilterType,
const int resizeWindowType
3205 ,
const __global
float* resizeFilterCubicCoefficients
3206 ,
const float resizeFilterScale,
const float resizeFilterSupport,
const float resizeFilterWindowSupport,
const float resizeFilterBlur
3207 , __local CLPixelType* inputImageCache,
const int numCachedPixels,
const unsigned int pixelPerWorkgroup,
const unsigned int pixelChunkSize
3208 , __local float4* outputPixelCache, __local
float* densityCache, __local
float* gammaCache) {
3210 ResizeHorizontalFilter(inputImage,inputColumns,inputRows,matte
3211 ,xFactor, filteredImage, filteredColumns, filteredRows
3213 ,resizeFilterCubicCoefficients
3214 ,resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur
3215 ,inputImageCache, numCachedPixels, pixelPerWorkgroup, pixelChunkSize
3216 ,outputPixelCache, densityCache, gammaCache);
3223 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
3224 void ResizeVerticalFilter(const __global CLPixelType* inputImage, const
unsigned int inputColumns, const
unsigned int inputRows, const
unsigned int matte
3225 , const
float yFactor, __global CLPixelType* filteredImage, const
unsigned int filteredColumns, const
unsigned int filteredRows
3226 , const
int resizeFilterType, const
int resizeWindowType
3227 , const __global
float* resizeFilterCubicCoefficients
3228 , const
float resizeFilterScale, const
float resizeFilterSupport, const
float resizeFilterWindowSupport, const
float resizeFilterBlur
3229 , __local CLPixelType* inputImageCache, const
int numCachedPixels, const
unsigned int pixelPerWorkgroup, const
unsigned int pixelChunkSize
3230 , __local float4* outputPixelCache, __local
float* densityCache, __local
float* gammaCache) {
3234 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
3235 const unsigned int stopY = min(startY + pixelPerWorkgroup,filteredRows);
3236 const unsigned int actualNumPixelToCompute = stopY - startY;
3239 float scale = max(1.0f/yFactor+MagickEpsilon ,1.0f);
3240 const float support = max(scale*resizeFilterSupport,0.5f);
3243 const int cacheRangeStartY = max((
int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(
int)(0));
3244 const int cacheRangeEndY = min((
int)(cacheRangeStartY + numCachedPixels), (
int)inputRows);
3247 const unsigned int x = get_global_id(0);
3248 event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
3249 wait_group_events(1,&e);
3251 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
3252 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
3255 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
3256 const unsigned int chunkStopY = min(chunkStartY + pixelChunkSize, stopY);
3257 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
3260 const unsigned int itemID = get_local_id(1);
3261 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
3263 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
3265 float4 filteredPixel = (float4)0.0f;
3266 float density = 0.0f;
3269 if (pixelIndex != -1) {
3272 const int y = chunkStartY + pixelIndex;
3276 const unsigned int start = (
unsigned int)max(bisect-support+0.5f,0.0f);
3277 const unsigned int stop = (
unsigned int)min(bisect+support+0.5f,(
float)inputRows);
3278 const unsigned int n = stop - start;
3281 unsigned int numStepsPerWorkItem = n / numItems;
3282 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
3284 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
3285 if (startStep < n) {
3286 const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
3288 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
3291 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3292 float4 cp = convert_float4(inputImageCache[cacheIndex]);
3296 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3298 filteredPixel += ((float4)weight)*cp;
3305 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3306 CLPixelType p = inputImageCache[cacheIndex];
3310 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3313 float4 cp = convert_float4(p);
3315 filteredPixel.x += alpha * cp.x;
3316 filteredPixel.y += alpha * cp.y;
3317 filteredPixel.z += alpha * cp.z;
3318 filteredPixel.w += weight * cp.w;
3328 if (itemID < actualNumPixelInThisChunk) {
3329 outputPixelCache[itemID] = (float4)0.0f;
3330 densityCache[itemID] = 0.0f;
3332 gammaCache[itemID] = 0.0f;
3334 barrier(CLK_LOCAL_MEM_FENCE);
3337 for (
unsigned int i = 0; i < numItems; i++) {
3338 if (pixelIndex != -1) {
3339 if (itemID%numItems == i) {
3340 outputPixelCache[pixelIndex]+=filteredPixel;
3341 densityCache[pixelIndex]+=density;
3343 gammaCache[pixelIndex]+=gamma;
3347 barrier(CLK_LOCAL_MEM_FENCE);
3350 if (itemID < actualNumPixelInThisChunk) {
3352 float density = densityCache[itemID];
3353 float4 filteredPixel = outputPixelCache[itemID];
3354 if (density!= 0.0f && density != 1.0)
3357 filteredPixel *= (float4)density;
3359 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (
ClampToQuantum(filteredPixel.x)
3365 float density = densityCache[itemID];
3366 float gamma = gammaCache[itemID];
3367 float4 filteredPixel = outputPixelCache[itemID];
3369 if (density!= 0.0f && density != 1.0) {
3371 filteredPixel *= (float4)density;
3382 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
3394 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
3395 void ResizeVerticalFilterSinc(const __global CLPixelType* inputImage, const
unsigned int inputColumns, const
unsigned int inputRows, const
unsigned int matte
3396 , const
float yFactor, __global CLPixelType* filteredImage, const
unsigned int filteredColumns, const
unsigned int filteredRows
3397 , const
int resizeFilterType, const
int resizeWindowType
3398 , const __global
float* resizeFilterCubicCoefficients
3399 , const
float resizeFilterScale, const
float resizeFilterSupport, const
float resizeFilterWindowSupport, const
float resizeFilterBlur
3400 , __local CLPixelType* inputImageCache, const
int numCachedPixels, const
unsigned int pixelPerWorkgroup, const
unsigned int pixelChunkSize
3401 , __local float4* outputPixelCache, __local
float* densityCache, __local
float* gammaCache) {
3402 ResizeVerticalFilter(inputImage,inputColumns,inputRows,matte
3403 ,yFactor,filteredImage,filteredColumns,filteredRows
3405 ,resizeFilterCubicCoefficients
3406 ,resizeFilterScale,resizeFilterSupport,resizeFilterWindowSupport,resizeFilterBlur
3407 ,inputImageCache,numCachedPixels,pixelPerWorkgroup,pixelChunkSize
3408 ,outputPixelCache,densityCache,gammaCache);
3425 __kernel
void UnsharpMaskBlurColumn(
const __global CLPixelType* inputImage,
3426 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
3427 const unsigned int imageColumns,
const unsigned int imageRows,
3428 __local float4* cachedData, __local
float* cachedFilter,
3429 const ChannelType channel,
const __global
float *filter,
const unsigned int width,
3430 const float gain,
const float threshold)
3432 const unsigned int radius = (width-1)/2;
3435 const int groupX = get_group_id(0);
3436 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
3437 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
3439 if (groupStartY >= 0
3440 && groupStopY < imageRows) {
3441 event_t e = async_work_group_strided_copy(cachedData
3442 ,blurRowData+groupStartY*imageColumns+groupX
3443 ,groupStopY-groupStartY,imageColumns,0);
3444 wait_group_events(1,&e);
3447 for (
int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
3448 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
3450 barrier(CLK_LOCAL_MEM_FENCE);
3453 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
3454 wait_group_events(1,&e);
3458 const int cy = get_global_id(1);
3460 if (cy < imageRows) {
3461 float4 blurredPixel = (float4) 0.0f;
3465 \n #ifndef UFACTOR \n
3466 \n #define UFACTOR 8 \n
3469 for ( ; i+UFACTOR < width; )
3471 \n #pragma unroll UFACTOR \n
3472 for (
int j=0; j < UFACTOR; j++, i++)
3474 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3478 for ( ; i < width; i++)
3480 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3486 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
3487 float4 outputPixel = inputImagePixel - blurredPixel;
3491 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
3492 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
3501 __kernel
void UnsharpMaskBlurColumnSection(
const __global CLPixelType* inputImage,
3502 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
3503 const unsigned int imageColumns,
const unsigned int imageRows,
3504 __local float4* cachedData, __local
float* cachedFilter,
3505 const ChannelType channel,
const __global
float *filter,
const unsigned int width,
3506 const float gain,
const float threshold,
3507 const unsigned int offsetRows,
const unsigned int section)
3509 const unsigned int radius = (width-1)/2;
3512 const int groupX = get_group_id(0);
3513 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
3514 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
3517 blurRowData += imageColumns * radius * section;
3519 if (groupStartY >= 0
3520 && groupStopY < imageRows) {
3521 event_t e = async_work_group_strided_copy(cachedData
3522 ,blurRowData+groupStartY*imageColumns+groupX
3523 ,groupStopY-groupStartY,imageColumns,0);
3524 wait_group_events(1,&e);
3527 for (
int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
3528 int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX;
3529 cachedData[i] = *(blurRowData + pos);
3531 barrier(CLK_LOCAL_MEM_FENCE);
3534 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
3535 wait_group_events(1,&e);
3539 const int cy = get_global_id(1);
3541 if (cy < imageRows) {
3542 float4 blurredPixel = (float4) 0.0f;
3546 \n #ifndef UFACTOR \n
3547 \n #define UFACTOR 8 \n
3550 for ( ; i+UFACTOR < width; )
3552 \n #pragma unroll UFACTOR \n
3553 for (
int j=0; j < UFACTOR; j++, i++)
3555 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3559 for ( ; i < width; i++)
3561 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3568 inputImage += imageColumns * offsetRows;
3569 filtered_im += imageColumns * offsetRows;
3571 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
3572 float4 outputPixel = inputImagePixel - blurredPixel;
3576 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
3577 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
3591 __kernel
void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im,
3592 __constant
float *filter,
3593 const unsigned int width,
3594 const unsigned int imageColumns,
const unsigned int imageRows,
3595 __local float4 *pixels,
3596 const float gain,
const float threshold,
const unsigned int justBlur)
3598 const int x = get_global_id(0);
3599 const int y = get_global_id(1);
3601 const unsigned int radius = (width - 1) / 2;
3603 int row = y - radius;
3604 int baseRow = get_group_id(1) * get_local_size(1) - radius;
3605 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
3607 while (row < endRow) {
3608 int srcy = (row < 0) ? -row : row;
3609 srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
3611 float4 value = 0.0f;
3613 int ix = x - radius;
3616 while (i + 7 < width) {
3617 for (
int j = 0; j < 8; ++j) {
3619 srcx = (srcx < 0) ? -srcx : srcx;
3620 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3621 value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
3628 int srcx = (ix < 0) ? -ix : ix;
3629 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3630 value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
3634 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
3635 row += get_local_size(1);
3639 barrier(CLK_LOCAL_MEM_FENCE);
3642 const int px = get_local_id(0);
3643 const int py = get_local_id(1);
3644 const int prp = get_local_size(0);
3645 float4 value = (float4)(0.0f);
3648 while (i + 7 < width) {
3649 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3650 value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp];
3651 value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp];
3652 value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp];
3653 value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp];
3654 value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp];
3655 value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp];
3656 value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp];
3660 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3664 if (justBlur == 0) {
3665 float4 srcPixel = convert_float4(im[x + y * imageColumns]);
3666 float4 diff = srcPixel - value;
3670 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3671 value = select(srcPixel + diff * gain, srcPixel, mask);
3674 if ((x < imageColumns) && (y < imageRows))
3681 #endif // MAGICKCORE_OPENCL_SUPPORT
3683 #if defined(__cplusplus) || defined(c_plusplus)
3687 #endif // _MAGICKCORE_ACCELERATE_PRIVATE_H
Definition: composite.h:91
Definition: composite.h:94
Definition: composite.h:65
Definition: colorspace.h:44
Definition: resize-private.h:31
Definition: colorspace.h:36
Definition: resize-private.h:37
Definition: statistic.h:116
Definition: resize-private.h:33
Definition: magick-type.h:198
Definition: composite.h:75
Definition: colorspace.h:40
static void MagickPixelCompositeBlend(const MagickPixelPacket *p, const MagickRealType alpha, const MagickPixelPacket *q, const MagickRealType beta, MagickPixelPacket *composite)
Definition: composite-private.h:138
Definition: composite.h:31
Definition: composite.h:93
Definition: colorspace.h:45
Definition: colorspace.h:33
Definition: composite.h:80
Definition: composite.h:33
Definition: resize-private.h:40
Definition: composite.h:90
Definition: resize-private.h:29
static MagickRealType ColorDodge(const MagickRealType Sca, const MagickRealType Sa, const MagickRealType Dca, const MagickRealType Da)
Definition: composite.c:293
PixelIntensityMethod
Definition: pixel.h:67
Definition: magick-type.h:187
Definition: composite.h:95
Definition: colorspace.h:59
Definition: magick-type.h:193
Definition: composite.h:59
Definition: composite.h:89
Definition: magick-type.h:182
Definition: composite.h:27
Definition: colorspace.h:41
Definition: colorspace.h:37
static MagickRealType RoundToUnity(const MagickRealType value)
Definition: composite-private.h:33
Definition: composite.h:35
Definition: composite.h:87
#define MagickPI
Definition: image-private.h:36
Definition: colorspace.h:58
Definition: colorspace.h:50
static MagickRealType Hanning(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:287
Definition: colorspace.h:47
float MagickRealType
Definition: magick-type.h:76
Definition: statistic.h:115
Definition: colorspace.h:31
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:28
Definition: composite.h:53
Definition: colorspace.h:35
Definition: resize-private.h:38
#define MagickEpsilon
Definition: magick-type.h:139
MagickExport void ConvertRGBToHSL(const Quantum red, const Quantum green, const Quantum blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1127
Definition: magick-type.h:188
Definition: colorspace.h:48
Definition: statistic.h:117
Definition: magick-type.h:200
NoiseType
Definition: fx.h:27
Definition: colorspace.h:52
Definition: composite.h:47
static MagickRealType Hamming(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:301
Definition: resize-private.h:41
Definition: composite.h:73
Definition: composite.h:29
Definition: composite.h:72
Definition: composite.h:42
Definition: colorspace.h:43
Definition: composite.h:97
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: enhance.c:3550
Definition: colorspace.h:34
Definition: colorspace.h:57
Definition: resize-private.h:30
static double PerceptibleReciprocal(const double x)
Definition: pixel-accessor.h:124
Definition: composite.h:54
#define GetPixelAlpha(pixel)
Definition: pixel-accessor.h:36
Definition: composite.h:38
Definition: composite.h:68
Definition: composite.h:96
Definition: magick-type.h:184
Definition: composite.h:71
Definition: resize-private.h:32
Definition: composite.h:55
Definition: composite.h:56
Definition: composite.h:69
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:941
Definition: colorspace.h:38
Definition: composite.h:86
Definition: resize-private.h:36
Definition: colorspace.h:30
#define SigmaMultiplicativeGaussian
Definition: composite.h:49
Definition: composite.h:44
MagickExport void ConvertRGBToHSB(const Quantum red, const Quantum green, const Quantum blue, double *hue, double *saturation, double *brightness)
Definition: gem.c:994
Definition: magick-type.h:186
static void Contrast(const int sign, Quantum *red, Quantum *green, Quantum *blue)
Definition: enhance.c:913
Definition: magick-type.h:201
Definition: composite.h:46
Definition: statistic.h:113
Definition: composite.h:28
Definition: magick-type.h:181
Definition: magick-type.h:190
Definition: colorspace.h:54
Definition: magick-type.h:189
Definition: resize-private.h:39
Definition: composite.h:78
Definition: resize-private.h:34
#define QuantumScale
Definition: magick-type.h:142
Definition: colorspace.h:55
MagickExport double GetPseudoRandomValue(RandomInfo *random_info)
Definition: random.c:612
Definition: composite.h:62
Definition: colorspace.h:39
#define MaxMap
Definition: magick-type.h:70
Definition: magick-type.h:197
Definition: composite.h:98
Definition: composite.h:39
static void CompositeColorDodge(const MagickPixelPacket *p, const MagickPixelPacket *q, MagickPixelPacket *composite)
Definition: composite.c:330
MagickExport void ConvertHSBToRGB(const double hue, const double saturation, const double brightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: gem.c:284
Definition: composite.h:45
ChannelType
Definition: magick-type.h:177
Definition: composite.h:70
Definition: colorspace.h:46
Definition: resize-private.h:28
Definition: composite.h:81
Definition: composite.h:41
Definition: composite.h:52
Definition: colorspace.h:49
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: gem.c:460
Definition: composite.h:77
static Quantum ClampToQuantum(const MagickRealType value)
Definition: quantum.h:87
Definition: colorspace.h:53
Definition: composite.h:61
Definition: magick-type.h:183
static void MagickPixelCompositePlus(const MagickPixelPacket *p, const MagickRealType alpha, const MagickPixelPacket *q, const MagickRealType beta, MagickPixelPacket *composite)
Definition: composite-private.h:111
Definition: composite.h:76
Definition: magick-type.h:179
Definition: colorspace.h:28
Definition: resize-private.h:42
Definition: composite.h:50
Definition: composite.h:36
Definition: composite.h:43
MagickExport MagickRealType GetPixelIntensity(const Image *image, const PixelPacket *magick_restrict pixel)
Definition: pixel.c:2285
static MagickRealType Sinc(const MagickRealType, const ResizeFilter *)
Definition: composite.h:37
Definition: composite.h:60
Definition: statistic.h:114
ResizeWeightingFunctionType
Definition: resize-private.h:25
static MagickRealType Blackman(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:148
Definition: colorspace.h:56
ColorspaceType
Definition: colorspace.h:25
Definition: composite.h:32
Definition: colorspace.h:29
Definition: composite.h:88
Definition: colorspace.h:42
Definition: composite.h:48
Definition: composite.h:64
Definition: magick-type.h:185
Definition: colorspace.h:51
CompositeOperator
Definition: composite.h:25
Definition: composite.h:79
Definition: magick-type.h:192
Definition: colorspace.h:32
Definition: composite.h:66
Definition: composite.h:30
Definition: colorspace.h:60
Definition: magick-type.h:180
Definition: composite.h:63
Definition: composite.h:58
Definition: composite.h:92
Definition: magick-type.h:199
Definition: composite.h:34
static MagickRealType CubicBC(const MagickRealType x, const ResizeFilter *resize_filter)
Definition: resize.c:210
Definition: resize-private.h:27
Definition: composite.h:74
Definition: colorspace.h:27
MagickFunction
Definition: statistic.h:111
Definition: composite.h:40
Definition: composite.h:67
Definition: resize-private.h:35
#define QuantumRange
Definition: magick-type.h:94
static MagickRealType Triangle(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:514
Definition: composite.h:51
Definition: magick-type.h:191
Definition: composite.h:57