MagickCore  6.9.2
accelerate-private.h
Go to the documentation of this file.
1 /*
2  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization
3  dedicated to making software imaging solutions freely available.
4 
5  You may not use this file except in compliance with the License.
6  obtain a copy of the License at
7 
8  http://www.imagemagick.org/script/license.php
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  MagickCore private methods for accelerated functions.
17 */
18 
19 #ifndef _MAGICKCORE_ACCELERATE_PRIVATE_H
20 #define _MAGICKCORE_ACCELERATE_PRIVATE_H
21 
22 #if defined(__cplusplus) || defined(c_plusplus)
23 extern "C" {
24 #endif
25 
26 #if defined(MAGICKCORE_OPENCL_SUPPORT)
27 
28 /*
29  Define declarations.
30 */
31 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33 #define OPENCL_ELSE() "\n #""else " " \n"
34 #define OPENCL_ENDIF() "\n #""endif " " \n"
35 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36 #define STRINGIFY(...) #__VA_ARGS__ "\n"
37 
38 /*
39  Typedef declarations.
40 */
41 
42 typedef struct _FloatPixelPacket
43 {
44 #ifdef MAGICK_PIXEL_RGBA
46  red,
47  green,
48  blue,
49  opacity;
50 #endif
51 #ifdef MAGICK_PIXEL_BGRA
53  blue,
54  green,
55  red,
56  opacity;
57 #endif
58 } FloatPixelPacket;
59 
60 const char* accelerateKernels =
61 
62 /*
63  Define declarations.
64 */
65  OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
66  OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
67  OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
68  OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
69  OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
70  OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
71  OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
72  OPENCL_DEFINE(SigmaRandom, (attenuate))
73  OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
74 
75 /*
76  Typedef declarations.
77 */
78  STRINGIFY(
79  typedef enum
80  {
82  RGBColorspace, /* Linear RGB colorspace */
83  GRAYColorspace, /* greyscale (linear) image (faked 1 channel) */
93  CMYKColorspace, /* negared linear RGB with black separated */
94  sRGBColorspace, /* Default: non-lienar sRGB colorspace */
103  CMYColorspace, /* negated linear RGB colorspace */
106  LCHColorspace, /* alias for LCHuv */
108  LCHabColorspace, /* Cylindrical (Polar) Lab */
109  LCHuvColorspace, /* Cylindrical (Polar) Luv */
112  HSVColorspace, /* alias for HSB */
115  } ColorspaceType;
116  )
117 
118  STRINGIFY(
119  typedef enum
120  {
176  /* These are new operators, added after the above was last sorted.
177  * The list should be re-sorted only when a new library version is
178  * created.
179  */
194  )
195 
196  STRINGIFY(
197  typedef enum
198  {
204  } MagickFunction;
205  )
206 
207  STRINGIFY(
208  typedef enum
209  {
211  UniformNoise,
214  ImpulseNoise,
216  PoissonNoise,
218  } NoiseType;
219  )
220 
221  STRINGIFY(
222  typedef enum
223  {
235  )
236 
237  STRINGIFY(
238  typedef enum {
256  )
257 
258  STRINGIFY(
259  typedef enum
260  {
262  RedChannel = 0x0001,
263  GrayChannel = 0x0001,
264  CyanChannel = 0x0001,
265  GreenChannel = 0x0002,
266  MagentaChannel = 0x0002,
267  BlueChannel = 0x0004,
268  YellowChannel = 0x0004,
269  AlphaChannel = 0x0008,
270  OpacityChannel = 0x0008,
271  MatteChannel = 0x0008, /* deprecated */
272  BlackChannel = 0x0020,
273  IndexChannel = 0x0020,
274  CompositeChannels = 0x002F,
275  AllChannels = 0x7ffffff,
276  /*
277  Special purpose channel types.
278  */
279  TrueAlphaChannel = 0x0040, /* extract actual alpha channel from opacity */
280  RGBChannels = 0x0080, /* set alpha from grayscale mask in RGB */
281  GrayChannels = 0x0080,
282  SyncChannels = 0x0100, /* channels should be modified equally */
284  } ChannelType;
285  )
286 
287 /*
288  Helper functions.
289 */
290 
291 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
292 
293  STRINGIFY(
294  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
295  {
296  return((CLQuantum) value);
297  }
298  )
299 
300 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
301 
302  STRINGIFY(
303  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
304  {
305  return((CLQuantum) (257.0f*value));
306  }
307  )
308 
309 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
310 
311  STRINGIFY(
312  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
313  {
314  return((CLQuantum) (16843009.0*value));
315  }
316  )
317 
318 OPENCL_ENDIF()
319 
320 STRINGIFY(
321  inline int ClampToCanvas(const int offset, const int range)
322  {
323  return clamp(offset, (int)0, range - 1);
324  }
325  )
326 
327  STRINGIFY(
328  inline int ClampToCanvasWithHalo(const int offset, const int range, const int edge, const int section)
329  {
330  return clamp(offset, section ? (int)(0 - edge) : (int)0, section ? (range - 1) : (range - 1 + edge));
331  }
332  )
333 
334  STRINGIFY(
335  inline CLQuantum ClampToQuantum(const float value)
336  {
337  return (CLQuantum)(clamp(value, 0.0f, (float)QuantumRange) + 0.5f);
338  }
339  )
340 
341  STRINGIFY(
342  inline uint ScaleQuantumToMap(CLQuantum value)
343  {
344  if (value >= (CLQuantum)MaxMap)
345  return ((uint)MaxMap);
346  else
347  return ((uint)value);
348  }
349  )
350 
351  STRINGIFY(
352  inline float PerceptibleReciprocal(const float x)
353  {
354  float sign = x < (float) 0.0 ? (float)-1.0 : (float) 1.0;
355  return((sign*x) >= MagickEpsilon ? (float) 1.0 / x : sign*((float) 1.0 / MagickEpsilon));
356  }
357  )
358 
359  STRINGIFY(
360  inline float RoundToUnity(const float value)
361  {
362  return clamp(value, 0.0f, 1.0f);
363  }
364  )
365 
366  STRINGIFY(
367 
368  inline CLQuantum getBlue(CLPixelType p) { return p.x; }
369  inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
370  inline float getBlueF4(float4 p) { return p.x; }
371  inline void setBlueF4(float4* p, float value) { (*p).x = value; }
372 
373  inline CLQuantum getGreen(CLPixelType p) { return p.y; }
374  inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
375  inline float getGreenF4(float4 p) { return p.y; }
376  inline void setGreenF4(float4* p, float value) { (*p).y = value; }
377 
378  inline CLQuantum getRed(CLPixelType p) { return p.z; }
379  inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
380  inline float getRedF4(float4 p) { return p.z; }
381  inline void setRedF4(float4* p, float value) { (*p).z = value; }
382 
383  inline CLQuantum getOpacity(CLPixelType p) { return p.w; }
384  inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
385  inline float getOpacityF4(float4 p) { return p.w; }
386  inline void setOpacityF4(float4* p, float value) { (*p).w = value; }
387 
388  inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
389 
390  inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p)
391  {
392  float red = getRed(p);
393  float green = getGreen(p);
394  float blue = getBlue(p);
395 
396  float intensity;
397 
398  if (colorspace == GRAYColorspace)
399  return red;
400 
401  switch (method)
402  {
404  {
405  intensity = (red + green + blue) / 3.0;
406  break;
407  }
409  {
410  intensity = max(max(red, green), blue);
411  break;
412  }
414  {
415  intensity = (min(min(red, green), blue) +
416  max(max(red, green), blue)) / 2.0;
417  break;
418  }
420  {
421  intensity = (float)(((float)red*red + green*green + blue*blue) /
422  (3.0*QuantumRange));
423  break;
424  }
426  {
427  /*
428  if (image->colorspace == RGBColorspace)
429  {
430  red=EncodePixelGamma(red);
431  green=EncodePixelGamma(green);
432  blue=EncodePixelGamma(blue);
433  }
434  */
435  intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
436  break;
437  }
439  {
440  /*
441  if (image->colorspace == sRGBColorspace)
442  {
443  red=DecodePixelGamma(red);
444  green=DecodePixelGamma(green);
445  blue=DecodePixelGamma(blue);
446  }
447  */
448  intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
449  break;
450  }
452  default:
453  {
454  /*
455  if (image->colorspace == RGBColorspace)
456  {
457  red=EncodePixelGamma(red);
458  green=EncodePixelGamma(green);
459  blue=EncodePixelGamma(blue);
460  }
461  */
462  intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
463  break;
464  }
466  {
467  /*
468  if (image->colorspace == sRGBColorspace)
469  {
470  red=DecodePixelGamma(red);
471  green=DecodePixelGamma(green);
472  blue=DecodePixelGamma(blue);
473  }
474  */
475  intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
476  break;
477  }
479  {
480  intensity = (float)(sqrt((float)red*red + green*green + blue*blue) /
481  sqrt(3.0));
482  break;
483  }
484  }
485 
486  return intensity;
487 
488  }
489  )
490 
491 /*
492 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
493 % %
494 % %
495 % %
496 % A d d N o i s e %
497 % %
498 % %
499 % %
500 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
501 */
502 
503 STRINGIFY(
504 
505 /*
506 Part of MWC64X by David Thomas, dt10@imperial.ac.uk
507 This is provided under BSD, full license is with the main package.
508 See http://www.doc.ic.ac.uk/~dt10/research
509 */
510 
511 // Pre: a<M, b<M
512 // Post: r=(a+b) mod M
513 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
514 {
515  ulong v=a+b;
516  //if( (v>=M) || (v<a) )
517  if( (v>=M) || (convert_float(v) < convert_float(a)) ) // workaround for what appears to be an optimizer bug.
518  v=v-M;
519  return v;
520 }
521 
522 // Pre: a<M,b<M
523 // Post: r=(a*b) mod M
524 // This could be done more efficently, but it is portable, and should
525 // be easy to understand. It can be replaced with any of the better
526 // modular multiplication algorithms (for example if you know you have
527 // double precision available or something).
528 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
529 {
530  ulong r=0;
531  while(a!=0){
532  if(a&1)
533  r=MWC_AddMod64(r,b,M);
534  b=MWC_AddMod64(b,b,M);
535  a=a>>1;
536  }
537  return r;
538 }
539 
540 
541 // Pre: a<M, e>=0
542 // Post: r=(a^b) mod M
543 // This takes at most ~64^2 modular additions, so probably about 2^15 or so instructions on
544 // most architectures
545 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
546 {
547  ulong sqr=a, acc=1;
548  while(e!=0){
549  if(e&1)
550  acc=MWC_MulMod64(acc,sqr,M);
551  sqr=MWC_MulMod64(sqr,sqr,M);
552  e=e>>1;
553  }
554  return acc;
555 }
556 
557 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
558 {
559  ulong m=MWC_PowMod64(A, distance, M);
560  ulong x=curr.x*(ulong)A+curr.y;
561  x=MWC_MulMod64(x, m, M);
562  return (uint2)((uint)(x/A), (uint)(x%A));
563 }
564 
565 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
566 {
567  // This is an arbitrary constant for starting LCG jumping from. I didn't
568  // want to start from 1, as then you end up with the two or three first values
569  // being a bit poor in ones - once you've decided that, one constant is as
570  // good as any another. There is no deep mathematical reason for it, I just
571  // generated a random number.
572  enum{ MWC_BASEID = 4077358422479273989UL };
573 
574  ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
575  ulong m=MWC_PowMod64(A, dist, M);
576 
577  ulong x=MWC_MulMod64(MWC_BASEID, m, M);
578  return (uint2)((uint)(x/A), (uint)(x%A));
579 }
580 
582 typedef struct{ uint x; uint c; } mwc64x_state_t;
583 
584 enum{ MWC64X_A = 4294883355U };
585 enum{ MWC64X_M = 18446383549859758079UL };
586 
587 void MWC64X_Step(mwc64x_state_t *s)
588 {
589  uint X=s->x, C=s->c;
590 
591  uint Xn=MWC64X_A*X+C;
592  uint carry=(uint)(Xn<C); // The (Xn<C) will be zero or one for scalar
593  uint Cn=mad_hi(MWC64X_A,X,carry);
594 
595  s->x=Xn;
596  s->c=Cn;
597 }
598 
599 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
600 {
601  uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance);
602  s->x=tmp.x;
603  s->c=tmp.y;
604 }
605 
606 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
607 {
608  uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset);
609  s->x=tmp.x;
610  s->c=tmp.y;
611 }
612 
614 uint MWC64X_NextUint(mwc64x_state_t *s)
615 {
616  uint res=s->x ^ s->c;
617  MWC64X_Step(s);
618  return res;
619 }
620 
621 //
622 // End of MWC64X excerpt
623 //
624 
625  float mwcReadPseudoRandomValue(mwc64x_state_t* rng) {
626  return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); // normalized to 1.0
627  }
628 
629 
630  float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type, float attenuate) {
631 
632  float
633  alpha,
634  beta,
635  noise,
636  sigma;
637 
638  noise = 0.0f;
639  alpha=mwcReadPseudoRandomValue(r);
640  switch(noise_type) {
641  case UniformNoise:
642  default:
643  {
644  noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
645  break;
646  }
647  case GaussianNoise:
648  {
649  float
650  gamma,
651  tau;
652 
653  if (alpha == 0.0f)
654  alpha=1.0f;
655  beta=mwcReadPseudoRandomValue(r);
656  gamma=sqrt(-2.0f*log(alpha));
657  sigma=gamma*cospi((2.0f*beta));
658  tau=gamma*sinpi((2.0f*beta));
659  noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+
661  break;
662  }
663 
664 
665  case ImpulseNoise:
666  {
667  if (alpha < (SigmaImpulse/2.0f))
668  noise=0.0f;
669  else
670  if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
671  noise=(float)QuantumRange;
672  else
673  noise=(float)pixel;
674  break;
675  }
676  case LaplacianNoise:
677  {
678  if (alpha <= 0.5f)
679  {
680  if (alpha <= MagickEpsilon)
681  noise=(float) (pixel-QuantumRange);
682  else
683  noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
684  0.5f);
685  break;
686  }
687  beta=1.0f-alpha;
688  if (beta <= (0.5f*MagickEpsilon))
689  noise=(float) (pixel+QuantumRange);
690  else
691  noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
692  break;
693  }
695  {
696  sigma=1.0f;
697  if (alpha > MagickEpsilon)
698  sigma=sqrt(-2.0f*log(alpha));
699  beta=mwcReadPseudoRandomValue(r);
700  noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
701  cospi((float) (2.0f*beta))/2.0f);
702  break;
703  }
704  case PoissonNoise:
705  {
706  float
707  poisson;
708  unsigned int i;
709  poisson=exp(-SigmaPoisson*QuantumScale*pixel);
710  for (i=0; alpha > poisson; i++)
711  {
712  beta=mwcReadPseudoRandomValue(r);
713  alpha*=beta;
714  }
715  noise=(float) (QuantumRange*i/SigmaPoisson);
716  break;
717  }
718  case RandomNoise:
719  {
720  noise=(float) (QuantumRange*SigmaRandom*alpha);
721  break;
722  }
723 
724  };
725  return noise;
726  }
727 
728  __kernel
729  void AddNoise(const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
730  ,const unsigned int inputPixelCount, const unsigned int pixelsPerWorkItem
731  ,const ChannelType channel
732  ,const NoiseType noise_type, const float attenuate
733  ,const unsigned int seed0, const unsigned int seed1
734  ,const unsigned int numRandomNumbersPerPixel) {
735 
736  mwc64x_state_t rng;
737  rng.x = seed0;
738  rng.c = seed1;
739 
740  uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; // length of RNG substream each workitem will use
741  uint offset = span * get_local_size(0) * get_group_id(0); // offset of this workgroup's RNG substream (in master stream);
742 
743  MWC64X_SeedStreams(&rng, offset, span); // Seed the RNG streams
744 
745  uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0); // pixel to process
746 
747  uint count = pixelsPerWorkItem;
748 
749  while (count > 0) {
750  if (pos < inputPixelCount) {
751  CLPixelType p = inputImage[pos];
752 
753  if ((channel&RedChannel)!=0) {
754  setRed(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate)));
755  }
756 
757  if ((channel&GreenChannel)!=0) {
758  setGreen(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate)));
759  }
760 
761  if ((channel&BlueChannel)!=0) {
762  setBlue(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate)));
763  }
764 
765  if ((channel & OpacityChannel) != 0) {
766  setOpacity(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate)));
767  }
768 
769  filteredImage[pos] = p;
770  //filteredImage[pos] = (CLPixelType)(MWC64X_NextUint(&rng) % 256, MWC64X_NextUint(&rng) % 256, MWC64X_NextUint(&rng) % 256, 255);
771  }
772  pos += get_local_size(0);
773  --count;
774  }
775  }
776  )
777 
778 /*
779 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
780 % %
781 % %
782 % %
783 % B l u r %
784 % %
785 % %
786 % %
787 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
788 */
789 
790  STRINGIFY(
791  /*
792  Reduce image noise and reduce detail levels by row
793  im: input pixels filtered_in filtered_im: output pixels
794  filter : convolve kernel width: convolve kernel size
795  channel : define which channel is blured
796  is_RGBA_BGRA : define the input is RGBA or BGRA
797  */
798  __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
799  const ChannelType channel, __constant float *filter,
800  const unsigned int width,
801  const unsigned int imageColumns, const unsigned int imageRows,
802  __local CLPixelType *temp)
803  {
804  const int x = get_global_id(0);
805  const int y = get_global_id(1);
806 
807  const int columns = imageColumns;
808 
809  const unsigned int radius = (width-1)/2;
810  const int wsize = get_local_size(0);
811  const unsigned int loadSize = wsize+width;
812 
813  //load chunk only for now
814  //event_t e = async_work_group_copy(temp+radius, im+x+y*columns, wsize, 0);
815  //wait_group_events(1,&e);
816 
817  //parallel load and clamp
818  /*
819  int count = 0;
820  for (int i=0; i < loadSize; i=i+wsize)
821  {
822  int currentX = x + wsize*(count++);
823 
824  int localId = get_local_id(0);
825 
826  if ((localId+i) > loadSize)
827  break;
828 
829  temp[localId+i] = im[y*columns+ClampToCanvas(currentX-radius, columns)];
830 
831  if (y==0 && get_group_id(0) == 0)
832  {
833  printf("(%d %d) temp %d load %d currentX %d\n", x, y, localId+i, ClampToCanvas(currentX-radius, columns), currentX);
834  }
835  }
836  */
837 
838  //group coordinate
839  const int groupX=get_local_size(0)*get_group_id(0);
840  const int groupY=get_local_size(1)*get_group_id(1);
841 
842  //parallel load and clamp
843  for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
844  {
845  //int cx = ClampToCanvas(groupX+i, columns);
846  temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
847 
848  /*if (0 && y==0 && get_group_id(1) == 0)
849  {
850  printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
851  }*/
852  }
853 
854  // barrier
855  barrier(CLK_LOCAL_MEM_FENCE);
856 
857  // only do the work if this is not a patched item
858  if (get_global_id(0) < columns)
859  {
860  // compute
861  float4 result = (float4) 0;
862 
863  int i = 0;
864 
865  \n #ifndef UFACTOR \n
866  \n #define UFACTOR 8 \n
867  \n #endif \n
868 
869  for ( ; i+UFACTOR < width; )
870  {
871  \n #pragma unroll UFACTOR\n
872  for (int j=0; j < UFACTOR; j++, i++)
873  {
874  result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
875  }
876  }
877 
878  for ( ; i < width; i++)
879  {
880  result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
881  }
882 
883  result.x = ClampToQuantum(result.x);
884  result.y = ClampToQuantum(result.y);
885  result.z = ClampToQuantum(result.z);
886  result.w = ClampToQuantum(result.w);
887 
888  // write back to global
889  filtered_im[y*columns+x] = result;
890  }
891  }
892  )
893 
894  STRINGIFY(
895  /*
896  Reduce image noise and reduce detail levels by row
897  im: input pixels filtered_in filtered_im: output pixels
898  filter : convolve kernel width: convolve kernel size
899  channel : define which channel is blured
900  is_RGBA_BGRA : define the input is RGBA or BGRA
901  */
902  __kernel void BlurRowSection(__global CLPixelType *im, __global float4 *filtered_im,
903  const ChannelType channel, __constant float *filter,
904  const unsigned int width,
905  const unsigned int imageColumns, const unsigned int imageRows,
906  __local CLPixelType *temp,
907  const unsigned int offsetRows, const unsigned int section)
908  {
909  const int x = get_global_id(0);
910  const int y = get_global_id(1);
911 
912  const int columns = imageColumns;
913 
914  const unsigned int radius = (width-1)/2;
915  const int wsize = get_local_size(0);
916  const unsigned int loadSize = wsize+width;
917 
918  //group coordinate
919  const int groupX=get_local_size(0)*get_group_id(0);
920  const int groupY=get_local_size(1)*get_group_id(1);
921 
922  //offset the input data, assuming section is 0, 1
923  im += imageColumns * (offsetRows - radius * section);
924 
925  //parallel load and clamp
926  for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
927  {
928  //int cx = ClampToCanvas(groupX+i, columns);
929  temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
930 
931  /*if (0 && y==0 && get_group_id(1) == 0)
932  {
933  printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
934  }*/
935  }
936 
937  // barrier
938  barrier(CLK_LOCAL_MEM_FENCE);
939 
940  // only do the work if this is not a patched item
941  if (get_global_id(0) < columns)
942  {
943  // compute
944  float4 result = (float4) 0;
945 
946  int i = 0;
947 
948  \n #ifndef UFACTOR \n
949  \n #define UFACTOR 8 \n
950  \n #endif \n
951 
952  for ( ; i+UFACTOR < width; )
953  {
954  \n #pragma unroll UFACTOR\n
955  for (int j=0; j < UFACTOR; j++, i++)
956  {
957  result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
958  }
959  }
960 
961  for ( ; i < width; i++)
962  {
963  result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
964  }
965 
966  result.x = ClampToQuantum(result.x);
967  result.y = ClampToQuantum(result.y);
968  result.z = ClampToQuantum(result.z);
969  result.w = ClampToQuantum(result.w);
970 
971  // write back to global
972  filtered_im[y*columns+x] = result;
973  }
974 
975  }
976  )
977 
978  STRINGIFY(
979  /*
980  Reduce image noise and reduce detail levels by line
981  im: input pixels filtered_in filtered_im: output pixels
982  filter : convolve kernel width: convolve kernel size
983  channel : define which channel is blured\
984  is_RGBA_BGRA : define the input is RGBA or BGRA
985  */
986  __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
987  const ChannelType channel, __constant float *filter,
988  const unsigned int width,
989  const unsigned int imageColumns, const unsigned int imageRows,
990  __local float4 *temp)
991  {
992  const int x = get_global_id(0);
993  const int y = get_global_id(1);
994 
995  //const int columns = get_global_size(0);
996  //const int rows = get_global_size(1);
997  const int columns = imageColumns;
998  const int rows = imageRows;
999 
1000  unsigned int radius = (width-1)/2;
1001  const int wsize = get_local_size(1);
1002  const unsigned int loadSize = wsize+width;
1003 
1004  //group coordinate
1005  const int groupX=get_local_size(0)*get_group_id(0);
1006  const int groupY=get_local_size(1)*get_group_id(1);
1007  //notice that get_local_size(0) is 1, so
1008  //groupX=get_group_id(0);
1009 
1010  //parallel load and clamp
1011  for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
1012  {
1013  temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
1014  }
1015 
1016  // barrier
1017  barrier(CLK_LOCAL_MEM_FENCE);
1018 
1019  // only do the work if this is not a patched item
1020  if (get_global_id(1) < rows)
1021  {
1022  // compute
1023  float4 result = (float4) 0;
1024 
1025  int i = 0;
1026 
1027  \n #ifndef UFACTOR \n
1028  \n #define UFACTOR 8 \n
1029  \n #endif \n
1030 
1031  for ( ; i+UFACTOR < width; )
1032  {
1033  \n #pragma unroll UFACTOR \n
1034  for (int j=0; j < UFACTOR; j++, i++)
1035  {
1036  result+=filter[i]*temp[i+get_local_id(1)];
1037  }
1038  }
1039 
1040  for ( ; i < width; i++)
1041  {
1042  result+=filter[i]*temp[i+get_local_id(1)];
1043  }
1044 
1045  result.x = ClampToQuantum(result.x);
1046  result.y = ClampToQuantum(result.y);
1047  result.z = ClampToQuantum(result.z);
1048  result.w = ClampToQuantum(result.w);
1049 
1050  // write back to global
1051  filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
1052  }
1053 
1054  }
1055  )
1056 
1057 
1058  STRINGIFY(
1059  /*
1060  Reduce image noise and reduce detail levels by line
1061  im: input pixels filtered_in filtered_im: output pixels
1062  filter : convolve kernel width: convolve kernel size
1063  channel : define which channel is blured\
1064  is_RGBA_BGRA : define the input is RGBA or BGRA
1065  */
1066  __kernel void BlurColumnSection(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1067  const ChannelType channel, __constant float *filter,
1068  const unsigned int width,
1069  const unsigned int imageColumns, const unsigned int imageRows,
1070  __local float4 *temp,
1071  const unsigned int offsetRows, const unsigned int section)
1072  {
1073  const int x = get_global_id(0);
1074  const int y = get_global_id(1);
1075 
1076  //const int columns = get_global_size(0);
1077  //const int rows = get_global_size(1);
1078  const int columns = imageColumns;
1079  const int rows = imageRows;
1080 
1081  unsigned int radius = (width-1)/2;
1082  const int wsize = get_local_size(1);
1083  const unsigned int loadSize = wsize+width;
1084 
1085  //group coordinate
1086  const int groupX=get_local_size(0)*get_group_id(0);
1087  const int groupY=get_local_size(1)*get_group_id(1);
1088  //notice that get_local_size(0) is 1, so
1089  //groupX=get_group_id(0);
1090 
1091  // offset the input data
1092  blurRowData += imageColumns * radius * section;
1093 
1094  //parallel load and clamp
1095  for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
1096  {
1097  int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
1098  temp[i] = *(blurRowData+pos);
1099  }
1100 
1101  // barrier
1102  barrier(CLK_LOCAL_MEM_FENCE);
1103 
1104  // only do the work if this is not a patched item
1105  if (get_global_id(1) < rows)
1106  {
1107  // compute
1108  float4 result = (float4) 0;
1109 
1110  int i = 0;
1111 
1112  \n #ifndef UFACTOR \n
1113  \n #define UFACTOR 8 \n
1114  \n #endif \n
1115 
1116  for ( ; i+UFACTOR < width; )
1117  {
1118  \n #pragma unroll UFACTOR \n
1119  for (int j=0; j < UFACTOR; j++, i++)
1120  {
1121  result+=filter[i]*temp[i+get_local_id(1)];
1122  }
1123  }
1124  for ( ; i < width; i++)
1125  {
1126  result+=filter[i]*temp[i+get_local_id(1)];
1127  }
1128 
1129  result.x = ClampToQuantum(result.x);
1130  result.y = ClampToQuantum(result.y);
1131  result.z = ClampToQuantum(result.z);
1132  result.w = ClampToQuantum(result.w);
1133 
1134  // offset the output data
1135  filtered_im += imageColumns * offsetRows;
1136 
1137  // write back to global
1138  filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
1139  }
1140 
1141  }
1142  )
1143 
1144 /*
1145 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1146 % %
1147 % %
1148 % %
1149 % C o m p o s i t e %
1150 % %
1151 % %
1152 % %
1153 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1154 */
1155 
1156  STRINGIFY(
1157  inline float ColorDodge(const float Sca,
1158  const float Sa,const float Dca,const float Da)
1159  {
1160  /*
1161  Oct 2004 SVG specification.
1162  */
1163  if ((Sca*Da+Dca*Sa) >= Sa*Da)
1164  return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
1165  return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
1166 
1167 
1168  /*
1169  New specification, March 2009 SVG specification. This specification was
1170  also wrong of non-overlap cases.
1171  */
1172  /*
1173  if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
1174  return(Sca*(1.0-Da));
1175  if (fabs(Sca-Sa) < MagickEpsilon)
1176  return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
1177  return(Sa*MagickMin(Da,Dca*Sa/(Sa-Sca)));
1178  */
1179 
1180  /*
1181  Working from first principles using the original formula:
1182 
1183  f(Sc,Dc) = Dc/(1-Sc)
1184 
1185  This works correctly! Looks like the 2004 model was right but just
1186  required a extra condition for correct handling.
1187  */
1188 
1189  /*
1190  if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
1191  return(Sca*(1.0-Da)+Dca*(1.0-Sa));
1192  if (fabs(Sca-Sa) < MagickEpsilon)
1193  return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
1194  return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
1195  */
1196  }
1197 
1198  inline void CompositeColorDodge(const float4 *p,
1199  const float4 *q,float4 *composite) {
1200 
1201  float
1202  Da,
1203  gamma,
1204  Sa;
1205 
1206  Sa=1.0f-QuantumScale*getOpacityF4(*p); /* simplify and speed up equations */
1207  Da=1.0f-QuantumScale*getOpacityF4(*q);
1208  gamma=RoundToUnity(Sa+Da-Sa*Da); /* over blend, as per SVG doc */
1209  setOpacityF4(composite, QuantumRange*(1.0-gamma));
1210  gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
1211  setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
1212  getRedF4(*q)*Da,Da));
1213  setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
1214  getGreenF4(*q)*Da,Da));
1215  setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
1216  getBlueF4(*q)*Da,Da));
1217  }
1218  )
1219 
1220  STRINGIFY(
1221  inline void MagickPixelCompositePlus(const float4 *p,
1222  const float alpha,const float4 *q,
1223  const float beta,float4 *composite)
1224  {
1225  float
1226  gamma;
1227 
1228  float
1229  Da,
1230  Sa;
1231  /*
1232  Add two pixels with the given opacities.
1233  */
1234  Sa=1.0-QuantumScale*alpha;
1235  Da=1.0-QuantumScale*beta;
1236  gamma=RoundToUnity(Sa+Da); /* 'Plus' blending -- not 'Over' blending */
1237  setOpacityF4(composite,(float) QuantumRange*(1.0-gamma));
1238  gamma=PerceptibleReciprocal(gamma);
1239  setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
1240  setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
1241  setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
1242  }
1243  )
1244 
1245  STRINGIFY(
1246  inline void MagickPixelCompositeBlend(const float4 *p,
1247  const float alpha,const float4 *q,
1248  const float beta,float4 *composite)
1249  {
1250  MagickPixelCompositePlus(p,(float) (QuantumRange-alpha*
1251  (QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta*
1252  (QuantumRange-getOpacityF4(*q))),composite);
1253  }
1254  )
1255 
1256  STRINGIFY(
1257  __kernel
1258  void Composite(__global CLPixelType *image,
1259  const unsigned int imageWidth,
1260  const unsigned int imageHeight,
1261  const unsigned int imageMatte,
1262  const __global CLPixelType *compositeImage,
1263  const unsigned int compositeWidth,
1264  const unsigned int compositeHeight,
1265  const unsigned int compositeMatte,
1266  const unsigned int compose,
1267  const ChannelType channel,
1268  const float destination_dissolve,
1269  const float source_dissolve) {
1270 
1271  uint2 index;
1272  index.x = get_global_id(0);
1273  index.y = get_global_id(1);
1274 
1275 
1276  if (index.x >= imageWidth
1277  || index.y >= imageHeight) {
1278  return;
1279  }
1280  const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
1281  float4 destination;
1282  setRedF4(&destination,getRed(inputPixel));
1283  setGreenF4(&destination,getGreen(inputPixel));
1284  setBlueF4(&destination,getBlue(inputPixel));
1285 
1286 
1287  const CLPixelType compositePixel
1288  = compositeImage[index.y*imageWidth+index.x];
1289  float4 source;
1290  setRedF4(&source,getRed(compositePixel));
1291  setGreenF4(&source,getGreen(compositePixel));
1292  setBlueF4(&source,getBlue(compositePixel));
1293 
1294  if (imageMatte != 0) {
1295  setOpacityF4(&destination,getOpacity(inputPixel));
1296  }
1297  else {
1298  setOpacityF4(&destination,0.0f);
1299  }
1300 
1301  if (compositeMatte != 0) {
1302  setOpacityF4(&source,getOpacity(compositePixel));
1303  }
1304  else {
1305  setOpacityF4(&source,0.0f);
1306  }
1307 
1308  float4 composite=destination;
1309 
1310  CompositeOperator op = (CompositeOperator)compose;
1311  switch (op) {
1312  case ColorDodgeCompositeOp:
1313  CompositeColorDodge(&source,&destination,&composite);
1314  break;
1315  case BlendCompositeOp:
1316  MagickPixelCompositeBlend(&source,source_dissolve,&destination,
1317  destination_dissolve,&composite);
1318  break;
1319  default:
1320  // unsupported operators
1321  break;
1322  };
1323 
1324  CLPixelType outputPixel;
1325  setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
1326  setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
1327  setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
1328  setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
1329  image[index.y*imageWidth+index.x] = outputPixel;
1330  }
1331  )
1332 
1333 /*
1334 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1335 % %
1336 % %
1337 % %
1338 % C o n t r a s t %
1339 % %
1340 % %
1341 % %
1342 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1343 */
1344 
1345  STRINGIFY(
1346 
1347  inline float3 ConvertRGBToHSB(CLPixelType pixel) {
1348  float3 HueSaturationBrightness;
1349  HueSaturationBrightness.x = 0.0f; // Hue
1350  HueSaturationBrightness.y = 0.0f; // Saturation
1351  HueSaturationBrightness.z = 0.0f; // Brightness
1352 
1353  float r=(float) getRed(pixel);
1354  float g=(float) getGreen(pixel);
1355  float b=(float) getBlue(pixel);
1356 
1357  float tmin=min(min(r,g),b);
1358  float tmax=max(max(r,g),b);
1359 
1360  if (tmax!=0.0f) {
1361  float delta=tmax-tmin;
1362  HueSaturationBrightness.y=delta/tmax;
1363  HueSaturationBrightness.z=QuantumScale*tmax;
1364 
1365  if (delta != 0.0f) {
1366  HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
1367  HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
1368  HueSaturationBrightness.x/=6.0f;
1369  HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
1370  }
1371  }
1372  return HueSaturationBrightness;
1373  }
1374 
1375  inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
1376 
1377  float hue = HueSaturationBrightness.x;
1378  float brightness = HueSaturationBrightness.z;
1379  float saturation = HueSaturationBrightness.y;
1380 
1381  CLPixelType rgb;
1382 
1383  if (saturation == 0.0f) {
1384  setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
1385  setGreen(&rgb,getRed(rgb));
1386  setBlue(&rgb,getRed(rgb));
1387  }
1388  else {
1389 
1390  float h=6.0f*(hue-floor(hue));
1391  float f=h-floor(h);
1392  float p=brightness*(1.0f-saturation);
1393  float q=brightness*(1.0f-saturation*f);
1394  float t=brightness*(1.0f-(saturation*(1.0f-f)));
1395 
1396  float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
1397  float clamped_t = ClampToQuantum(QuantumRange*t);
1398  float clamped_p = ClampToQuantum(QuantumRange*p);
1399  float clamped_q = ClampToQuantum(QuantumRange*q);
1400  int ih = (int)h;
1401  setRed(&rgb, (ih == 1)?clamped_q:
1402  (ih == 2 || ih == 3)?clamped_p:
1403  (ih == 4)?clamped_t:
1404  clampedBrightness);
1405 
1406  setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
1407  (ih == 3)?clamped_q:
1408  (ih == 4 || ih == 5)?clamped_p:
1409  clamped_t);
1410 
1411  setBlue(&rgb, (ih == 2)?clamped_t:
1412  (ih == 3 || ih == 4)?clampedBrightness:
1413  (ih == 5)?clamped_q:
1414  clamped_p);
1415  }
1416  return rgb;
1417  }
1418 
1419  __kernel void Contrast(__global CLPixelType *im, const unsigned int sharpen)
1420  {
1421 
1422  const int sign = sharpen!=0?1:-1;
1423  const int x = get_global_id(0);
1424  const int y = get_global_id(1);
1425  const int columns = get_global_size(0);
1426  const int c = x + y * columns;
1427 
1428  CLPixelType pixel = im[c];
1429  float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
1430  float brightness = HueSaturationBrightness.z;
1431  brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1432  brightness = clamp(brightness,0.0f,1.0f);
1433  HueSaturationBrightness.z = brightness;
1434 
1435  CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
1436  filteredPixel.w = pixel.w;
1437  im[c] = filteredPixel;
1438  }
1439  )
1440 
1441 /*
1442 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1443 % %
1444 % %
1445 % %
1446 % C o n t r a s t S t r e t c h %
1447 % %
1448 % %
1449 % %
1450 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1451 */
1452 
1453  STRINGIFY(
1454  /*
1455  */
1456  __kernel void Histogram(__global CLPixelType * restrict im,
1457  const ChannelType channel,
1458  const int method,
1459  const int colorspace,
1460  __global uint4 * restrict histogram)
1461  {
1462  const int x = get_global_id(0);
1463  const int y = get_global_id(1);
1464  const int columns = get_global_size(0);
1465  const int c = x + y * columns;
1466  if ((channel & SyncChannels) != 0)
1467  {
1468  float intensity = GetPixelIntensity(method, colorspace,im[c]);
1469  uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
1470  atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
1471  }
1472  else
1473  {
1474  // for equalizing, we always need all channels?
1475  // otherwise something more
1476  }
1477  }
1478  )
1479 
1480  STRINGIFY(
1481  /*
1482  */
1483  __kernel void ContrastStretch(__global CLPixelType * restrict im,
1484  const ChannelType channel,
1485  __global CLPixelType * restrict stretch_map,
1486  const float4 white, const float4 black)
1487  {
1488  const int x = get_global_id(0);
1489  const int y = get_global_id(1);
1490  const int columns = get_global_size(0);
1491  const int c = x + y * columns;
1492 
1493  uint ePos;
1494  CLPixelType oValue, eValue;
1495  CLQuantum red, green, blue, opacity;
1496 
1497  //read from global
1498  oValue=im[c];
1499 
1500  if ((channel & RedChannel) != 0)
1501  {
1502  if (getRedF4(white) != getRedF4(black))
1503  {
1504  ePos = ScaleQuantumToMap(getRed(oValue));
1505  eValue = stretch_map[ePos];
1506  red = getRed(eValue);
1507  }
1508  }
1509 
1510  if ((channel & GreenChannel) != 0)
1511  {
1512  if (getGreenF4(white) != getGreenF4(black))
1513  {
1514  ePos = ScaleQuantumToMap(getGreen(oValue));
1515  eValue = stretch_map[ePos];
1516  green = getGreen(eValue);
1517  }
1518  }
1519 
1520  if ((channel & BlueChannel) != 0)
1521  {
1522  if (getBlueF4(white) != getBlueF4(black))
1523  {
1524  ePos = ScaleQuantumToMap(getBlue(oValue));
1525  eValue = stretch_map[ePos];
1526  blue = getBlue(eValue);
1527  }
1528  }
1529 
1530  if ((channel & OpacityChannel) != 0)
1531  {
1532  if (getOpacityF4(white) != getOpacityF4(black))
1533  {
1534  ePos = ScaleQuantumToMap(getOpacity(oValue));
1535  eValue = stretch_map[ePos];
1536  opacity = getOpacity(eValue);
1537  }
1538  }
1539 
1540  //write back
1541  im[c]=(CLPixelType)(blue, green, red, opacity);
1542 
1543  }
1544  )
1545 
1546 /*
1547 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1548 % %
1549 % %
1550 % %
1551 % C o n v o l v e %
1552 % %
1553 % %
1554 % %
1555 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1556 */
1557 
1558  STRINGIFY(
1559  __kernel
1560  void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
1561  const unsigned int imageWidth, const unsigned int imageHeight,
1562  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1563  const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
1564 
1565  int2 blockID;
1566  blockID.x = get_group_id(0);
1567  blockID.y = get_group_id(1);
1568 
1569  // image area processed by this workgroup
1570  int2 imageAreaOrg;
1571  imageAreaOrg.x = blockID.x * get_local_size(0);
1572  imageAreaOrg.y = blockID.y * get_local_size(1);
1573 
1574  int2 midFilterDimen;
1575  midFilterDimen.x = (filterWidth-1)/2;
1576  midFilterDimen.y = (filterHeight-1)/2;
1577 
1578  int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1579 
1580  // dimension of the local cache
1581  int2 cachedAreaDimen;
1582  cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1583  cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1584 
1585  // cache the pixels accessed by this workgroup in local memory
1586  int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1587  int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1588  int groupSize = get_local_size(0) * get_local_size(1);
1589  for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1590 
1591  int2 cachedAreaIndex;
1592  cachedAreaIndex.x = i % cachedAreaDimen.x;
1593  cachedAreaIndex.y = i / cachedAreaDimen.x;
1594 
1595  int2 imagePixelIndex;
1596  imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1597 
1598  // only support EdgeVirtualPixelMethod through ClampToCanvas
1599  // TODO: implement other virtual pixel method
1600  imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1601  imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1602 
1603  pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1604  }
1605 
1606  // cache the filter
1607  for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1608  filterCache[i] = filter[i];
1609  }
1610  barrier(CLK_LOCAL_MEM_FENCE);
1611 
1612 
1613  int2 imageIndex;
1614  imageIndex.x = imageAreaOrg.x + get_local_id(0);
1615  imageIndex.y = imageAreaOrg.y + get_local_id(1);
1616 
1617  // if out-of-range, stops here and quit
1618  if (imageIndex.x >= imageWidth
1619  || imageIndex.y >= imageHeight) {
1620  return;
1621  }
1622 
1623  int filterIndex = 0;
1624  float4 sum = (float4)0.0f;
1625  float gamma = 0.0f;
1626  if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1627  int cacheIndexY = get_local_id(1);
1628  for (int j = 0; j < filterHeight; j++) {
1629  int cacheIndexX = get_local_id(0);
1630  for (int i = 0; i < filterWidth; i++) {
1631  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1632  float f = filterCache[filterIndex];
1633 
1634  sum.x += f * p.x;
1635  sum.y += f * p.y;
1636  sum.z += f * p.z;
1637  sum.w += f * p.w;
1638 
1639  gamma += f;
1640  filterIndex++;
1641  cacheIndexX++;
1642  }
1643  cacheIndexY++;
1644  }
1645  }
1646  else {
1647  int cacheIndexY = get_local_id(1);
1648  for (int j = 0; j < filterHeight; j++) {
1649  int cacheIndexX = get_local_id(0);
1650  for (int i = 0; i < filterWidth; i++) {
1651 
1652  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1653  float alpha = QuantumScale*(QuantumRange-p.w);
1654  float f = filterCache[filterIndex];
1655  float g = alpha * f;
1656 
1657  sum.x += g*p.x;
1658  sum.y += g*p.y;
1659  sum.z += g*p.z;
1660  sum.w += f*p.w;
1661 
1662  gamma += g;
1663  filterIndex++;
1664  cacheIndexX++;
1665  }
1666  cacheIndexY++;
1667  }
1668  gamma = PerceptibleReciprocal(gamma);
1669  sum.xyz = gamma*sum.xyz;
1670  }
1671  CLPixelType outputPixel;
1672  outputPixel.x = ClampToQuantum(sum.x);
1673  outputPixel.y = ClampToQuantum(sum.y);
1674  outputPixel.z = ClampToQuantum(sum.z);
1675  outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1676 
1677  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1678  }
1679  )
1680 
1681  STRINGIFY(
1682  __kernel
1683  void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
1684  const uint imageWidth, const uint imageHeight,
1685  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
1686  const uint matte, const ChannelType channel) {
1687 
1688  int2 imageIndex;
1689  imageIndex.x = get_global_id(0);
1690  imageIndex.y = get_global_id(1);
1691 
1692  /*
1693  unsigned int imageWidth = get_global_size(0);
1694  unsigned int imageHeight = get_global_size(1);
1695  */
1696  if (imageIndex.x >= imageWidth
1697  || imageIndex.y >= imageHeight)
1698  return;
1699 
1700  int2 midFilterDimen;
1701  midFilterDimen.x = (filterWidth-1)/2;
1702  midFilterDimen.y = (filterHeight-1)/2;
1703 
1704  int filterIndex = 0;
1705  float4 sum = (float4)0.0f;
1706  float gamma = 0.0f;
1707  if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1708  for (int j = 0; j < filterHeight; j++) {
1709  int2 inputPixelIndex;
1710  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1711  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1712  for (int i = 0; i < filterWidth; i++) {
1713  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1714  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1715 
1716  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1717  float f = filter[filterIndex];
1718 
1719  sum.x += f * p.x;
1720  sum.y += f * p.y;
1721  sum.z += f * p.z;
1722  sum.w += f * p.w;
1723 
1724  gamma += f;
1725 
1726  filterIndex++;
1727  }
1728  }
1729  }
1730  else {
1731 
1732  for (int j = 0; j < filterHeight; j++) {
1733  int2 inputPixelIndex;
1734  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1735  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1736  for (int i = 0; i < filterWidth; i++) {
1737  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1738  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1739 
1740  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1741  float alpha = QuantumScale*(QuantumRange-p.w);
1742  float f = filter[filterIndex];
1743  float g = alpha * f;
1744 
1745  sum.x += g*p.x;
1746  sum.y += g*p.y;
1747  sum.z += g*p.z;
1748  sum.w += f*p.w;
1749 
1750  gamma += g;
1751 
1752 
1753  filterIndex++;
1754  }
1755  }
1756  gamma = PerceptibleReciprocal(gamma);
1757  sum.xyz = gamma*sum.xyz;
1758  }
1759 
1760  CLPixelType outputPixel;
1761  outputPixel.x = ClampToQuantum(sum.x);
1762  outputPixel.y = ClampToQuantum(sum.y);
1763  outputPixel.z = ClampToQuantum(sum.z);
1764  outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1765 
1766  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1767  }
1768  )
1769 
1770 /*
1771 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1772 % %
1773 % %
1774 % %
1775 % D e s p e c k l e %
1776 % %
1777 % %
1778 % %
1779 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1780 */
1781 
1782  STRINGIFY(
1783 
1784  __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1785  , const unsigned int imageWidth, const unsigned int imageHeight
1786  , const int2 offset, const int polarity, const int matte) {
1787 
1788  int x = get_global_id(0);
1789  int y = get_global_id(1);
1790 
1791  CLPixelType v = inputImage[y*imageWidth+x];
1792 
1793  int2 neighbor;
1794  neighbor.y = y + offset.y;
1795  neighbor.x = x + offset.x;
1796 
1797  int2 clampedNeighbor;
1798  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1799  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1800 
1801  CLPixelType r = (clampedNeighbor.x == neighbor.x
1802  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1803  :(CLPixelType)0;
1804 
1805  int sv[4];
1806  sv[0] = (int)v.x;
1807  sv[1] = (int)v.y;
1808  sv[2] = (int)v.z;
1809  sv[3] = (int)v.w;
1810 
1811  int sr[4];
1812  sr[0] = (int)r.x;
1813  sr[1] = (int)r.y;
1814  sr[2] = (int)r.z;
1815  sr[3] = (int)r.w;
1816 
1817  if (polarity > 0) {
1818  \n #pragma unroll 4\n
1819  for (unsigned int i = 0; i < 4; i++) {
1820  sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1821  }
1822  }
1823  else {
1824  \n #pragma unroll 4\n
1825  for (unsigned int i = 0; i < 4; i++) {
1826  sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1827  }
1828 
1829  }
1830 
1831  v.x = (CLQuantum)sv[0];
1832  v.y = (CLQuantum)sv[1];
1833  v.z = (CLQuantum)sv[2];
1834 
1835  if (matte!=0)
1836  v.w = (CLQuantum)sv[3];
1837 
1838  outputImage[y*imageWidth+x] = v;
1839 
1840  }
1841 
1842 
1843  )
1844 
1845 
1846 
1847  STRINGIFY(
1848 
1849  __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1850  , const unsigned int imageWidth, const unsigned int imageHeight
1851  , const int2 offset, const int polarity, const int matte) {
1852 
1853  int x = get_global_id(0);
1854  int y = get_global_id(1);
1855 
1856  CLPixelType v = inputImage[y*imageWidth+x];
1857 
1858  int2 neighbor, clampedNeighbor;
1859 
1860  neighbor.y = y + offset.y;
1861  neighbor.x = x + offset.x;
1862  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1863  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1864 
1865  CLPixelType r = (clampedNeighbor.x == neighbor.x
1866  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1867  :(CLPixelType)0;
1868 
1869 
1870  neighbor.y = y - offset.y;
1871  neighbor.x = x - offset.x;
1872  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1873  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1874 
1875  CLPixelType s = (clampedNeighbor.x == neighbor.x
1876  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1877  :(CLPixelType)0;
1878 
1879 
1880  int sv[4];
1881  sv[0] = (int)v.x;
1882  sv[1] = (int)v.y;
1883  sv[2] = (int)v.z;
1884  sv[3] = (int)v.w;
1885 
1886  int sr[4];
1887  sr[0] = (int)r.x;
1888  sr[1] = (int)r.y;
1889  sr[2] = (int)r.z;
1890  sr[3] = (int)r.w;
1891 
1892  int ss[4];
1893  ss[0] = (int)s.x;
1894  ss[1] = (int)s.y;
1895  ss[2] = (int)s.z;
1896  ss[3] = (int)s.w;
1897 
1898  if (polarity > 0) {
1899  \n #pragma unroll 4\n
1900  for (unsigned int i = 0; i < 4; i++) {
1901  //sv[i] = (ss[i] >= (sv[i]+ScaleCharToQuantum(2)) && sr[i] > sv[i] ) ? (sv[i]+ScaleCharToQuantum(1)):sv[i];
1902  //
1903  //sv[i] =(!( (int)(ss[i] >= (sv[i]+ScaleCharToQuantum(2))) && (int) (sr[i] > sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1904  //sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) || (int) ( sr[i] <= sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1905  sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1906  }
1907  }
1908  else {
1909  \n #pragma unroll 4\n
1910  for (unsigned int i = 0; i < 4; i++) {
1911  //sv[i] = (ss[i] <= (sv[i]-ScaleCharToQuantum(2)) && sr[i] < sv[i] ) ? (sv[i]-ScaleCharToQuantum(1)):sv[i];
1912  //
1913  //sv[i] = ( (int)(ss[i] <= (sv[i]-ScaleCharToQuantum(2)) ) + (int)( sr[i] < sv[i] ) ==0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1914  sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1915  }
1916  }
1917 
1918  v.x = (CLQuantum)sv[0];
1919  v.y = (CLQuantum)sv[1];
1920  v.z = (CLQuantum)sv[2];
1921 
1922  if (matte!=0)
1923  v.w = (CLQuantum)sv[3];
1924 
1925  outputImage[y*imageWidth+x] = v;
1926 
1927  }
1928 
1929 
1930  )
1931 
1932 /*
1933 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1934 % %
1935 % %
1936 % %
1937 % E q u a l i z e %
1938 % %
1939 % %
1940 % %
1941 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1942 */
1943 
1944  STRINGIFY(
1945  /*
1946  */
1947  __kernel void Equalize(__global CLPixelType * restrict im,
1948  const ChannelType channel,
1949  __global CLPixelType * restrict equalize_map,
1950  const float4 white, const float4 black)
1951  {
1952  const int x = get_global_id(0);
1953  const int y = get_global_id(1);
1954  const int columns = get_global_size(0);
1955  const int c = x + y * columns;
1956 
1957  uint ePos;
1958  CLPixelType oValue, eValue;
1959  CLQuantum red, green, blue, opacity;
1960 
1961  //read from global
1962  oValue=im[c];
1963 
1964  if ((channel & SyncChannels) != 0)
1965  {
1966  if (getRedF4(white) != getRedF4(black))
1967  {
1968  ePos = ScaleQuantumToMap(getRed(oValue));
1969  eValue = equalize_map[ePos];
1970  red = getRed(eValue);
1971  ePos = ScaleQuantumToMap(getGreen(oValue));
1972  eValue = equalize_map[ePos];
1973  green = getRed(eValue);
1974  ePos = ScaleQuantumToMap(getBlue(oValue));
1975  eValue = equalize_map[ePos];
1976  blue = getRed(eValue);
1977  ePos = ScaleQuantumToMap(getOpacity(oValue));
1978  eValue = equalize_map[ePos];
1979  opacity = getRed(eValue);
1980 
1981  //write back
1982  im[c]=(CLPixelType)(blue, green, red, opacity);
1983  }
1984 
1985  }
1986 
1987  // for equalizing, we always need all channels?
1988  // otherwise something more
1989 
1990  }
1991  )
1992 
1993 /*
1994 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1995 % %
1996 % %
1997 % %
1998 % F u n c t i o n %
1999 % %
2000 % %
2001 % %
2002 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2003 */
2004 
2005  STRINGIFY(
2006 
2007  /*
2008  apply FunctionImageChannel(braightness-contrast)
2009  */
2010  CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function,
2011  const unsigned int number_parameters,
2012  __constant float *parameters)
2013  {
2014  float4 result = (float4) 0.0f;
2015  switch (function)
2016  {
2017  case PolynomialFunction:
2018  {
2019  for (unsigned int i=0; i < number_parameters; i++)
2020  result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
2021  result *= (float4)QuantumRange;
2022  break;
2023  }
2024  case SinusoidFunction:
2025  {
2026  float freq,phase,ampl,bias;
2027  freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
2028  phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
2029  ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
2030  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
2031  result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
2032  (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias);
2033  result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
2034  (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias);
2035  result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
2036  (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias);
2037  result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
2038  (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias);
2039  break;
2040  }
2041  case ArcsinFunction:
2042  {
2043  float width,range,center,bias;
2044  width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
2045  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
2046  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
2047  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
2048 
2049  result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
2050  result.x = range/MagickPI*asin(result.x)+bias;
2051  result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
2052  result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
2053 
2054  result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
2055  result.y = range/MagickPI*asin(result.y)+bias;
2056  result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
2057  result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
2058 
2059  result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
2060  result.z = range/MagickPI*asin(result.z)+bias;
2061  result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
2062  result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
2063 
2064 
2065  result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
2066  result.w = range/MagickPI*asin(result.w)+bias;
2067  result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
2068  result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
2069 
2070  result *= (float4)QuantumRange;
2071  break;
2072  }
2073  case ArctanFunction:
2074  {
2075  float slope,range,center,bias;
2076  slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
2077  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
2078  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
2079  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
2080  result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
2081  result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
2082  break;
2083  }
2084  case UndefinedFunction:
2085  break;
2086  }
2087  return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
2088  ClampToQuantum(result.z), ClampToQuantum(result.w));
2089  }
2090  )
2091 
2092  STRINGIFY(
2093  /*
2094  Improve brightness / contrast of the image
2095  channel : define which channel is improved
2096  function : the function called to enchance the brightness contrast
2097  number_parameters : numbers of parameters
2098  parameters : the parameter
2099  */
2100  __kernel void ComputeFunction(__global CLPixelType *im,
2101  const ChannelType channel, const MagickFunction function,
2102  const unsigned int number_parameters, __constant float *parameters)
2103  {
2104  const int x = get_global_id(0);
2105  const int y = get_global_id(1);
2106  const int columns = get_global_size(0);
2107  const int c = x + y * columns;
2108  im[c] = ApplyFunction(im[c], function, number_parameters, parameters);
2109  }
2110  )
2111 
2112 /*
2113 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2114 % %
2115 % %
2116 % %
2117 % G r a y s c a l e %
2118 % %
2119 % %
2120 % %
2121 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2122 */
2123 
2124  STRINGIFY(
2125  __kernel void Grayscale(__global CLPixelType *im,
2126  const int method, const int colorspace)
2127  {
2128 
2129  const int x = get_global_id(0);
2130  const int y = get_global_id(1);
2131  const int columns = get_global_size(0);
2132  const int c = x + y * columns;
2133 
2134  CLPixelType pixel = im[c];
2135 
2136  float
2137  blue,
2138  green,
2139  intensity,
2140  red;
2141 
2142  red=(float)getRed(pixel);
2143  green=(float)getGreen(pixel);
2144  blue=(float)getBlue(pixel);
2145 
2146  intensity=0.0;
2147 
2148  CLPixelType filteredPixel;
2149 
2150  switch (method)
2151  {
2153  {
2154  intensity=(red+green+blue)/3.0;
2155  break;
2156  }
2158  {
2159  intensity=max(max(red,green),blue);
2160  break;
2161  }
2163  {
2164  intensity=(min(min(red,green),blue)+
2165  max(max(red,green),blue))/2.0;
2166  break;
2167  }
2169  {
2170  intensity=(float) (((float) red*red+green*green+
2171  blue*blue)/(3.0*QuantumRange));
2172  break;
2173  }
2175  {
2176  /*
2177  if (colorspace == RGBColorspace)
2178  {
2179  red=EncodePixelGamma(red);
2180  green=EncodePixelGamma(green);
2181  blue=EncodePixelGamma(blue);
2182  }
2183  */
2184  intensity=0.298839*red+0.586811*green+0.114350*blue;
2185  break;
2186  }
2188  {
2189  /*
2190  if (image->colorspace == sRGBColorspace)
2191  {
2192  red=DecodePixelGamma(red);
2193  green=DecodePixelGamma(green);
2194  blue=DecodePixelGamma(blue);
2195  }
2196  */
2197  intensity=0.298839*red+0.586811*green+0.114350*blue;
2198  break;
2199  }
2201  default:
2202  {
2203  /*
2204  if (image->colorspace == RGBColorspace)
2205  {
2206  red=EncodePixelGamma(red);
2207  green=EncodePixelGamma(green);
2208  blue=EncodePixelGamma(blue);
2209  }
2210  */
2211  intensity=0.212656*red+0.715158*green+0.072186*blue;
2212  break;
2213  }
2215  {
2216  /*
2217  if (image->colorspace == sRGBColorspace)
2218  {
2219  red=DecodePixelGamma(red);
2220  green=DecodePixelGamma(green);
2221  blue=DecodePixelGamma(blue);
2222  }
2223  */
2224  intensity=0.212656*red+0.715158*green+0.072186*blue;
2225  break;
2226  }
2228  {
2229  intensity=(float) (sqrt((float) red*red+green*green+
2230  blue*blue)/sqrt(3.0));
2231  break;
2232  }
2233 
2234  }
2235 
2236  setGray(&filteredPixel, ClampToQuantum(intensity));
2237 
2238  filteredPixel.w = pixel.w;
2239 
2240  im[c] = filteredPixel;
2241  }
2242  )
2243 
2244 /*
2245 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2246 % %
2247 % %
2248 % %
2249 % L o c a l C o n t r a s t %
2250 % %
2251 % %
2252 % %
2253 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2254 */
2255 
2256  STRINGIFY(
2257  inline int mirrorBottom(int value)
2258  {
2259  return (value < 0) ? - (value) : value;
2260  }
2261  inline int mirrorTop(int value, int width)
2262  {
2263  return (value >= width) ? (2 * width - value - 1) : value;
2264  }
2265 
2266  __kernel void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *tmpImage,
2267  const int radius,
2268  const int imageWidth,
2269  const int imageHeight)
2270  {
2271  const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
2272 
2273  int x = get_local_id(0);
2274  int y = get_global_id(1);
2275 
2276  global CLPixelType *src = srcImage + y * imageWidth;
2277 
2278  for (int i = x; i < imageWidth; i += get_local_size(0)) {
2279  float sum = 0.0f;
2280  float weight = 1.0f;
2281 
2282  int j = i - radius;
2283  while ((j + 7) < i) {
2284  for (int k = 0; k < 8; ++k) // Unroll 8x
2285  sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
2286  weight += 8.0f;
2287  j+=8;
2288  }
2289  while (j < i) {
2290  sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
2291  weight += 1.0f;
2292  ++j;
2293  }
2294 
2295  while ((j + 7) < radius + i) {
2296  for (int k = 0; k < 8; ++k) // Unroll 8x
2297  sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
2298  weight -= 8.0f;
2299  j+=8;
2300  }
2301  while (j < radius + i) {
2302  sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
2303  weight -= 1.0f;
2304  ++j;
2305  }
2306 
2307  tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
2308  }
2309  }
2310  )
2311 
2312  STRINGIFY(
2313  __kernel void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global float *blurImage,
2314  const int radius,
2315  const float strength,
2316  const int imageWidth,
2317  const int imageHeight)
2318  {
2319  const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
2320 
2321  int x = get_global_id(0);
2322  int y = get_global_id(1);
2323 
2324  if ((x >= imageWidth) || (y >= imageHeight))
2325  return;
2326 
2327  global float *src = blurImage + x;
2328 
2329  float sum = 0.0f;
2330  float weight = 1.0f;
2331 
2332  int j = y - radius;
2333  while ((j + 7) < y) {
2334  for (int k = 0; k < 8; ++k) // Unroll 8x
2335  sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
2336  weight += 8.0f;
2337  j+=8;
2338  }
2339  while (j < y) {
2340  sum += weight * src[mirrorBottom(j) * imageWidth];
2341  weight += 1.0f;
2342  ++j;
2343  }
2344 
2345  while ((j + 7) < radius + y) {
2346  for (int k = 0; k < 8; ++k) // Unroll 8x
2347  sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
2348  weight -= 8.0f;
2349  j+=8;
2350  }
2351  while (j < radius + y) {
2352  sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
2353  weight -= 1.0f;
2354  ++j;
2355  }
2356 
2357  CLPixelType pixel = srcImage[x + y * imageWidth];
2358  float srcVal = dot(RGB, convert_float4(pixel));
2359  float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
2360  mult = (srcVal + mult) / srcVal;
2361 
2362  pixel.x = ClampToQuantum(pixel.x * mult);
2363  pixel.y = ClampToQuantum(pixel.y * mult);
2364  pixel.z = ClampToQuantum(pixel.z * mult);
2365 
2366  dstImage[x + y * imageWidth] = pixel;
2367  }
2368  )
2369 
2370 /*
2371 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2372 % %
2373 % %
2374 % %
2375 % M o d u l a t e %
2376 % %
2377 % %
2378 % %
2379 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2380 */
2381 
2382  STRINGIFY(
2383 
2384  inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
2385  float *hue, float *saturation, float *lightness)
2386  {
2387  float
2388  c,
2389  tmax,
2390  tmin;
2391 
2392  /*
2393  Convert RGB to HSL colorspace.
2394  */
2395  tmax=max(QuantumScale*red,max(QuantumScale*green, QuantumScale*blue));
2396  tmin=min(QuantumScale*red,min(QuantumScale*green, QuantumScale*blue));
2397 
2398  c=tmax-tmin;
2399 
2400  *lightness=(tmax+tmin)/2.0;
2401  if (c <= 0.0)
2402  {
2403  *hue=0.0;
2404  *saturation=0.0;
2405  return;
2406  }
2407 
2408  if (tmax == (QuantumScale*red))
2409  {
2410  *hue=(QuantumScale*green-QuantumScale*blue)/c;
2411  if ((QuantumScale*green) < (QuantumScale*blue))
2412  *hue+=6.0;
2413  }
2414  else
2415  if (tmax == (QuantumScale*green))
2416  *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
2417  else
2418  *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
2419 
2420  *hue*=60.0/360.0;
2421  if (*lightness <= 0.5)
2422  *saturation=c/(2.0*(*lightness));
2423  else
2424  *saturation=c/(2.0-2.0*(*lightness));
2425  }
2426 
2427  inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
2428  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2429  {
2430  float
2431  b,
2432  c,
2433  g,
2434  h,
2435  tmin,
2436  r,
2437  x;
2438 
2439  /*
2440  Convert HSL to RGB colorspace.
2441  */
2442  h=hue*360.0;
2443  if (lightness <= 0.5)
2444  c=2.0*lightness*saturation;
2445  else
2446  c=(2.0-2.0*lightness)*saturation;
2447  tmin=lightness-0.5*c;
2448  h-=360.0*floor(h/360.0);
2449  h/=60.0;
2450  x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2451  switch ((int) floor(h))
2452  {
2453  case 0:
2454  {
2455  r=tmin+c;
2456  g=tmin+x;
2457  b=tmin;
2458  break;
2459  }
2460  case 1:
2461  {
2462  r=tmin+x;
2463  g=tmin+c;
2464  b=tmin;
2465  break;
2466  }
2467  case 2:
2468  {
2469  r=tmin;
2470  g=tmin+c;
2471  b=tmin+x;
2472  break;
2473  }
2474  case 3:
2475  {
2476  r=tmin;
2477  g=tmin+x;
2478  b=tmin+c;
2479  break;
2480  }
2481  case 4:
2482  {
2483  r=tmin+x;
2484  g=tmin;
2485  b=tmin+c;
2486  break;
2487  }
2488  case 5:
2489  {
2490  r=tmin+c;
2491  g=tmin;
2492  b=tmin+x;
2493  break;
2494  }
2495  default:
2496  {
2497  r=0.0;
2498  g=0.0;
2499  b=0.0;
2500  }
2501  }
2502  *red=ClampToQuantum(QuantumRange*r);
2503  *green=ClampToQuantum(QuantumRange*g);
2504  *blue=ClampToQuantum(QuantumRange*b);
2505  }
2506 
2507  inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
2508  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2509  {
2510  float
2511  hue,
2512  lightness,
2513  saturation;
2514 
2515  /*
2516  Increase or decrease color lightness, saturation, or hue.
2517  */
2518  ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
2519  hue+=0.5*(0.01*percent_hue-1.0);
2520  while (hue < 0.0)
2521  hue+=1.0;
2522  while (hue >= 1.0)
2523  hue-=1.0;
2524  saturation*=0.01*percent_saturation;
2525  lightness*=0.01*percent_lightness;
2526  ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
2527  }
2528 
2529  __kernel void Modulate(__global CLPixelType *im,
2530  const float percent_brightness,
2531  const float percent_hue,
2532  const float percent_saturation,
2533  const int colorspace)
2534  {
2535 
2536  const int x = get_global_id(0);
2537  const int y = get_global_id(1);
2538  const int columns = get_global_size(0);
2539  const int c = x + y * columns;
2540 
2541  CLPixelType pixel = im[c];
2542 
2543  CLQuantum
2544  blue,
2545  green,
2546  red;
2547 
2548  red=getRed(pixel);
2549  green=getGreen(pixel);
2550  blue=getBlue(pixel);
2551 
2552  switch (colorspace)
2553  {
2554  case HSLColorspace:
2555  default:
2556  {
2557  ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2558  &red, &green, &blue);
2559  }
2560 
2561  }
2562 
2563  CLPixelType filteredPixel;
2564 
2565  setRed(&filteredPixel, red);
2566  setGreen(&filteredPixel, green);
2567  setBlue(&filteredPixel, blue);
2568  filteredPixel.w = pixel.w;
2569 
2570  im[c] = filteredPixel;
2571  }
2572  )
2573 
2574 /*
2575 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2576 % %
2577 % %
2578 % %
2579 % M o t i o n B l u r %
2580 % %
2581 % %
2582 % %
2583 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2584 */
2585 
2586  STRINGIFY(
2587  __kernel
2588  void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
2589  const unsigned int imageWidth, const unsigned int imageHeight,
2590  const __global float *filter, const unsigned int width, const __global int2* offset,
2591  const float4 bias,
2592  const ChannelType channel, const unsigned int matte) {
2593 
2594  int2 currentPixel;
2595  currentPixel.x = get_global_id(0);
2596  currentPixel.y = get_global_id(1);
2597 
2598  if (currentPixel.x >= imageWidth
2599  || currentPixel.y >= imageHeight)
2600  return;
2601 
2602  float4 pixel;
2603  pixel.x = (float)bias.x;
2604  pixel.y = (float)bias.y;
2605  pixel.z = (float)bias.z;
2606  pixel.w = (float)bias.w;
2607 
2608  if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2609 
2610  for (int i = 0; i < width; i++) {
2611  // only support EdgeVirtualPixelMethod through ClampToCanvas
2612  // TODO: implement other virtual pixel method
2613  int2 samplePixel = currentPixel + offset[i];
2614  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2615  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2616  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2617 
2618  pixel.x += (filter[i] * (float)samplePixelValue.x);
2619  pixel.y += (filter[i] * (float)samplePixelValue.y);
2620  pixel.z += (filter[i] * (float)samplePixelValue.z);
2621  pixel.w += (filter[i] * (float)samplePixelValue.w);
2622  }
2623 
2624  CLPixelType outputPixel;
2625  outputPixel.x = ClampToQuantum(pixel.x);
2626  outputPixel.y = ClampToQuantum(pixel.y);
2627  outputPixel.z = ClampToQuantum(pixel.z);
2628  outputPixel.w = ClampToQuantum(pixel.w);
2629  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2630  }
2631  else {
2632 
2633  float gamma = 0.0f;
2634  for (int i = 0; i < width; i++) {
2635  // only support EdgeVirtualPixelMethod through ClampToCanvas
2636  // TODO: implement other virtual pixel method
2637  int2 samplePixel = currentPixel + offset[i];
2638  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2639  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2640 
2641  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2642 
2643  float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
2644  float k = filter[i];
2645  pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2646  pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2647  pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2648 
2649  pixel.w += k * alpha * samplePixelValue.w;
2650 
2651  gamma+=k*alpha;
2652  }
2653  gamma = PerceptibleReciprocal(gamma);
2654  pixel.xyz = gamma*pixel.xyz;
2655 
2656  CLPixelType outputPixel;
2657  outputPixel.x = ClampToQuantum(pixel.x);
2658  outputPixel.y = ClampToQuantum(pixel.y);
2659  outputPixel.z = ClampToQuantum(pixel.z);
2660  outputPixel.w = ClampToQuantum(pixel.w);
2661  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2662  }
2663  }
2664  )
2665 
2666 /*
2667 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2668 % %
2669 % %
2670 % %
2671 % R a d i a l B l u r %
2672 % %
2673 % %
2674 % %
2675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2676 */
2677 
2678  STRINGIFY(
2679  __kernel void RadialBlur(const __global CLPixelType *im, __global CLPixelType *filtered_im,
2680  const float4 bias,
2681  const unsigned int channel, const unsigned int matte,
2682  const float2 blurCenter,
2683  __constant float *cos_theta, __constant float *sin_theta,
2684  const unsigned int cossin_theta_size)
2685  {
2686  const int x = get_global_id(0);
2687  const int y = get_global_id(1);
2688  const int columns = get_global_size(0);
2689  const int rows = get_global_size(1);
2690  unsigned int step = 1;
2691  float center_x = (float) x - blurCenter.x;
2692  float center_y = (float) y - blurCenter.y;
2693  float radius = hypot(center_x, center_y);
2694 
2695  //float blur_radius = hypot((float) columns/2.0f, (float) rows/2.0f);
2696  float blur_radius = hypot(blurCenter.x, blurCenter.y);
2697 
2698  if (radius > MagickEpsilon)
2699  {
2700  step = (unsigned int) (blur_radius / radius);
2701  if (step == 0)
2702  step = 1;
2703  if (step >= cossin_theta_size)
2704  step = cossin_theta_size-1;
2705  }
2706 
2707  float4 result;
2708  result.x = (float)bias.x;
2709  result.y = (float)bias.y;
2710  result.z = (float)bias.z;
2711  result.w = (float)bias.w;
2712  float normalize = 0.0f;
2713 
2714  if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2715  for (unsigned int i=0; i<cossin_theta_size; i+=step)
2716  {
2717  result += convert_float4(im[
2718  ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2719  ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2720  normalize += 1.0f;
2721  }
2722  normalize = PerceptibleReciprocal(normalize);
2723  result = result * normalize;
2724  }
2725  else {
2726  float gamma = 0.0f;
2727  for (unsigned int i=0; i<cossin_theta_size; i+=step)
2728  {
2729  float4 p = convert_float4(im[
2730  ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2731  ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2732 
2733  float alpha = (float)(QuantumScale*(QuantumRange-p.w));
2734  result.x += alpha * p.x;
2735  result.y += alpha * p.y;
2736  result.z += alpha * p.z;
2737  result.w += p.w;
2738  gamma+=alpha;
2739  normalize += 1.0f;
2740  }
2741  gamma = PerceptibleReciprocal(gamma);
2742  normalize = PerceptibleReciprocal(normalize);
2743  result.x = gamma*result.x;
2744  result.y = gamma*result.y;
2745  result.z = gamma*result.z;
2746  result.w = normalize*result.w;
2747  }
2748  filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
2749  ClampToQuantum(result.z), ClampToQuantum(result.w));
2750  }
2751  )
2752 
2753 /*
2754 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2755 % %
2756 % %
2757 % %
2758 % R a n d o m %
2759 % %
2760 % %
2761 % %
2762 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2763 */
2764 
2765 STRINGIFY(
2766 
2767  inline float GetPseudoRandomValue(uint4* seed, const float normalizeRand) {
2768  uint4 s = *seed;
2769  do {
2770  unsigned int alpha = (unsigned int)(s.y ^ (s.y << 11));
2771  s.y = s.z;
2772  s.z = s.w;
2773  s.w = s.x;
2774  s.x = (s.x ^ (s.x >> 19)) ^ (alpha ^ (alpha >> 8));
2775  } while (s.x == ~0UL);
2776  *seed = s;
2777  return (normalizeRand*s.x);
2778  }
2779 
2780  __kernel void RandomNumberGenerator(__global uint* seeds, const float normalizeRand
2781  , __global float* randomNumbers, const uint init
2782  , const uint numRandomNumbers) {
2783 
2784  unsigned int id = get_global_id(0);
2785  unsigned int seed[4];
2786 
2787  if (init != 0) {
2788  seed[0] = seeds[id * 4];
2789  seed[1] = 0x50a7f451;
2790  seed[2] = 0x5365417e;
2791  seed[3] = 0xc3a4171a;
2792  }
2793  else {
2794  seed[0] = seeds[id * 4];
2795  seed[1] = seeds[id * 4 + 1];
2796  seed[2] = seeds[id * 4 + 2];
2797  seed[3] = seeds[id * 4 + 3];
2798  }
2799 
2800  unsigned int numRandomNumbersPerItem = (numRandomNumbers + get_global_size(0) - 1) / get_global_size(0);
2801  for (unsigned int i = 0; i < numRandomNumbersPerItem; i++) {
2802  do
2803  {
2804  unsigned int alpha = (unsigned int)(seed[1] ^ (seed[1] << 11));
2805  seed[1] = seed[2];
2806  seed[2] = seed[3];
2807  seed[3] = seed[0];
2808  seed[0] = (seed[0] ^ (seed[0] >> 19)) ^ (alpha ^ (alpha >> 8));
2809  } while (seed[0] == ~0UL);
2810  unsigned int pos = (get_group_id(0)*get_local_size(0)*numRandomNumbersPerItem)
2811  + get_local_size(0) * i + get_local_id(0);
2812 
2813  if (pos >= numRandomNumbers)
2814  break;
2815  randomNumbers[pos] = normalizeRand*seed[0];
2816  }
2817 
2818  /* save the seeds for the time*/
2819  seeds[id * 4] = seed[0];
2820  seeds[id * 4 + 1] = seed[1];
2821  seeds[id * 4 + 2] = seed[2];
2822  seeds[id * 4 + 3] = seed[3];
2823  }
2824  )
2825 
2826 /*
2827 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2828 % %
2829 % %
2830 % %
2831 % R e s i z e %
2832 % %
2833 % %
2834 % %
2835 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2836 */
2837 
2838  STRINGIFY(
2839  // Based on Box from resize.c
2840  float BoxResizeFilter(const float x)
2841  {
2842  return 1.0f;
2843  }
2844  )
2845 
2846  STRINGIFY(
2847  // Based on CubicBC from resize.c
2848  float CubicBC(const float x,const __global float* resizeFilterCoefficients)
2849  {
2850  /*
2851  Cubic Filters using B,C determined values:
2852  Mitchell-Netravali B = 1/3 C = 1/3 "Balanced" cubic spline filter
2853  Catmull-Rom B = 0 C = 1/2 Interpolatory and exact on linears
2854  Spline B = 1 C = 0 B-Spline Gaussian approximation
2855  Hermite B = 0 C = 0 B-Spline interpolator
2856 
2857  See paper by Mitchell and Netravali, Reconstruction Filters in Computer
2858  Graphics Computer Graphics, Volume 22, Number 4, August 1988
2859  http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/
2860  Mitchell.pdf.
2861 
2862  Coefficents are determined from B,C values:
2863  P0 = ( 6 - 2*B )/6 = coeff[0]
2864  P1 = 0
2865  P2 = (-18 +12*B + 6*C )/6 = coeff[1]
2866  P3 = ( 12 - 9*B - 6*C )/6 = coeff[2]
2867  Q0 = ( 8*B +24*C )/6 = coeff[3]
2868  Q1 = ( -12*B -48*C )/6 = coeff[4]
2869  Q2 = ( 6*B +30*C )/6 = coeff[5]
2870  Q3 = ( - 1*B - 6*C )/6 = coeff[6]
2871 
2872  which are used to define the filter:
2873 
2874  P0 + P1*x + P2*x^2 + P3*x^3 0 <= x < 1
2875  Q0 + Q1*x + Q2*x^2 + Q3*x^3 1 <= x < 2
2876 
2877  which ensures function is continuous in value and derivative (slope).
2878  */
2879  if (x < 1.0)
2880  return(resizeFilterCoefficients[0]+x*(x*
2881  (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2882  if (x < 2.0)
2883  return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2884  (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2885  return(0.0);
2886  }
2887  )
2888 
2889  STRINGIFY(
2890  float Sinc(const float x)
2891  {
2892  if (x != 0.0f)
2893  {
2894  const float alpha=(float) (MagickPI*x);
2895  return sinpi(x)/alpha;
2896  }
2897  return(1.0f);
2898  }
2899  )
2900 
2901  STRINGIFY(
2902  float Triangle(const float x)
2903  {
2904  /*
2905  1st order (linear) B-Spline, bilinear interpolation, Tent 1D filter, or
2906  a Bartlett 2D Cone filter. Also used as a Bartlett Windowing function
2907  for Sinc().
2908  */
2909  return ((x<1.0f)?(1.0f-x):0.0f);
2910  }
2911  )
2912 
2913 
2914  STRINGIFY(
2915  float Hanning(const float x)
2916  {
2917  /*
2918  Cosine window function:
2919  0.5+0.5*cos(pi*x).
2920  */
2921  const float cosine=cos((MagickPI*x));
2922  return(0.5f+0.5f*cosine);
2923  }
2924  )
2925 
2926  STRINGIFY(
2927  float Hamming(const float x)
2928  {
2929  /*
2930  Offset cosine window function:
2931  .54 + .46 cos(pi x).
2932  */
2933  const float cosine=cos((MagickPI*x));
2934  return(0.54f+0.46f*cosine);
2935  }
2936  )
2937 
2938  STRINGIFY(
2939  float Blackman(const float x)
2940  {
2941  /*
2942  Blackman: 2nd order cosine windowing function:
2943  0.42 + 0.5 cos(pi x) + 0.08 cos(2pi x)
2944 
2945  Refactored by Chantal Racette and Nicolas Robidoux to one trig call and
2946  five flops.
2947  */
2948  const float cosine=cos((MagickPI*x));
2949  return(0.34f+cosine*(0.5f+cosine*0.16f));
2950  }
2951  )
2952 
2953 
2954 
2955 
2956  STRINGIFY(
2957  inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
2958  {
2959  switch (filterType)
2960  {
2961  /* Call Sinc even for SincFast to get better precision on GPU
2962  and to avoid thread divergence. Sinc is pretty fast on GPU anyway...*/
2963  case SincWeightingFunction:
2965  return Sinc(x);
2967  return CubicBC(x,filterCoefficients);
2968  case BoxWeightingFunction:
2969  return BoxResizeFilter(x);
2971  return Triangle(x);
2973  return Hanning(x);
2975  return Hamming(x);
2977  return Blackman(x);
2978 
2979  default:
2980  return 0.0f;
2981  }
2982  }
2983  )
2984 
2985 
2986  STRINGIFY(
2987  inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
2988  , const ResizeWeightingFunctionType resizeWindowType
2989  , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
2990  {
2991  float scale;
2992  float xBlur = fabs(x/resizeFilterBlur);
2993  if (resizeWindowSupport < MagickEpsilon
2994  || resizeWindowType == BoxWeightingFunction)
2995  {
2996  scale = 1.0f;
2997  }
2998  else
2999  {
3000  scale = resizeFilterScale;
3001  scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
3002  }
3003  float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
3004  return weight;
3005  }
3006 
3007  )
3008 
3009  ;
3010  const char* accelerateKernels2 =
3011 
3012  STRINGIFY(
3013 
3014  inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
3015  return (numWorkItems/pixelPerWorkgroup);
3016  }
3017 
3018  // returns the index of the pixel for the current workitem to compute.
3019  // returns -1 if this workitem doesn't need to participate in any computation
3020  inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
3021  const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
3022  int pixelIndex = itemID/numWorkItemsPerPixel;
3023  pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
3024  return pixelIndex;
3025  }
3026 
3027  )
3028 
3029  STRINGIFY(
3030  __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
3031  void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
3032  , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
3033  , const int resizeFilterType, const int resizeWindowType
3034  , const __global float* resizeFilterCubicCoefficients
3035  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
3036  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
3037  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
3038 
3039 
3040  // calculate the range of resized image pixels computed by this workgroup
3041  const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
3042  const unsigned int stopX = min(startX + pixelPerWorkgroup,filteredColumns);
3043  const unsigned int actualNumPixelToCompute = stopX - startX;
3044 
3045  // calculate the range of input image pixels to cache
3046  float scale = max(1.0f/xFactor+MagickEpsilon ,1.0f);
3047  const float support = max(scale*resizeFilterSupport,0.5f);
3048  scale = PerceptibleReciprocal(scale);
3049 
3050  const int cacheRangeStartX = max((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
3051  const int cacheRangeEndX = min((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
3052 
3053  // cache the input pixels into local memory
3054  const unsigned int y = get_global_id(1);
3055  event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
3056  wait_group_events(1,&e);
3057 
3058  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
3059  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
3060  {
3061 
3062  const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
3063  const unsigned int chunkStopX = min(chunkStartX + pixelChunkSize, stopX);
3064  const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
3065 
3066  // determine which resized pixel computed by this workitem
3067  const unsigned int itemID = get_local_id(0);
3068  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
3069 
3070  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
3071 
3072  float4 filteredPixel = (float4)0.0f;
3073  float density = 0.0f;
3074  float gamma = 0.0f;
3075  // -1 means this workitem doesn't participate in the computation
3076  if (pixelIndex != -1) {
3077 
3078  // x coordinated of the resized pixel computed by this workitem
3079  const int x = chunkStartX + pixelIndex;
3080 
3081  // calculate how many steps required for this pixel
3082  const float bisect = (x+0.5)/xFactor+MagickEpsilon;
3083  const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
3084  const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputColumns);
3085  const unsigned int n = stop - start;
3086 
3087  // calculate how many steps this workitem will contribute
3088  unsigned int numStepsPerWorkItem = n / numItems;
3089  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
3090 
3091  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
3092  if (startStep < n) {
3093  const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
3094 
3095  unsigned int cacheIndex = start+startStep-cacheRangeStartX;
3096  if (matte == 0) {
3097 
3098  for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3099  float4 cp = convert_float4(inputImageCache[cacheIndex]);
3100 
3101  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3102  , (ResizeWeightingFunctionType)resizeWindowType
3103  , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3104 
3105  filteredPixel += ((float4)weight)*cp;
3106  density+=weight;
3107  }
3108 
3109 
3110  }
3111  else {
3112  for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3113  CLPixelType p = inputImageCache[cacheIndex];
3114 
3115  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3116  , (ResizeWeightingFunctionType)resizeWindowType
3117  , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3118 
3119  float alpha = weight * QuantumScale * GetPixelAlpha(p);
3120  float4 cp = convert_float4(p);
3121 
3122  filteredPixel.x += alpha * cp.x;
3123  filteredPixel.y += alpha * cp.y;
3124  filteredPixel.z += alpha * cp.z;
3125  filteredPixel.w += weight * cp.w;
3126 
3127  density+=weight;
3128  gamma+=alpha;
3129  }
3130  }
3131  }
3132  }
3133 
3134  // initialize the accumulators to zero
3135  if (itemID < actualNumPixelInThisChunk) {
3136  outputPixelCache[itemID] = (float4)0.0f;
3137  densityCache[itemID] = 0.0f;
3138  if (matte != 0)
3139  gammaCache[itemID] = 0.0f;
3140  }
3141  barrier(CLK_LOCAL_MEM_FENCE);
3142 
3143  // accumulatte the filtered pixel value and the density
3144  for (unsigned int i = 0; i < numItems; i++) {
3145  if (pixelIndex != -1) {
3146  if (itemID%numItems == i) {
3147  outputPixelCache[pixelIndex]+=filteredPixel;
3148  densityCache[pixelIndex]+=density;
3149  if (matte!=0) {
3150  gammaCache[pixelIndex]+=gamma;
3151  }
3152  }
3153  }
3154  barrier(CLK_LOCAL_MEM_FENCE);
3155  }
3156 
3157  if (itemID < actualNumPixelInThisChunk) {
3158  if (matte==0) {
3159  float density = densityCache[itemID];
3160  float4 filteredPixel = outputPixelCache[itemID];
3161  if (density!= 0.0f && density != 1.0)
3162  {
3163  density = PerceptibleReciprocal(density);
3164  filteredPixel *= (float4)density;
3165  }
3166  filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
3167  , ClampToQuantum(filteredPixel.y)
3168  , ClampToQuantum(filteredPixel.z)
3169  , ClampToQuantum(filteredPixel.w));
3170  }
3171  else {
3172  float density = densityCache[itemID];
3173  float gamma = gammaCache[itemID];
3174  float4 filteredPixel = outputPixelCache[itemID];
3175 
3176  if (density!= 0.0f && density != 1.0) {
3177  density = PerceptibleReciprocal(density);
3178  filteredPixel *= (float4)density;
3179  gamma *= density;
3180  }
3181  gamma = PerceptibleReciprocal(gamma);
3182 
3183  CLPixelType fp;
3184  fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
3185  , ClampToQuantum(gamma*filteredPixel.y)
3186  , ClampToQuantum(gamma*filteredPixel.z)
3187  , ClampToQuantum(filteredPixel.w));
3188 
3189  filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
3190 
3191  }
3192  }
3193 
3194  } // end of chunking loop
3195  }
3196  )
3197 
3198 
3199 
3200  STRINGIFY(
3201  __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
3202  void ResizeHorizontalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
3203  , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
3204  , const int resizeFilterType, const int resizeWindowType
3205  , const __global float* resizeFilterCubicCoefficients
3206  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
3207  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
3208  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
3209 
3210  ResizeHorizontalFilter(inputImage,inputColumns,inputRows,matte
3211  ,xFactor, filteredImage, filteredColumns, filteredRows
3213  ,resizeFilterCubicCoefficients
3214  ,resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur
3215  ,inputImageCache, numCachedPixels, pixelPerWorkgroup, pixelChunkSize
3216  ,outputPixelCache, densityCache, gammaCache);
3217 
3218  }
3219  )
3220 
3221 
3222  STRINGIFY(
3223  __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
3224  void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
3225  , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
3226  , const int resizeFilterType, const int resizeWindowType
3227  , const __global float* resizeFilterCubicCoefficients
3228  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
3229  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
3230  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
3231 
3232 
3233  // calculate the range of resized image pixels computed by this workgroup
3234  const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
3235  const unsigned int stopY = min(startY + pixelPerWorkgroup,filteredRows);
3236  const unsigned int actualNumPixelToCompute = stopY - startY;
3237 
3238  // calculate the range of input image pixels to cache
3239  float scale = max(1.0f/yFactor+MagickEpsilon ,1.0f);
3240  const float support = max(scale*resizeFilterSupport,0.5f);
3241  scale = PerceptibleReciprocal(scale);
3242 
3243  const int cacheRangeStartY = max((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
3244  const int cacheRangeEndY = min((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
3245 
3246  // cache the input pixels into local memory
3247  const unsigned int x = get_global_id(0);
3248  event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
3249  wait_group_events(1,&e);
3250 
3251  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
3252  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
3253  {
3254 
3255  const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
3256  const unsigned int chunkStopY = min(chunkStartY + pixelChunkSize, stopY);
3257  const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
3258 
3259  // determine which resized pixel computed by this workitem
3260  const unsigned int itemID = get_local_id(1);
3261  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
3262 
3263  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
3264 
3265  float4 filteredPixel = (float4)0.0f;
3266  float density = 0.0f;
3267  float gamma = 0.0f;
3268  // -1 means this workitem doesn't participate in the computation
3269  if (pixelIndex != -1) {
3270 
3271  // x coordinated of the resized pixel computed by this workitem
3272  const int y = chunkStartY + pixelIndex;
3273 
3274  // calculate how many steps required for this pixel
3275  const float bisect = (y+0.5)/yFactor+MagickEpsilon;
3276  const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
3277  const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputRows);
3278  const unsigned int n = stop - start;
3279 
3280  // calculate how many steps this workitem will contribute
3281  unsigned int numStepsPerWorkItem = n / numItems;
3282  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
3283 
3284  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
3285  if (startStep < n) {
3286  const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
3287 
3288  unsigned int cacheIndex = start+startStep-cacheRangeStartY;
3289  if (matte == 0) {
3290 
3291  for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3292  float4 cp = convert_float4(inputImageCache[cacheIndex]);
3293 
3294  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3295  , (ResizeWeightingFunctionType)resizeWindowType
3296  , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3297 
3298  filteredPixel += ((float4)weight)*cp;
3299  density+=weight;
3300  }
3301 
3302 
3303  }
3304  else {
3305  for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3306  CLPixelType p = inputImageCache[cacheIndex];
3307 
3308  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3309  , (ResizeWeightingFunctionType)resizeWindowType
3310  , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3311 
3312  float alpha = weight * QuantumScale * GetPixelAlpha(p);
3313  float4 cp = convert_float4(p);
3314 
3315  filteredPixel.x += alpha * cp.x;
3316  filteredPixel.y += alpha * cp.y;
3317  filteredPixel.z += alpha * cp.z;
3318  filteredPixel.w += weight * cp.w;
3319 
3320  density+=weight;
3321  gamma+=alpha;
3322  }
3323  }
3324  }
3325  }
3326 
3327  // initialize the accumulators to zero
3328  if (itemID < actualNumPixelInThisChunk) {
3329  outputPixelCache[itemID] = (float4)0.0f;
3330  densityCache[itemID] = 0.0f;
3331  if (matte != 0)
3332  gammaCache[itemID] = 0.0f;
3333  }
3334  barrier(CLK_LOCAL_MEM_FENCE);
3335 
3336  // accumulatte the filtered pixel value and the density
3337  for (unsigned int i = 0; i < numItems; i++) {
3338  if (pixelIndex != -1) {
3339  if (itemID%numItems == i) {
3340  outputPixelCache[pixelIndex]+=filteredPixel;
3341  densityCache[pixelIndex]+=density;
3342  if (matte!=0) {
3343  gammaCache[pixelIndex]+=gamma;
3344  }
3345  }
3346  }
3347  barrier(CLK_LOCAL_MEM_FENCE);
3348  }
3349 
3350  if (itemID < actualNumPixelInThisChunk) {
3351  if (matte==0) {
3352  float density = densityCache[itemID];
3353  float4 filteredPixel = outputPixelCache[itemID];
3354  if (density!= 0.0f && density != 1.0)
3355  {
3356  density = PerceptibleReciprocal(density);
3357  filteredPixel *= (float4)density;
3358  }
3359  filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
3360  , ClampToQuantum(filteredPixel.y)
3361  , ClampToQuantum(filteredPixel.z)
3362  , ClampToQuantum(filteredPixel.w));
3363  }
3364  else {
3365  float density = densityCache[itemID];
3366  float gamma = gammaCache[itemID];
3367  float4 filteredPixel = outputPixelCache[itemID];
3368 
3369  if (density!= 0.0f && density != 1.0) {
3370  density = PerceptibleReciprocal(density);
3371  filteredPixel *= (float4)density;
3372  gamma *= density;
3373  }
3374  gamma = PerceptibleReciprocal(gamma);
3375 
3376  CLPixelType fp;
3377  fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
3378  , ClampToQuantum(gamma*filteredPixel.y)
3379  , ClampToQuantum(gamma*filteredPixel.z)
3380  , ClampToQuantum(filteredPixel.w));
3381 
3382  filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
3383 
3384  }
3385  }
3386 
3387  } // end of chunking loop
3388  }
3389  )
3390 
3391 
3392 
3393  STRINGIFY(
3394  __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
3395  void ResizeVerticalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
3396  , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
3397  , const int resizeFilterType, const int resizeWindowType
3398  , const __global float* resizeFilterCubicCoefficients
3399  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
3400  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
3401  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
3402  ResizeVerticalFilter(inputImage,inputColumns,inputRows,matte
3403  ,yFactor,filteredImage,filteredColumns,filteredRows
3405  ,resizeFilterCubicCoefficients
3406  ,resizeFilterScale,resizeFilterSupport,resizeFilterWindowSupport,resizeFilterBlur
3407  ,inputImageCache,numCachedPixels,pixelPerWorkgroup,pixelChunkSize
3408  ,outputPixelCache,densityCache,gammaCache);
3409  }
3410  )
3411 
3412 /*
3413 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3414 % %
3415 % %
3416 % %
3417 % U n s h a r p M a s k %
3418 % %
3419 % %
3420 % %
3421 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3422 */
3423 
3424  STRINGIFY(
3425  __kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage,
3426  const __global float4 *blurRowData, __global CLPixelType *filtered_im,
3427  const unsigned int imageColumns, const unsigned int imageRows,
3428  __local float4* cachedData, __local float* cachedFilter,
3429  const ChannelType channel, const __global float *filter, const unsigned int width,
3430  const float gain, const float threshold)
3431  {
3432  const unsigned int radius = (width-1)/2;
3433 
3434  // cache the pixel shared by the workgroup
3435  const int groupX = get_group_id(0);
3436  const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
3437  const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
3438 
3439  if (groupStartY >= 0
3440  && groupStopY < imageRows) {
3441  event_t e = async_work_group_strided_copy(cachedData
3442  ,blurRowData+groupStartY*imageColumns+groupX
3443  ,groupStopY-groupStartY,imageColumns,0);
3444  wait_group_events(1,&e);
3445  }
3446  else {
3447  for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
3448  cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
3449  }
3450  barrier(CLK_LOCAL_MEM_FENCE);
3451  }
3452  // cache the filter as well
3453  event_t e = async_work_group_copy(cachedFilter,filter,width,0);
3454  wait_group_events(1,&e);
3455 
3456  // only do the work if this is not a patched item
3457  //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
3458  const int cy = get_global_id(1);
3459 
3460  if (cy < imageRows) {
3461  float4 blurredPixel = (float4) 0.0f;
3462 
3463  int i = 0;
3464 
3465  \n #ifndef UFACTOR \n
3466  \n #define UFACTOR 8 \n
3467  \n #endif \n
3468 
3469  for ( ; i+UFACTOR < width; )
3470  {
3471  \n #pragma unroll UFACTOR \n
3472  for (int j=0; j < UFACTOR; j++, i++)
3473  {
3474  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3475  }
3476  }
3477 
3478  for ( ; i < width; i++)
3479  {
3480  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3481  }
3482 
3483  blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
3484  ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
3485 
3486  float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
3487  float4 outputPixel = inputImagePixel - blurredPixel;
3488 
3489  float quantumThreshold = QuantumRange*threshold;
3490 
3491  int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
3492  outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
3493 
3494  //write back
3495  filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
3496  ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
3497 
3498  }
3499  }
3500 
3501  __kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage,
3502  const __global float4 *blurRowData, __global CLPixelType *filtered_im,
3503  const unsigned int imageColumns, const unsigned int imageRows,
3504  __local float4* cachedData, __local float* cachedFilter,
3505  const ChannelType channel, const __global float *filter, const unsigned int width,
3506  const float gain, const float threshold,
3507  const unsigned int offsetRows, const unsigned int section)
3508  {
3509  const unsigned int radius = (width-1)/2;
3510 
3511  // cache the pixel shared by the workgroup
3512  const int groupX = get_group_id(0);
3513  const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
3514  const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
3515 
3516  // offset the input data
3517  blurRowData += imageColumns * radius * section;
3518 
3519  if (groupStartY >= 0
3520  && groupStopY < imageRows) {
3521  event_t e = async_work_group_strided_copy(cachedData
3522  ,blurRowData+groupStartY*imageColumns+groupX
3523  ,groupStopY-groupStartY,imageColumns,0);
3524  wait_group_events(1,&e);
3525  }
3526  else {
3527  for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
3528  int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX;
3529  cachedData[i] = *(blurRowData + pos);
3530  }
3531  barrier(CLK_LOCAL_MEM_FENCE);
3532  }
3533  // cache the filter as well
3534  event_t e = async_work_group_copy(cachedFilter,filter,width,0);
3535  wait_group_events(1,&e);
3536 
3537  // only do the work if this is not a patched item
3538  //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
3539  const int cy = get_global_id(1);
3540 
3541  if (cy < imageRows) {
3542  float4 blurredPixel = (float4) 0.0f;
3543 
3544  int i = 0;
3545 
3546  \n #ifndef UFACTOR \n
3547  \n #define UFACTOR 8 \n
3548  \n #endif \n
3549 
3550  for ( ; i+UFACTOR < width; )
3551  {
3552  \n #pragma unroll UFACTOR \n
3553  for (int j=0; j < UFACTOR; j++, i++)
3554  {
3555  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3556  }
3557  }
3558 
3559  for ( ; i < width; i++)
3560  {
3561  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3562  }
3563 
3564  blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
3565  ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
3566 
3567  // offset the output data
3568  inputImage += imageColumns * offsetRows;
3569  filtered_im += imageColumns * offsetRows;
3570 
3571  float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
3572  float4 outputPixel = inputImagePixel - blurredPixel;
3573 
3574  float quantumThreshold = QuantumRange*threshold;
3575 
3576  int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
3577  outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
3578 
3579  //write back
3580  filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
3581  ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
3582 
3583  }
3584 
3585  }
3586  )
3587 
3588 
3589 
3590  STRINGIFY(
3591  __kernel void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im,
3592  __constant float *filter,
3593  const unsigned int width,
3594  const unsigned int imageColumns, const unsigned int imageRows,
3595  __local float4 *pixels,
3596  const float gain, const float threshold, const unsigned int justBlur)
3597  {
3598  const int x = get_global_id(0);
3599  const int y = get_global_id(1);
3600 
3601  const unsigned int radius = (width - 1) / 2;
3602 
3603  int row = y - radius;
3604  int baseRow = get_group_id(1) * get_local_size(1) - radius;
3605  int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
3606 
3607  while (row < endRow) {
3608  int srcy = (row < 0) ? -row : row; // mirror pad
3609  srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
3610 
3611  float4 value = 0.0f;
3612 
3613  int ix = x - radius;
3614  int i = 0;
3615 
3616  while (i + 7 < width) {
3617  for (int j = 0; j < 8; ++j) { // unrolled
3618  int srcx = ix + j;
3619  srcx = (srcx < 0) ? -srcx : srcx;
3620  srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3621  value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
3622  }
3623  ix += 8;
3624  i += 8;
3625  }
3626 
3627  while (i < width) {
3628  int srcx = (ix < 0) ? -ix : ix; // mirror pad
3629  srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3630  value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
3631  ++i;
3632  ++ix;
3633  }
3634  pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
3635  row += get_local_size(1);
3636  }
3637 
3638 
3639  barrier(CLK_LOCAL_MEM_FENCE);
3640 
3641 
3642  const int px = get_local_id(0);
3643  const int py = get_local_id(1);
3644  const int prp = get_local_size(0);
3645  float4 value = (float4)(0.0f);
3646 
3647  int i = 0;
3648  while (i + 7 < width) { // unrolled
3649  value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3650  value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp];
3651  value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp];
3652  value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp];
3653  value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp];
3654  value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp];
3655  value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp];
3656  value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp];
3657  i += 8;
3658  }
3659  while (i < width) {
3660  value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3661  ++i;
3662  }
3663 
3664  if (justBlur == 0) { // apply sharpening
3665  float4 srcPixel = convert_float4(im[x + y * imageColumns]);
3666  float4 diff = srcPixel - value;
3667 
3668  float quantumThreshold = QuantumRange*threshold;
3669 
3670  int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3671  value = select(srcPixel + diff * gain, srcPixel, mask);
3672  }
3673 
3674  if ((x < imageColumns) && (y < imageRows))
3675  filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3));
3676  }
3677  )
3678 
3679  ;
3680 
3681 #endif // MAGICKCORE_OPENCL_SUPPORT
3682 
3683 #if defined(__cplusplus) || defined(c_plusplus)
3684 }
3685 #endif
3686 
3687 #endif // _MAGICKCORE_ACCELERATE_PRIVATE_H
Definition: composite.h:91
Definition: composite.h:94
Definition: composite.h:65
Definition: colorspace.h:44
Definition: resize-private.h:31
Definition: colorspace.h:36
#define SigmaPoisson
Definition: resize-private.h:37
Definition: pixel.h:75
Definition: statistic.h:116
Definition: resize-private.h:33
Definition: magick-type.h:198
Definition: composite.h:75
Definition: pixel.h:72
Definition: colorspace.h:40
static void MagickPixelCompositeBlend(const MagickPixelPacket *p, const MagickRealType alpha, const MagickPixelPacket *q, const MagickRealType beta, MagickPixelPacket *composite)
Definition: composite-private.h:138
Definition: composite.h:31
Definition: composite.h:93
Definition: colorspace.h:45
Definition: colorspace.h:33
Definition: composite.h:80
#define SigmaRandom
Definition: composite.h:33
Definition: resize-private.h:40
Definition: composite.h:90
Definition: resize-private.h:29
static MagickRealType ColorDodge(const MagickRealType Sca, const MagickRealType Sa, const MagickRealType Dca, const MagickRealType Da)
Definition: composite.c:293
Definition: fx.h:34
PixelIntensityMethod
Definition: pixel.h:67
Definition: magick-type.h:187
Definition: composite.h:95
Definition: colorspace.h:59
Definition: magick-type.h:193
Definition: composite.h:59
Definition: composite.h:89
Definition: magick-type.h:182
Definition: composite.h:27
Definition: colorspace.h:41
Definition: colorspace.h:37
static MagickRealType RoundToUnity(const MagickRealType value)
Definition: composite-private.h:33
Definition: composite.h:35
Definition: composite.h:87
#define MagickPI
Definition: image-private.h:36
Definition: colorspace.h:58
Definition: colorspace.h:50
static MagickRealType Hanning(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:287
Definition: colorspace.h:47
Definition: fx.h:29
float MagickRealType
Definition: magick-type.h:76
Definition: statistic.h:115
Definition: colorspace.h:31
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:28
Definition: composite.h:53
Definition: colorspace.h:35
Definition: resize-private.h:38
Definition: pixel.h:77
#define MagickEpsilon
Definition: magick-type.h:139
#define SigmaLaplacian
MagickExport void ConvertRGBToHSL(const Quantum red, const Quantum green, const Quantum blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1127
Definition: magick-type.h:188
Definition: colorspace.h:48
Definition: statistic.h:117
Definition: magick-type.h:200
NoiseType
Definition: fx.h:27
Definition: colorspace.h:52
Definition: composite.h:47
static MagickRealType Hamming(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:301
Definition: resize-private.h:41
Definition: composite.h:73
Definition: composite.h:29
Definition: composite.h:72
Definition: composite.h:42
Definition: colorspace.h:43
#define SigmaUniform
Definition: composite.h:97
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: enhance.c:3550
Definition: colorspace.h:34
Definition: colorspace.h:57
Definition: resize-private.h:30
static double PerceptibleReciprocal(const double x)
Definition: pixel-accessor.h:124
Definition: composite.h:54
#define GetPixelAlpha(pixel)
Definition: pixel-accessor.h:36
Definition: composite.h:38
Definition: composite.h:68
Definition: composite.h:96
Definition: magick-type.h:184
Definition: composite.h:71
Definition: resize-private.h:32
Definition: composite.h:55
Definition: composite.h:56
Definition: fx.h:31
#define SigmaGaussian
Definition: composite.h:69
Definition: pixel.h:71
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:941
Definition: colorspace.h:38
Definition: pixel.h:70
Definition: composite.h:86
Definition: resize-private.h:36
Definition: colorspace.h:30
#define SigmaMultiplicativeGaussian
Definition: composite.h:49
Definition: composite.h:44
#define TauGaussian
MagickExport void ConvertRGBToHSB(const Quantum red, const Quantum green, const Quantum blue, double *hue, double *saturation, double *brightness)
Definition: gem.c:994
Definition: magick-type.h:186
static void Contrast(const int sign, Quantum *red, Quantum *green, Quantum *blue)
Definition: enhance.c:913
Definition: magick-type.h:201
Definition: composite.h:46
Definition: statistic.h:113
Definition: composite.h:28
Definition: magick-type.h:181
Definition: magick-type.h:190
Definition: colorspace.h:54
Definition: magick-type.h:189
Definition: resize-private.h:39
Definition: composite.h:78
Definition: resize-private.h:34
#define QuantumScale
Definition: magick-type.h:142
Definition: colorspace.h:55
Definition: fx.h:33
MagickExport double GetPseudoRandomValue(RandomInfo *random_info)
Definition: random.c:612
Definition: composite.h:62
Definition: colorspace.h:39
#define MaxMap
Definition: magick-type.h:70
Definition: magick-type.h:197
Definition: composite.h:98
Definition: composite.h:39
static void CompositeColorDodge(const MagickPixelPacket *p, const MagickPixelPacket *q, MagickPixelPacket *composite)
Definition: composite.c:330
MagickExport void ConvertHSBToRGB(const double hue, const double saturation, const double brightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: gem.c:284
Definition: composite.h:45
ChannelType
Definition: magick-type.h:177
Definition: composite.h:70
Definition: colorspace.h:46
Definition: resize-private.h:28
Definition: composite.h:81
Definition: composite.h:41
Definition: composite.h:52
Definition: pixel.h:69
Definition: colorspace.h:49
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: gem.c:460
Definition: composite.h:77
static Quantum ClampToQuantum(const MagickRealType value)
Definition: quantum.h:87
Definition: colorspace.h:53
Definition: composite.h:61
Definition: magick-type.h:183
static void MagickPixelCompositePlus(const MagickPixelPacket *p, const MagickRealType alpha, const MagickPixelPacket *q, const MagickRealType beta, MagickPixelPacket *composite)
Definition: composite-private.h:111
Definition: composite.h:76
Definition: magick-type.h:179
Definition: colorspace.h:28
Definition: resize-private.h:42
Definition: composite.h:50
Definition: composite.h:36
Definition: composite.h:43
MagickExport MagickRealType GetPixelIntensity(const Image *image, const PixelPacket *magick_restrict pixel)
Definition: pixel.c:2285
static MagickRealType Sinc(const MagickRealType, const ResizeFilter *)
Definition: composite.h:37
Definition: composite.h:60
Definition: statistic.h:114
ResizeWeightingFunctionType
Definition: resize-private.h:25
static MagickRealType Blackman(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:148
Definition: colorspace.h:56
ColorspaceType
Definition: colorspace.h:25
Definition: composite.h:32
Definition: colorspace.h:29
Definition: composite.h:88
Definition: colorspace.h:42
#define SigmaImpulse
Definition: composite.h:48
Definition: composite.h:64
Definition: magick-type.h:185
Definition: colorspace.h:51
CompositeOperator
Definition: composite.h:25
Definition: composite.h:79
Definition: magick-type.h:192
Definition: colorspace.h:32
Definition: pixel.h:78
Definition: composite.h:66
Definition: composite.h:30
Definition: colorspace.h:60
Definition: magick-type.h:180
Definition: composite.h:63
Definition: composite.h:58
Definition: composite.h:92
Definition: magick-type.h:199
Definition: composite.h:34
static MagickRealType CubicBC(const MagickRealType x, const ResizeFilter *resize_filter)
Definition: resize.c:210
Definition: resize-private.h:27
Definition: composite.h:74
Definition: colorspace.h:27
MagickFunction
Definition: statistic.h:111
Definition: fx.h:30
Definition: composite.h:40
Definition: composite.h:67
Definition: resize-private.h:35
#define QuantumRange
Definition: magick-type.h:94
static MagickRealType Triangle(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:514
Definition: fx.h:35
Definition: pixel.h:73
Definition: composite.h:51
Definition: fx.h:32
Definition: magick-type.h:191
Definition: fx.h:36
Definition: composite.h:57