MagickCore  6.8.9
accelerate-private.h
Go to the documentation of this file.
1 /*
2  Copyright 1999-2014 ImageMagick Studio LLC, a non-profit organization
3  dedicated to making software imaging solutions freely available.
4 
5  You may not use this file except in compliance with the License.
6  obtain a copy of the License at
7 
8  http://www.imagemagick.org/script/license.php
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 
16  MagickCore private methods for accelerated functions.
17 */
18 
19 #ifndef _MAGICKCORE_ACCELERATE_PRIVATE_H
20 #define _MAGICKCORE_ACCELERATE_PRIVATE_H
21 
22 #if defined(__cplusplus) || defined(c_plusplus)
23 extern "C" {
24 #endif
25 
26 
27 #if defined(MAGICKCORE_OPENCL_SUPPORT)
28 
29 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
30 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
31 #define OPENCL_ELSE() "\n #""else " " \n"
32 #define OPENCL_ENDIF() "\n #""endif " " \n"
33 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
34 #define STRINGIFY(...) #__VA_ARGS__ "\n"
35 
36 typedef struct _FloatPixelPacket
37 {
38 #ifdef MAGICK_PIXEL_RGBA
40  red,
41  green,
42  blue,
43  opacity;
44 #endif
45 #ifdef MAGICK_PIXEL_BGRA
47  blue,
48  green,
49  red,
50  opacity;
51 #endif
52 } FloatPixelPacket;
53 
54 const char* accelerateKernels =
55  STRINGIFY(
56  typedef enum
57  {
59  RedChannel = 0x0001,
60  GrayChannel = 0x0001,
61  CyanChannel = 0x0001,
62  GreenChannel = 0x0002,
63  MagentaChannel = 0x0002,
64  BlueChannel = 0x0004,
65  YellowChannel = 0x0004,
66  AlphaChannel = 0x0008,
67  OpacityChannel = 0x0008,
68  MatteChannel = 0x0008, /* deprecated */
69  BlackChannel = 0x0020,
70  IndexChannel = 0x0020,
71  CompositeChannels = 0x002F,
72  AllChannels = 0x7ffffff,
73  /*
74  Special purpose channel types.
75  */
76  TrueAlphaChannel = 0x0040, /* extract actual alpha channel from opacity */
77  RGBChannels = 0x0080, /* set alpha from grayscale mask in RGB */
78  GrayChannels = 0x0080,
79  SyncChannels = 0x0100, /* channels should be modified equally */
81  } ChannelType;
82  )
83 
84  OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
85 
86  STRINGIFY(
87  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
88  {
89  return((CLQuantum) value);
90  }
91  )
92 
93  OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
94 
95  STRINGIFY(
96  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
97  {
98  return((CLQuantum) (257.0f*value));
99  }
100  )
101 
102  OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
103 
104  STRINGIFY(
105  inline CLQuantum ScaleCharToQuantum(const unsigned char value)
106  {
107  return((CLQuantum) (16843009.0*value));
108  }
109  )
110 
111  OPENCL_ENDIF()
112 
113 
114  STRINGIFY(
115  inline int ClampToCanvas(const int offset,const int range)
116  {
117  return clamp(offset, (int)0, range-1);
118  }
119  )
120 
121  STRINGIFY(
122  inline int ClampToCanvasWithHalo(const int offset,const int range, const int edge, const int section)
123  {
124  return clamp(offset, section?(int)(0-edge):(int)0, section?(range-1):(range-1+edge));
125  }
126  )
127 
128  STRINGIFY(
129  inline CLQuantum ClampToQuantum(const float value)
130  {
131  return (CLQuantum) (clamp(value, 0.0f, (float) QuantumRange) + 0.5f);
132  }
133  )
134 
135  STRINGIFY(
136  inline uint ScaleQuantumToMap(CLQuantum value)
137  {
138  if (value >= (CLQuantum) MaxMap)
139  return ((uint)MaxMap);
140  else
141  return ((uint)value);
142  }
143  )
144 
145  STRINGIFY(
146  inline float PerceptibleReciprocal(const float x)
147  {
148  float sign = x < (float) 0.0 ? (float) -1.0 : (float) 1.0;
149  return((sign*x) >= MagickEpsilon ? (float) 1.0/x : sign*((float) 1.0/MagickEpsilon));
150  }
151  )
152 
153  OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
154 
155  STRINGIFY(
156  typedef enum
157  {
169  )
170 
171  STRINGIFY(
172  typedef enum
173  {
175  RGBColorspace, /* Linear RGB colorspace */
176  GRAYColorspace, /* greyscale (linear) image (faked 1 channel) */
186  CMYKColorspace, /* negared linear RGB with black separated */
187  sRGBColorspace, /* Default: non-lienar sRGB colorspace */
196  CMYColorspace, /* negated linear RGB colorspace */
199  LCHColorspace, /* alias for LCHuv */
201  LCHabColorspace, /* Cylindrical (Polar) Lab */
202  LCHuvColorspace, /* Cylindrical (Polar) Luv */
205  HSVColorspace, /* alias for HSB */
208  } ColorspaceType;
209  )
210 
211  STRINGIFY(
212  inline float RoundToUnity(const float value)
213  {
214  return clamp(value,0.0f,1.0f);
215  }
216  )
217 
218  STRINGIFY(
219 
220  inline CLQuantum getBlue(CLPixelType p) { return p.x; }
221  inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
222  inline float getBlueF4(float4 p) { return p.x; }
223  inline void setBlueF4(float4* p, float value) { (*p).x = value; }
224 
225  inline CLQuantum getGreen(CLPixelType p) { return p.y; }
226  inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
227  inline float getGreenF4(float4 p) { return p.y; }
228  inline void setGreenF4(float4* p, float value) { (*p).y = value; }
229 
230  inline CLQuantum getRed(CLPixelType p) { return p.z; }
231  inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
232  inline float getRedF4(float4 p) { return p.z; }
233  inline void setRedF4(float4* p, float value) { (*p).z = value; }
234 
235  inline CLQuantum getOpacity(CLPixelType p) { return p.w; }
236  inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
237  inline float getOpacityF4(float4 p) { return p.w; }
238  inline void setOpacityF4(float4* p, float value) { (*p).w = value; }
239 
240  inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
241 
242  inline float GetPixelIntensity(const int method, const int colorspace, CLPixelType p)
243  {
244  float red = getRed(p);
245  float green = getGreen(p);
246  float blue = getBlue(p);
247 
248  float intensity;
249 
250  if (colorspace == GRAYColorspace)
251  return red;
252 
253  switch (method)
254  {
256  {
257  intensity=(red+green+blue)/3.0;
258  break;
259  }
261  {
262  intensity=max(max(red,green),blue);
263  break;
264  }
266  {
267  intensity=(min(min(red,green),blue)+
268  max(max(red,green),blue))/2.0;
269  break;
270  }
272  {
273  intensity=(float) (((float) red*red+green*green+blue*blue)/
274  (3.0*QuantumRange));
275  break;
276  }
278  {
279  /*
280  if (image->colorspace == RGBColorspace)
281  {
282  red=EncodePixelGamma(red);
283  green=EncodePixelGamma(green);
284  blue=EncodePixelGamma(blue);
285  }
286  */
287  intensity=0.298839*red+0.586811*green+0.114350*blue;
288  break;
289  }
291  {
292  /*
293  if (image->colorspace == sRGBColorspace)
294  {
295  red=DecodePixelGamma(red);
296  green=DecodePixelGamma(green);
297  blue=DecodePixelGamma(blue);
298  }
299  */
300  intensity=0.298839*red+0.586811*green+0.114350*blue;
301  break;
302  }
304  default:
305  {
306  /*
307  if (image->colorspace == RGBColorspace)
308  {
309  red=EncodePixelGamma(red);
310  green=EncodePixelGamma(green);
311  blue=EncodePixelGamma(blue);
312  }
313  */
314  intensity=0.212656*red+0.715158*green+0.072186*blue;
315  break;
316  }
318  {
319  /*
320  if (image->colorspace == sRGBColorspace)
321  {
322  red=DecodePixelGamma(red);
323  green=DecodePixelGamma(green);
324  blue=DecodePixelGamma(blue);
325  }
326  */
327  intensity=0.212656*red+0.715158*green+0.072186*blue;
328  break;
329  }
331  {
332  intensity=(float) (sqrt((float) red*red+green*green+blue*blue)/
333  sqrt(3.0));
334  break;
335  }
336  }
337 
338  return intensity;
339 
340  }
341  )
342 
343  STRINGIFY(
344  __kernel
345  void ConvolveOptimized(const __global CLPixelType *input, __global CLPixelType *output,
346  const unsigned int imageWidth, const unsigned int imageHeight,
347  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
348  const uint matte, const ChannelType channel, __local CLPixelType *pixelLocalCache, __local float* filterCache) {
349 
350  int2 blockID;
351  blockID.x = get_group_id(0);
352  blockID.y = get_group_id(1);
353 
354  // image area processed by this workgroup
355  int2 imageAreaOrg;
356  imageAreaOrg.x = blockID.x * get_local_size(0);
357  imageAreaOrg.y = blockID.y * get_local_size(1);
358 
359  int2 midFilterDimen;
360  midFilterDimen.x = (filterWidth-1)/2;
361  midFilterDimen.y = (filterHeight-1)/2;
362 
363  int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
364 
365  // dimension of the local cache
366  int2 cachedAreaDimen;
367  cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
368  cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
369 
370  // cache the pixels accessed by this workgroup in local memory
371  int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
372  int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
373  int groupSize = get_local_size(0) * get_local_size(1);
374  for (int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
375 
376  int2 cachedAreaIndex;
377  cachedAreaIndex.x = i % cachedAreaDimen.x;
378  cachedAreaIndex.y = i / cachedAreaDimen.x;
379 
380  int2 imagePixelIndex;
381  imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
382 
383  // only support EdgeVirtualPixelMethod through ClampToCanvas
384  // TODO: implement other virtual pixel method
385  imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
386  imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
387 
388  pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
389  }
390 
391  // cache the filter
392  for (int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
393  filterCache[i] = filter[i];
394  }
395  barrier(CLK_LOCAL_MEM_FENCE);
396 
397 
398  int2 imageIndex;
399  imageIndex.x = imageAreaOrg.x + get_local_id(0);
400  imageIndex.y = imageAreaOrg.y + get_local_id(1);
401 
402  // if out-of-range, stops here and quit
403  if (imageIndex.x >= imageWidth
404  || imageIndex.y >= imageHeight) {
405  return;
406  }
407 
408  int filterIndex = 0;
409  float4 sum = (float4)0.0f;
410  float gamma = 0.0f;
411  if (((channel & OpacityChannel) == 0) || (matte == 0)) {
412  int cacheIndexY = get_local_id(1);
413  for (int j = 0; j < filterHeight; j++) {
414  int cacheIndexX = get_local_id(0);
415  for (int i = 0; i < filterWidth; i++) {
416  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
417  float f = filterCache[filterIndex];
418 
419  sum.x += f * p.x;
420  sum.y += f * p.y;
421  sum.z += f * p.z;
422  sum.w += f * p.w;
423 
424  gamma += f;
425  filterIndex++;
426  cacheIndexX++;
427  }
428  cacheIndexY++;
429  }
430  }
431  else {
432  int cacheIndexY = get_local_id(1);
433  for (int j = 0; j < filterHeight; j++) {
434  int cacheIndexX = get_local_id(0);
435  for (int i = 0; i < filterWidth; i++) {
436 
437  CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
438  float alpha = QuantumScale*(QuantumRange-p.w);
439  float f = filterCache[filterIndex];
440  float g = alpha * f;
441 
442  sum.x += g*p.x;
443  sum.y += g*p.y;
444  sum.z += g*p.z;
445  sum.w += f*p.w;
446 
447  gamma += g;
448  filterIndex++;
449  cacheIndexX++;
450  }
451  cacheIndexY++;
452  }
453  gamma = PerceptibleReciprocal(gamma);
454  sum.xyz = gamma*sum.xyz;
455  }
456  CLPixelType outputPixel;
457  outputPixel.x = ClampToQuantum(sum.x);
458  outputPixel.y = ClampToQuantum(sum.y);
459  outputPixel.z = ClampToQuantum(sum.z);
460  outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
461 
462  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
463  }
464  )
465 
466  STRINGIFY(
467  __kernel
468  void Convolve(const __global CLPixelType *input, __global CLPixelType *output,
469  const uint imageWidth, const uint imageHeight,
470  __constant float *filter, const unsigned int filterWidth, const unsigned int filterHeight,
471  const uint matte, const ChannelType channel) {
472 
473  int2 imageIndex;
474  imageIndex.x = get_global_id(0);
475  imageIndex.y = get_global_id(1);
476 
477  /*
478  unsigned int imageWidth = get_global_size(0);
479  unsigned int imageHeight = get_global_size(1);
480  */
481  if (imageIndex.x >= imageWidth
482  || imageIndex.y >= imageHeight)
483  return;
484 
485  int2 midFilterDimen;
486  midFilterDimen.x = (filterWidth-1)/2;
487  midFilterDimen.y = (filterHeight-1)/2;
488 
489  int filterIndex = 0;
490  float4 sum = (float4)0.0f;
491  float gamma = 0.0f;
492  if (((channel & OpacityChannel) == 0) || (matte == 0)) {
493  for (int j = 0; j < filterHeight; j++) {
494  int2 inputPixelIndex;
495  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
496  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
497  for (int i = 0; i < filterWidth; i++) {
498  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
499  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
500 
501  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
502  float f = filter[filterIndex];
503 
504  sum.x += f * p.x;
505  sum.y += f * p.y;
506  sum.z += f * p.z;
507  sum.w += f * p.w;
508 
509  gamma += f;
510 
511  filterIndex++;
512  }
513  }
514  }
515  else {
516 
517  for (int j = 0; j < filterHeight; j++) {
518  int2 inputPixelIndex;
519  inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
520  inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
521  for (int i = 0; i < filterWidth; i++) {
522  inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
523  inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
524 
525  CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
526  float alpha = QuantumScale*(QuantumRange-p.w);
527  float f = filter[filterIndex];
528  float g = alpha * f;
529 
530  sum.x += g*p.x;
531  sum.y += g*p.y;
532  sum.z += g*p.z;
533  sum.w += f*p.w;
534 
535  gamma += g;
536 
537 
538  filterIndex++;
539  }
540  }
541  gamma = PerceptibleReciprocal(gamma);
542  sum.xyz = gamma*sum.xyz;
543  }
544 
545  CLPixelType outputPixel;
546  outputPixel.x = ClampToQuantum(sum.x);
547  outputPixel.y = ClampToQuantum(sum.y);
548  outputPixel.z = ClampToQuantum(sum.z);
549  outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
550 
551  output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
552  }
553  )
554 
555  STRINGIFY(
556  typedef enum
557  {
563  } MagickFunction;
564  )
565 
566  STRINGIFY(
567 
568  /*
569  apply FunctionImageChannel(braightness-contrast)
570  */
571  CLPixelType ApplyFunction(CLPixelType pixel,const MagickFunction function,
572  const unsigned int number_parameters,
573  __constant float *parameters)
574  {
575  float4 result = (float4) 0.0f;
576  switch (function)
577  {
578  case PolynomialFunction:
579  {
580  for (unsigned int i=0; i < number_parameters; i++)
581  result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
582  result *= (float4)QuantumRange;
583  break;
584  }
585  case SinusoidFunction:
586  {
587  float freq,phase,ampl,bias;
588  freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
589  phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
590  ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
591  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
592  result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
593  (freq*QuantumScale*(float)pixel.x + phase/360.0f)) + bias);
594  result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
595  (freq*QuantumScale*(float)pixel.y + phase/360.0f)) + bias);
596  result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
597  (freq*QuantumScale*(float)pixel.z + phase/360.0f)) + bias);
598  result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
599  (freq*QuantumScale*(float)pixel.w + phase/360.0f)) + bias);
600  break;
601  }
602  case ArcsinFunction:
603  {
604  float width,range,center,bias;
605  width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
606  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
607  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
608  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
609 
610  result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
611  result.x = range/MagickPI*asin(result.x)+bias;
612  result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
613  result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
614 
615  result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
616  result.y = range/MagickPI*asin(result.y)+bias;
617  result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
618  result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
619 
620  result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
621  result.z = range/MagickPI*asin(result.z)+bias;
622  result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
623  result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
624 
625 
626  result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
627  result.w = range/MagickPI*asin(result.w)+bias;
628  result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
629  result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
630 
631  result *= (float4)QuantumRange;
632  break;
633  }
634  case ArctanFunction:
635  {
636  float slope,range,center,bias;
637  slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
638  center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
639  range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
640  bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
641  result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
642  result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
643  break;
644  }
645  case UndefinedFunction:
646  break;
647  }
648  return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
649  ClampToQuantum(result.z), ClampToQuantum(result.w));
650  }
651  )
652 
653  STRINGIFY(
654  /*
655  Improve brightness / contrast of the image
656  channel : define which channel is improved
657  function : the function called to enchance the brightness contrast
658  number_parameters : numbers of parameters
659  parameters : the parameter
660  */
661  __kernel void FunctionImage(__global CLPixelType *im,
662  const ChannelType channel, const MagickFunction function,
663  const unsigned int number_parameters, __constant float *parameters)
664  {
665  const int x = get_global_id(0);
666  const int y = get_global_id(1);
667  const int columns = get_global_size(0);
668  const int c = x + y * columns;
669  im[c] = ApplyFunction(im[c], function, number_parameters, parameters);
670  }
671  )
672 
673  STRINGIFY(
674  /*
675  */
676  __kernel void Stretch(__global CLPixelType * restrict im,
677  const ChannelType channel,
678  __global CLPixelType * restrict stretch_map,
679  const float4 white, const float4 black)
680  {
681  const int x = get_global_id(0);
682  const int y = get_global_id(1);
683  const int columns = get_global_size(0);
684  const int c = x + y * columns;
685 
686  uint ePos;
687  CLPixelType oValue, eValue;
688  CLQuantum red, green, blue, opacity;
689 
690  //read from global
691  oValue=im[c];
692 
693  if ((channel & RedChannel) != 0)
694  {
695  if (getRedF4(white) != getRedF4(black))
696  {
697  ePos = ScaleQuantumToMap(getRed(oValue));
698  eValue = stretch_map[ePos];
699  red = getRed(eValue);
700  }
701  }
702 
703  if ((channel & GreenChannel) != 0)
704  {
705  if (getGreenF4(white) != getGreenF4(black))
706  {
707  ePos = ScaleQuantumToMap(getGreen(oValue));
708  eValue = stretch_map[ePos];
709  green = getGreen(eValue);
710  }
711  }
712 
713  if ((channel & BlueChannel) != 0)
714  {
715  if (getBlueF4(white) != getBlueF4(black))
716  {
717  ePos = ScaleQuantumToMap(getBlue(oValue));
718  eValue = stretch_map[ePos];
719  blue = getBlue(eValue);
720  }
721  }
722 
723  if ((channel & OpacityChannel) != 0)
724  {
725  if (getOpacityF4(white) != getOpacityF4(black))
726  {
727  ePos = ScaleQuantumToMap(getOpacity(oValue));
728  eValue = stretch_map[ePos];
729  opacity = getOpacity(eValue);
730  }
731  }
732 
733  //write back
734  im[c]=(CLPixelType)(blue, green, red, opacity);
735 
736  }
737  )
738 
739 
740  STRINGIFY(
741  /*
742  */
743  __kernel void Equalize(__global CLPixelType * restrict im,
744  const ChannelType channel,
745  __global CLPixelType * restrict equalize_map,
746  const float4 white, const float4 black)
747  {
748  const int x = get_global_id(0);
749  const int y = get_global_id(1);
750  const int columns = get_global_size(0);
751  const int c = x + y * columns;
752 
753  uint ePos;
754  CLPixelType oValue, eValue;
755  CLQuantum red, green, blue, opacity;
756 
757  //read from global
758  oValue=im[c];
759 
760  if ((channel & SyncChannels) != 0)
761  {
762  if (getRedF4(white) != getRedF4(black))
763  {
764  ePos = ScaleQuantumToMap(getRed(oValue));
765  eValue = equalize_map[ePos];
766  red = getRed(eValue);
767  ePos = ScaleQuantumToMap(getGreen(oValue));
768  eValue = equalize_map[ePos];
769  green = getRed(eValue);
770  ePos = ScaleQuantumToMap(getBlue(oValue));
771  eValue = equalize_map[ePos];
772  blue = getRed(eValue);
773  ePos = ScaleQuantumToMap(getOpacity(oValue));
774  eValue = equalize_map[ePos];
775  opacity = getRed(eValue);
776 
777  //write back
778  im[c]=(CLPixelType)(blue, green, red, opacity);
779  }
780 
781  }
782 
783  // for equalizing, we always need all channels?
784  // otherwise something more
785 
786  }
787  )
788 
789  STRINGIFY(
790  /*
791  */
792  __kernel void Histogram(__global CLPixelType * restrict im,
793  const ChannelType channel,
794  const int method,
795  const int colorspace,
796  __global uint4 * restrict histogram)
797  {
798  const int x = get_global_id(0);
799  const int y = get_global_id(1);
800  const int columns = get_global_size(0);
801  const int c = x + y * columns;
802  if ((channel & SyncChannels) != 0)
803  {
804  float intensity = GetPixelIntensity(method, colorspace,im[c]);
805  uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
806  atomic_inc((__global uint *)(&(histogram[pos]))+2); //red position
807  }
808  else
809  {
810  // for equalizing, we always need all channels?
811  // otherwise something more
812  }
813  }
814  )
815 
816  STRINGIFY(
817  /*
818  Reduce image noise and reduce detail levels by row
819  im: input pixels filtered_in filtered_im: output pixels
820  filter : convolve kernel width: convolve kernel size
821  channel : define which channel is blured
822  is_RGBA_BGRA : define the input is RGBA or BGRA
823  */
824  __kernel void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
825  const ChannelType channel, __constant float *filter,
826  const unsigned int width,
827  const unsigned int imageColumns, const unsigned int imageRows,
828  __local CLPixelType *temp)
829  {
830  const int x = get_global_id(0);
831  const int y = get_global_id(1);
832 
833  const int columns = imageColumns;
834 
835  const unsigned int radius = (width-1)/2;
836  const int wsize = get_local_size(0);
837  const unsigned int loadSize = wsize+width;
838 
839  //load chunk only for now
840  //event_t e = async_work_group_copy(temp+radius, im+x+y*columns, wsize, 0);
841  //wait_group_events(1,&e);
842 
843  //parallel load and clamp
844  /*
845  int count = 0;
846  for (int i=0; i < loadSize; i=i+wsize)
847  {
848  int currentX = x + wsize*(count++);
849 
850  int localId = get_local_id(0);
851 
852  if ((localId+i) > loadSize)
853  break;
854 
855  temp[localId+i] = im[y*columns+ClampToCanvas(currentX-radius, columns)];
856 
857  if (y==0 && get_group_id(0) == 0)
858  {
859  printf("(%d %d) temp %d load %d currentX %d\n", x, y, localId+i, ClampToCanvas(currentX-radius, columns), currentX);
860  }
861  }
862  */
863 
864  //group coordinate
865  const int groupX=get_local_size(0)*get_group_id(0);
866  const int groupY=get_local_size(1)*get_group_id(1);
867 
868  //parallel load and clamp
869  for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
870  {
871  //int cx = ClampToCanvas(groupX+i, columns);
872  temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
873 
874  /*if (0 && y==0 && get_group_id(1) == 0)
875  {
876  printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
877  }*/
878  }
879 
880  // barrier
881  barrier(CLK_LOCAL_MEM_FENCE);
882 
883  // only do the work if this is not a patched item
884  if (get_global_id(0) < columns)
885  {
886  // compute
887  float4 result = (float4) 0;
888 
889  int i = 0;
890 
891  \n #ifndef UFACTOR \n
892  \n #define UFACTOR 8 \n
893  \n #endif \n
894 
895  for ( ; i+UFACTOR < width; )
896  {
897  \n #pragma unroll UFACTOR\n
898  for (int j=0; j < UFACTOR; j++, i++)
899  {
900  result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
901  }
902  }
903 
904  for ( ; i < width; i++)
905  {
906  result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
907  }
908 
909  result.x = ClampToQuantum(result.x);
910  result.y = ClampToQuantum(result.y);
911  result.z = ClampToQuantum(result.z);
912  result.w = ClampToQuantum(result.w);
913 
914  // write back to global
915  filtered_im[y*columns+x] = result;
916  }
917  }
918  )
919 
920  STRINGIFY(
921  /*
922  Reduce image noise and reduce detail levels by row
923  im: input pixels filtered_in filtered_im: output pixels
924  filter : convolve kernel width: convolve kernel size
925  channel : define which channel is blured
926  is_RGBA_BGRA : define the input is RGBA or BGRA
927  */
928  __kernel void BlurRowSection(__global CLPixelType *im, __global float4 *filtered_im,
929  const ChannelType channel, __constant float *filter,
930  const unsigned int width,
931  const unsigned int imageColumns, const unsigned int imageRows,
932  __local CLPixelType *temp,
933  const unsigned int offsetRows, const unsigned int section)
934  {
935  const int x = get_global_id(0);
936  const int y = get_global_id(1);
937 
938  const int columns = imageColumns;
939 
940  const unsigned int radius = (width-1)/2;
941  const int wsize = get_local_size(0);
942  const unsigned int loadSize = wsize+width;
943 
944  //group coordinate
945  const int groupX=get_local_size(0)*get_group_id(0);
946  const int groupY=get_local_size(1)*get_group_id(1);
947 
948  //offset the input data, assuming section is 0, 1
949  im += imageColumns * (offsetRows - radius * section);
950 
951  //parallel load and clamp
952  for (int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
953  {
954  //int cx = ClampToCanvas(groupX+i, columns);
955  temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
956 
957  /*if (0 && y==0 && get_group_id(1) == 0)
958  {
959  printf("(%d %d) temp %d load %d groupX %d\n", x, y, i, ClampToCanvas(groupX+i, columns), groupX);
960  }*/
961  }
962 
963  // barrier
964  barrier(CLK_LOCAL_MEM_FENCE);
965 
966  // only do the work if this is not a patched item
967  if (get_global_id(0) < columns)
968  {
969  // compute
970  float4 result = (float4) 0;
971 
972  int i = 0;
973 
974  \n #ifndef UFACTOR \n
975  \n #define UFACTOR 8 \n
976  \n #endif \n
977 
978  for ( ; i+UFACTOR < width; )
979  {
980  \n #pragma unroll UFACTOR\n
981  for (int j=0; j < UFACTOR; j++, i++)
982  {
983  result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
984  }
985  }
986 
987  for ( ; i < width; i++)
988  {
989  result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
990  }
991 
992  result.x = ClampToQuantum(result.x);
993  result.y = ClampToQuantum(result.y);
994  result.z = ClampToQuantum(result.z);
995  result.w = ClampToQuantum(result.w);
996 
997  // write back to global
998  filtered_im[y*columns+x] = result;
999  }
1000 
1001  }
1002  )
1003 
1004  STRINGIFY(
1005  /*
1006  Reduce image noise and reduce detail levels by line
1007  im: input pixels filtered_in filtered_im: output pixels
1008  filter : convolve kernel width: convolve kernel size
1009  channel : define which channel is blured\
1010  is_RGBA_BGRA : define the input is RGBA or BGRA
1011  */
1012  __kernel void BlurColumn(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1013  const ChannelType channel, __constant float *filter,
1014  const unsigned int width,
1015  const unsigned int imageColumns, const unsigned int imageRows,
1016  __local float4 *temp)
1017  {
1018  const int x = get_global_id(0);
1019  const int y = get_global_id(1);
1020 
1021  //const int columns = get_global_size(0);
1022  //const int rows = get_global_size(1);
1023  const int columns = imageColumns;
1024  const int rows = imageRows;
1025 
1026  unsigned int radius = (width-1)/2;
1027  const int wsize = get_local_size(1);
1028  const unsigned int loadSize = wsize+width;
1029 
1030  //group coordinate
1031  const int groupX=get_local_size(0)*get_group_id(0);
1032  const int groupY=get_local_size(1)*get_group_id(1);
1033  //notice that get_local_size(0) is 1, so
1034  //groupX=get_group_id(0);
1035 
1036  //parallel load and clamp
1037  for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
1038  {
1039  temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
1040  }
1041 
1042  // barrier
1043  barrier(CLK_LOCAL_MEM_FENCE);
1044 
1045  // only do the work if this is not a patched item
1046  if (get_global_id(1) < rows)
1047  {
1048  // compute
1049  float4 result = (float4) 0;
1050 
1051  int i = 0;
1052 
1053  \n #ifndef UFACTOR \n
1054  \n #define UFACTOR 8 \n
1055  \n #endif \n
1056 
1057  for ( ; i+UFACTOR < width; )
1058  {
1059  \n #pragma unroll UFACTOR \n
1060  for (int j=0; j < UFACTOR; j++, i++)
1061  {
1062  result+=filter[i]*temp[i+get_local_id(1)];
1063  }
1064  }
1065 
1066  for ( ; i < width; i++)
1067  {
1068  result+=filter[i]*temp[i+get_local_id(1)];
1069  }
1070 
1071  result.x = ClampToQuantum(result.x);
1072  result.y = ClampToQuantum(result.y);
1073  result.z = ClampToQuantum(result.z);
1074  result.w = ClampToQuantum(result.w);
1075 
1076  // write back to global
1077  filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
1078  }
1079 
1080  }
1081  )
1082 
1083 
1084  STRINGIFY(
1085  /*
1086  Reduce image noise and reduce detail levels by line
1087  im: input pixels filtered_in filtered_im: output pixels
1088  filter : convolve kernel width: convolve kernel size
1089  channel : define which channel is blured\
1090  is_RGBA_BGRA : define the input is RGBA or BGRA
1091  */
1092  __kernel void BlurColumnSection(const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1093  const ChannelType channel, __constant float *filter,
1094  const unsigned int width,
1095  const unsigned int imageColumns, const unsigned int imageRows,
1096  __local float4 *temp,
1097  const unsigned int offsetRows, const unsigned int section)
1098  {
1099  const int x = get_global_id(0);
1100  const int y = get_global_id(1);
1101 
1102  //const int columns = get_global_size(0);
1103  //const int rows = get_global_size(1);
1104  const int columns = imageColumns;
1105  const int rows = imageRows;
1106 
1107  unsigned int radius = (width-1)/2;
1108  const int wsize = get_local_size(1);
1109  const unsigned int loadSize = wsize+width;
1110 
1111  //group coordinate
1112  const int groupX=get_local_size(0)*get_group_id(0);
1113  const int groupY=get_local_size(1)*get_group_id(1);
1114  //notice that get_local_size(0) is 1, so
1115  //groupX=get_group_id(0);
1116 
1117  // offset the input data
1118  blurRowData += imageColumns * radius * section;
1119 
1120  //parallel load and clamp
1121  for (int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
1122  {
1123  int pos = ClampToCanvasWithHalo(i+groupY-radius, rows, radius, section) * columns + groupX;
1124  temp[i] = *(blurRowData+pos);
1125  }
1126 
1127  // barrier
1128  barrier(CLK_LOCAL_MEM_FENCE);
1129 
1130  // only do the work if this is not a patched item
1131  if (get_global_id(1) < rows)
1132  {
1133  // compute
1134  float4 result = (float4) 0;
1135 
1136  int i = 0;
1137 
1138  \n #ifndef UFACTOR \n
1139  \n #define UFACTOR 8 \n
1140  \n #endif \n
1141 
1142  for ( ; i+UFACTOR < width; )
1143  {
1144  \n #pragma unroll UFACTOR \n
1145  for (int j=0; j < UFACTOR; j++, i++)
1146  {
1147  result+=filter[i]*temp[i+get_local_id(1)];
1148  }
1149  }
1150  for ( ; i < width; i++)
1151  {
1152  result+=filter[i]*temp[i+get_local_id(1)];
1153  }
1154 
1155  result.x = ClampToQuantum(result.x);
1156  result.y = ClampToQuantum(result.y);
1157  result.z = ClampToQuantum(result.z);
1158  result.w = ClampToQuantum(result.w);
1159 
1160  // offset the output data
1161  filtered_im += imageColumns * offsetRows;
1162 
1163  // write back to global
1164  filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
1165  }
1166 
1167  }
1168  )
1169 
1170 
1171  STRINGIFY(
1172  __kernel void UnsharpMaskBlurColumn(const __global CLPixelType* inputImage,
1173  const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1174  const unsigned int imageColumns, const unsigned int imageRows,
1175  __local float4* cachedData, __local float* cachedFilter,
1176  const ChannelType channel, const __global float *filter, const unsigned int width,
1177  const float gain, const float threshold)
1178  {
1179  const unsigned int radius = (width-1)/2;
1180 
1181  // cache the pixel shared by the workgroup
1182  const int groupX = get_group_id(0);
1183  const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
1184  const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
1185 
1186  if (groupStartY >= 0
1187  && groupStopY < imageRows) {
1188  event_t e = async_work_group_strided_copy(cachedData
1189  ,blurRowData+groupStartY*imageColumns+groupX
1190  ,groupStopY-groupStartY,imageColumns,0);
1191  wait_group_events(1,&e);
1192  }
1193  else {
1194  for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
1195  cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
1196  }
1197  barrier(CLK_LOCAL_MEM_FENCE);
1198  }
1199  // cache the filter as well
1200  event_t e = async_work_group_copy(cachedFilter,filter,width,0);
1201  wait_group_events(1,&e);
1202 
1203  // only do the work if this is not a patched item
1204  //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
1205  const int cy = get_global_id(1);
1206 
1207  if (cy < imageRows) {
1208  float4 blurredPixel = (float4) 0.0f;
1209 
1210  int i = 0;
1211 
1212  \n #ifndef UFACTOR \n
1213  \n #define UFACTOR 8 \n
1214  \n #endif \n
1215 
1216  for ( ; i+UFACTOR < width; )
1217  {
1218  \n #pragma unroll UFACTOR \n
1219  for (int j=0; j < UFACTOR; j++, i++)
1220  {
1221  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1222  }
1223  }
1224 
1225  for ( ; i < width; i++)
1226  {
1227  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1228  }
1229 
1230  blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
1231  ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
1232 
1233  float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
1234  float4 outputPixel = inputImagePixel - blurredPixel;
1235 
1236  float quantumThreshold = QuantumRange*threshold;
1237 
1238  int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
1239  outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
1240 
1241  //write back
1242  filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
1243  ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
1244 
1245  }
1246  }
1247 
1248  __kernel void UnsharpMaskBlurColumnSection(const __global CLPixelType* inputImage,
1249  const __global float4 *blurRowData, __global CLPixelType *filtered_im,
1250  const unsigned int imageColumns, const unsigned int imageRows,
1251  __local float4* cachedData, __local float* cachedFilter,
1252  const ChannelType channel, const __global float *filter, const unsigned int width,
1253  const float gain, const float threshold,
1254  const unsigned int offsetRows, const unsigned int section)
1255  {
1256  const unsigned int radius = (width-1)/2;
1257 
1258  // cache the pixel shared by the workgroup
1259  const int groupX = get_group_id(0);
1260  const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
1261  const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
1262 
1263  // offset the input data
1264  blurRowData += imageColumns * radius * section;
1265 
1266  if (groupStartY >= 0
1267  && groupStopY < imageRows) {
1268  event_t e = async_work_group_strided_copy(cachedData
1269  ,blurRowData+groupStartY*imageColumns+groupX
1270  ,groupStopY-groupStartY,imageColumns,0);
1271  wait_group_events(1,&e);
1272  }
1273  else {
1274  for (int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
1275  int pos = ClampToCanvasWithHalo(groupStartY+i,imageRows, radius, section)*imageColumns+ groupX;
1276  cachedData[i] = *(blurRowData + pos);
1277  }
1278  barrier(CLK_LOCAL_MEM_FENCE);
1279  }
1280  // cache the filter as well
1281  event_t e = async_work_group_copy(cachedFilter,filter,width,0);
1282  wait_group_events(1,&e);
1283 
1284  // only do the work if this is not a patched item
1285  //const int cy = get_group_id(1)*get_local_size(1)+get_local_id(1);
1286  const int cy = get_global_id(1);
1287 
1288  if (cy < imageRows) {
1289  float4 blurredPixel = (float4) 0.0f;
1290 
1291  int i = 0;
1292 
1293  \n #ifndef UFACTOR \n
1294  \n #define UFACTOR 8 \n
1295  \n #endif \n
1296 
1297  for ( ; i+UFACTOR < width; )
1298  {
1299  \n #pragma unroll UFACTOR \n
1300  for (int j=0; j < UFACTOR; j++, i++)
1301  {
1302  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1303  }
1304  }
1305 
1306  for ( ; i < width; i++)
1307  {
1308  blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
1309  }
1310 
1311  blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
1312  ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
1313 
1314  // offset the output data
1315  inputImage += imageColumns * offsetRows;
1316  filtered_im += imageColumns * offsetRows;
1317 
1318  float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
1319  float4 outputPixel = inputImagePixel - blurredPixel;
1320 
1321  float quantumThreshold = QuantumRange*threshold;
1322 
1323  int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
1324  outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
1325 
1326  //write back
1327  filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
1328  ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
1329 
1330  }
1331 
1332  }
1333  )
1334 
1335 
1336 
1337  STRINGIFY(
1338  __kernel void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im,
1339  __constant float *filter,
1340  const unsigned int width,
1341  const unsigned int imageColumns, const unsigned int imageRows,
1342  __local float4 *pixels,
1343  const float gain, const float threshold, const unsigned int justBlur)
1344  {
1345  const int x = get_global_id(0);
1346  const int y = get_global_id(1);
1347 
1348  const unsigned int radius = (width - 1) / 2;
1349 
1350  int row = y - radius;
1351  int baseRow = get_group_id(1) * get_local_size(1) - radius;
1352  int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
1353 
1354  while (row < endRow) {
1355  int srcy = (row < 0) ? -row : row; // mirror pad
1356  srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
1357 
1358  float4 value = 0.0f;
1359 
1360  int ix = x - radius;
1361  int i = 0;
1362 
1363  while (i + 7 < width) {
1364  for (int j = 0; j < 8; ++j) { // unrolled
1365  int srcx = ix + j;
1366  srcx = (srcx < 0) ? -srcx : srcx;
1367  srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
1368  value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
1369  }
1370  ix += 8;
1371  i += 8;
1372  }
1373 
1374  while (i < width) {
1375  int srcx = (ix < 0) ? -ix : ix; // mirror pad
1376  srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
1377  value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
1378  ++i;
1379  ++ix;
1380  }
1381  pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
1382  row += get_local_size(1);
1383  }
1384 
1385 
1386  barrier(CLK_LOCAL_MEM_FENCE);
1387 
1388 
1389  const int px = get_local_id(0);
1390  const int py = get_local_id(1);
1391  const int prp = get_local_size(0);
1392  float4 value = (float4)(0.0f);
1393 
1394  int i = 0;
1395  while (i + 7 < width) { // unrolled
1396  value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
1397  value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp];
1398  value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp];
1399  value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp];
1400  value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp];
1401  value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp];
1402  value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp];
1403  value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp];
1404  i += 8;
1405  }
1406  while (i < width) {
1407  value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
1408  ++i;
1409  }
1410 
1411  if (justBlur == 0) { // apply sharpening
1412  float4 srcPixel = convert_float4(im[x + y * imageColumns]);
1413  float4 diff = srcPixel - value;
1414 
1415  float quantumThreshold = QuantumRange*threshold;
1416 
1417  int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
1418  value = select(srcPixel + diff * gain, srcPixel, mask);
1419  }
1420 
1421  if ((x < imageColumns) && (y < imageRows))
1422  filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3));
1423  }
1424  )
1425 
1426 
1427  STRINGIFY(
1428 
1429  __kernel void HullPass1(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1430  , const unsigned int imageWidth, const unsigned int imageHeight
1431  , const int2 offset, const int polarity, const int matte) {
1432 
1433  int x = get_global_id(0);
1434  int y = get_global_id(1);
1435 
1436  CLPixelType v = inputImage[y*imageWidth+x];
1437 
1438  int2 neighbor;
1439  neighbor.y = y + offset.y;
1440  neighbor.x = x + offset.x;
1441 
1442  int2 clampedNeighbor;
1443  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1444  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1445 
1446  CLPixelType r = (clampedNeighbor.x == neighbor.x
1447  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1448  :(CLPixelType)0;
1449 
1450  int sv[4];
1451  sv[0] = (int)v.x;
1452  sv[1] = (int)v.y;
1453  sv[2] = (int)v.z;
1454  sv[3] = (int)v.w;
1455 
1456  int sr[4];
1457  sr[0] = (int)r.x;
1458  sr[1] = (int)r.y;
1459  sr[2] = (int)r.z;
1460  sr[3] = (int)r.w;
1461 
1462  if (polarity > 0) {
1463  \n #pragma unroll 4\n
1464  for (unsigned int i = 0; i < 4; i++) {
1465  sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1466  }
1467  }
1468  else {
1469  \n #pragma unroll 4\n
1470  for (unsigned int i = 0; i < 4; i++) {
1471  sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1472  }
1473 
1474  }
1475 
1476  v.x = (CLQuantum)sv[0];
1477  v.y = (CLQuantum)sv[1];
1478  v.z = (CLQuantum)sv[2];
1479 
1480  if (matte!=0)
1481  v.w = (CLQuantum)sv[3];
1482 
1483  outputImage[y*imageWidth+x] = v;
1484 
1485  }
1486 
1487 
1488  )
1489 
1490 
1491 
1492  STRINGIFY(
1493 
1494  __kernel void HullPass2(const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1495  , const unsigned int imageWidth, const unsigned int imageHeight
1496  , const int2 offset, const int polarity, const int matte) {
1497 
1498  int x = get_global_id(0);
1499  int y = get_global_id(1);
1500 
1501  CLPixelType v = inputImage[y*imageWidth+x];
1502 
1503  int2 neighbor, clampedNeighbor;
1504 
1505  neighbor.y = y + offset.y;
1506  neighbor.x = x + offset.x;
1507  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1508  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1509 
1510  CLPixelType r = (clampedNeighbor.x == neighbor.x
1511  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1512  :(CLPixelType)0;
1513 
1514 
1515  neighbor.y = y - offset.y;
1516  neighbor.x = x - offset.x;
1517  clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1518  clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1519 
1520  CLPixelType s = (clampedNeighbor.x == neighbor.x
1521  && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1522  :(CLPixelType)0;
1523 
1524 
1525  int sv[4];
1526  sv[0] = (int)v.x;
1527  sv[1] = (int)v.y;
1528  sv[2] = (int)v.z;
1529  sv[3] = (int)v.w;
1530 
1531  int sr[4];
1532  sr[0] = (int)r.x;
1533  sr[1] = (int)r.y;
1534  sr[2] = (int)r.z;
1535  sr[3] = (int)r.w;
1536 
1537  int ss[4];
1538  ss[0] = (int)s.x;
1539  ss[1] = (int)s.y;
1540  ss[2] = (int)s.z;
1541  ss[3] = (int)s.w;
1542 
1543  if (polarity > 0) {
1544  \n #pragma unroll 4\n
1545  for (unsigned int i = 0; i < 4; i++) {
1546  //sv[i] = (ss[i] >= (sv[i]+ScaleCharToQuantum(2)) && sr[i] > sv[i] ) ? (sv[i]+ScaleCharToQuantum(1)):sv[i];
1547  //
1548  //sv[i] =(!( (int)(ss[i] >= (sv[i]+ScaleCharToQuantum(2))) && (int) (sr[i] > sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1549  //sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) || (int) ( sr[i] <= sv[i] ) )) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1550  sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1551  }
1552  }
1553  else {
1554  \n #pragma unroll 4\n
1555  for (unsigned int i = 0; i < 4; i++) {
1556  //sv[i] = (ss[i] <= (sv[i]-ScaleCharToQuantum(2)) && sr[i] < sv[i] ) ? (sv[i]-ScaleCharToQuantum(1)):sv[i];
1557  //
1558  //sv[i] = ( (int)(ss[i] <= (sv[i]-ScaleCharToQuantum(2)) ) + (int)( sr[i] < sv[i] ) ==0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1559  sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1560  }
1561  }
1562 
1563  v.x = (CLQuantum)sv[0];
1564  v.y = (CLQuantum)sv[1];
1565  v.z = (CLQuantum)sv[2];
1566 
1567  if (matte!=0)
1568  v.w = (CLQuantum)sv[3];
1569 
1570  outputImage[y*imageWidth+x] = v;
1571 
1572  }
1573 
1574 
1575  )
1576 
1577 
1578  STRINGIFY(
1579  __kernel void RadialBlur(const __global CLPixelType *im, __global CLPixelType *filtered_im,
1580  const float4 bias,
1581  const unsigned int channel, const unsigned int matte,
1582  const float2 blurCenter,
1583  __constant float *cos_theta, __constant float *sin_theta,
1584  const unsigned int cossin_theta_size)
1585  {
1586  const int x = get_global_id(0);
1587  const int y = get_global_id(1);
1588  const int columns = get_global_size(0);
1589  const int rows = get_global_size(1);
1590  unsigned int step = 1;
1591  float center_x = (float) x - blurCenter.x;
1592  float center_y = (float) y - blurCenter.y;
1593  float radius = hypot(center_x, center_y);
1594 
1595  //float blur_radius = hypot((float) columns/2.0f, (float) rows/2.0f);
1596  float blur_radius = hypot(blurCenter.x, blurCenter.y);
1597 
1598  if (radius > MagickEpsilon)
1599  {
1600  step = (unsigned int) (blur_radius / radius);
1601  if (step == 0)
1602  step = 1;
1603  if (step >= cossin_theta_size)
1604  step = cossin_theta_size-1;
1605  }
1606 
1607  float4 result;
1608  result.x = (float)bias.x;
1609  result.y = (float)bias.y;
1610  result.z = (float)bias.z;
1611  result.w = (float)bias.w;
1612  float normalize = 0.0f;
1613 
1614  if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1615  for (unsigned int i=0; i<cossin_theta_size; i+=step)
1616  {
1617  result += convert_float4(im[
1618  ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
1619  ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
1620  normalize += 1.0f;
1621  }
1622  normalize = PerceptibleReciprocal(normalize);
1623  result = result * normalize;
1624  }
1625  else {
1626  float gamma = 0.0f;
1627  for (unsigned int i=0; i<cossin_theta_size; i+=step)
1628  {
1629  float4 p = convert_float4(im[
1630  ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
1631  ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
1632 
1633  float alpha = (float)(QuantumScale*(QuantumRange-p.w));
1634  result.x += alpha * p.x;
1635  result.y += alpha * p.y;
1636  result.z += alpha * p.z;
1637  result.w += p.w;
1638  gamma+=alpha;
1639  normalize += 1.0f;
1640  }
1641  gamma = PerceptibleReciprocal(gamma);
1642  normalize = PerceptibleReciprocal(normalize);
1643  result.x = gamma*result.x;
1644  result.y = gamma*result.y;
1645  result.z = gamma*result.z;
1646  result.w = normalize*result.w;
1647  }
1648  filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
1649  ClampToQuantum(result.z), ClampToQuantum(result.w));
1650  }
1651  )
1652 
1653  STRINGIFY(
1654 
1655  inline float3 ConvertRGBToHSB(CLPixelType pixel) {
1656  float3 HueSaturationBrightness;
1657  HueSaturationBrightness.x = 0.0f; // Hue
1658  HueSaturationBrightness.y = 0.0f; // Saturation
1659  HueSaturationBrightness.z = 0.0f; // Brightness
1660 
1661  float r=(float) getRed(pixel);
1662  float g=(float) getGreen(pixel);
1663  float b=(float) getBlue(pixel);
1664 
1665  float tmin=min(min(r,g),b);
1666  float tmax=max(max(r,g),b);
1667 
1668  if (tmax!=0.0f) {
1669  float delta=tmax-tmin;
1670  HueSaturationBrightness.y=delta/tmax;
1671  HueSaturationBrightness.z=QuantumScale*tmax;
1672 
1673  if (delta != 0.0f) {
1674  HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
1675  HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
1676  HueSaturationBrightness.x/=6.0f;
1677  HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
1678  }
1679  }
1680  return HueSaturationBrightness;
1681  }
1682 
1683  inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
1684 
1685  float hue = HueSaturationBrightness.x;
1686  float brightness = HueSaturationBrightness.z;
1687  float saturation = HueSaturationBrightness.y;
1688 
1689  CLPixelType rgb;
1690 
1691  if (saturation == 0.0f) {
1692  setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
1693  setGreen(&rgb,getRed(rgb));
1694  setBlue(&rgb,getRed(rgb));
1695  }
1696  else {
1697 
1698  float h=6.0f*(hue-floor(hue));
1699  float f=h-floor(h);
1700  float p=brightness*(1.0f-saturation);
1701  float q=brightness*(1.0f-saturation*f);
1702  float t=brightness*(1.0f-(saturation*(1.0f-f)));
1703 
1704  float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
1705  float clamped_t = ClampToQuantum(QuantumRange*t);
1706  float clamped_p = ClampToQuantum(QuantumRange*p);
1707  float clamped_q = ClampToQuantum(QuantumRange*q);
1708  int ih = (int)h;
1709  setRed(&rgb, (ih == 1)?clamped_q:
1710  (ih == 2 || ih == 3)?clamped_p:
1711  (ih == 4)?clamped_t:
1712  clampedBrightness);
1713 
1714  setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
1715  (ih == 3)?clamped_q:
1716  (ih == 4 || ih == 5)?clamped_p:
1717  clamped_t);
1718 
1719  setBlue(&rgb, (ih == 2)?clamped_t:
1720  (ih == 3 || ih == 4)?clampedBrightness:
1721  (ih == 5)?clamped_q:
1722  clamped_p);
1723  }
1724  return rgb;
1725  }
1726 
1727  __kernel void Contrast(__global CLPixelType *im, const unsigned int sharpen)
1728  {
1729 
1730  const int sign = sharpen!=0?1:-1;
1731  const int x = get_global_id(0);
1732  const int y = get_global_id(1);
1733  const int columns = get_global_size(0);
1734  const int c = x + y * columns;
1735 
1736  CLPixelType pixel = im[c];
1737  float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
1738  float brightness = HueSaturationBrightness.z;
1739  brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1740  brightness = clamp(brightness,0.0f,1.0f);
1741  HueSaturationBrightness.z = brightness;
1742 
1743  CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
1744  filteredPixel.w = pixel.w;
1745  im[c] = filteredPixel;
1746  }
1747 
1748 
1749  )
1750 
1751  STRINGIFY(
1752 
1753  inline void ConvertRGBToHSL(const CLQuantum red,const CLQuantum green, const CLQuantum blue,
1754  float *hue, float *saturation, float *lightness)
1755  {
1756  float
1757  c,
1758  tmax,
1759  tmin;
1760 
1761  /*
1762  Convert RGB to HSL colorspace.
1763  */
1764  tmax=max(QuantumScale*red,max(QuantumScale*green, QuantumScale*blue));
1765  tmin=min(QuantumScale*red,min(QuantumScale*green, QuantumScale*blue));
1766 
1767  c=tmax-tmin;
1768 
1769  *lightness=(tmax+tmin)/2.0;
1770  if (c <= 0.0)
1771  {
1772  *hue=0.0;
1773  *saturation=0.0;
1774  return;
1775  }
1776 
1777  if (tmax == (QuantumScale*red))
1778  {
1779  *hue=(QuantumScale*green-QuantumScale*blue)/c;
1780  if ((QuantumScale*green) < (QuantumScale*blue))
1781  *hue+=6.0;
1782  }
1783  else
1784  if (tmax == (QuantumScale*green))
1785  *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
1786  else
1787  *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
1788 
1789  *hue*=60.0/360.0;
1790  if (*lightness <= 0.5)
1791  *saturation=c/(2.0*(*lightness));
1792  else
1793  *saturation=c/(2.0-2.0*(*lightness));
1794  }
1795 
1796  inline void ConvertHSLToRGB(const float hue,const float saturation, const float lightness,
1797  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
1798  {
1799  float
1800  b,
1801  c,
1802  g,
1803  h,
1804  tmin,
1805  r,
1806  x;
1807 
1808  /*
1809  Convert HSL to RGB colorspace.
1810  */
1811  h=hue*360.0;
1812  if (lightness <= 0.5)
1813  c=2.0*lightness*saturation;
1814  else
1815  c=(2.0-2.0*lightness)*saturation;
1816  tmin=lightness-0.5*c;
1817  h-=360.0*floor(h/360.0);
1818  h/=60.0;
1819  x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
1820  switch ((int) floor(h))
1821  {
1822  case 0:
1823  {
1824  r=tmin+c;
1825  g=tmin+x;
1826  b=tmin;
1827  break;
1828  }
1829  case 1:
1830  {
1831  r=tmin+x;
1832  g=tmin+c;
1833  b=tmin;
1834  break;
1835  }
1836  case 2:
1837  {
1838  r=tmin;
1839  g=tmin+c;
1840  b=tmin+x;
1841  break;
1842  }
1843  case 3:
1844  {
1845  r=tmin;
1846  g=tmin+x;
1847  b=tmin+c;
1848  break;
1849  }
1850  case 4:
1851  {
1852  r=tmin+x;
1853  g=tmin;
1854  b=tmin+c;
1855  break;
1856  }
1857  case 5:
1858  {
1859  r=tmin+c;
1860  g=tmin;
1861  b=tmin+x;
1862  break;
1863  }
1864  default:
1865  {
1866  r=0.0;
1867  g=0.0;
1868  b=0.0;
1869  }
1870  }
1871  *red=ClampToQuantum(QuantumRange*r);
1872  *green=ClampToQuantum(QuantumRange*g);
1873  *blue=ClampToQuantum(QuantumRange*b);
1874  }
1875 
1876  inline void ModulateHSL(const float percent_hue, const float percent_saturation,const float percent_lightness,
1877  CLQuantum *red,CLQuantum *green,CLQuantum *blue)
1878  {
1879  float
1880  hue,
1881  lightness,
1882  saturation;
1883 
1884  /*
1885  Increase or decrease color lightness, saturation, or hue.
1886  */
1887  ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
1888  hue+=0.5*(0.01*percent_hue-1.0);
1889  while (hue < 0.0)
1890  hue+=1.0;
1891  while (hue >= 1.0)
1892  hue-=1.0;
1893  saturation*=0.01*percent_saturation;
1894  lightness*=0.01*percent_lightness;
1895  ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
1896  }
1897 
1898  __kernel void Modulate(__global CLPixelType *im,
1899  const float percent_brightness,
1900  const float percent_hue,
1901  const float percent_saturation,
1902  const int colorspace)
1903  {
1904 
1905  const int x = get_global_id(0);
1906  const int y = get_global_id(1);
1907  const int columns = get_global_size(0);
1908  const int c = x + y * columns;
1909 
1910  CLPixelType pixel = im[c];
1911 
1912  CLQuantum
1913  blue,
1914  green,
1915  red;
1916 
1917  red=getRed(pixel);
1918  green=getGreen(pixel);
1919  blue=getBlue(pixel);
1920 
1921  switch (colorspace)
1922  {
1923  case HSLColorspace:
1924  default:
1925  {
1926  ModulateHSL(percent_hue, percent_saturation, percent_brightness,
1927  &red, &green, &blue);
1928  }
1929 
1930  }
1931 
1932  CLPixelType filteredPixel;
1933 
1934  setRed(&filteredPixel, red);
1935  setGreen(&filteredPixel, green);
1936  setBlue(&filteredPixel, blue);
1937  filteredPixel.w = pixel.w;
1938 
1939  im[c] = filteredPixel;
1940  }
1941  )
1942 
1943  STRINGIFY(
1944  __kernel void Grayscale(__global CLPixelType *im,
1945  const int method, const int colorspace)
1946  {
1947 
1948  const int x = get_global_id(0);
1949  const int y = get_global_id(1);
1950  const int columns = get_global_size(0);
1951  const int c = x + y * columns;
1952 
1953  CLPixelType pixel = im[c];
1954 
1955  float
1956  blue,
1957  green,
1958  intensity,
1959  red;
1960 
1961  red=(float)getRed(pixel);
1962  green=(float)getGreen(pixel);
1963  blue=(float)getBlue(pixel);
1964 
1965  intensity=0.0;
1966 
1967  CLPixelType filteredPixel;
1968 
1969  switch (method)
1970  {
1972  {
1973  intensity=(red+green+blue)/3.0;
1974  break;
1975  }
1977  {
1978  intensity=max(max(red,green),blue);
1979  break;
1980  }
1982  {
1983  intensity=(min(min(red,green),blue)+
1984  max(max(red,green),blue))/2.0;
1985  break;
1986  }
1988  {
1989  intensity=(float) (((float) red*red+green*green+
1990  blue*blue)/(3.0*QuantumRange));
1991  break;
1992  }
1994  {
1995  /*
1996  if (colorspace == RGBColorspace)
1997  {
1998  red=EncodePixelGamma(red);
1999  green=EncodePixelGamma(green);
2000  blue=EncodePixelGamma(blue);
2001  }
2002  */
2003  intensity=0.298839*red+0.586811*green+0.114350*blue;
2004  break;
2005  }
2007  {
2008  /*
2009  if (image->colorspace == sRGBColorspace)
2010  {
2011  red=DecodePixelGamma(red);
2012  green=DecodePixelGamma(green);
2013  blue=DecodePixelGamma(blue);
2014  }
2015  */
2016  intensity=0.298839*red+0.586811*green+0.114350*blue;
2017  break;
2018  }
2020  default:
2021  {
2022  /*
2023  if (image->colorspace == RGBColorspace)
2024  {
2025  red=EncodePixelGamma(red);
2026  green=EncodePixelGamma(green);
2027  blue=EncodePixelGamma(blue);
2028  }
2029  */
2030  intensity=0.212656*red+0.715158*green+0.072186*blue;
2031  break;
2032  }
2034  {
2035  /*
2036  if (image->colorspace == sRGBColorspace)
2037  {
2038  red=DecodePixelGamma(red);
2039  green=DecodePixelGamma(green);
2040  blue=DecodePixelGamma(blue);
2041  }
2042  */
2043  intensity=0.212656*red+0.715158*green+0.072186*blue;
2044  break;
2045  }
2047  {
2048  intensity=(float) (sqrt((float) red*red+green*green+
2049  blue*blue)/sqrt(3.0));
2050  break;
2051  }
2052 
2053  }
2054 
2055  setGray(&filteredPixel, ClampToQuantum(intensity));
2056 
2057  filteredPixel.w = pixel.w;
2058 
2059  im[c] = filteredPixel;
2060  }
2061  )
2062 
2063  STRINGIFY(
2064  // Based on Box from resize.c
2065  float BoxResizeFilter(const float x)
2066  {
2067  return 1.0f;
2068  }
2069  )
2070 
2071  STRINGIFY(
2072  // Based on CubicBC from resize.c
2073  float CubicBC(const float x,const __global float* resizeFilterCoefficients)
2074  {
2075  /*
2076  Cubic Filters using B,C determined values:
2077  Mitchell-Netravali B = 1/3 C = 1/3 "Balanced" cubic spline filter
2078  Catmull-Rom B = 0 C = 1/2 Interpolatory and exact on linears
2079  Spline B = 1 C = 0 B-Spline Gaussian approximation
2080  Hermite B = 0 C = 0 B-Spline interpolator
2081 
2082  See paper by Mitchell and Netravali, Reconstruction Filters in Computer
2083  Graphics Computer Graphics, Volume 22, Number 4, August 1988
2084  http://www.cs.utexas.edu/users/fussell/courses/cs384g/lectures/mitchell/
2085  Mitchell.pdf.
2086 
2087  Coefficents are determined from B,C values:
2088  P0 = ( 6 - 2*B )/6 = coeff[0]
2089  P1 = 0
2090  P2 = (-18 +12*B + 6*C )/6 = coeff[1]
2091  P3 = ( 12 - 9*B - 6*C )/6 = coeff[2]
2092  Q0 = ( 8*B +24*C )/6 = coeff[3]
2093  Q1 = ( -12*B -48*C )/6 = coeff[4]
2094  Q2 = ( 6*B +30*C )/6 = coeff[5]
2095  Q3 = ( - 1*B - 6*C )/6 = coeff[6]
2096 
2097  which are used to define the filter:
2098 
2099  P0 + P1*x + P2*x^2 + P3*x^3 0 <= x < 1
2100  Q0 + Q1*x + Q2*x^2 + Q3*x^3 1 <= x < 2
2101 
2102  which ensures function is continuous in value and derivative (slope).
2103  */
2104  if (x < 1.0)
2105  return(resizeFilterCoefficients[0]+x*(x*
2106  (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2107  if (x < 2.0)
2108  return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2109  (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2110  return(0.0);
2111  }
2112  )
2113 
2114  STRINGIFY(
2115  float Sinc(const float x)
2116  {
2117  if (x != 0.0f)
2118  {
2119  const float alpha=(float) (MagickPI*x);
2120  return sinpi(x)/alpha;
2121  }
2122  return(1.0f);
2123  }
2124  )
2125 
2126  STRINGIFY(
2127  float Triangle(const float x)
2128  {
2129  /*
2130  1st order (linear) B-Spline, bilinear interpolation, Tent 1D filter, or
2131  a Bartlett 2D Cone filter. Also used as a Bartlett Windowing function
2132  for Sinc().
2133  */
2134  return ((x<1.0f)?(1.0f-x):0.0f);
2135  }
2136  )
2137 
2138 
2139  STRINGIFY(
2140  float Hanning(const float x)
2141  {
2142  /*
2143  Cosine window function:
2144  0.5+0.5*cos(pi*x).
2145  */
2146  const float cosine=cos((MagickPI*x));
2147  return(0.5f+0.5f*cosine);
2148  }
2149  )
2150 
2151  STRINGIFY(
2152  float Hamming(const float x)
2153  {
2154  /*
2155  Offset cosine window function:
2156  .54 + .46 cos(pi x).
2157  */
2158  const float cosine=cos((MagickPI*x));
2159  return(0.54f+0.46f*cosine);
2160  }
2161  )
2162 
2163  STRINGIFY(
2164  float Blackman(const float x)
2165  {
2166  /*
2167  Blackman: 2nd order cosine windowing function:
2168  0.42 + 0.5 cos(pi x) + 0.08 cos(2pi x)
2169 
2170  Refactored by Chantal Racette and Nicolas Robidoux to one trig call and
2171  five flops.
2172  */
2173  const float cosine=cos((MagickPI*x));
2174  return(0.34f+cosine*(0.5f+cosine*0.16f));
2175  }
2176  )
2177 
2178 
2179  STRINGIFY(
2180  typedef enum {
2198  )
2199 
2200  STRINGIFY(
2201  inline float applyResizeFilter(const float x, const ResizeWeightingFunctionType filterType, const __global float* filterCoefficients)
2202  {
2203  switch (filterType)
2204  {
2205  /* Call Sinc even for SincFast to get better precision on GPU
2206  and to avoid thread divergence. Sinc is pretty fast on GPU anyway...*/
2207  case SincWeightingFunction:
2209  return Sinc(x);
2211  return CubicBC(x,filterCoefficients);
2212  case BoxWeightingFunction:
2213  return BoxResizeFilter(x);
2215  return Triangle(x);
2217  return Hanning(x);
2219  return Hamming(x);
2221  return Blackman(x);
2222 
2223  default:
2224  return 0.0f;
2225  }
2226  }
2227  )
2228 
2229 
2230  STRINGIFY(
2231  inline float getResizeFilterWeight(const __global float* resizeFilterCubicCoefficients, const ResizeWeightingFunctionType resizeFilterType
2232  , const ResizeWeightingFunctionType resizeWindowType
2233  , const float resizeFilterScale, const float resizeWindowSupport, const float resizeFilterBlur, const float x)
2234  {
2235  float scale;
2236  float xBlur = fabs(x/resizeFilterBlur);
2237  if (resizeWindowSupport < MagickEpsilon
2238  || resizeWindowType == BoxWeightingFunction)
2239  {
2240  scale = 1.0f;
2241  }
2242  else
2243  {
2244  scale = resizeFilterScale;
2245  scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2246  }
2247  float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2248  return weight;
2249  }
2250 
2251  )
2252 
2253  ;
2254  const char* accelerateKernels2 =
2255 
2256  STRINGIFY(
2257 
2258  inline unsigned int getNumWorkItemsPerPixel(const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2259  return (numWorkItems/pixelPerWorkgroup);
2260  }
2261 
2262  // returns the index of the pixel for the current workitem to compute.
2263  // returns -1 if this workitem doesn't need to participate in any computation
2264  inline int pixelToCompute(const unsigned itemID, const unsigned int pixelPerWorkgroup, const unsigned int numWorkItems) {
2265  const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2266  int pixelIndex = itemID/numWorkItemsPerPixel;
2267  pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2268  return pixelIndex;
2269  }
2270 
2271  )
2272 
2273  STRINGIFY(
2274  __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2275  void ResizeHorizontalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2276  , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2277  , const int resizeFilterType, const int resizeWindowType
2278  , const __global float* resizeFilterCubicCoefficients
2279  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2280  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2281  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2282 
2283 
2284  // calculate the range of resized image pixels computed by this workgroup
2285  const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2286  const unsigned int stopX = min(startX + pixelPerWorkgroup,filteredColumns);
2287  const unsigned int actualNumPixelToCompute = stopX - startX;
2288 
2289  // calculate the range of input image pixels to cache
2290  float scale = max(1.0f/xFactor+MagickEpsilon ,1.0f);
2291  const float support = max(scale*resizeFilterSupport,0.5f);
2292  scale = PerceptibleReciprocal(scale);
2293 
2294  const int cacheRangeStartX = max((int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(int)(0));
2295  const int cacheRangeEndX = min((int)(cacheRangeStartX + numCachedPixels), (int)inputColumns);
2296 
2297  // cache the input pixels into local memory
2298  const unsigned int y = get_global_id(1);
2299  event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
2300  wait_group_events(1,&e);
2301 
2302  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2303  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2304  {
2305 
2306  const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2307  const unsigned int chunkStopX = min(chunkStartX + pixelChunkSize, stopX);
2308  const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2309 
2310  // determine which resized pixel computed by this workitem
2311  const unsigned int itemID = get_local_id(0);
2312  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2313 
2314  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2315 
2316  float4 filteredPixel = (float4)0.0f;
2317  float density = 0.0f;
2318  float gamma = 0.0f;
2319  // -1 means this workitem doesn't participate in the computation
2320  if (pixelIndex != -1) {
2321 
2322  // x coordinated of the resized pixel computed by this workitem
2323  const int x = chunkStartX + pixelIndex;
2324 
2325  // calculate how many steps required for this pixel
2326  const float bisect = (x+0.5)/xFactor+MagickEpsilon;
2327  const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
2328  const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputColumns);
2329  const unsigned int n = stop - start;
2330 
2331  // calculate how many steps this workitem will contribute
2332  unsigned int numStepsPerWorkItem = n / numItems;
2333  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2334 
2335  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2336  if (startStep < n) {
2337  const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
2338 
2339  unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2340  if (matte == 0) {
2341 
2342  for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2343  float4 cp = convert_float4(inputImageCache[cacheIndex]);
2344 
2345  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2346  , (ResizeWeightingFunctionType)resizeWindowType
2347  , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2348 
2349  filteredPixel += ((float4)weight)*cp;
2350  density+=weight;
2351  }
2352 
2353 
2354  }
2355  else {
2356  for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2357  CLPixelType p = inputImageCache[cacheIndex];
2358 
2359  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2360  , (ResizeWeightingFunctionType)resizeWindowType
2361  , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2362 
2363  float alpha = weight * QuantumScale * GetPixelAlpha(p);
2364  float4 cp = convert_float4(p);
2365 
2366  filteredPixel.x += alpha * cp.x;
2367  filteredPixel.y += alpha * cp.y;
2368  filteredPixel.z += alpha * cp.z;
2369  filteredPixel.w += weight * cp.w;
2370 
2371  density+=weight;
2372  gamma+=alpha;
2373  }
2374  }
2375  }
2376  }
2377 
2378  // initialize the accumulators to zero
2379  if (itemID < actualNumPixelInThisChunk) {
2380  outputPixelCache[itemID] = (float4)0.0f;
2381  densityCache[itemID] = 0.0f;
2382  if (matte != 0)
2383  gammaCache[itemID] = 0.0f;
2384  }
2385  barrier(CLK_LOCAL_MEM_FENCE);
2386 
2387  // accumulatte the filtered pixel value and the density
2388  for (unsigned int i = 0; i < numItems; i++) {
2389  if (pixelIndex != -1) {
2390  if (itemID%numItems == i) {
2391  outputPixelCache[pixelIndex]+=filteredPixel;
2392  densityCache[pixelIndex]+=density;
2393  if (matte!=0) {
2394  gammaCache[pixelIndex]+=gamma;
2395  }
2396  }
2397  }
2398  barrier(CLK_LOCAL_MEM_FENCE);
2399  }
2400 
2401  if (itemID < actualNumPixelInThisChunk) {
2402  if (matte==0) {
2403  float density = densityCache[itemID];
2404  float4 filteredPixel = outputPixelCache[itemID];
2405  if (density!= 0.0f && density != 1.0)
2406  {
2407  density = PerceptibleReciprocal(density);
2408  filteredPixel *= (float4)density;
2409  }
2410  filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2411  , ClampToQuantum(filteredPixel.y)
2412  , ClampToQuantum(filteredPixel.z)
2413  , ClampToQuantum(filteredPixel.w));
2414  }
2415  else {
2416  float density = densityCache[itemID];
2417  float gamma = gammaCache[itemID];
2418  float4 filteredPixel = outputPixelCache[itemID];
2419 
2420  if (density!= 0.0f && density != 1.0) {
2421  density = PerceptibleReciprocal(density);
2422  filteredPixel *= (float4)density;
2423  gamma *= density;
2424  }
2425  gamma = PerceptibleReciprocal(gamma);
2426 
2427  CLPixelType fp;
2428  fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2429  , ClampToQuantum(gamma*filteredPixel.y)
2430  , ClampToQuantum(gamma*filteredPixel.z)
2431  , ClampToQuantum(filteredPixel.w));
2432 
2433  filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
2434 
2435  }
2436  }
2437 
2438  } // end of chunking loop
2439  }
2440  )
2441 
2442 
2443 
2444  STRINGIFY(
2445  __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2446  void ResizeHorizontalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2447  , const float xFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2448  , const int resizeFilterType, const int resizeWindowType
2449  , const __global float* resizeFilterCubicCoefficients
2450  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2451  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2452  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2453 
2454  ResizeHorizontalFilter(inputImage,inputColumns,inputRows,matte
2455  ,xFactor, filteredImage, filteredColumns, filteredRows
2457  ,resizeFilterCubicCoefficients
2458  ,resizeFilterScale, resizeFilterSupport, resizeFilterWindowSupport, resizeFilterBlur
2459  ,inputImageCache, numCachedPixels, pixelPerWorkgroup, pixelChunkSize
2460  ,outputPixelCache, densityCache, gammaCache);
2461 
2462  }
2463  )
2464 
2465 
2466  STRINGIFY(
2467  __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2468  void ResizeVerticalFilter(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2469  , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2470  , const int resizeFilterType, const int resizeWindowType
2471  , const __global float* resizeFilterCubicCoefficients
2472  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2473  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2474  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2475 
2476 
2477  // calculate the range of resized image pixels computed by this workgroup
2478  const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2479  const unsigned int stopY = min(startY + pixelPerWorkgroup,filteredRows);
2480  const unsigned int actualNumPixelToCompute = stopY - startY;
2481 
2482  // calculate the range of input image pixels to cache
2483  float scale = max(1.0f/yFactor+MagickEpsilon ,1.0f);
2484  const float support = max(scale*resizeFilterSupport,0.5f);
2485  scale = PerceptibleReciprocal(scale);
2486 
2487  const int cacheRangeStartY = max((int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(int)(0));
2488  const int cacheRangeEndY = min((int)(cacheRangeStartY + numCachedPixels), (int)inputRows);
2489 
2490  // cache the input pixels into local memory
2491  const unsigned int x = get_global_id(0);
2492  event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
2493  wait_group_events(1,&e);
2494 
2495  unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2496  for (unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2497  {
2498 
2499  const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2500  const unsigned int chunkStopY = min(chunkStartY + pixelChunkSize, stopY);
2501  const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2502 
2503  // determine which resized pixel computed by this workitem
2504  const unsigned int itemID = get_local_id(1);
2505  const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2506 
2507  const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2508 
2509  float4 filteredPixel = (float4)0.0f;
2510  float density = 0.0f;
2511  float gamma = 0.0f;
2512  // -1 means this workitem doesn't participate in the computation
2513  if (pixelIndex != -1) {
2514 
2515  // x coordinated of the resized pixel computed by this workitem
2516  const int y = chunkStartY + pixelIndex;
2517 
2518  // calculate how many steps required for this pixel
2519  const float bisect = (y+0.5)/yFactor+MagickEpsilon;
2520  const unsigned int start = (unsigned int)max(bisect-support+0.5f,0.0f);
2521  const unsigned int stop = (unsigned int)min(bisect+support+0.5f,(float)inputRows);
2522  const unsigned int n = stop - start;
2523 
2524  // calculate how many steps this workitem will contribute
2525  unsigned int numStepsPerWorkItem = n / numItems;
2526  numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2527 
2528  const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2529  if (startStep < n) {
2530  const unsigned int stopStep = min(startStep+numStepsPerWorkItem, n);
2531 
2532  unsigned int cacheIndex = start+startStep-cacheRangeStartY;
2533  if (matte == 0) {
2534 
2535  for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2536  float4 cp = convert_float4(inputImageCache[cacheIndex]);
2537 
2538  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2539  , (ResizeWeightingFunctionType)resizeWindowType
2540  , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2541 
2542  filteredPixel += ((float4)weight)*cp;
2543  density+=weight;
2544  }
2545 
2546 
2547  }
2548  else {
2549  for (unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2550  CLPixelType p = inputImageCache[cacheIndex];
2551 
2552  float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2553  , (ResizeWeightingFunctionType)resizeWindowType
2554  , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2555 
2556  float alpha = weight * QuantumScale * GetPixelAlpha(p);
2557  float4 cp = convert_float4(p);
2558 
2559  filteredPixel.x += alpha * cp.x;
2560  filteredPixel.y += alpha * cp.y;
2561  filteredPixel.z += alpha * cp.z;
2562  filteredPixel.w += weight * cp.w;
2563 
2564  density+=weight;
2565  gamma+=alpha;
2566  }
2567  }
2568  }
2569  }
2570 
2571  // initialize the accumulators to zero
2572  if (itemID < actualNumPixelInThisChunk) {
2573  outputPixelCache[itemID] = (float4)0.0f;
2574  densityCache[itemID] = 0.0f;
2575  if (matte != 0)
2576  gammaCache[itemID] = 0.0f;
2577  }
2578  barrier(CLK_LOCAL_MEM_FENCE);
2579 
2580  // accumulatte the filtered pixel value and the density
2581  for (unsigned int i = 0; i < numItems; i++) {
2582  if (pixelIndex != -1) {
2583  if (itemID%numItems == i) {
2584  outputPixelCache[pixelIndex]+=filteredPixel;
2585  densityCache[pixelIndex]+=density;
2586  if (matte!=0) {
2587  gammaCache[pixelIndex]+=gamma;
2588  }
2589  }
2590  }
2591  barrier(CLK_LOCAL_MEM_FENCE);
2592  }
2593 
2594  if (itemID < actualNumPixelInThisChunk) {
2595  if (matte==0) {
2596  float density = densityCache[itemID];
2597  float4 filteredPixel = outputPixelCache[itemID];
2598  if (density!= 0.0f && density != 1.0)
2599  {
2600  density = PerceptibleReciprocal(density);
2601  filteredPixel *= (float4)density;
2602  }
2603  filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2604  , ClampToQuantum(filteredPixel.y)
2605  , ClampToQuantum(filteredPixel.z)
2606  , ClampToQuantum(filteredPixel.w));
2607  }
2608  else {
2609  float density = densityCache[itemID];
2610  float gamma = gammaCache[itemID];
2611  float4 filteredPixel = outputPixelCache[itemID];
2612 
2613  if (density!= 0.0f && density != 1.0) {
2614  density = PerceptibleReciprocal(density);
2615  filteredPixel *= (float4)density;
2616  gamma *= density;
2617  }
2618  gamma = PerceptibleReciprocal(gamma);
2619 
2620  CLPixelType fp;
2621  fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2622  , ClampToQuantum(gamma*filteredPixel.y)
2623  , ClampToQuantum(gamma*filteredPixel.z)
2624  , ClampToQuantum(filteredPixel.w));
2625 
2626  filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
2627 
2628  }
2629  }
2630 
2631  } // end of chunking loop
2632  }
2633  )
2634 
2635 
2636 
2637  STRINGIFY(
2638  __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2639  void ResizeVerticalFilterSinc(const __global CLPixelType* inputImage, const unsigned int inputColumns, const unsigned int inputRows, const unsigned int matte
2640  , const float yFactor, __global CLPixelType* filteredImage, const unsigned int filteredColumns, const unsigned int filteredRows
2641  , const int resizeFilterType, const int resizeWindowType
2642  , const __global float* resizeFilterCubicCoefficients
2643  , const float resizeFilterScale, const float resizeFilterSupport, const float resizeFilterWindowSupport, const float resizeFilterBlur
2644  , __local CLPixelType* inputImageCache, const int numCachedPixels, const unsigned int pixelPerWorkgroup, const unsigned int pixelChunkSize
2645  , __local float4* outputPixelCache, __local float* densityCache, __local float* gammaCache) {
2646  ResizeVerticalFilter(inputImage,inputColumns,inputRows,matte
2647  ,yFactor,filteredImage,filteredColumns,filteredRows
2649  ,resizeFilterCubicCoefficients
2650  ,resizeFilterScale,resizeFilterSupport,resizeFilterWindowSupport,resizeFilterBlur
2651  ,inputImageCache,numCachedPixels,pixelPerWorkgroup,pixelChunkSize
2652  ,outputPixelCache,densityCache,gammaCache);
2653  }
2654  )
2655 
2656 
2657 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
2658 OPENCL_DEFINE(SigmaGaussian,(attenuate*0.015625f))
2659 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
2660 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
2661 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
2662 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
2663 OPENCL_DEFINE(SigmaRandom, (attenuate))
2664 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
2665 
2666 STRINGIFY(
2667 
2668 /*
2669 Part of MWC64X by David Thomas, dt10@imperial.ac.uk
2670 This is provided under BSD, full license is with the main package.
2671 See http://www.doc.ic.ac.uk/~dt10/research
2672 */
2673 
2674 // Pre: a<M, b<M
2675 // Post: r=(a+b) mod M
2676 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
2677 {
2678  ulong v=a+b;
2679  //if( (v>=M) || (v<a) )
2680  if( (v>=M) || (convert_float(v) < convert_float(a)) ) // workaround for what appears to be an optimizer bug.
2681  v=v-M;
2682  return v;
2683 }
2684 
2685 // Pre: a<M,b<M
2686 // Post: r=(a*b) mod M
2687 // This could be done more efficently, but it is portable, and should
2688 // be easy to understand. It can be replaced with any of the better
2689 // modular multiplication algorithms (for example if you know you have
2690 // double precision available or something).
2691 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
2692 {
2693  ulong r=0;
2694  while(a!=0){
2695  if(a&1)
2696  r=MWC_AddMod64(r,b,M);
2697  b=MWC_AddMod64(b,b,M);
2698  a=a>>1;
2699  }
2700  return r;
2701 }
2702 
2703 
2704 // Pre: a<M, e>=0
2705 // Post: r=(a^b) mod M
2706 // This takes at most ~64^2 modular additions, so probably about 2^15 or so instructions on
2707 // most architectures
2708 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
2709 {
2710  ulong sqr=a, acc=1;
2711  while(e!=0){
2712  if(e&1)
2713  acc=MWC_MulMod64(acc,sqr,M);
2714  sqr=MWC_MulMod64(sqr,sqr,M);
2715  e=e>>1;
2716  }
2717  return acc;
2718 }
2719 
2720 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
2721 {
2722  ulong m=MWC_PowMod64(A, distance, M);
2723  ulong x=curr.x*(ulong)A+curr.y;
2724  x=MWC_MulMod64(x, m, M);
2725  return (uint2)((uint)(x/A), (uint)(x%A));
2726 }
2727 
2728 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
2729 {
2730  // This is an arbitrary constant for starting LCG jumping from. I didn't
2731  // want to start from 1, as then you end up with the two or three first values
2732  // being a bit poor in ones - once you've decided that, one constant is as
2733  // good as any another. There is no deep mathematical reason for it, I just
2734  // generated a random number.
2735  enum{ MWC_BASEID = 4077358422479273989UL };
2736 
2737  ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
2738  ulong m=MWC_PowMod64(A, dist, M);
2739 
2740  ulong x=MWC_MulMod64(MWC_BASEID, m, M);
2741  return (uint2)((uint)(x/A), (uint)(x%A));
2742 }
2743 
2745 typedef struct{ uint x; uint c; } mwc64x_state_t;
2746 
2747 enum{ MWC64X_A = 4294883355U };
2748 enum{ MWC64X_M = 18446383549859758079UL };
2749 
2750 void MWC64X_Step(mwc64x_state_t *s)
2751 {
2752  uint X=s->x, C=s->c;
2753 
2754  uint Xn=MWC64X_A*X+C;
2755  uint carry=(uint)(Xn<C); // The (Xn<C) will be zero or one for scalar
2756  uint Cn=mad_hi(MWC64X_A,X,carry);
2757 
2758  s->x=Xn;
2759  s->c=Cn;
2760 }
2761 
2762 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
2763 {
2764  uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), MWC64X_A, MWC64X_M, distance);
2765  s->x=tmp.x;
2766  s->c=tmp.y;
2767 }
2768 
2769 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
2770 {
2771  uint2 tmp=MWC_SeedImpl_Mod64(MWC64X_A, MWC64X_M, 1, 0, baseOffset, perStreamOffset);
2772  s->x=tmp.x;
2773  s->c=tmp.y;
2774 }
2775 
2777 uint MWC64X_NextUint(mwc64x_state_t *s)
2778 {
2779  uint res=s->x ^ s->c;
2780  MWC64X_Step(s);
2781  return res;
2782 }
2783 
2784 //
2785 // End of MWC64X excerpt
2786 //
2787 
2788 
2789  typedef enum
2790  {
2792  UniformNoise,
2793  GaussianNoise,
2795  ImpulseNoise,
2797  PoissonNoise,
2798  RandomNoise
2799  } NoiseType;
2800 
2801 
2802  float mwcReadPseudoRandomValue(mwc64x_state_t* rng) {
2803  return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff); // normalized to 1.0
2804  }
2805 
2806 
2807  float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type, float attenuate) {
2808 
2809  float
2810  alpha,
2811  beta,
2812  noise,
2813  sigma;
2814 
2815  noise = 0.0f;
2816  alpha=mwcReadPseudoRandomValue(r);
2817  switch(noise_type) {
2818  case UniformNoise:
2819  default:
2820  {
2821  noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
2822  break;
2823  }
2824  case GaussianNoise:
2825  {
2826  float
2827  gamma,
2828  tau;
2829 
2830  if (alpha == 0.0f)
2831  alpha=1.0f;
2832  beta=mwcReadPseudoRandomValue(r);
2833  gamma=sqrt(-2.0f*log(alpha));
2834  sigma=gamma*cospi((2.0f*beta));
2835  tau=gamma*sinpi((2.0f*beta));
2836  noise=(float)(pixel+sqrt((float) pixel)*SigmaGaussian*sigma+
2837  QuantumRange*TauGaussian*tau);
2838  break;
2839  }
2840 
2841 
2842  case ImpulseNoise:
2843  {
2844  if (alpha < (SigmaImpulse/2.0f))
2845  noise=0.0f;
2846  else
2847  if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
2848  noise=(float)QuantumRange;
2849  else
2850  noise=(float)pixel;
2851  break;
2852  }
2853  case LaplacianNoise:
2854  {
2855  if (alpha <= 0.5f)
2856  {
2857  if (alpha <= MagickEpsilon)
2858  noise=(float) (pixel-QuantumRange);
2859  else
2860  noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
2861  0.5f);
2862  break;
2863  }
2864  beta=1.0f-alpha;
2865  if (beta <= (0.5f*MagickEpsilon))
2866  noise=(float) (pixel+QuantumRange);
2867  else
2868  noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
2869  break;
2870  }
2872  {
2873  sigma=1.0f;
2874  if (alpha > MagickEpsilon)
2875  sigma=sqrt(-2.0f*log(alpha));
2876  beta=mwcReadPseudoRandomValue(r);
2877  noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
2878  cospi((float) (2.0f*beta))/2.0f);
2879  break;
2880  }
2881  case PoissonNoise:
2882  {
2883  float
2884  poisson;
2885  unsigned int i;
2886  poisson=exp(-SigmaPoisson*QuantumScale*pixel);
2887  for (i=0; alpha > poisson; i++)
2888  {
2889  beta=mwcReadPseudoRandomValue(r);
2890  alpha*=beta;
2891  }
2892  noise=(float) (QuantumRange*i/SigmaPoisson);
2893  break;
2894  }
2895  case RandomNoise:
2896  {
2897  noise=(float) (QuantumRange*SigmaRandom*alpha);
2898  break;
2899  }
2900 
2901  };
2902  return noise;
2903  }
2904 
2905 
2906 
2907 
2908 
2909  __kernel
2910  void GenerateNoiseImage(const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
2911  ,const unsigned int inputPixelCount, const unsigned int pixelsPerWorkItem
2912  ,const ChannelType channel
2913  ,const NoiseType noise_type, const float attenuate
2914  ,const unsigned int seed0, const unsigned int seed1
2915  ,const unsigned int numRandomNumbersPerPixel) {
2916 
2917  mwc64x_state_t rng;
2918  rng.x = seed0;
2919  rng.c = seed1;
2920 
2921  uint span = pixelsPerWorkItem * numRandomNumbersPerPixel; // length of RNG substream each workitem will use
2922  uint offset = span * get_local_size(0) * get_group_id(0); // offset of this workgroup's RNG substream (in master stream);
2923 
2924  MWC64X_SeedStreams(&rng, offset, span); // Seed the RNG streams
2925 
2926  uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0); // pixel to process
2927 
2928  uint count = pixelsPerWorkItem;
2929 
2930  while (count > 0) {
2931  if (pos < inputPixelCount) {
2932  CLPixelType p = inputImage[pos];
2933 
2934  if ((channel&RedChannel)!=0) {
2935  setRed(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate)));
2936  }
2937 
2938  if ((channel&GreenChannel)!=0) {
2939  setGreen(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate)));
2940  }
2941 
2942  if ((channel&BlueChannel)!=0) {
2943  setBlue(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate)));
2944  }
2945 
2946  if ((channel & OpacityChannel) != 0) {
2947  setOpacity(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate)));
2948  }
2949 
2950  filteredImage[pos] = p;
2951  //filteredImage[pos] = (CLPixelType)(MWC64X_NextUint(&rng) % 256, MWC64X_NextUint(&rng) % 256, MWC64X_NextUint(&rng) % 256, 255);
2952  }
2953  pos += get_local_size(0);
2954  --count;
2955  }
2956  }
2957  )
2958 
2959 
2960  STRINGIFY(
2961  __kernel
2962  void MotionBlur(const __global CLPixelType *input, __global CLPixelType *output,
2963  const unsigned int imageWidth, const unsigned int imageHeight,
2964  const __global float *filter, const unsigned int width, const __global int2* offset,
2965  const float4 bias,
2966  const ChannelType channel, const unsigned int matte) {
2967 
2968  int2 currentPixel;
2969  currentPixel.x = get_global_id(0);
2970  currentPixel.y = get_global_id(1);
2971 
2972  if (currentPixel.x >= imageWidth
2973  || currentPixel.y >= imageHeight)
2974  return;
2975 
2976  float4 pixel;
2977  pixel.x = (float)bias.x;
2978  pixel.y = (float)bias.y;
2979  pixel.z = (float)bias.z;
2980  pixel.w = (float)bias.w;
2981 
2982  if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2983 
2984  for (int i = 0; i < width; i++) {
2985  // only support EdgeVirtualPixelMethod through ClampToCanvas
2986  // TODO: implement other virtual pixel method
2987  int2 samplePixel = currentPixel + offset[i];
2988  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2989  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2990  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2991 
2992  pixel.x += (filter[i] * (float)samplePixelValue.x);
2993  pixel.y += (filter[i] * (float)samplePixelValue.y);
2994  pixel.z += (filter[i] * (float)samplePixelValue.z);
2995  pixel.w += (filter[i] * (float)samplePixelValue.w);
2996  }
2997 
2998  CLPixelType outputPixel;
2999  outputPixel.x = ClampToQuantum(pixel.x);
3000  outputPixel.y = ClampToQuantum(pixel.y);
3001  outputPixel.z = ClampToQuantum(pixel.z);
3002  outputPixel.w = ClampToQuantum(pixel.w);
3003  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
3004  }
3005  else {
3006 
3007  float gamma = 0.0f;
3008  for (int i = 0; i < width; i++) {
3009  // only support EdgeVirtualPixelMethod through ClampToCanvas
3010  // TODO: implement other virtual pixel method
3011  int2 samplePixel = currentPixel + offset[i];
3012  samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
3013  samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
3014 
3015  CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
3016 
3017  float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
3018  float k = filter[i];
3019  pixel.x = pixel.x + k * alpha * samplePixelValue.x;
3020  pixel.y = pixel.y + k * alpha * samplePixelValue.y;
3021  pixel.z = pixel.z + k * alpha * samplePixelValue.z;
3022 
3023  pixel.w += k * alpha * samplePixelValue.w;
3024 
3025  gamma+=k*alpha;
3026  }
3027  gamma = PerceptibleReciprocal(gamma);
3028  pixel.xyz = gamma*pixel.xyz;
3029 
3030  CLPixelType outputPixel;
3031  outputPixel.x = ClampToQuantum(pixel.x);
3032  outputPixel.y = ClampToQuantum(pixel.y);
3033  outputPixel.z = ClampToQuantum(pixel.z);
3034  outputPixel.w = ClampToQuantum(pixel.w);
3035  output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
3036  }
3037  }
3038  )
3039 
3040  STRINGIFY(
3041  typedef enum
3042  {
3044  NoCompositeOp,
3075  InCompositeOp,
3098  /* These are new operators, added after the above was last sorted.
3099  * The list should be re-sorted only when a new library version is
3100  * created.
3101  */
3116  )
3117 
3118  STRINGIFY(
3119  inline float ColorDodge(const float Sca,
3120  const float Sa,const float Dca,const float Da)
3121  {
3122  /*
3123  Oct 2004 SVG specification.
3124  */
3125  if ((Sca*Da+Dca*Sa) >= Sa*Da)
3126  return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
3127  return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
3128 
3129 
3130  /*
3131  New specification, March 2009 SVG specification. This specification was
3132  also wrong of non-overlap cases.
3133  */
3134  /*
3135  if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
3136  return(Sca*(1.0-Da));
3137  if (fabs(Sca-Sa) < MagickEpsilon)
3138  return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
3139  return(Sa*MagickMin(Da,Dca*Sa/(Sa-Sca)));
3140  */
3141 
3142  /*
3143  Working from first principles using the original formula:
3144 
3145  f(Sc,Dc) = Dc/(1-Sc)
3146 
3147  This works correctly! Looks like the 2004 model was right but just
3148  required a extra condition for correct handling.
3149  */
3150 
3151  /*
3152  if ((fabs(Sca-Sa) < MagickEpsilon) && (fabs(Dca) < MagickEpsilon))
3153  return(Sca*(1.0-Da)+Dca*(1.0-Sa));
3154  if (fabs(Sca-Sa) < MagickEpsilon)
3155  return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
3156  return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
3157  */
3158  }
3159 
3160  inline void CompositeColorDodge(const float4 *p,
3161  const float4 *q,float4 *composite) {
3162 
3163  float
3164  Da,
3165  gamma,
3166  Sa;
3167 
3168  Sa=1.0f-QuantumScale*getOpacityF4(*p); /* simplify and speed up equations */
3169  Da=1.0f-QuantumScale*getOpacityF4(*q);
3170  gamma=RoundToUnity(Sa+Da-Sa*Da); /* over blend, as per SVG doc */
3171  setOpacityF4(composite, QuantumRange*(1.0-gamma));
3172  gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
3173  setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
3174  getRedF4(*q)*Da,Da));
3175  setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
3176  getGreenF4(*q)*Da,Da));
3177  setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
3178  getBlueF4(*q)*Da,Da));
3179  }
3180  )
3181 
3182  STRINGIFY(
3183  inline void MagickPixelCompositePlus(const float4 *p,
3184  const float alpha,const float4 *q,
3185  const float beta,float4 *composite)
3186  {
3187  float
3188  gamma;
3189 
3190  float
3191  Da,
3192  Sa;
3193  /*
3194  Add two pixels with the given opacities.
3195  */
3196  Sa=1.0-QuantumScale*alpha;
3197  Da=1.0-QuantumScale*beta;
3198  gamma=RoundToUnity(Sa+Da); /* 'Plus' blending -- not 'Over' blending */
3199  setOpacityF4(composite,(float) QuantumRange*(1.0-gamma));
3200  gamma=PerceptibleReciprocal(gamma);
3201  setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
3202  setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
3203  setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
3204  }
3205  )
3206 
3207  STRINGIFY(
3208  inline void MagickPixelCompositeBlend(const float4 *p,
3209  const float alpha,const float4 *q,
3210  const float beta,float4 *composite)
3211  {
3212  MagickPixelCompositePlus(p,(float) (QuantumRange-alpha*
3213  (QuantumRange-getOpacityF4(*p))),q,(float) (QuantumRange-beta*
3214  (QuantumRange-getOpacityF4(*q))),composite);
3215  }
3216  )
3217 
3218  STRINGIFY(
3219  __kernel
3220  void Composite(__global CLPixelType *image,
3221  const unsigned int imageWidth,
3222  const unsigned int imageHeight,
3223  const __global CLPixelType *compositeImage,
3224  const unsigned int compositeWidth,
3225  const unsigned int compositeHeight,
3226  const unsigned int compose,
3227  const ChannelType channel,
3228  const unsigned int matte,
3229  const float destination_dissolve,
3230  const float source_dissolve) {
3231 
3232  uint2 index;
3233  index.x = get_global_id(0);
3234  index.y = get_global_id(1);
3235 
3236 
3237  if (index.x >= imageWidth
3238  || index.y >= imageHeight) {
3239  return;
3240  }
3241  const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
3242  float4 destination;
3243  setRedF4(&destination,getRed(inputPixel));
3244  setGreenF4(&destination,getGreen(inputPixel));
3245  setBlueF4(&destination,getBlue(inputPixel));
3246 
3247 
3248  const CLPixelType compositePixel
3249  = compositeImage[index.y*imageWidth+index.x];
3250  float4 source;
3251  setRedF4(&source,getRed(compositePixel));
3252  setGreenF4(&source,getGreen(compositePixel));
3253  setBlueF4(&source,getBlue(compositePixel));
3254 
3255  if (matte != 0) {
3256  setOpacityF4(&destination,getOpacity(inputPixel));
3257  setOpacityF4(&source,getOpacity(compositePixel));
3258  }
3259  else {
3260  setOpacityF4(&destination,0.0f);
3261  setOpacityF4(&source,0.0f);
3262  }
3263 
3264  float4 composite=destination;
3265 
3266  CompositeOperator op = (CompositeOperator)compose;
3267  switch (op) {
3268  case ColorDodgeCompositeOp:
3269  CompositeColorDodge(&source,&destination,&composite);
3270  break;
3271  case BlendCompositeOp:
3272  MagickPixelCompositeBlend(&source,source_dissolve,&destination,
3273  destination_dissolve,&composite);
3274  break;
3275  default:
3276  // unsupported operators
3277  break;
3278  };
3279 
3280  CLPixelType outputPixel;
3281  setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
3282  setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
3283  setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
3284  setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
3285  image[index.y*imageWidth+index.x] = outputPixel;
3286  }
3287  )
3288 
3289  ;
3290 
3291 #endif // MAGICKCORE_OPENCL_SUPPORT
3292 
3293 #if defined(__cplusplus) || defined(c_plusplus)
3294 }
3295 #endif
3296 
3297 #endif // _MAGICKCORE_ACCELERATE_PRIVATE_H
Definition: composite.h:91
Definition: composite.h:94
Definition: composite.h:65
Definition: colorspace.h:44
Definition: resize-private.h:31
Definition: colorspace.h:36
#define SigmaPoisson
Definition: resize-private.h:37
Definition: pixel.h:75
Definition: statistic.h:115
Definition: resize-private.h:33
Definition: magick-type.h:201
Definition: composite.h:75
Definition: pixel.h:72
Definition: colorspace.h:40
static void MagickPixelCompositeBlend(const MagickPixelPacket *p, const MagickRealType alpha, const MagickPixelPacket *q, const MagickRealType beta, MagickPixelPacket *composite)
Definition: composite-private.h:137
Definition: composite.h:31
Definition: composite.h:93
MagickExport MagickBooleanType FunctionImage(Image *image, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:1042
Definition: colorspace.h:45
Definition: colorspace.h:33
Definition: composite.h:80
#define SigmaRandom
Definition: composite.h:33
Definition: resize-private.h:40
Definition: composite.h:90
Definition: resize-private.h:29
static MagickRealType ColorDodge(const MagickRealType Sca, const MagickRealType Sa, const MagickRealType Dca, const MagickRealType Da)
Definition: composite.c:306
Definition: fx.h:34
PixelIntensityMethod
Definition: pixel.h:67
Definition: magick-type.h:190
Definition: composite.h:95
Definition: colorspace.h:59
Definition: magick-type.h:196
Definition: composite.h:59
Definition: composite.h:89
Definition: magick-type.h:185
Definition: composite.h:27
Definition: colorspace.h:41
Definition: colorspace.h:37
static MagickRealType RoundToUnity(const MagickRealType value)
Definition: composite-private.h:33
Definition: composite.h:35
Definition: composite.h:87
#define MagickPI
Definition: image-private.h:26
Definition: colorspace.h:58
Definition: colorspace.h:50
static MagickRealType Hanning(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:287
Definition: colorspace.h:47
Definition: fx.h:29
float MagickRealType
Definition: magick-type.h:79
Definition: statistic.h:114
Definition: colorspace.h:31
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:28
Definition: composite.h:53
Definition: colorspace.h:35
Definition: resize-private.h:38
Definition: pixel.h:77
#define MagickEpsilon
Definition: magick-type.h:142
#define SigmaLaplacian
MagickExport void ConvertRGBToHSL(const Quantum red, const Quantum green, const Quantum blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1142
Definition: magick-type.h:191
Definition: colorspace.h:48
Definition: statistic.h:116
Definition: magick-type.h:203
NoiseType
Definition: fx.h:27
Definition: colorspace.h:52
Definition: composite.h:47
static MagickRealType Hamming(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:301
Definition: resize-private.h:41
Definition: composite.h:73
Definition: composite.h:29
Definition: composite.h:72
Definition: composite.h:42
Definition: colorspace.h:43
#define SigmaUniform
Definition: composite.h:97
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: enhance.c:3552
Definition: colorspace.h:34
Definition: colorspace.h:57
Definition: resize-private.h:30
Definition: composite.h:54
#define GetPixelAlpha(pixel)
Definition: pixel-accessor.h:36
Definition: composite.h:38
Definition: composite.h:68
Definition: composite.h:96
Definition: magick-type.h:187
Definition: composite.h:71
Definition: resize-private.h:32
Definition: composite.h:55
Definition: composite.h:56
Definition: fx.h:31
#define SigmaGaussian
Definition: composite.h:69
Definition: pixel.h:71
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:960
Definition: colorspace.h:38
Definition: pixel.h:70
Definition: composite.h:86
Definition: resize-private.h:36
Definition: colorspace.h:30
#define SigmaMultiplicativeGaussian
Definition: composite.h:49
Definition: composite.h:44
#define TauGaussian
MagickExport void ConvertRGBToHSB(const Quantum red, const Quantum green, const Quantum blue, double *hue, double *saturation, double *brightness)
Definition: gem.c:1009
Definition: magick-type.h:189
static void Contrast(const int sign, Quantum *red, Quantum *green, Quantum *blue)
Definition: enhance.c:913
Definition: magick-type.h:204
Definition: composite.h:46
Definition: statistic.h:112
Definition: composite.h:28
Definition: magick-type.h:184
Definition: magick-type.h:193
Definition: colorspace.h:54
Definition: magick-type.h:192
Definition: resize-private.h:39
Definition: composite.h:78
Definition: resize-private.h:34
#define QuantumScale
Definition: magick-type.h:145
Definition: colorspace.h:55
Definition: fx.h:33
Definition: composite.h:62
MagickExport MagickRealType GetPixelIntensity(const Image *image, const PixelPacket *restrict pixel)
Definition: pixel.c:2106
Definition: colorspace.h:39
#define MaxMap
Definition: magick-type.h:73
Definition: magick-type.h:200
Definition: composite.h:98
Definition: composite.h:39
static void CompositeColorDodge(const MagickPixelPacket *p, const MagickPixelPacket *q, MagickPixelPacket *composite)
Definition: composite.c:343
MagickExport void ConvertHSBToRGB(const double hue, const double saturation, const double brightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: gem.c:284
Definition: composite.h:45
ChannelType
Definition: magick-type.h:180
Definition: composite.h:70
Definition: colorspace.h:46
Definition: resize-private.h:28
Definition: composite.h:81
Definition: composite.h:41
Definition: composite.h:52
Definition: pixel.h:69
Definition: colorspace.h:49
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, Quantum *red, Quantum *green, Quantum *blue)
Definition: gem.c:460
Definition: composite.h:77
static Quantum ClampToQuantum(const MagickRealType value)
Definition: quantum.h:87
Definition: colorspace.h:53
Definition: composite.h:61
Definition: magick-type.h:186
static void MagickPixelCompositePlus(const MagickPixelPacket *p, const MagickRealType alpha, const MagickPixelPacket *q, const MagickRealType beta, MagickPixelPacket *composite)
Definition: composite-private.h:108
Definition: composite.h:76
Definition: magick-type.h:182
Definition: colorspace.h:28
Definition: resize-private.h:42
Definition: composite.h:50
Definition: composite.h:36
Definition: composite.h:43
Definition: composite.h:37
static MagickRealType Sinc(const MagickRealType, const ResizeFilter *)
Definition: composite.h:60
Definition: statistic.h:113
ResizeWeightingFunctionType
Definition: resize-private.h:25
static MagickRealType Blackman(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:148
Definition: colorspace.h:56
ColorspaceType
Definition: colorspace.h:25
Definition: composite.h:32
Definition: colorspace.h:29
Definition: composite.h:88
Definition: colorspace.h:42
#define SigmaImpulse
Definition: composite.h:48
Definition: composite.h:64
Definition: magick-type.h:188
Definition: colorspace.h:51
CompositeOperator
Definition: composite.h:25
Definition: composite.h:79
Definition: magick-type.h:195
Definition: colorspace.h:32
Definition: pixel.h:78
Definition: composite.h:66
Definition: composite.h:30
Definition: colorspace.h:60
Definition: magick-type.h:183
Definition: composite.h:63
Definition: composite.h:58
Definition: composite.h:92
Definition: magick-type.h:202
Definition: composite.h:34
static double PerceptibleReciprocal(const double x)
Definition: pixel-private.h:78
static MagickRealType CubicBC(const MagickRealType x, const ResizeFilter *resize_filter)
Definition: resize.c:210
Definition: resize-private.h:27
Definition: composite.h:74
Definition: colorspace.h:27
MagickFunction
Definition: statistic.h:110
Definition: fx.h:30
Definition: composite.h:40
Definition: composite.h:67
Definition: resize-private.h:35
#define QuantumRange
Definition: magick-type.h:97
static MagickRealType Triangle(const MagickRealType x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:514
Definition: fx.h:35
Definition: pixel.h:73
Definition: composite.h:51
Definition: fx.h:32
Definition: magick-type.h:194
Definition: fx.h:36
Definition: composite.h:57