/* ctx-0.1.5 */
/*
 * Copyright (c) 2012, 2015, 2019, 2020, 2021, 2022, 2023, 2024, 2025
 * Øyvind Kolps <pippin@gimp.org> with contributors.
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * ctx is a 2D vector graphics protocol, and interactive application
 * development environment for microcontrollers, framebuffers and
 * terminals on unix systems.
 * 
 * To use ctx add ctx.h to the project path and do the following:
 *
 * #define CTX_IMPLEMENTATION
 * #include "ctx.h"
 *
 * Ctx contains a minimal default fallback font with only ascii, so
 * you probably want to also include a font, and perhaps enable
 * SDL2 optional backends, a more complete example:
 *
 * #include <cairo.h>
 * #include <SDL.h>
 * 
 * #define CTX_IMPLEMENTATION
 * #include "ctx.h"
 *
 * The behavior of ctx can be tweaked, and features can be configured, enabled
 * or disabled with other #defines, see further down in the start of this file
 * for details.
 */

#ifndef CTX_H
#define CTX_H

#ifdef __cplusplus
extern "C" {
#endif

#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <stdio.h>

/*** h2: context management */

typedef struct _Ctx            Ctx;

/**
 * ctx_new:
 * @width: with in device units
 * @height: height in device units
 * @backend: backend to use
 *
 *   valid values are:
 *     NULL/"auto", "drawlist", "sdl", "term", "ctx" the strings are
 *     the same as are valid for the CTX_BACKEND environment variable.
 *
 * Create a new drawing context, this context has no pixels but
 * accumulates commands and can be played back on other ctx
 * render contexts, this is a ctx context using the drawlist backend.
 */
Ctx *ctx_new (int width, int height, const char *backend);


/**
 * ctx_new_drawlist:
 *
 * Create a new drawing context that can record drawing commands,
 * this is also the basis for creating more complex contexts with
 * swapped out backend.
 */
Ctx * ctx_new_drawlist (int width, int height);

/** CtxEntry:
 *
 * A pointer to a command in binary ctx protocol.
 */
typedef struct _CtxEntry   CtxEntry;
/** CtxCommand:
 *
 * A pointer to a command in binary ctx protocol.
 */
typedef struct _CtxCommand CtxCommand;

/* The pixel formats supported as render targets, depending on
 * compile-time configuration not all formats are usable.
 */
enum _CtxPixelFormat
{
  CTX_FORMAT_NONE=0,
  CTX_FORMAT_GRAY8,  // 1  - these enum values are not coincidence
  CTX_FORMAT_GRAYA8, // 2  - but match bpp, for the common gray and
  CTX_FORMAT_RGB8,   // 3  - rgb cases up to 4bpp = RGBA8
  CTX_FORMAT_RGBA8,  // 4  -
  CTX_FORMAT_BGRA8,  // 5
  CTX_FORMAT_RGB565, // 6
  CTX_FORMAT_RGB565_BYTESWAPPED, // 7
  CTX_FORMAT_RGB332, // 8 // matching flags
  CTX_FORMAT_RGBAF,  // 9
  CTX_FORMAT_GRAYF,  // 10
  CTX_FORMAT_GRAYAF, // 11
  CTX_FORMAT_GRAY1,  // 12
  CTX_FORMAT_CMYK8,  // 13
  CTX_FORMAT_CMYKAF, // 14
  CTX_FORMAT_CMYKA8, // 15 
  CTX_FORMAT_GRAY2,  // 16 // matching flags
  CTX_FORMAT_YUV420, // 17
  CTX_FORMAT_BGR8,   // 
  CTX_FORMAT_RGBA8_SEPARATE_ALPHA, //
  CTX_FORMAT_GRAY4 =32, // to match flags
  CTX_FORMAT_BGRA8Z,    // 
};
typedef enum   _CtxPixelFormat CtxPixelFormat;

/**
 * ctx_new_for_framebuffer:
 *
 * Create a new drawing context for a framebuffer, rendering happens
 * immediately.
 */
Ctx *ctx_new_for_framebuffer (void *data,
                              int   width,
                              int   height,
                              int   stride,
                              CtxPixelFormat pixel_format);



/**
 * ctx_get_drawlist:
 * @ctx: a ctx context.
 * @count: return location for length of drawlist
 *
 * The returned pointer is only valid as long as no further drawing has been
 * done.
 *
 * Returns a read only pointer to the first entry of the contexts drawlist.
 */
const CtxEntry *ctx_get_drawlist (Ctx *ctx, int *count);


/**
 * ctx_new_for_drawlist:
 *
 * Create a new drawing context for a pre-existing raw drawlist.
 */
Ctx *ctx_new_for_drawlist   (int    width,
                             int    height,
                             void  *data,
                             size_t length);

/**
 * ctx_set_drawlist:
 *
 * Replaces the drawlist of a ctx context with a new one.  the length of the
 * data is expected to be length * 9;
 */
int  ctx_set_drawlist       (Ctx *ctx, void *data, int length);

/**
 * ctx_append_drawlist:
 *
 * Appends the commands in a binary drawlist, the length of the data is expected to
 * be length * 9;
 */
int  ctx_append_drawlist    (Ctx *ctx, void *data, int length);

/**
 * ctx_drawlist_clear:
 *
 * Clears the drawlist associated with the context.
 */
void  ctx_drawlist_clear (Ctx *ctx);

/**
 * ctx_drawlist_force_count:
 * @ctx: a ctx context
 * @count: new count to set, must be lower than the current count.
 *
 * Shortens the length of the internal drawlist, dropping the last
 * items.
 */
void ctx_drawlist_force_count (Ctx *ctx, int count);


/**
 * ctx_destroy:
 * @ctx: a ctx context
 */
void ctx_destroy (Ctx *ctx);



/*** h2: drawing api */


/*** h3: frame start/end */

/* most backends, apart from PDF expect to have the separate frames
 * to be shown bracketed by ctx_start_frame() and ctx_end_frame() calls.
 *
 * The combination of the calls are blocking if rendering is congested.
 */

/**
 * ctx_start_frame:
 *
 * Prepare for rendering a new frame, clears internal drawlist and initializes
 * the state.
 *
 * Returns time in seconds since previous start_frame.
 */
float ctx_start_frame    (Ctx *ctx);

/**
 * ctx_end_frame:
 *
 * We're done rendering a frame, this does nothing on a context created for a
 * framebuffer, where drawing commands are immediate.
 */
void ctx_end_frame      (Ctx *ctx);


/* create a new page
 */
void ctx_new_page         (Ctx *ctx);

/**
 * ctx_view_box:
 *
 * Specify the view box for the current page, should immediately follow
 * new_page if present, the PDF backend in particular makes use of this.
 */
void ctx_view_box         (Ctx *ctx,
                           float x0, float y0,
                           float w, float h);


/*** h3: path construction/manipulation */


/**
 * ctx_x:
 * @ctx: a context
 *
 * Returns the current path append x-coordinate.
 */
float ctx_x                    (Ctx *ctx);

/**
 * ctx_y:
 * @ctx: a context
 *
 * Returns the current path append y-coordinate.
 */
float ctx_y                    (Ctx *ctx);

/**
 * ctx_get_current_point:
 * @ctx: a context
 * @x: a pointer to store x coordinate in, or NULL
 * @y: a pointer to store y coordinate in, or NULL
 *
 * Returns the same value as ctx_x() and ctx_y()
 */
void  ctx_current_point        (Ctx *ctx, float *x, float *y);

/**
 * ctx_reset_path:
 * @ctx: a context
 *
 * Clears the current path if any, fill and stroke commands without a preceding preserve do an implicit reset_path.
 */
void ctx_reset_path     (Ctx *ctx);

#define ctx_begin_path(ctx) ctx_reset_path(ctx) // compatibility with old API

/**
 * ctx_move_to:
 * @ctx: a context
 * @x: target x coordinate
 * @y: target y coordinate
 *
 * Move the tip of the virtual pen to x,y, starting a new sub-path.
 */
void  ctx_move_to         (Ctx *ctx, float x, float y);

/**
 * ctx_line_to:
 * @ctx: a context
 * @x: target x coordinate
 * @y: target y coordinate
 *
 * Add a straight line segment to the current path. moving the
 * thip of the virtual pen to x,y.
 */
void  ctx_line_to         (Ctx *ctx, float x, float y);

/**
 * ctx_curve_to:
 * @ctx: a context
 * @cx0: control point x coordinate
 * @cy0: control point y coordinate
 * @cx1: control point x coordinate
 * @cy1: control point y coordinate
 * @x: target x coordinate
 * @y: target y coordinate
 *
 * Adds a quad curve segment to x, y to the current path.
 */
void  ctx_curve_to        (Ctx *ctx,
                           float cx0, float cy0,
                           float cx1, float cy1,
                           float x, float y);
/**
 * ctx_quad_to:
 * @ctx: a context
 * @cx: control point x coordinate
 * @cy: control point y coordinate
 * @x: target x coordinate
 * @y: target y coordinate
 *
 * Adds a quad curve segment to x, y to the current path.
 */
void  ctx_quad_to         (Ctx  *ctx,
                           float cx, float cy,
                           float x,  float y);
/**
 * ctx_arc:
 *
 * Add an arc segment for a circle centered at x,y with radius fromg angle1 to angle2 in radians,
 * XXX : look into specification of direction on other APIs
 */
void  ctx_arc             (Ctx  *ctx,
                           float x, float y,
                           float radius,
                           float angle1, float angle2,
                           int   direction);
/**
 * ctx_arc_to:
 * @ctx: a context
 */
void  ctx_arc_to          (Ctx *ctx,
                           float x1, float y1,
                           float x2, float y2,
                           float radius);
/**
 * ctx_rel_arc_to:
 * @ctx: a context
 */
void  ctx_rel_arc_to      (Ctx *ctx,
                           float x1, float y1,
                           float x2, float y2,
                           float radius);


/**
 * ctx_rectangle:
 * @ctx: a context
 * @x0: upper left x coordinate
 * @y0: upper left y coordiante
 * @width: width in user space coordinate
 * @height: height in user space coordinate
 *
 * Add rectangle to the current path.
 */
void  ctx_rectangle       (Ctx *ctx,
                           float x0, float y0,
                           float w, float h);
/**
 * ctx_round_rectangle:
 * @ctx: a context
 * @x0: upper left x coordinate
 * @y0: upper left y coordiante
 * @width: width in user space coordinate
 * @height: height in user space coordinate
 * @radius: rounding radius, if width or height are too small radius is clamped accordingly.
 *
 * Add a rectangle with rounded corners to the current path.
 */
void  ctx_round_rectangle (Ctx *ctx,
                           float x0, float y0,
                           float w, float h,
                           float radius);
/**
 * ctx_rel_line_to:
 * @ctx: a context
 * @x: target x coordinate
 * @y: target y coordinate
 *
 * Adds a straight segment to ctx_x()+x, ctx_y()+y to the current path.
 */
void  ctx_rel_line_to     (Ctx *ctx,
                           float x, float y);
/**
 * ctx_rel_move_to:
 * @ctx: a context
 * @x: target x coordinate
 * @y: target y coordinate
 *
 * Stop current sub path and move path append point to ctx_x()+x, ctx_y()+y
 */
void  ctx_rel_move_to     (Ctx *ctx,
                           float x, float y);
/**
 * ctx_rel_quad_to:
 * @ctx: a context
 * @cx0: control point x coordinate
 * @cy0: control point y coordinate
 * @cx: control point x coordinate
 * @cy: control point y coordinate
 * @x: target x coordinate
 * @y: target y coordinate
 *
 * Adds a cubic curve segment to ctx_x()+x, ctx_y()+y to the current path.
 */
void  ctx_rel_curve_to    (Ctx *ctx,
                           float cx0, float cy0,
                           float cx1, float cy1,
                           float x, float y);
/**
 * ctx_rel_quad_to:
 * @ctx: a context
 * @cx: control point x coordinate
 * @cy: control point y coordinate
 * @x: target x coordinate
 * @y: target y coordinate
 *
 * Adds a quad curve segment to ctx_x()+x, ctx_y()+y to the current path.
 */
void  ctx_rel_quad_to     (Ctx *ctx,
                           float cx, float cy,
                           float x, float y);
/**
 * ctx_close_path:
 * @ctx: a context
 *
 * Closes the currently open sub-path.
 */
void  ctx_close_path      (Ctx *ctx);

/**
 * ctx_in_fill:
 * @ctx: a ctx context
 * @x: x coordinate
 * @y: y coordinate
 *
 * Returns 1 if x, y are inside a fill of the current path with current fill-rule.
 */
int  ctx_in_fill    (Ctx *ctx, float x, float y);

/**
 * ctx_in_stroke:
 * @ctx: a ctx context
 * @x: x coordinate
 * @y: y coordinate
 *
 * Returns 1 if x, y are inside a stroke of the current path with current parameters.
 */
int  ctx_in_stroke  (Ctx *ctx, float x, float y);

typedef struct _CtxDrawlist CtxDrawlist;

/* to be freed with ctx_free
 */
CtxDrawlist * ctx_current_path (Ctx *ctx);

void
ctx_path_extents (Ctx *ctx, float *ex1, float *ey1, float *ex2, float *ey2);

/*** h3: context management  */

/* Attributes like transform, clipping state, fill and stroke sources, font sie,
 * stroking, texture interpolation and dashing are stored in stackable contexts.
 * 
 * This allows building up a hierarchy of transforms, as well as bringing the
 * drawing context back to a known state.
 */

/**
 * ctx_save:
 * @ctx: a context
 *
 * Stores the transform, clipping state, fill and stroke sources, font size,
 * stroking and dashing options.
 */
void ctx_save           (Ctx *ctx);

/**
 * ctx_restore:
 * @ctx: a context
 *
 * Restores the state previously saved with ctx_save, calls to
 * ctx_save/ctx_restore should be balanced.
 */
void ctx_restore        (Ctx *ctx);

/**
 * ctx_start_group:
 * @ctx: a context
 *
 * Start a compositing group.
 *
 */
void ctx_start_group    (Ctx *ctx);

/**
 * ctx_end_group:
 * @ctx: a context
 *
 * End a compositing group, the global alpha, compositing mode and blend mode
 * set before this call is used to apply the group.
 */
void ctx_end_group      (Ctx *ctx);


/**
 * ctx_image_smoothing:
 * @ctx: a context
 * @enabled: 1 for enabled and 0 for disabled
 *
 * Set or unset bilinear / box filtering for textures, turning it off uses the
 * faster nearest neighbor for all cases.
 */
void ctx_image_smoothing  (Ctx *ctx, int enabled);
/**
 * ctx_get_image_smoothing:
 * @ctx: a context
 *
 * Returns the current setting for image_smoothing.
 */
int   ctx_get_image_smoothing  (Ctx *ctx);


/*** h3: drawing commands */


/**
 * ctx_fill:
 * @ctx: a context
 *
 * Fills the current path, and resets it (unless ctx_preserve has been called).
 */
void ctx_fill             (Ctx *ctx);

/**
 * ctx_paint:
 * @ctx: a context
 *
 * Fills the whole canvas with color, is affected by clipping.
 */
void ctx_paint            (Ctx *ctx);

/**
 * ctx_clip:
 * @ctx: a context
 *
 * Use the current path as a clipping mask, subsequent draw calls are limited
 * by the path. The only way to increase the visible area is to first call
 * ctx_save and then later ctx_restore to undo the clip.
 */
void ctx_clip           (Ctx *ctx);

/**
 * ctx_preserve:
 * @ctx: a context
 *
 * Make the following fill or stroke not reset the current path.
 */
void ctx_preserve         (Ctx *ctx);

/**
 * ctx_stroke:
 * @ctx: a context
 *
 * Stroke the current path with current line_width, dashing cap and join options.
 */
void ctx_stroke           (Ctx *ctx);


/*** h4: stroking options */
/**
 * ctx_miter_limit:
 * @ctx: a context.
 * @limit: new miter limit in user coordinates.
 *
 * Specify the miter limit used when stroking.
 */
void ctx_miter_limit      (Ctx *ctx, float limit);

/**
 * ctx_get_miter_limit:
 * @ctx: a context.
 *
 * Returns the current miter limit.
 */
float ctx_get_miter_limit (Ctx *ctx);


#define CTX_LINE_WIDTH_HAIRLINE -1000.0
#define CTX_LINE_WIDTH_ALIASED  -1.0
#define CTX_LINE_WIDTH_FAST     -1.0  /* aliased 1px wide line */

/**
 * ctx_line_width:
 * @ctx: a context.
 * @width: new stroking width in user space coordinates.
 *
 * Set the line width used when stroking.
 */
void ctx_line_width       (Ctx *ctx, float with);

/**
 * ctx_get_line_width:
 * @ctx: a context.
 *
 * Returns the current stroking line-width.
 */
float ctx_get_line_width  (Ctx *ctx);

/**
 * ctx_line_dash_offset:
 * @ctx: a context.
 * @offset: number of user-space units to wait before starting dash pattern.
 *
 * Specify phase offset for line dash pattern.
 */
void ctx_line_dash_offset (Ctx *ctx, float offset);

/**
 * ctx_get_line_dash_offset:
 * @ctx: a context.
 *
 * Returns the current setting for image_smoothing.
 */
float ctx_get_line_dash_offset (Ctx *ctx);

/**
 * ctx_line_dash:
 * @ctx: a context.
 * @dashes: pointer to an array of floats
 * @count: number of items in dash array.
 *
 * Specify the line dash pattern.
 */
void  ctx_line_dash       (Ctx *ctx, const float *dashes, int count);


/*** h3: transforms */

/**
 * ctx_identity:
 * @ctx: a context.
 *
 * Restore context to identity transform, NOTE: a bug makes this call currently
 * breaks mult-threaded rendering when used; since the rendering threads are
 * expecting an initial transform on top of the base identity.
 */
void ctx_identity       (Ctx *ctx);

/**
 * ctx_scale:
 * @ctx: a context.
 * @x: x scale factor
 * @y: y scale factor
 *
 * Scales the user to device transform.
 */
void  ctx_scale         (Ctx *ctx, float x, float y);

/**
 * ctx_translate:
 * @ctx: a context.
 * @x: x translation
 * @y: y translation
 *
 * Adds translation to the user to device transform.
 */
void  ctx_translate     (Ctx *ctx, float x, float y);

/**
 * ctx_rotate:
 * @ctx: a context.
 * @a: angle to rotate in radians.
 *
 * Add rotatation to the user to device space transform.
 */
void ctx_rotate         (Ctx *ctx, float a);

/**
 * ctx_apply_transform:
 * @ctx: a context.
 * @a..i: matrix components.
 *
 * Adds a 3x3 matrix on top of the existing user to device space transform.
 */
void ctx_apply_transform (Ctx *ctx, float a, float b, float c,
                                    float d, float e, float f,
                                    float g, float h, float i);

typedef struct _CtxMatrix     CtxMatrix;
struct _CtxMatrix { float m[3][3]; };

/**
 * @ctx: a context.
 * @matrix: a 3x3 matrix components.
 *
 * Adds a 3x3 matrix on top of the existing user to device space transform.
 */
void ctx_apply_matrix           (Ctx *ctx, CtxMatrix *matrix);
/**
 * ctx_set_transform:
 * @ctx: a context.
 * @a..i: matrix components.
 *
 * Set the user to device transform, * Redundant with identity+apply? XXX
 */
void ctx_set_transform    (Ctx *ctx, float a, float b, float c,
                                     float d, float e, float f,
                                     float g, float h, float i);


/*** h3: filling options */

typedef enum
{
  CTX_FILL_RULE_WINDING = 0,
  CTX_FILL_RULE_EVEN_ODD
} CtxFillRule;

/**
 * ctx_fill_rule:
 * @ctx: a ctx context.
 * @mode: new fill_rule to set, CTX_FULL_RULE_WINDING or CTX_FILL_RULE_EVEN_ODD.
 *
 * Sets the current fill rule.
 */
void        ctx_fill_rule      (Ctx *ctx, CtxFillRule        fill_rule);


/**
 * ctx_get_fill_rule:
 * @ctx: a context.
 *
 * Returns the current fill rule.
 */
CtxFillRule ctx_get_fill_rule  (Ctx *ctx);

typedef enum
{
#if 0
  CTX_COMPOSITE_SOURCE_OVER      = 0,
  CTX_COMPOSITE_COPY             = 32,
  CTX_COMPOSITE_SOURCE_IN        = 64,
  CTX_COMPOSITE_SOURCE_OUT       = 96,
  CTX_COMPOSITE_SOURCE_ATOP      = 128,
  CTX_COMPOSITE_CLEAR            = 160,

  CTX_COMPOSITE_DESTINATION_OVER = 192,
  CTX_COMPOSITE_DESTINATION      = 224,
  CTX_COMPOSITE_DESTINATION_IN   = 256,
  CTX_COMPOSITE_DESTINATION_OUT  = 288,
  CTX_COMPOSITE_DESTINATION_ATOP = 320,
  CTX_COMPOSITE_XOR              = 352,
  CTX_COMPOSITE_ALL              = (32+64+128+256)
#else
  CTX_COMPOSITE_SOURCE_OVER      = 0,
  CTX_COMPOSITE_COPY             ,
  CTX_COMPOSITE_SOURCE_IN        ,
  CTX_COMPOSITE_SOURCE_OUT       ,
  CTX_COMPOSITE_SOURCE_ATOP      ,
  CTX_COMPOSITE_CLEAR            ,

  CTX_COMPOSITE_DESTINATION_OVER ,
  CTX_COMPOSITE_DESTINATION      ,
  CTX_COMPOSITE_DESTINATION_IN   ,
  CTX_COMPOSITE_DESTINATION_OUT  ,
  CTX_COMPOSITE_DESTINATION_ATOP ,
  CTX_COMPOSITE_XOR              ,
#endif
} CtxCompositingMode;
#define CTX_COMPOSITE_LAST CTX_COMPOSITE_XOR

/**
 * ctx_compositing_mode:
 * @ctx: a ctx context.
 * @mode: new compositing mode to set.
 *
 * Sets the current compositing mode
 */
void               ctx_compositing_mode     (Ctx *ctx, CtxCompositingMode mode);
/**
 * ctx_get_compositing_mode:
 * @ctx: a ctx context.
 *
 * Return the current compositing mode
 */
CtxCompositingMode ctx_get_compositing_mode (Ctx *ctx);

typedef enum
{
  CTX_BLEND_NORMAL,
  CTX_BLEND_MULTIPLY,
  CTX_BLEND_SCREEN,
  CTX_BLEND_OVERLAY,
  CTX_BLEND_DARKEN,
  CTX_BLEND_LIGHTEN,
  CTX_BLEND_COLOR_DODGE,
  CTX_BLEND_COLOR_BURN,
  CTX_BLEND_HARD_LIGHT,
  CTX_BLEND_SOFT_LIGHT,
  CTX_BLEND_DIFFERENCE,
  CTX_BLEND_EXCLUSION,
  CTX_BLEND_HUE, 
  CTX_BLEND_SATURATION, 
  CTX_BLEND_COLOR, 
  CTX_BLEND_LUMINOSITY,  // 15
  CTX_BLEND_DIVIDE,
  CTX_BLEND_ADDITION,
  CTX_BLEND_SUBTRACT,    // 18
} CtxBlend;
#define CTX_BLEND_LAST CTX_BLEND_SUBTRACT

/**
 * ctx_blend_mode:
 * @ctx: a ctx context.
 * @mode: new blend mode to set.
 *
 * Sets the current blending mode
 */
void     ctx_blend_mode     (Ctx *ctx, CtxBlend mode);

/**
 * ctx_get_blend_mode:
 * @ctx: a ctx context.
 *
 * Returns the blending mode of the current graphics context.
 */
CtxBlend ctx_get_blend_mode (Ctx *ctx);

/*** h3: paint/stroke sources */

/**
 * ctx_set_pixel_u8:
 * @ctx: a ctx context
 * @x: x coordinate
 * @y: y coordinate
 * @r: red component
 * @g: green component
 * @b: blue component
 * @a: alpha component
 *
 * Set a single pixel to the nearest possible the specified r,g,b,a value. Fast
 * for individual few pixels, slow for doing textures.
 */
void
ctx_set_pixel_u8 (Ctx *ctx, uint16_t x, uint16_t y,
                  uint8_t r, uint8_t g, uint8_t b, uint8_t a);

/**
 * ctx_global_alpha:
 * @ctx: a ctx context
 * @global_alpha: a value in the range 0.0f-1.0f
 *
 * Set a global alpha value that the colors, textures and gradients are
 * modulated by.
 */
void  ctx_global_alpha (Ctx *ctx, float global_alpha);
/**
 * ctx_get_global_alpha:
 * @ctx: a ctx context
 *
 * Returns the current global_alpha value.
 */
float ctx_get_global_alpha     (Ctx *ctx);


/**
 * ctx_stroke_source:
 *
 * The next source definition applies to stroking rather than filling, when a stroke source is
 * not explicitly set the value of filling is inherited.
 */
void ctx_stroke_source (Ctx *ctx); // next source definition is for stroking

/**
 * ctx_rgba_stroke:
 * @ctx: a ctx context
 * @r: red component
 * @g: green component
 * @b: blue component
 * @a: alpha component
 *
 * Set the current stroking color to the color specified by parameters.
 */
void ctx_rgba_stroke   (Ctx *ctx, float r, float g, float b, float a);

/**
 * ctx_rgb_stroke:
 * @ctx: a ctx context
 * @r: red component
 * @g: green component
 * @b: blue component
 *
 * Set the current stroking color to the color specified by parameters.
 */
void ctx_rgb_stroke    (Ctx *ctx, float r, float g, float b);

/**
 * ctx_rgba_u8_stroke:
 * @ctx: a ctx context
 * @r: red component
 * @g: green component
 * @b: blue component
 * @a: alpha component
 *
 * Set the current stroking color to the color specified by parameters.
 */
void ctx_rgba8_stroke  (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a);

/**
 * ctx_gray_stroke:
 * @gray: value
 *
 * Set a grayscale value, valid value range 0.0-1.0f
 */
void ctx_gray_stroke   (Ctx *ctx, float gray);

/**
 * ctx_drgba_stroke:
 * @ctx: a ctx context
 * @r: red component
 * @g: green component
 * @b: blue component
 * @a: alpha component
 *
 * Set the current stroking color to the color specified by parameters directly
 * in device-space without any color transformation.
 */
void ctx_drgba_stroke  (Ctx *ctx, float r, float g, float b, float a);

/**
 * ctx_cmyka_stroke:
 * @ctx: a ctx context
 * @c: cyan component
 * @m: magenta component
 * @y: yellow component
 * @k: black component
 * @a: alpha component
 *
 * Set the current stroking color to the color specified by parameters.
 */
void ctx_cmyka_stroke  (Ctx *ctx, float c, float m, float y, float k, float a);
/**
 * ctx_cmyk_stroke:
 * @ctx: a ctx context
 * @c: cyan component
 * @m: magenta component
 * @y: yellow component
 * @k: black component
 *
 * Set the current stroking color to the color specified by parameters.
 */
void ctx_cmyk_stroke   (Ctx *ctx, float c, float m, float y, float k);
/**
 * ctx_dcmyka_stroke:
 * @ctx: a ctx context
 * @c: cyan component
 * @m: magenta component
 * @y: yellow component
 * @k: black component
 * @a: alpha component
 *
 * Set the current stroking color to the color specified by parameters directly
 * in device-space without any color transformation.
 */
void ctx_dcmyka_stroke (Ctx *ctx, float c, float m, float y, float k, float a);

/**
 * ctx_dcmyka_stroke:
 * @ctx: a ctx context
 * @c: cyan component
 * @m: magenta component
 * @y: yellow component
 * @k: black component
 *
 * Set the current stroking color to the color specified by parameters directly
 * in device-space without any color transformation.
 */
void ctx_dcmyk_stroke  (Ctx *ctx, float c, float m, float y, float k);

/**
 * ctx_rgba:
 * @ctx: a ctx context
 * @r: red component
 * @g: green component
 * @b: blue component
 * @a: alpha component
 *
 * Set the current fill and text color to the color specified by parameters.
 */

void ctx_rgba  (Ctx *ctx, float r, float g, float b, float a);

/**
 * ctx_rgba:
 * @ctx: a ctx context
 * @r: red component
 * @g: green component
 * @b: blue component
 *
 * Set the current fill and text color to the color specified by parameters.
 */
void ctx_rgb    (Ctx *ctx, float r, float g, float b);

/**
 * ctx_rgba_u8:
 * @ctx: a ctx context
 * @r: red component
 * @g: green component
 * @b: blue component
 * @a: alpha component
 *
 * Set the current fill and text color to the color specified by parameters.
 */
void ctx_rgba8  (Ctx *ctx, uint8_t r, uint8_t g, uint8_t b, uint8_t a);

/**
 * ctx_rgba_u8:
 * @ctx: a ctx context
 * @gray:  value
 *
 * Set the current fill and text color to the grayscale color specified by parameters.
 */
void ctx_gray   (Ctx *ctx, float gray);

/**
 * ctx_drgba:
 * @ctx: a ctx context
 * @r: red component
 * @g: green component
 * @b: blue component
 * @a: alpha component
 *
 * Set the current fill and text color to the color specified by parameters in
 * device space, without any color transforms.
 */
void ctx_drgba  (Ctx *ctx, float r, float g, float b, float a);

/**
 * ctx_cmyka:
 * @ctx: a ctx context
 * @c: cyan component
 * @m: magenta component
 * @y: yellow component
 * @k: black component
 * @a: alpha component
 *
 * Set the current fill and text color to the grayscale color specified by parameters.
 */
void ctx_cmyka  (Ctx *ctx, float c, float m, float y, float k, float a);

/**
 * ctx_cmyk:
 * @ctx: a ctx context
 * @c: cyan component
 * @m: magenta component
 * @y: yellow component
 * @k: black component
 *
 * Set the current fill and text color to the grayscale color specified by parameters.
 */
void ctx_cmyk   (Ctx *ctx, float c, float m, float y, float k);

/**
 * ctx_dcmyka:
 * @ctx: a ctx context
 * @c: cyan component
 * @m: magenta component
 * @y: yellow component
 * @k: black component
 * @a: alpha component
 *
 * Set the current fill and text color to the color specified by parameters in
 * device space, without any color transforms.
 */
void ctx_dcmyka (Ctx *ctx, float c, float m, float y, float k, float a);

/**
 * ctx_dcmyk:
 * @ctx: a ctx context
 * @c: cyan component
 * @m: magenta component
 * @y: yellow component
 * @k: black component
 *
 * Set the current fill and text color to the color specified by parameters in
 * device space, without any color transforms.
 */
void ctx_dcmyk  (Ctx *ctx, float c, float m, float y, float k);

/* there is also getters for colors, by first setting a color in one format and getting
 * it with another color conversions can be done
 */
void ctx_get_rgba   (Ctx *ctx, float *rgba);
void ctx_get_graya  (Ctx *ctx, float *ya);
void ctx_get_drgba  (Ctx *ctx, float *drgba);
void ctx_get_cmyka  (Ctx *ctx, float *cmyka);
void ctx_get_dcmyka (Ctx *ctx, float *dcmyka);



/**
 * ctx_linear_gradient:
 * Change the source to a linear gradient from x0,y0 to x1 y1, an empty gradient
 * is interpreted as grayscale from black to white, add stops with ctx_gradient_add_stop to specify a custom gradient.
 */
void ctx_linear_gradient (Ctx *ctx, float x0, float y0, float x1, float y1);

/**
 * ctx_radial_gradient:
 * Change the source to a radial gradient from a circle x0,y0 with radius r0 to an outher circle x1, y1 with radius r1. (NOTE: currently ctx is only using the second circles origin, both radiuses are in use.)
 */
void ctx_radial_gradient (Ctx *ctx, float x0, float y0, float r0,
                          float x1, float y1, float r1);

/**
 * ctx_conic_gradient:
 * Change the source to a conic/conic gradient cenetered at cx,cy with gradient starting at angle start_angle.
 */
void ctx_conic_gradient (Ctx *ctx, float cx, float cy, float start_angle, float cycles);


/**
 * ctx_gradient_add_stop_rgba:
 *
 * Add an RGBA gradient stop to the current gradient at position pos.
 *
 */
void ctx_gradient_add_stop_rgba (Ctx *ctx, float pos, float r, float g, float b, float a);
#define ctx_gradient_add_stop ctx_gradient_add_stop_rgba // compat

/**
 * ctx_gradient_add_stop_u8:
 *
 * Add an RGBA gradient stop to the current gradient at position pos.
 */
void ctx_gradient_add_stop_u8 (Ctx *ctx, float pos, uint8_t r, uint8_t g, uint8_t b, uint8_t a);

/* ctx_define_texture:
 */
void ctx_define_texture (Ctx        *ctx,
                         const char *eid,
                         int         width,
                         int         height,
                         int         stride,
                         int         format,
                         void       *data,
                         char       *ret_eid);

/** ctx_drop_eid:
 *
 * Drops the relevant texture eid freeing resources.
 */
void ctx_drop_eid (Ctx *ctx, const char *eid);

/* ctx_source_transform:
 */
void
ctx_source_transform (Ctx *ctx, float a, float b,  float c,
                      float d, float e, float f, 
                      float g, float h, float i); 

/* ctx_source_transform_matrix:
 */
void
ctx_source_transform_matrix (Ctx *ctx, CtxMatrix *matrix);


/*** h3: shadow */

/**
 * ctx_shadow_rgba:
 * sets the color of the shadow-blur, use a < 1.0 for softer blur
 */
void ctx_shadow_rgba      (Ctx *ctx, float r, float g, float b, float a);

/**
 * ctx_shadow_blur:
 * set the shadow_blur radius, which in HTML5 canvas is double the standard
 * deviation of an expected gaussian blur.
 */
void ctx_shadow_blur      (Ctx *ctx, float stddev_x_2);

/**
 * ctx_shadow_offset_x:
 * specify offset of generated shadow blur
 */
void ctx_shadow_offset_x  (Ctx *ctx, float x);

/**
 * ctx_shadow_offset_y:
 * specify offset of generated shadow blur
 */
void ctx_shadow_offset_y  (Ctx *ctx, float y);


/**
 * ctx_width:
 *
 * Returns the width of the ctx canvas in pixels.
 */
int   ctx_width                (Ctx *ctx);

/**
 * ctx_height:
 *
 * Returns the height of the ctx canvas in pixels.
 */
int   ctx_height               (Ctx *ctx);


/**
 * ctx_get_transform:
 *
 * Returns the currently set transform matrix coefficients in a..i.
 */
void  ctx_get_transform        (Ctx *ctx, float *a, float *b,
                                float *c, float *d,
                                float *e, float *f,
                                float *g, float *h,
                                float *i);

/**
 * ctx_clip_extents::
 *
 * Returns the upper-left, x0,y0  and lower-right x1,y1 coordinates for the currently set clip bounding box,
 * useful for getting culling bounds.
 */
void
ctx_clip_extents (Ctx *ctx, float *x0, float *y0,
                            float *x1, float *y1);

/**
 * ctx_get_image_data:
 *
 * Get a copy of pixel values - depending on backend might cause rendering
 * temporary rendering or providing the data directly from an immediate buffer.
 *
 */
void
ctx_get_image_data (Ctx *ctx, int sx, int sy, int sw, int sh,
                    CtxPixelFormat format, int dst_stride,
                    uint8_t *dst_data);

/**
 * ctx_put_image_data:
 *
 * draws a texture at the given coordinates.
 */
void
ctx_put_image_data (Ctx *ctx, int w, int h, int stride, int format,
                    uint8_t *data,
                    int ox, int oy,
                    int dirtyX, int dirtyY,
                    int dirtyWidth, int dirtyHeight);


/**
 * ctx_texture_load:
 *
 * loads an image file from disk into texture, returning pixel width, height
 * and eid, the eid is based on the path; not the contents - avoiding doing
 * sha1 checksum of contents. The width and height of the image is returned
 * along with the used eid, width height or eid can be NULL if we
 * do not care about their values.
 */
void ctx_texture_load (Ctx        *ctx,
                       const char *path,
                       int        *width,
                       int        *height,
                       char       *eid);

/**
 * ctx_texture:
 *
 * sets the paint source to be a texture by eid
 */
void ctx_texture              (Ctx *ctx, const char *eid, float x, float y);

/**
 * ctx_draw_texture:
 *
 * draw a texture at given coordinates with specified with/height.
 */
void ctx_draw_texture         (Ctx *ctx, const char *eid, float x, float y, float w, float h);

void ctx_draw_texture_clipped (Ctx *ctx, const char *eid, float x, float y, float w, float h, float sx, float sy, float swidth, float sheight);

/**
 * ctx_draw_image:
 *
 * Load an possibly cache an image from a png/svg/jpg sufficed file, at x, y with width and height
 */
void ctx_draw_image           (Ctx *ctx, const char *path, float x, float y, float w, float h);

void ctx_draw_image_clipped   (Ctx *ctx, const char *path, float x, float y, float w, float h, float sx, float sy, float swidth, float sheight);

/**
 * ctx_set_texture_source:
 *
 * used by the render threads of fb and sdl backends.
 */
void ctx_set_texture_source (Ctx *ctx, Ctx *texture_source);

/**
 * ctx_set_texture_cache:
 *
 * used when sharing cache state of eids between clients
 */
void ctx_set_texture_cache (Ctx *ctx, Ctx *texture_cache);


/**
 * ctx_pixel_format_bits_per_pixel:
 *
 * Returns bits per pixel for a pixel format.
 */
int ctx_pixel_format_bits_per_pixel (CtxPixelFormat format); // bits per pixel
/**
 * ctx_pixel_format_get_stride:
 *
 * Computes the stride of a scanline in bytes, rounding up to the neatest byte for 1/2/4bits
 */
int ctx_pixel_format_get_stride     (CtxPixelFormat format, int width);

/**
 * ctx_hasher_new:
 *
 * Create a new hashing context, for use with replays from another master context.
 */
Ctx *ctx_hasher_new          (int width, int height, int cols, int rows, CtxDrawlist *drawlist);

/**
 * ctx_hasher_get_hash:
 */
uint32_t ctx_hasher_get_hash (Ctx *ctx, int col, int row);



/*  In progress for implementing rubber spacing or space bands, in one pass layouting.
 */
void ctx_deferred_scale       (Ctx *ctx, const char *name, float x, float y);

/**
 * ctx_deferred_translate:
 */
void ctx_deferred_translate   (Ctx *ctx, const char *name, float x, float y);
/**
 * ctx_deferred_move_to:
 */
void ctx_deferred_move_to     (Ctx *ctx, const char *name, float x, float y);
/**
 * ctx_deferred_rel_line_to:
 */
void ctx_deferred_rel_line_to (Ctx *ctx, const char *name, float x, float y);
/**
 * ctx_deferred_rel_move_to:
 */
void ctx_deferred_rel_move_to (Ctx *ctx, const char *name, float x, float y);
/**
 * ctx_deferred_rectangle:
 */
void ctx_deferred_rectangle   (Ctx *ctx, const char *name, float x, float y,
                                                           float width, float height);

void ctx_deferred_round_rectangle   (Ctx *ctx, const char *name, float x, float y,
                                                           float width, float height,
                                                           float radius);


/**
 * ctx_resolve:
 *
 */
void ctx_resolve              (Ctx *ctx, const char *name,
                               void (*set_dim) (Ctx *ctx,
                                                void *userdata,
                                                const char *name,
                                                int         count,
                                                float *x,
                                                float *y,
                                                float *width,
                                                float *height),
                               void *userdata);


/* these are configuration flags for a ctx renderer, not all
 * flags are applicable for all rendereres, the cb backend
 * has the widest support currently.
 */
typedef enum CtxFlags {
  //CTX_FLAG_DEFAULTS    = 0,      // most of these flags apply to cb-backend
  CTX_FLAG_GRAY8         = 1 << 0, // use GRAY8, implies LOWFI
  CTX_FLAG_HASH_CACHE    = 1 << 1, // use a hashcache to determine which parts to redraw, implied by LOWFI
  CTX_FLAG_LOWFI         = 1 << 2, // lower res preview for performance during animations
  CTX_FLAG_SUBPIXEL      = 1 << 3, // re-render with subpixel precision
  CTX_FLAG_DAMAGE_CONTROL = 1 << 4,
  CTX_FLAG_SHOW_FPS      = 1 << 5, // possibly show fps in titlebar or shown in overlay
  CTX_FLAG_KEEP_DATA     = 1 << 6, // keep existing fb-data instead of doing an initial clear
  CTX_FLAG_RENDER_THREAD = 1 << 7,  // do rendering in separate thread
                                     
  CTX_FLAG_POINTER       = 1 << 8,  //  draw software cursor

  CTX_FLAG_HANDLE_ESCAPES = 1 << 9, // applies to parser config
  CTX_FLAG_FORWARD_EVENTS = 1 << 10, // applies to parser config

  CTX_FLAG_SYNC           = 1 << 11, // applies to ctx-backend
  CTX_FLAG_COMPRESS       = 1 << 12, // applies to ctx-backend
  CTX_FLAG_FULL_FB        = 1 << 13, // only valid with a fb pointer passed in,
                                     // swap/render the whole frame when drawlist
                                     // is full, this is slower than hash cache
                                     // unless geometry is simpler, can not be
                                     // combined with CTX_FLAG_HASH_CACHE
} CtxFlags;

typedef struct CtxCbConfig {
   CtxPixelFormat format;
   int            buffer_size;
   void          *buffer;    // scratch buffer should be in sram if possible
   int            flags;

   int            chunk_size; // number of entries in drawlist before flush,
                              // full flush on end-frame

   void          *fb;        // if provided is a backing-fb for rendering
                             // buffer comes on top as a scratch area;
   void          *user_data; // provided to the callback functions
                             //
   void (*set_pixels)     (Ctx *ctx, void *user_data, 
                           int x, int y, int w, int h, void *buf);
   void  *set_pixels_user_data;

   // runs after all subregion updates in renderer thread
   // if CTX_FLAG_RENDER_THREAD then this is run in renderer thread.
   int (*update_fb)       (Ctx *ctx, void *user_data, int x, int y, int w, int h);
   void  *update_fb_user_data;

   // run as an idle call in render thread, between chunks
   int (*intra)       (Ctx *ctx, void *user_data);
   void  *intra_user_data;

   int  (*renderer_init)  (Ctx *ctx, void *user_data); // return non 0 on failure to init
   void  *renderer_init_user_data;
   void (*renderer_idle)  (Ctx *ctx, void *user_data);
   void  *renderer_idle_user_data;

   void (*renderer_stop)  (Ctx *ctx, void *user_data);
   void  *renderer_stop_user_data;

   void (*consume_events) (Ctx *ctx, void *user_data); // runs in the main (not renderer thread)
   void  *consume_events_user_data;

   void (*set_fullscreen)  (Ctx *ctx, void *user_data, int fullscreen);
   void *set_fullscreen_user_data;

   int  (*get_fullscreen)  (Ctx *ctx, void *user_data);
   void *get_fullscreen_user_data;

   void (*windowtitle)     (Ctx *ctx, void *user_data, const char *utf8);
   void *windowtitle_user_data;

   void (*set_clipboard)  (Ctx *ctx, void *user_data, const char *text);
   void *set_clipboard_user_data;

   char *(*get_clipboard) (Ctx *ctx, void *user_data);
   void *get_clipboard_user_data;

   void *padding[10];
} CtxCbConfig;

/**
 * ctx_new_cb:
 */
Ctx *ctx_new_cb (int width, int height, CtxCbConfig *config);

/**
 * ctx_new_cb_old:
 */
Ctx *ctx_new_cb_old (int width, int height, CtxPixelFormat format,
                 void (*set_pixels) (Ctx *ctx, void *user_data, 
                                     int x, int y, int w, int h, void *buf),
                 void *set_pixels_user_data,
                 int (*update_fb) (Ctx *ctx, void *user_data),
                 void *update_fb_user_data,
                 int   memory_budget,
                 void *scratch_fb,
                 int   flags);

/**
 * ctx_cb_set_flags:
 */
void ctx_cb_set_flags (Ctx *ctx, int flags);
/**
 * ctx_cb_get_flags:
 */
int  ctx_cb_get_flags (Ctx *ctx);

/**
 * ctx_cb_set_memory_budget:
 */
void ctx_cb_set_memory_budget (Ctx *ctx, int memory_budget);

/***h3: serialization/formatting API */

typedef enum CtxFormatterFlag{
  CTX_FORMATTER_FLAG_NONE = 0,
  CTX_FORMATTER_FLAG_LONGFORM = (1<<0),
  CTX_FORMATTER_FLAG_FLUSH = (1<<1),
} CtxFormatterFlag;

/**
 * ctx_render_string:
 * @ctx: a ctx context containing a drawing
 * @longform: if 1 use human readable encoding, 0 for compact
 * @retlen: optional location to store length of returned string.
 *
 * returns an allocated string containing serialization of the drawing in ctx,
 * free with ctx_free.
 */
char *ctx_render_string (Ctx *ctx, CtxFormatterFlag flags, int *retlen);

/**
 * ctx_render_stream:
 * @ctx: a ctx context containing a drawing
 * @stream: stream to serialize to
 * @longform: 0 for compact, 1 for human readable
 *
 * Render a ctx context to a stream.
 */
void ctx_render_stream  (Ctx *ctx, FILE *stream, CtxFormatterFlag flags);

/**
 * ctx_render_fd:
 * @ctx: a ctx context containing a drawing
 * @fd: an open file descriptor to write to
 * @longform: 0 for compact, 1 for human readable
 *
 * Render a ctx context to an open file.
 */
void ctx_render_fd      (Ctx *ctx, int fd, CtxFormatterFlag flag);

/**
 * ctx_render_ctx:
 * @ctx: a source context containing a drawing
 * @d_ctx: destination context.
 *
 * Render one context onto another.
 */
void ctx_render_ctx     (Ctx *ctx, Ctx *d_ctx);

/**
 * ctx_render_ctx_textures:
 * @ctx: a source context containing a drawing
 * @d_ctx: destination context.
 *
 * Render one context onto another, without doing any drawing - but using all used textures.
 */
void ctx_render_ctx_textures (Ctx *ctx, Ctx *d_ctx);

typedef enum
{
  CTX_JOIN_BEVEL = 0,
  CTX_JOIN_ROUND = 1,
  CTX_JOIN_MITER = 2
} CtxLineJoin;
void ctx_line_join  (Ctx *ctx, CtxLineJoin        join);
CtxLineJoin          ctx_get_line_join        (Ctx *ctx);

typedef enum
{
  CTX_CAP_NONE   = 0,
  CTX_CAP_ROUND  = 1,
  CTX_CAP_SQUARE = 2
} CtxLineCap;
void       ctx_line_cap     (Ctx *ctx, CtxLineCap         cap);
CtxLineCap ctx_get_line_cap (Ctx *ctx);

typedef enum
{
  CTX_EXTEND_NONE    = 0,
  CTX_EXTEND_REPEAT  = 1,
  CTX_EXTEND_REFLECT = 2,
  CTX_EXTEND_PAD     = 3
} CtxExtend;
#define CTX_EXTEND_LAST CTX_EXTEND_PAD
void      ctx_extend     (Ctx *ctx, CtxExtend extend);
CtxExtend ctx_get_extend (Ctx *ctx);

void ctx_gradient_add_stop_string (Ctx *ctx, float pos, const char *color);


/*** h3: text */

/**
 * ctx_font_size:
 */
void  ctx_font_size       (Ctx *ctx, float x);

/**
 * ctx_get_font_size:
 *
 * Returns the current font_size.
 */
float ctx_get_font_size        (Ctx *ctx);

/**
 * ctx_font_family:
 */
void  ctx_font_family     (Ctx *ctx, const char *font_family);

/**
 * ctx_font:
 */
void  ctx_font            (Ctx *ctx, const char *font);

/**
 * ctx_get_font:
 *
 * Returns the currently set font.
 */
const char *ctx_get_font       (Ctx *ctx);

int
ctx_font_extents (Ctx   *ctx,
                  float *ascent,
                  float *descent,
                  float *line_gap);


/**
 * ctx_wrap_left:
 *
 * Specify left edge of text column for word-wrapping, default value is 0.0f for
 * none.
 */
void ctx_wrap_left   (Ctx *ctx, float x);
float ctx_get_wrap_left        (Ctx *ctx);

/**
 * ctx_wrap_right:
 *
 * Specify right edge of text column for word-wrapping, default value is 0.0f for
 * none.
 */
void ctx_wrap_right  (Ctx *ctx, float x);
float ctx_get_wrap_right       (Ctx *ctx);

/**
 * ctx_line_height:
 *
 * Specify right edge of text column for word-wrapping, default value is 0.0f for
 * none.
 */
void ctx_line_height (Ctx *ctx, float y);
float ctx_get_line_height      (Ctx *ctx);



typedef enum
{
  CTX_TEXT_ALIGN_START = 0,  // in mrg these didnt exist
  CTX_TEXT_ALIGN_END,        // but left/right did
  CTX_TEXT_ALIGN_JUSTIFY, // not handled in ctx
  CTX_TEXT_ALIGN_CENTER,
  CTX_TEXT_ALIGN_LEFT,
  CTX_TEXT_ALIGN_RIGHT
} CtxTextAlign;
void         ctx_text_align     (Ctx *ctx, CtxTextAlign align);
CtxTextAlign ctx_get_text_align (Ctx *ctx);

typedef enum
{
  CTX_TEXT_BASELINE_ALPHABETIC = 0,
  CTX_TEXT_BASELINE_TOP,
  CTX_TEXT_BASELINE_HANGING,
  CTX_TEXT_BASELINE_MIDDLE,
  CTX_TEXT_BASELINE_IDEOGRAPHIC,
  CTX_TEXT_BASELINE_BOTTOM
} CtxTextBaseline;
void            ctx_text_baseline     (Ctx *ctx, CtxTextBaseline    baseline);
CtxTextBaseline ctx_get_text_baseline (Ctx *ctx);


typedef enum
{
  CTX_TEXT_DIRECTION_INHERIT = 0,
  CTX_TEXT_DIRECTION_LTR,
  CTX_TEXT_DIRECTION_RTL
} CtxTextDirection;
void               ctx_text_direction       (Ctx *ctx, CtxTextDirection   direction);
CtxTextDirection   ctx_get_text_direction   (Ctx *ctx);

/**
 * ctx_text:
 *
 * Draw the UTF8 string at current position, wrapping if ctx_wrap_left and ctx_wrap_right have been set.
 */
void  ctx_text          (Ctx        *ctx,
                         const char *utf8);

/**
 * ctx_text_width:
 *
 * returns the total horizontal advance if string had been rendered
 */
float ctx_text_width    (Ctx        *ctx,
                         const char *utf8);


/**
 * ctx_get_font_name:
 *
 * Get the name a font is registered under, substrings are often sufficient for
 * specifying, iterating up from 0 until NULL is retrieved is the current way
 * to enumerate registered fonts.
 */
const char *ctx_get_font_name (Ctx *ctx, int no);

int   ctx_load_font_file     (Ctx *ctx, const char *name, const char *path);
int   ctx_load_font          (Ctx *ctx, const char *name, const char *data, unsigned int length);

float ctx_glyph_width   (Ctx *ctx, int glyph_id);

/*
 * low level glyph drawing calls, unless you are integrating harfbuzz
 * you probably want to use ctx_text instead.
 */
typedef struct _CtxGlyph CtxGlyph;
struct
_CtxGlyph
{
  uint32_t index; // glyph index in font
  float    x;
  float    y;
};

/**
 * ctx_glyph_allocate:
 */
CtxGlyph *ctx_glyph_allocate     (int n_glyphs);

/**
 * ctx_glyph_free:
 */
void      ctx_glyph_free         (Ctx *ctx, CtxGlyph *glyphs);

/**
 * ctx_glyph_id:
 */
int       ctx_glyph_id           (Ctx *ctx, uint32_t glyphid, int stroke);

int       ctx_glyph_unichar      (Ctx *ctx, uint32_t unichar, int stroke);

void      ctx_glyphs             (Ctx      *ctx,
                                  CtxGlyph *glyphs,
                                  int       n_glyphs);

int       ctx_glyph_lookup       (Ctx *ctx, uint32_t unichar);

/**
 */
void  ctx_glyphs_stroke  (Ctx        *ctx,
                          CtxGlyph   *glyphs,
                          int         n_glyphs);


/**
 * ctx_dirty_rect:
 *
 * Query the dirtied bounding box of drawing commands thus far.
 */
void  ctx_dirty_rect (Ctx *ctx, int *x, int *y, int *width, int *height);


#ifdef CTX_X86_64
int ctx_x86_64_level (void);
#endif


enum _CtxModifierState
{
  CTX_MODIFIER_STATE_SHIFT   = (1<<0),
  CTX_MODIFIER_STATE_CONTROL = (1<<1),
  CTX_MODIFIER_STATE_ALT     = (1<<2),
  CTX_MODIFIER_STATE_BUTTON1 = (1<<3),
  CTX_MODIFIER_STATE_BUTTON2 = (1<<4),
  CTX_MODIFIER_STATE_BUTTON3 = (1<<5),
  CTX_MODIFIER_STATE_DRAG    = (1<<6), // pointer button is down (0 or any)
};
typedef enum _CtxModifierState CtxModifierState;

enum _CtxScrollDirection
{
  CTX_SCROLL_DIRECTION_UP,
  CTX_SCROLL_DIRECTION_DOWN,
  CTX_SCROLL_DIRECTION_LEFT,
  CTX_SCROLL_DIRECTION_RIGHT
};
typedef enum _CtxScrollDirection CtxScrollDirection;

typedef struct _CtxEvent CtxEvent;

/**
 * ctx_handle_events:
 *
 * Calls the backends consume events, to consume events until none pending, this also
 * deals with client events.
 */
void ctx_handle_events (Ctx *ctx);

/**
 * ctx_need_redraw:
 *
 * Returns non 0 if the ctx context needs a redraw, as queued by ctx_queue_draw since the last
 * end_frame.
 *
 */
int ctx_need_redraw (Ctx *ctx);

/**
 * ctx_queue_draw:
 *
 * Mark the current ctx output as dirty and in need of recomputation.
 */
void ctx_queue_draw (Ctx *ctx);

/**
 * ctx_exit:
 * @ctx: a context
 *
 * Set a flag on the context indicating that execution is finished.
 */
void ctx_exit (Ctx *ctx);
/**
 * ctx_has_exited:
 * @ctx: a context
 *
 * returns 1 if ctx_exit() has been called
 */
int  ctx_has_exited (Ctx *ctx);

/**
 * ctx_reset_has_exited:
 * @ctx: a context
 *
 * Reset the has_exited flag of context.
 */
void ctx_reset_has_exited (Ctx *ctx);

/**
 * ctx_ticks:
 *
 * Returns number of microseconds since startup.
 */
unsigned long ctx_ticks (void);

/**
 * ctx_ms:
 * @ctx: a context
 *
 * Returns number of milliseconds since startup.
 */
uint32_t ctx_ms         (Ctx *ctx);

/**
 * ctx_strhash:
 * @string: a string
 *
 * Returns an integer that is a hash/compaction of string
 */
uint32_t    ctx_strhash    (const char *string);

/**
 * ctx_str_decode:
 * @number: an integer, previously encoded with ctx_strhash
 *
 * Returns a constant string decoding the number, if it is reversible -
 * this is mostly possible with strings <5 or 6 chars.
 */
const char *ctx_str_decode (uint32_t number);

/**
 * ctx_set_float:
 * @ctx: a context
 * @hash: a key - as created with ctx_strhash
 * @value: a value
 *
 * Set a key/value mapping maintained in the graphics context to value, the values
 * are maintained by ctx_save.
 */
void        ctx_set_float  (Ctx *ctx, uint32_t hash, float value);

/**
 * ctx_get_float:
 * @ctx: a context
 * @hash: a key - as created with ctx_strhash
 *
 * Get a key/value mapping maintained in the graphics context to value as previously
 * set by ctx_set_float, if not set returns 0.0f.
 */
float       ctx_get_float  (Ctx *ctx, uint32_t hash);

/**
 * ctx_is_set:
 * @ctx: a context
 * @hash: a key - as created with ctx_strhash
 *
 * Check if a value has been set at all (when 0.0f needs to be explicitly detected).
 */
int         ctx_is_set     (Ctx *ctx, uint32_t hash);

/**
 * ctx_set_clipboard:
 * @ctx: a context
 * @text: new clipboard text.
 *
 * Set clipboard text, this is the copy action in copy+paste.
 */
void ctx_set_clipboard (Ctx *ctx, const char *text);

/**
 * ctx_get_clipboard:
 * @ctx: a context
 *
 * Get clipboard contents or NULL if a result the result should be freed with ctx_free.
 */
char *ctx_get_clipboard (Ctx *ctx);

/**
 * ctx_windowtitle:
 *
 * Set window title.
 */
void ctx_windowtitle (Ctx *ctx, const char *text);

void _ctx_events_init     (Ctx *ctx);
typedef struct _CtxIntRectangle CtxIntRectangle;
struct _CtxIntRectangle {
  int x;
  int y;
  int width;
  int height;
};
typedef struct _CtxFloatRectangle CtxFloatRectangle;
struct _CtxFloatRectangle {
  float x;
  float y;
  float width;
  float height;
};


typedef void (*CtxCb) (CtxEvent *event,
                       void     *data,
                       void     *data2);

enum _CtxEventType {
  CTX_PRESS        = 1 << 0,
  CTX_MOTION       = 1 << 1,
  CTX_RELEASE      = 1 << 2,
  CTX_ENTER        = 1 << 3,
  CTX_LEAVE        = 1 << 4,
  CTX_TAP          = 1 << 5,
  CTX_TAP_AND_HOLD = 1 << 6,

  /* NYI: SWIPE, ZOOM ROT_ZOOM, */

  CTX_DRAG_PRESS   = 1 << 7,
  CTX_DRAG_MOTION  = 1 << 8,
  CTX_DRAG_RELEASE = 1 << 9,
  CTX_KEY_PRESS    = 1 << 10,
  CTX_KEY_DOWN     = 1 << 11,
  CTX_KEY_UP       = 1 << 12,
  CTX_SCROLL       = 1 << 13,
  CTX_MESSAGE      = 1 << 14,
  CTX_DROP         = 1 << 15,

  CTX_SET_CURSOR   = 1 << 16, // used internally

  /* client should store state - preparing
                                 * for restart
                                 */
  CTX_POINTER  = (CTX_PRESS | CTX_MOTION | CTX_RELEASE | CTX_DROP),
  CTX_TAPS     = (CTX_TAP | CTX_TAP_AND_HOLD),
  CTX_CROSSING = (CTX_ENTER | CTX_LEAVE),
  CTX_DRAG     = (CTX_DRAG_PRESS | CTX_DRAG_MOTION | CTX_DRAG_RELEASE),
  CTX_KEY      = (CTX_KEY_DOWN | CTX_KEY_UP | CTX_KEY_PRESS),
  CTX_MISC     = (CTX_MESSAGE),
  CTX_ANY      = (CTX_POINTER | CTX_DRAG | CTX_CROSSING | CTX_KEY | CTX_MISC | CTX_TAPS),
};
typedef enum _CtxEventType CtxEventType;

#define CTX_CLICK   CTX_PRESS   // SHOULD HAVE MORE LOGIC
typedef struct _CtxClient CtxClient;

struct _CtxEvent {
  CtxEventType  type;
  uint32_t time;
  Ctx     *ctx;
  int stop_propagate; /* when set - propagation is stopped */

  CtxModifierState state;

  int     device_no; /* 0 = left mouse button / virtual focus */
                     /* 1 = middle mouse button */
                     /* 2 = right mouse button */
                     /* 4 = first multi-touch .. (NYI) */

  float   device_x; /* untransformed (device) coordinates  */
  float   device_y;

  /* coordinates; and deltas for motion/drag events in user-coordinates: */
  float   x;
  float   y;
  float   start_x; /* start-coordinates (press) event for drag, */
  float   start_y; /*    untransformed coordinates */
  float   prev_x;  /* previous events coordinates */
  float   prev_y;
  float   delta_x; /* x - prev_x, redundant - but often useful */
  float   delta_y; /* y - prev_y, redundant - ..  */


  unsigned int unicode; /* only valid for key-events, re-use as keycode? */
  const char *string;   /* as key can be "up" "down" "space" "backspace" "a" "b" "ø" etc .. */
                        /* this is also where the message is delivered for
                         * MESSAGE events
                         *
                         * and the data for drop events are delivered
                         *
                         */
                         /* XXX lifetime of this string should be longer
                         * than the events, preferably interned. XXX
                         * maybe add a flag for this?
                         */
  int owns_string; /* if 1 call free.. */
  CtxScrollDirection scroll_direction;

  // would be nice to add the bounding box of the hit-area causing
  // the event, making for instance scissored enter/leave repaint easier.
};

// layer-event "layer"  motion x y device_no 


/**
 * ctx_add_timeout_full:
 * @ctx: a context
 * @ms: milliseconds to elapse before calling 
 * @idle_cb: callback function to call
 * @idle_data: data for callback
 * @destroy_notify: function to call to destroy something when timeout is over
 * @destroy_data: data passed to destroy_notify
 *
 * add an idle callback, which is a function taking a ctx context and a the
 * provided idle_data, returning 1 if the callback should continue being
 * registered and called again after a new delay and 0 if it should be removed.
 *
 * Returns an integer handle that can be used with ctx_remove_idle.
 */
int   ctx_add_timeout_full   (Ctx *ctx, int ms, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data,
                              void (*destroy_notify)(void *destroy_data), void *destroy_data);
/**
 * ctx_add_timeout:
 * @ctx: a context
 * @ms: milliseconds to elapse before calling 
 * @idle_cb: callback function to call
 * @idle_data: data for callback
 *
 * add an idle callback, which is a function taking a ctx context and a the
 * provided idle_data, returning 1 if the callback should continue being
 * registered and called again after a new delay and 0 if it should be removed.
 *
 * Returns an integer handle that can be used with ctx_remove_idle.
 */
int   ctx_add_timeout        (Ctx *ctx, int ms, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data);

/**
 * ctx_add_idle_full:
 * @ctx: a context
 * @idle_cb: callback function to call
 * @idle_data: data for callback
 * @destroy_notify: function to call to destroy something when timeout is over
 * @destroy_data: data passed to destroy_notify
 *
 * add an idle callback, which is a function taking a ctx context and a the provided idle_data, returning
 * 1 if the callback should continue being registered and 0 if it should be removed.
 *
 * Returns an integer handle that can be used with ctx_remove_idle.
 */
int   ctx_add_idle_full      (Ctx *ctx, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data,
                              void (*destroy_notify)(void *destroy_data), void *destroy_data);
/**
 * ctx_add_idle:
 * @ctx: a context
 * @idle_cb: callback function to call
 * @idle_data: data for callback
 *
 * add an idle callback, which is a function taking a ctx context and a the provided idle_data, returning
 * 1 if the callback should continue being registered and 0 if it should be removed.
 *
 * Returns an integer handle that can be used with ctx_remove_idle.
 */
int   ctx_add_idle           (Ctx *ctx, int (*idle_cb)(Ctx *ctx, void *idle_data), void *idle_data);

/**
 * ctx_remove_idle:
 * @ctx: a context
 * @handle: a handle referencing a timeout or idle.
 *
 * Remove a previously registered idle callback / timeout.
 */
void  ctx_remove_idle        (Ctx *ctx, int handle);


typedef void (*CtxDestroyNotify) (void *data);

/**
 * ctx_add_key_binding_full:
 * @ctx: a context
 * @key: a string describing the keybinding like "a" "space" "shift-f3"
 * @action: a string action to take, is passed to cb as data2
 * @label: title to use in ui
 * @cb: callback function to call
 * @cb_data: data for callback
 * @destroy_notify: function to call to destroy something when timeout is over
 * @destroy_data: data passed to destroy_notify
 *
 * Register a key binding, with a finalizer.
 */
void ctx_add_key_binding_full (Ctx *ctx,
                               const char *key,
                               const char *action,
                               const char *label,
                               CtxCb       cb,
                               void       *cb_data,
                               CtxDestroyNotify destroy_notify,
                               void       *destroy_data);

/**
 * ctx_add_key_binding:
 * @ctx: a context
 * @key: a string describing the keybinding like "a" "space" "shift-f3"
 * @action: a string action to take, is passed to cb as data2
 * @label: title to use in ui
 * @cb: callback function to call
 * @cb_data: data for callback
 *
 * Register a key binding, with a finalizer.
 */
void ctx_add_key_binding (Ctx *ctx,
                          const char *key,
                          const char *action,
                          const char *label,
                          CtxCb cb,
                          void  *cb_data);





void ctx_add_hit_region (Ctx *ctx, const char *id);

/**
 * ctx_listen_full:
 * @ctx: a context
 * @x: bounding box x coordinate
 * @y: bounding box y coordinate
 * @width: bounding box width
 * @height: bounding box height
 * @types:
 * @cb: callback function to call
 * @cb_data1: data for callback
 * @cb_data2: second data for callback
 * @finalize: function to call to destroy something when timeout is over
 * @finalize_data: data passed to destroy_notify
 *
 * Register a pointer event with a finalizer.
 */
void ctx_listen_full (Ctx     *ctx,
                      float    x,
                      float    y,
                      float    width,
                      float    height,
                      CtxEventType  types,
                      CtxCb    cb,
                      void    *data1,
                      void    *data2,
                      void   (*finalize)(void *listen_data, void *listen_data2,
                                         void *finalize_data),
                      void    *finalize_data);
void  ctx_event_stop_propagate (CtxEvent *event);


/**
 * ctx_listen:
 * @ctx: a context
 * @types:
 * @cb: callback function to call
 * @cb_data1: data for callback
 * @cb_data2: second data for callback
 * @finalize: function to call to destroy something when timeout is over
 * @finalize_data: data passed to destroy_notify
 *
 * Register a pointer event handler, using the extent of the current path
 * as bounding box.
 */
void  ctx_listen               (Ctx          *ctx,
                                CtxEventType  types,
                                CtxCb         cb,
                                void*         data1,
                                void*         data2);
/**
 * ctx_listen_with_finalize:
 * @ctx: a context
 * @types:
 * @cb: callback function to call
 * @cb_data1: data for callback
 * @cb_data2: second data for callback
 * @finalize: function to call to destroy something when timeout is over
 * @finalize_data: data passed to destroy_notify
 *
 * Register a pointer event with a finalizer, using the extent of the current path
 * as bounding box.
 */
void  ctx_listen_with_finalize (Ctx          *ctx,
                                CtxEventType  types,
                                CtxCb         cb,
                                void*         data1,
                                void*         data2,
                      void   (*finalize)(void *listen_data, void *listen_data2,
                                         void *finalize_data),
                      void    *finalize_data);

CtxEvent *ctx_get_event (Ctx *ctx);
void      ctx_get_event_fds (Ctx *ctx, int *fd, int *count);


int   ctx_pointer_is_down (Ctx *ctx, int no);
float ctx_pointer_x (Ctx *ctx);
float ctx_pointer_y (Ctx *ctx);
void  ctx_freeze (Ctx *ctx);
void  ctx_thaw   (Ctx *ctx);
int   ctx_events_frozen (Ctx *ctx);
void  ctx_events_clear_items (Ctx *ctx);
/* The following functions drive the event delivery, registered callbacks
 * are called in response to these being called.
 */

int ctx_key_down  (Ctx *ctx, unsigned int keyval,
                   const char *string, uint32_t time);
int ctx_key_up    (Ctx *ctx, unsigned int keyval,
                   const char *string, uint32_t time);
int ctx_key_press (Ctx *ctx, unsigned int keyval,
                   const char *string, uint32_t time);
int ctx_scrolled  (Ctx *ctx, float x, float y, CtxScrollDirection scroll_direction, uint32_t time);
void ctx_incoming_message (Ctx *ctx, const char *message, long time);
int ctx_pointer_motion    (Ctx *ctx, float x, float y, int device_no, uint32_t time);
int ctx_pointer_release   (Ctx *ctx, float x, float y, int device_no, uint32_t time);
int ctx_pointer_press     (Ctx *ctx, float x, float y, int device_no, uint32_t time);
int ctx_pointer_drop      (Ctx *ctx, float x, float y, int device_no, uint32_t time,
                           char *string);





typedef enum CtxBackendType {
  CTX_BACKEND_NONE,
  CTX_BACKEND_CTX,
  CTX_BACKEND_RASTERIZER,
  CTX_BACKEND_HASHER,
  CTX_BACKEND_TERM,
  CTX_BACKEND_DRAWLIST,
  CTX_BACKEND_PDF,
  CTX_BACKEND_CB,
} CtxBackendType;

CtxBackendType ctx_backend_type (Ctx *ctx);

typedef struct _CtxBuffer CtxBuffer;
CtxBuffer *ctx_buffer_new_for_data (void *data, int width, int height,
                                    int stride,
                                    CtxPixelFormat pixel_format,
                                    void (*freefunc) (void *pixels, void *user_data),
                                    void *user_data);

int   ctx_client_resize        (Ctx *ctx, int id, int width, int height);
void  ctx_client_set_font_size (Ctx *ctx, int id, float font_size);
float ctx_client_get_font_size (Ctx *ctx, int id);
void  ctx_client_maximize      (Ctx *ctx, int id);
void ctx_client_focus          (Ctx *ctx, int id);

typedef struct _VT VT;
void vt_feed_keystring    (VT *vt, CtxEvent *event, const char *str);
void vt_paste             (VT *vt, const char *str);
char *vt_get_selection    (VT *vt);
long vt_rev               (VT *vt);
void vt_set_line_spacing (VT *vt, float line_spacing);
void vt_set_baseline     (VT *vt, float baseline);
int  vt_has_blink         (VT *vt);
int ctx_vt_had_alt_screen (VT *vt);
int  vt_get_cursor_x      (VT *vt);
int  vt_get_cursor_y      (VT *vt);
void vt_draw (VT *vt, Ctx *ctx, double x0, double y0, int has_focus);

void vt_set_palette(int color_no, uint8_t red, uint8_t green, uint8_t blue);

typedef struct _CtxList CtxList;
CtxList *ctx_clients (Ctx *ctx);

void ctx_set_fullscreen (Ctx *ctx, int val);
int ctx_get_fullscreen (Ctx *ctx);


typedef enum CtxClientFlags {
  CTX_CLIENT_UI_RESIZABLE = 1<<0,
  CTX_CLIENT_CAN_LAUNCH   = 1<<1,
  CTX_CLIENT_MAXIMIZED    = 1<<2,
  CTX_CLIENT_ICONIFIED    = 1<<3,
  CTX_CLIENT_SHADED       = 1<<4,
  CTX_CLIENT_TITLEBAR     = 1<<5,
  CTX_CLIENT_LAYER2       = 1<<6,  // used for having a second set
                                   // to draw - useful for splitting
                                   // scrolled and HUD items
                                   // with HUD being LAYER2
                                  
  CTX_CLIENT_KEEP_ALIVE   = 1<<7,  // do not automatically
  CTX_CLIENT_FINISHED     = 1<<8,  // do not automatically
                                   // remove after process quits
  CTX_CLIENT_PRELOAD      = 1<<9,
  CTX_CLIENT_LIVE         = 1<<10
} CtxClientFlags;
typedef void (*CtxClientFinalize)(CtxClient *client, void *user_data);

int ctx_client_id (CtxClient *client);
int ctx_client_flags (CtxClient *client);
VT *ctx_client_vt (CtxClient *client);
void ctx_client_add_event (CtxClient *client, CtxEvent *event);
const char *ctx_client_title (CtxClient *client);
CtxClient *ctx_client_find (Ctx *ctx, const char *label);

void *ctx_client_userdata (CtxClient *client);

void ctx_client_quit (CtxClient *client);
CtxClient *vt_get_client (VT *vt);
CtxClient *ctx_client_new (Ctx *ctx,
                           const char *commandline,
                           int x, int y, int width, int height,
                           float font_size,
                           CtxClientFlags flags,
                           void *user_data,
                           CtxClientFinalize client_finalize);

CtxClient *ctx_client_new_argv (Ctx *ctx, char **argv, int x, int y, int width, int height, float font_size, CtxClientFlags flags, void *user_data,
                CtxClientFinalize client_finalize);
int ctx_clients_need_redraw (Ctx *ctx);

CtxClient *ctx_client_new_thread (Ctx *ctx, void (*start_routine)(Ctx *ctx, void *user_data),
                                  int x, int y, int width, int height, float font_size, CtxClientFlags flags, void *user_data, CtxClientFinalize finalize);

extern float ctx_shape_cache_rate;
extern int _ctx_max_threads;

CtxEvent *ctx_event_copy (CtxEvent *event);

void  ctx_client_move           (Ctx *ctx, int id, int x, int y);
void  ctx_client_shade_toggle   (Ctx *ctx, int id);
float ctx_client_min_y_pos      (Ctx *ctx);
float ctx_client_max_y_pos      (Ctx *ctx);
void  ctx_client_paste          (Ctx *ctx, int id, const char *str);
char  *ctx_client_get_selection (Ctx *ctx, int id);

void  ctx_client_rev_inc      (CtxClient *client);
long  ctx_client_rev          (CtxClient *client);

int   ctx_clients_active      (Ctx *ctx);

CtxClient *ctx_client_by_id (Ctx *ctx, int id);

int ctx_clients_draw            (Ctx *ctx, int layer2);

void ctx_client_feed_keystring  (CtxClient *client, CtxEvent *event, const char *str);
// need not be public?
void ctx_client_register_events (CtxClient *client, Ctx *ctx, double x0, double y0);

void ctx_client_remove          (Ctx *ctx, CtxClient *client);

int  ctx_client_height           (Ctx *ctx, int id);
int  ctx_client_width            (Ctx *ctx, int id);
int  ctx_client_x                (Ctx *ctx, int id);
int  ctx_client_y                (Ctx *ctx, int id);
void ctx_client_raise_top        (Ctx *ctx, int id);
void ctx_client_raise_almost_top (Ctx *ctx, int id);
void ctx_client_lower_bottom     (Ctx *ctx, int id);
void ctx_client_lower_almost_bottom (Ctx *ctx, int id);
void ctx_client_iconify          (Ctx *ctx, int id);
int  ctx_client_is_iconified     (Ctx *ctx, int id);
void ctx_client_uniconify        (Ctx *ctx, int id);
void ctx_client_maximize         (Ctx *ctx, int id);
int  ctx_client_is_maximized     (Ctx *ctx, int id);
void ctx_client_unmaximize       (Ctx *ctx, int id);
void ctx_client_maximized_toggle (Ctx *ctx, int id);
void ctx_client_shade            (Ctx *ctx, int id);
int  ctx_client_is_shaded        (Ctx *ctx, int id);
void ctx_client_unshade          (Ctx *ctx, int id);
void ctx_client_toggle_maximized (Ctx *ctx, int id);
void ctx_client_shade_toggle     (Ctx *ctx, int id);
void ctx_client_move             (Ctx *ctx, int id, int x, int y);
int  ctx_client_resize           (Ctx *ctx, int id, int width, int height);
void ctx_client_set_opacity      (Ctx *ctx, int id, float opacity);
float ctx_client_get_opacity     (Ctx *ctx, int id);
void ctx_client_set_title        (Ctx *ctx, int id, const char *title);
const char *ctx_client_get_title (Ctx *ctx, int id);




typedef enum
{
  CTX_GRAY           = 1,
  CTX_RGB            = 3,
  CTX_DRGB           = 4,
  CTX_CMYK           = 5,
  CTX_DCMYK          = 6,
  CTX_LAB            = 7,
  CTX_LCH            = 8,
  CTX_GRAYA          = 101,
  CTX_RGBA           = 103,
  CTX_DRGBA          = 104,
  CTX_CMYKA          = 105,
  CTX_DCMYKA         = 106,
  CTX_LABA           = 107,
  CTX_LCHA           = 108,
  CTX_GRAYA_A        = 201,
  CTX_RGBA_A         = 203,
  CTX_RGBA_A_DEVICE  = 204,
  CTX_CMYKA_A        = 205,
  CTX_DCMYKA_A       = 206,
  // RGB  device and  RGB  ?
} CtxColorModel;


enum _CtxColorSpace
{
  CTX_COLOR_SPACE_DEVICE_RGB,
  CTX_COLOR_SPACE_DEVICE_CMYK,
  CTX_COLOR_SPACE_USER_RGB,
  CTX_COLOR_SPACE_USER_CMYK,
  CTX_COLOR_SPACE_TEXTURE
};
typedef enum _CtxColorSpace CtxColorSpace;

#define CTX_COLOR_SPACE_LAST CTX_COLOR_SPACE_TEXTURE

/* sets the color space for a slot, the space is either a string of
 * "sRGB" "rec2020" .. etc or an icc profile.
 *
 * The slots device_rgb and device_cmyk is mostly to be handled outside drawing 
 * code, and user_rgb and user_cmyk is to be used. With no user_cmyk set
 * user_cmyk == device_cmyk.
 *
 * The set profiles follows the graphics state.
 */
void ctx_colorspace (Ctx                 *ctx,
                     CtxColorSpace        space_slot,
                     const unsigned char *data,
                     int                  data_length);


enum _CtxCursor
{
  CTX_CURSOR_UNSET,
  CTX_CURSOR_NONE,
  CTX_CURSOR_ARROW,
  CTX_CURSOR_IBEAM,
  CTX_CURSOR_WAIT,
  CTX_CURSOR_HAND,
  CTX_CURSOR_CROSSHAIR,
  CTX_CURSOR_RESIZE_ALL,
  CTX_CURSOR_RESIZE_N,
  CTX_CURSOR_RESIZE_S,
  CTX_CURSOR_RESIZE_E,
  CTX_CURSOR_RESIZE_NE,
  CTX_CURSOR_RESIZE_SE,
  CTX_CURSOR_RESIZE_W,
  CTX_CURSOR_RESIZE_NW,
  CTX_CURSOR_RESIZE_SW,
  CTX_CURSOR_MOVE
};
typedef enum _CtxCursor CtxCursor;

/* to be used immediately after a ctx_listen or ctx_listen_full causing the
 * cursor to change when hovering the listen area.
 */
void ctx_listen_set_cursor (Ctx      *ctx,
                            CtxCursor cursor);


/* lower level cursor setting that is independent of ctx event handling
 */
void      ctx_set_cursor           (Ctx *ctx, CtxCursor cursor);
CtxCursor ctx_get_cursor           (Ctx *ctx);


/* draw the ctx logo */
void ctx_logo (Ctx *ctx, float x, float y, float dim);

/*** h2: parsing */

/**
 * ctx_parse:
 *
 * Parse ctx-syntax interpreting the commands in ctx.
 *
 */
void ctx_parse            (Ctx *ctx, const char *string);

/**
 * ctx_parse_animation:
 * elapsed_time the 
 *
 * Parses a string containg ctx protocol data, including an overlayed
 * scene and key-framing synax.
 *
 * pass in the scene_no you expect to render in the pointer for scene_no
 * actual rendered scene is returned here.
 *
 * elapsed time for scene to render, if we are beyond the specified scene
 * adjust elapsed_time to reflect elapsed time in actually rendered scene.
 */
void
ctx_parse_animation (Ctx *ctx, const char *string,
                     float *elapsed_time, 
                     int   *scene_no);


/* configuration of a parser, with callbacks for customization
 * of behavior.
 */
typedef struct CtxParserConfig {
  int      width;       // <- maybe should be float?
  int      height;
  float    cell_width;
  float    cell_height;
  int      cursor_x;
  int      cursor_y;
  int      flags;
  void    *user_data;

  int   (*set_prop)       (Ctx *ctx, void *user_data, uint32_t key, const char *data, int len);
  void   *set_prop_user_data;

  int   (*get_prop)       (Ctx *ctx, void *user_data, const char *key, char **data, int *len);
  void   *get_prop_user_data;

  void  (*start_frame)    (Ctx *ctx, void *user_data);
  void   *start_frame_user_data;

  void  (*end_frame)      (Ctx *ctx, void *user_data);
  void   *end_frame_user_data;

  void  (*response) (Ctx *ctx, void *user_data, char *response, int len);
  void   *response_user_data;

} CtxParserConfig;

typedef struct _CtxParser CtxParser;
  CtxParser *ctx_parser_new (
  Ctx       *ctx,
  CtxParserConfig *config);

void ctx_parser_destroy (CtxParser *parser);

int ctx_parser_neutral (CtxParser *parser);

void
ctx_parser_set_size (CtxParser *parser,
                     int        width,
                     int        height,
                     float      cell_width,
                     float      cell_height);

void ctx_parser_feed_bytes (CtxParser *parser, const char *data, int count);


typedef struct _CtxBackend CtxBackend;
struct _CtxBackend
{
  Ctx                      *ctx;

  void  (*process)         (Ctx *ctx, const CtxCommand *entry);

  /* for interactive/event-handling backends */
  void  (*start_frame)     (Ctx *ctx);
  void  (*end_frame)       (Ctx *ctx);
  void  (*consume_events)  (Ctx *ctx);
  void  (*get_event_fds)   (Ctx *ctx, int *fd, int *count);

  void  (*set_windowtitle) (Ctx *ctx, const char *text);

  char *(*get_clipboard)   (Ctx *ctx);
  void  (*set_clipboard)   (Ctx *ctx, const char *text);
  void  (*destroy)         (void *backend); /* the free pointers are abused as the differentiatior
                                               between different backends   */
  void  (*reset_caches)    (Ctx *ctx);
  CtxFlags                  flags;
  CtxBackendType            type;
  void                     *user_data; // not used by ctx core
};
void ctx_set_backend  (Ctx *ctx, void *backend);
void *ctx_get_backend (Ctx *ctx);

typedef struct _CtxIterator CtxIterator;

CtxCommand *ctx_iterator_next (CtxIterator *iterator);
void
ctx_iterator_init (CtxIterator  *iterator,
                   CtxDrawlist  *drawlist,  // replace with Ctx*  ? XXX XXX ? for one less type in public API
                   int           start_pos, 
                   int           flags);    // need exposing for font bits
int ctx_iterator_pos (CtxIterator *iterator);


// utility calls not tied directly to Ctx
int
ctx_get_contents (const char     *path,
                   unsigned char **contents,
                   long           *length);
int
ctx_get_contents2 (const char     *path,
                   unsigned char **contents,
                   long           *length,
                   long            max_len);

typedef struct _CtxSHA1 CtxSHA1;

void
ctx_bin2base64 (const void *bin,
                size_t      bin_length,
                char       *ascii);
int
ctx_base642bin (const char    *ascii,
                int           *length,
                unsigned char *bin);


void ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y);
void ctx_matrix_apply_transform_distance (const CtxMatrix *m, float *x, float *y);
void ctx_matrix_invert          (CtxMatrix *m);
void ctx_matrix_identity        (CtxMatrix *matrix);
void ctx_matrix_scale           (CtxMatrix *matrix, float x, float y);
void ctx_matrix_rotate          (CtxMatrix *matrix, float angle);
void ctx_matrix_translate       (CtxMatrix *matrix, float x, float y);
void ctx_matrix_multiply        (CtxMatrix       *result,
                                 const CtxMatrix *t,
                                 const CtxMatrix *s);

/* we already have the start of the file available which disambiguates some
 * of our important supported formats, give preference to magic, then extension
 * then text plain vs binary.
 */
const char *ctx_guess_media_type (const char *path, const char *content, int len);

/* get media-type, with preference towards using extension of path and
 * not reading the data at all.
 */
const char *ctx_path_get_media_type (const char *path);

typedef enum {
  CTX_MEDIA_TYPE_NONE=0,
  CTX_MEDIA_TYPE_TEXT,
  CTX_MEDIA_TYPE_HTML,
  CTX_MEDIA_TYPE_IMAGE,
  CTX_MEDIA_TYPE_VIDEO,
  CTX_MEDIA_TYPE_AUDIO,
  CTX_MEDIA_TYPE_INODE,
  CTX_MEDIA_TYPE_APPLICATION,
} CtxMediaTypeClass;

int ctx_media_type_is_text (const char *media_type);
CtxMediaTypeClass ctx_media_type_class (const char *media_type);


float ctx_term_get_cell_width (Ctx *ctx);
float ctx_term_get_cell_height (Ctx *ctx);

Ctx * ctx_new_pdf (const char *path, float width, float height);
void ctx_render_pdf (Ctx *ctx, const char *path);




//#if CTX_GSTATE_PROTECT
/* sets the current gstate stack (number of unpaired ctx_save calls) as a
 * limit that can not be restored beyond. For now can not be used recursively.
 */
void ctx_gstate_protect   (Ctx *ctx);

/* removes the limit set by ctx_gstate_protect, if insufficient ctx_restore
 * calls have been made, 
 */
void ctx_gstate_unprotect (Ctx *ctx);
//#endif

/* set the logical clock used for the texture eviction policy */
void ctx_set_textureclock (Ctx *ctx, int frame);
int  ctx_textureclock (Ctx *ctx);

/* reinitialize an existing rasterizer with new dimensions/pixel_format
 *
 */
void
ctx_rasterizer_reinit (Ctx  *ctx,
                       void *fb,
                       int   x0,
                       int   y0,
                       int   width,
                       int   height,
                       int   stride,
                       CtxPixelFormat pixel_format);

// causes text commands to directly operate instead of expanding
// in backend
void ctx_set_frontend_text (Ctx *ctx, int frontend_text);

/* this is an interface used when CTX_PTY=0 and CTX_VT=1 , it permits
 * interacting with a single terminal on for instance micro controllers
 */
int ctx_vt_available (Ctx *ctx);
void ctx_vt_write    (Ctx *ctx, uint8_t byte);
int ctx_vt_has_data  (Ctx *ctx);
int ctx_vt_read      (Ctx *ctx);

int ctx_vt_cursor_x (CtxClient *client);
int ctx_vt_cursor_y (CtxClient *client);

/* only valid for rasterizer backend, not kept in graphics state
 */
enum _CtxAntialias
{
  CTX_ANTIALIAS_DEFAULT, //
  CTX_ANTIALIAS_NONE, // non-antialiased
  CTX_ANTIALIAS_FAST, // vertical aa 3 for complex scanlines
  CTX_ANTIALIAS_GOOD, // vertical aa 5 for complex scanlines
  CTX_ANTIALIAS_FULL, // vertical aa 15 for complex scanlines
};
typedef enum _CtxAntialias CtxAntialias;
void         ctx_set_antialias (Ctx *ctx, CtxAntialias antialias);
CtxAntialias ctx_get_antialias (Ctx *ctx);

float ctx_get_stroke_pos (Ctx *ctx);
void ctx_stroke_pos (Ctx *ctx, float x);

// used by fontgen
void _ctx_set_transformation (Ctx *ctx, int transformation);


void ctx_write_png (const char *dst_path, int w, int h, int num_chans, void *data);

//////////////////////////////////////////////////////////////////
#pragma pack(push,1)
struct
  _CtxEntry
{
  uint8_t code;
  union
  {
    float    f[2];
    uint8_t  u8[8];
    int8_t   s8[8];
    uint16_t u16[4];
    int16_t  s16[4];
    uint32_t u32[2];
    int32_t  s32[2];
    uint64_t u64[1]; // unused
  } data; // 9bytes long, we're favoring compactness and correctness
  // over performance. By sacrificing float precision, zeroing
  // first 8bit of f[0] would permit 8bytes long and better
  // aglinment and cacheline behavior.
};
#pragma pack(pop)

typedef enum
{
  CTX_CONT             = '\0', // - contains args from preceding entry
  CTX_NOP              = ' ', //
  CTX_DATA             = '(', // size size-in-entries - u32
  CTX_DATA_REV         = ')', // reverse traversal data marker
  CTX_SET_RGBA_U8      = '*', // r g b a - u8
                   //     ,    UNUSED/RESERVED
  CTX_SET_PIXEL        = '-', // 8bit "fast-path" r g b a x y - u8 for rgba, and u16 for x,y
  // set pixel might want a shorter ascii form with hex-color? or keep it an embedded
  // only option?
                   //     ^    used for unit
                   //     &    UNUSED
                   //     +    UNUSED
                   //     !    UNUSED
                   //     "    start/end string
                   //     #    comment in parser
                   //     $    UNUSED
                   //     %    percent of viewport width or height
                   //     '    start/end string
                   //     .    decimal seperator
                   //     /    UNUSED
                   //     ;    UNUSED
                   //     <    UNUSED
                   //     =    UNUSED/RESERVED
                   //     >    UNUSED
                   //     ?    UNUSED
                   //     \    UNUSED
                   //     ^    PARSER - vh unit
                       // ~    UNUSED/textenc
 
  /* optimizations that reduce the number of entries used,
   * not visible outside the drawlist compression, thus
   * using entries that cannot be used directly as commands
   * since they would be interpreted as numbers - if values>127
   * then the embedded font data is harder to escape.
   */
  CTX_REL_LINE_TO_X4            = '0', // x1 y1 x2 y2 x3 y3 x4 y4   -- s8
  CTX_REL_LINE_TO_REL_CURVE_TO  = '1', // x1 y1 cx1 cy1 cx2 cy2 x y -- s8
  CTX_REL_CURVE_TO_REL_LINE_TO  = '2', // cx1 cy1 cx2 cy2 x y x1 y1 -- s8
  CTX_REL_CURVE_TO_REL_MOVE_TO  = '3', // cx1 cy1 cx2 cy2 x y x1 y1 -- s8
  CTX_REL_LINE_TO_X2            = '4', // x1 y1 x2 y2 -- s16
  CTX_MOVE_TO_REL_LINE_TO       = '5', // x1 y1 x2 y2 -- s16
  CTX_REL_LINE_TO_REL_MOVE_TO   = '6', // x1 y1 x2 y2 -- s16
  CTX_FILL_MOVE_TO              = '7', // x y
  CTX_REL_QUAD_TO_REL_QUAD_TO   = '8', // cx1 x1 cy1 y1 cx1 x2 cy1 y1 -- s8
  CTX_REL_QUAD_TO_S16           = '9', // cx1 cy1 x y                 - s16
  CTX_END_FRAME        = 'X',

  CTX_DEFINE_FONT      = 15,

  CTX_DEFINE_GLYPH     = '@', // unichar width - u32
  CTX_ARC_TO           = 'A', // x1 y1 x2 y2 radius
  CTX_ARC              = 'B', // x y radius angle1 angle2 direction
  CTX_CURVE_TO         = 'C', // cx1 cy1 cx2 cy2 x y
  CTX_PAINT            = 'D', // 
                       // 'E' // scientific notation
  CTX_FILL             = 'F', //
  CTX_RESTORE          = 'G', //
  CTX_HOR_LINE_TO      = 'H', // x
  CTX_DEFINE_TEXTURE   = 'I', // "eid" width height format "data"
  CTX_ROTATE           = 'J', // radians
  CTX_COLOR            = 'K', // model, c1 c2 c3 ca - variable arg count
  CTX_LINE_TO          = 'L', // x y
  CTX_MOVE_TO          = 'M', // x y
  CTX_RESET_PATH       = 'N', //
  CTX_SCALE            = 'O', // xscale yscale
  CTX_NEW_PAGE         = 'P', // - NYI optional page-size
  CTX_QUAD_TO          = 'Q', // cx cy x y
  CTX_VIEW_BOX         = 'R', // x y width height
  CTX_SMOOTH_TO        = 'S', // cx cy x y
  CTX_SMOOTHQ_TO       = 'T', // x y
  CTX_CONIC_GRADIENT   = 'U', // cx cy start_angle cycles
  CTX_VER_LINE_TO      = 'V', // y
  CTX_APPLY_TRANSFORM  = 'W', // a b c d e f g h i j - for set_transform combine with identity
  CTX_TRANSLATE        = 'Y', // x y  

  CTX_CLOSE_PATH2      = 'Z', //
                              
  CTX_START_FRAME      = ':', // 
  CTX_KERNING_PAIR     = '[', // glA glB kerning, glA and glB in u16 kerning in s32
  CTX_COLOR_SPACE      = ']', // IccSlot  data  data_len,
                         //    data can be a string with a name,
                         //    icc data or perhaps our own serialization
                         //    of profile data
  CTX_STROKE_SOURCE    = '_', // next source definition applies to strokes
  CTX_SOURCE_TRANSFORM = '`',
  CTX_REL_ARC_TO       = 'a', // x1 y1 x2 y2 radius
  CTX_CLIP             = 'b',
  CTX_REL_CURVE_TO     = 'c', // cx1 cy1 cx2 cy2 x y
  CTX_LINE_DASH        = 'd', // dashlen0 [dashlen1 ...]
                     //  'e'  -- scientific notation for SVG numbers
  CTX_LINEAR_GRADIENT  = 'f', // x1 y1 x2 y2
  CTX_SAVE             = 'g',
  CTX_REL_HOR_LINE_TO  = 'h', // x
  CTX_TEXTURE          = 'i',
  CTX_PRESERVE         = 'j', // XXX - fix!
  CTX_SET_KEY          = 'k', // - used together with another char to identify
                              //   a key to set
  CTX_REL_LINE_TO      = 'l', // x y
  CTX_REL_MOVE_TO      = 'm', // x y
  CTX_FONT             = 'n', // as used by text parser XXX: move to keyvals?
  CTX_RADIAL_GRADIENT  = 'o', // x1 y1 radius1 x2 y2 radius2
  CTX_GRADIENT_STOP    = 'p', // argument count depends on current color model
  CTX_REL_QUAD_TO      = 'q', // cx cy x y
  CTX_RECTANGLE        = 'r', // x y width height
  CTX_REL_SMOOTH_TO    = 's', // cx cy x y
  CTX_REL_SMOOTHQ_TO   = 't', // x y
  CTX_STROKE           = 'u', // string - utf8 string
  CTX_REL_VER_LINE_TO  = 'v', // y
  CTX_GLYPH            = 'w', // unichar fontsize
  CTX_TEXT             = 'x', // string | kern - utf8 data to shape or horizontal kerning amount
  CTX_IDENTITY         = 'y', // XXX remove? - or reset to baseline.. which is what identity expects
  CTX_CLOSE_PATH       = 'z', //
  CTX_START_GROUP      = '{',
  CTX_END_GROUP        = '}',
  CTX_ROUND_RECTANGLE  = '|', // x y width height radius

  /* though expressed as two chars in serialization we have
   * dedicated byte commands for the setters to keep the dispatch
   * simpler. There is no need for these to be human readable thus we go >128
   * they also should not be emitted when outputting, even compact mode ctx.
   *
   * rasterizer:    &^+
   * font:          @[
   *
   * unused:        !&<=>?: =/\`,
   * reserved:      '"&   #. %^@
   */

  CTX_FILL_RULE        = 128, // kr rule - u8, default = CTX_FILLE_RULE_EVEN_ODD
  CTX_BLEND_MODE       = 129, // kB mode - u32 , default=0

  CTX_MITER_LIMIT      = 130, // km limit - float, default = 0.0

  CTX_LINE_JOIN        = 131, // kj join - u8 , default=0
  CTX_LINE_CAP         = 132, // kc cap - u8, default = 0
  CTX_LINE_WIDTH       = 133, // kw width, default = 2.0
  CTX_GLOBAL_ALPHA     = 134, // ka alpha - default=1.0
  CTX_COMPOSITING_MODE = 135, // kc mode - u32 , default=0

  CTX_FONT_SIZE        = 136, // kf size - float, default=?
  CTX_TEXT_ALIGN       = 137, // kt align - u8, default = CTX_TEXT_ALIGN_START
  CTX_TEXT_BASELINE    = 138, // kb baseline - u8, default = CTX_TEXT_ALIGN_ALPHABETIC
  CTX_TEXT_DIRECTION   = 139, // kd

  CTX_SHADOW_BLUR      = 140, // ks
  CTX_SHADOW_COLOR     = 141, // kC
  CTX_SHADOW_OFFSET_X  = 142, // kx
  CTX_SHADOW_OFFSET_Y  = 143, // ky
  CTX_IMAGE_SMOOTHING  = 144, // kS
  CTX_LINE_DASH_OFFSET = 145, // kD lineDashOffset


  CTX_EXTEND           = 146, // ke u32 extend mode, default=0
  CTX_WRAP_LEFT        = 147, // kL
  CTX_WRAP_RIGHT       = 148, // kR
  CTX_LINE_HEIGHT      = 149, // kH
                              
  CTX_STROKE_POS       = 150, // kp
  CTX_FEATHER          = 151, // kp
                             
#define CTX_LAST_COMMAND  CTX_FEATHER

  CTX_STROKE_RECT      = 200, // strokeRect - only exist in long form
  CTX_FILL_RECT        = 201, // fillRect   - only exist in long form


  CTX_FROM_PREV        = 26, // references previous frame
  CTX_FROM_THIS        = 16, //
} CtxCode;


#pragma pack(push,1)
struct
  _CtxCommand
{
  union
  {
    uint8_t  code;
    CtxEntry entry;
    struct
    {
      uint8_t code;
      float scalex;
      float scaley;
    } scale;
    struct
    {
      uint8_t code;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t cont;
      uint8_t data[8]; /* ... and continues */
    } data;
    struct
    {
      uint8_t code;
      uint32_t stringlen;
      uint32_t blocklen;
    } data_rev;
    struct
    {
      uint8_t code;
      uint32_t next_active_mask; // the tilehasher active flags for next
                                 // drawing command
      float pad2;
      uint8_t code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t code_cont;
      uint8_t utf8[8]; /* .. and continues */
    } text;
    struct
    {
      uint8_t  code;
      uint32_t key_hash;
      float    pad;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } set;
    struct
    {
      uint8_t  code;
      uint32_t pad0;
      float    pad1;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } get;
    struct {
      uint8_t  code;
      uint32_t count; /* better than byte_len in code, but needs to then be set   */
      float    pad1;
      uint8_t  code_data;
      uint32_t byte_len;
      uint32_t blocklen;
      uint8_t  code_cont;
      float    data[2]; /* .. and - possibly continues */
    } line_dash;
    struct {
      uint8_t  code;
      uint32_t space_slot;
      float    pad1;
      uint8_t  code_data;
      uint32_t data_len;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  data[8]; /* .. and continues */
    } colorspace;
    struct
    {
      uint8_t  code;
      float    x;
      float    y;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      char     eid[8]; /* .. and continues */
    } texture;
    struct
    {
      uint8_t  code;
      uint32_t width;
      uint32_t height;
      uint8_t  code_cont0;
      uint16_t format;
      uint16_t pad0;
      uint32_t pad1;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont1;
      char     eid[8]; /* .. and continues */
      // followed by - in variable offset code_Data, data_len, datablock_len, cont, pixeldata
    } define_texture;
    struct
    {
      uint8_t  code;
      float    pad;
      float    pad2;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } text_stroke;
    struct
    {
      uint8_t  code;
      float    pad;
      float    pad2;
      uint8_t  code_data;
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } set_font;


    struct  // NYI - should be able to do old-style numerals, regular ligs, discretionary ligs
    {       //       the shaper shall look after current glyph, for matching utf8 and flags
            //       matching of 0 length utf8 maps to stylistic replacement.
      uint8_t  code;
      uint32_t glyph;
      uint32_t replacement;
      uint8_t  code_data;  /// < store which open-type flags activate it here
      uint32_t stringlen;
      uint32_t blocklen;
      uint8_t  code_cont;
      uint8_t  utf8[8]; /* .. and continues */
    } ligature;


    struct
    {
      uint8_t code;
      float model;
      float r;
      uint8_t pad1;
      float g;
      float b;
      uint8_t pad2;
      float a;
    } rgba;
    struct
    {
      uint8_t code;
      float model;
      float c;
      uint8_t pad1;
      float m;
      float y;
      uint8_t pad2;
      float k;
      float a;
    } cmyka;
    struct
    {
      uint8_t code;
      float model;
      float g;
      uint8_t pad1;
      float a;
    } graya;

    struct
    {
      uint8_t code;
      float model;
      float c0;
      uint8_t pad1;
      float c1;
      float c2;
      uint8_t pad2;
      float c3;
      float c4;
      uint8_t pad3;
      float c5;
      float c6;
      uint8_t pad4;
      float c7;
      float c8;
      uint8_t pad5;
      float c9;
      float c10;
    } set_color;
    struct
    {
      uint8_t code;
      float x;
      float y;
    } rel_move_to;
    struct
    {
      uint8_t code;
      float x;
      float y;
    } rel_line_to;
    struct
    {
      uint8_t code;
      float x;
      float y;
    } line_to;
    struct
    {
      uint8_t code;
      float cx1;
      float cy1;
      uint8_t pad0;
      float cx2;
      float cy2;
      uint8_t pad1;
      float x;
      float y;
    } rel_curve_to;
    struct
    {
      uint8_t code;
      float x;
      float y;
    } move_to;
    struct
    {
      uint8_t code;
      float cx1;
      float cy1;
      uint8_t pad0;
      float cx2;
      float cy2;
      uint8_t pad1;
      float x;
      float y;
    } curve_to;
    struct
    {
      uint8_t code;
      float x1;
      float y1;
      uint8_t pad0;
      float r1;
      float x2;
      uint8_t pad1;
      float y2;
      float r2;
    } radial_gradient;
    struct
    {
      uint8_t code;
      float x1;
      float y1;
      uint8_t pad0;
      float x2;
      float y2;
    } linear_gradient;
    struct
    {
      uint8_t code;
      float x;
      float y;
      uint8_t pad0;
      float start_angle;
      float cycles;
    } conic_gradient;
    struct
    {
      uint8_t code;
      float x;
      float y;
      uint8_t pad0;
      float width;
      float height;
      uint8_t pad1;
      float radius;
    } rectangle;
    struct {
      uint8_t code;
      float x;
      float y;
      uint8_t pad0;
      float width;
      float height;
    } view_box;

    struct
    {
      uint8_t code;
      uint16_t glyph_before;
      uint16_t glyph_after;
       int32_t amount;
    } kern;


    struct
    {
      uint8_t code;
      uint32_t glyph;
      uint32_t advance; // * 256
    } define_glyph;

    struct
    {
      uint8_t code;
      uint8_t rgba[4];
      uint16_t x;
      uint16_t y;
    } set_pixel;
    struct
    {
      uint8_t code;
      float cx;
      float cy;
      uint8_t pad0;
      float x;
      float y;
    } quad_to;
    struct
    {
      uint8_t code;
      float cx;
      float cy;
      uint8_t pad0;
      float x;
      float y;
    } rel_quad_to;
    struct
    {
      uint8_t code;
      float x;
      float y;
      uint8_t pad0;
      float radius;
      float angle1;
      uint8_t pad1;
      float angle2;
      float direction;
    }
    arc;
    struct
    {
      uint8_t code;
      float x1;
      float y1;
      uint8_t pad0;
      float x2;
      float y2;
      uint8_t pad1;
      float radius;
    }
    arc_to;
    /* some format specific generic accesors:  */
    struct
    {
      uint8_t code;
      float   x0;
      float   y0;
      uint8_t pad0;
      float   x1;
      float   y1;
      uint8_t pad1;
      float   x2;
      float   y2;
      uint8_t pad2;
      float   x3;
      float   y3;
      uint8_t pad3;
      float   x4;
      float   y4;
    } c;
    struct
    {
      uint8_t code;
      float   a0;
      float   a1;
      uint8_t pad0;
      float   a2;
      float   a3;
      uint8_t pad1;
      float   a4;
      float   a5;
      uint8_t pad2;
      float   a6;
      float   a7;
      uint8_t pad3;
      float   a8;
      float   a9;
    } f;
    struct
    {
      uint8_t  code;
      uint32_t a0;
      uint32_t a1;
      uint8_t  pad0;
      uint32_t a2;
      uint32_t a3;
      uint8_t  pad1;
      uint32_t a4;
      uint32_t a5;
      uint8_t  pad2;
      uint32_t a6;
      uint32_t a7;
      uint8_t  pad3;
      uint32_t a8;
      uint32_t a9;
    } u32;
    struct
    {
      uint8_t  code;
      uint64_t a0;
      uint8_t  pad0;
      uint64_t a1;
      uint8_t  pad1;
      uint64_t a2;
      uint8_t  pad2;
      uint64_t a3;
      uint8_t  pad3;
      uint64_t a4;
    } u64;
    struct
    {
      uint8_t code;
      int32_t a0;
      int32_t a1;
      uint8_t pad0;
      int32_t a2;
      int32_t a3;
      uint8_t pad1;
      int32_t a4;
      int32_t a5;
      uint8_t pad2;
      int32_t a6;
      int32_t a7;
      uint8_t pad3;
      int32_t a8;
      int32_t a9;
    } s32;
    struct
    {
      uint8_t code;
      int16_t a0;
      int16_t a1;
      int16_t a2;
      int16_t a3;
      uint8_t pad0;
      int16_t a4;
      int16_t a5;
      int16_t a6;
      int16_t a7;
      uint8_t pad1;
      int16_t a8;
      int16_t a9;
      int16_t a10;
      int16_t a11;
      uint8_t pad2;
      int16_t a12;
      int16_t a13;
      int16_t a14;
      int16_t a15;
      uint8_t pad3;
      int16_t a16;
      int16_t a17;
      int16_t a18;
      int16_t a19;
    } s16;
    struct
    {
      uint8_t code;
      uint16_t a0;
      uint16_t a1;
      uint16_t a2;
      uint16_t a3;
      uint8_t pad0;
      uint16_t a4;
      uint16_t a5;
      uint16_t a6;
      uint16_t a7;
      uint8_t pad1;
      uint16_t a8;
      uint16_t a9;
      uint16_t a10;
      uint16_t a11;
      uint8_t pad2;
      uint16_t a12;
      uint16_t a13;
      uint16_t a14;
      uint16_t a15;
      uint8_t pad3;
      uint16_t a16;
      uint16_t a17;
      uint16_t a18;
      uint16_t a19;
    } u16;
    struct
    {
      uint8_t code;
      uint8_t a0;
      uint8_t a1;
      uint8_t a2;
      uint8_t a3;
      uint8_t a4;
      uint8_t a5;
      uint8_t a6;
      uint8_t a7;
      uint8_t pad0;
      uint8_t a8;
      uint8_t a9;
      uint8_t a10;
      uint8_t a11;
      uint8_t a12;
      uint8_t a13;
      uint8_t a14;
      uint8_t a15;
      uint8_t pad1;
      uint8_t a16;
      uint8_t a17;
      uint8_t a18;
      uint8_t a19;
      uint8_t a20;
      uint8_t a21;
      uint8_t a22;
      uint8_t a23;
    } u8;
    struct
    {
      uint8_t code;
      int8_t a0;
      int8_t a1;
      int8_t a2;
      int8_t a3;
      int8_t a4;
      int8_t a5;
      int8_t a6;
      int8_t a7;
      uint8_t pad0;
      int8_t a8;
      int8_t a9;
      int8_t a10;
      int8_t a11;
      int8_t a12;
      int8_t a13;
      int8_t a14;
      int8_t a15;
      uint8_t pad1;
      int8_t a16;
      int8_t a17;
      int8_t a18;
      int8_t a19;
      int8_t a20;
      int8_t a21;
      int8_t a22;
      int8_t a23;
    } s8;
  };
  CtxEntry next_entry; // also pads size of CtxCommand slightly.
};
#pragma pack(pop)
#define ctx_arg_string()  ((char*)&entry[2].data.u8[0])

/* access macros for nth argument of a given type when packed into
 * an CtxEntry pointer in current code context
 */
#define ctx_arg_float(no) entry[(no)>>1].data.f[(no)&1]
#define ctx_arg_u64(no)   entry[(no)].data.u64[0]
#define ctx_arg_u32(no)   entry[(no)>>1].data.u32[(no)&1]
#define ctx_arg_s32(no)   entry[(no)>>1].data.s32[(no)&1]
#define ctx_arg_u16(no)   entry[(no)>>2].data.u16[(no)&3]
#define ctx_arg_s16(no)   entry[(no)>>2].data.s16[(no)&3]
#define ctx_arg_u8(no)    entry[(no)>>3].data.u8[(no)&7]
#define ctx_arg_s8(no)    entry[(no)>>3].data.s8[(no)&7]
#define ctx_arg_string()  ((char*)&entry[2].data.u8[0])
////////////////////////////////////////////////////////////////////

/*
 * removes any backend specific (SDL) clipboard integration and
 * use internal fallback if internal_clipboard is set to 1, currently
 * setting internal clipboard is irreversible.
 */
void ctx_internal_clipboard (Ctx *ctx, int internal_clipboard);

void ctx_wait_for_renderer (Ctx *ctx);


typedef enum CtxSubPixel
{
  CTX_SUBPIXEL_NONE = 0,
  CTX_SUBPIXEL_HRGB = 1,
  CTX_SUBPIXEL_HBGR = 2,
  CTX_SUBPIXEL_VRGB = 3,
  CTX_SUBPIXEL_VBGR = 4
} CtxSubPixel;

/* UTF8 utility functions:
 */
int ctx_utf8_strlen (const char *s);
int ctx_utf8_len (const unsigned char first_byte);
uint32_t ctx_utf8_to_unichar (const char *input);
int      ctx_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);
const char *ctx_utf8_skip (const char *s, int utf8_length);

int ctx_has_focus (Ctx *ctx);

int       ctx_get_major_version (void);
int       ctx_get_minor_version (void);
int       ctx_get_micro_version (void);

/* currently unused */
void      ctx_set_render_threads   (Ctx *ctx, int n_threads);
int       ctx_get_render_threads   (Ctx *ctx);

#if CTX_ASSERT==1
#define ctx_assert(a)  if(!(a)){fprintf(stderr,"%s:%i assertion failed\n", __FUNCTION__, __LINE__);  }
#else
#define ctx_assert(a)
#endif

#ifdef __cplusplus
}
#endif
#endif

#ifndef CTX_VERSION_STRING
#define CTX_VERSION_STRING "0.1.5"
#define CTX_VERSION_MAJOR 0
#define CTX_VERSION_MINOR 1
#define CTX_VERSION_MICRO 5
#endif
#ifndef __CTX_H__
#define __CTX_H__
/* definitions that determine which features are included and their settings,
 * for particular platforms - in particular microcontrollers ctx might need
 * tuning for different quality/performance/resource constraints.
 *
 * the way to configure ctx is to set these defines, before both including it
 * as a header and in the file where CTX_IMPLEMENTATION is set to include the
 * implementation for different featureset and runtime settings.
 *
 */


// babl included first causes babl support to be enabled
#ifndef CTX_BABL
#ifdef _BABL_H
#define CTX_BABL 1
#else
#define CTX_BABL 0
#endif
#endif

#if CTX_BABL
  #ifndef CTX_ENABLE_CM
    #define CTX_ENABLE_CM           1
  #endif
#else
  #ifndef CTX_ENABLE_CM
    #define CTX_ENABLE_CM           0
  #endif
#endif

// sdl included first causes sdl support to be enabled
#ifndef CTX_SDL
#ifdef SDL_h_
#define CTX_SDL 1
#else
#define CTX_SDL 0
#endif
#endif

#if CTX_SDL
#undef CTX_THREADS
#define CTX_THREADS 1
#endif

// fb and kms support has to be opted in to .. perhaps we could use
// defined from their includes as well? TODO
#ifndef CTX_FB
#define CTX_FB 0
#endif

#ifndef CTX_KMS
#define CTX_KMS 0
#endif


/* whether the font rendering happens in backend or front-end of API, the
 * option is used set to 0 by the tool that converts ttf fonts to ctx internal
 * representation - both should be possible so that this tool can be made
 * into a TTF/OTF font import at runtime (perhaps even with live subsetting).
 *
 * improving this feature and making it runtime selectable could also
 * be part of encoding all text as beziers upon pdf export
 */

#ifndef CTX_MAX_SCANLINES
#define CTX_MAX_SCANLINES 2048
#endif


/* subpixel-aa coordinates used in BITPACKing of drawlist
 *
 * powers of 2 is faster
 */
#ifndef CTX_SUBDIV
#define CTX_SUBDIV   8  //  max framebufer width 4095
//#define CTX_SUBDIV  10  //  max framebufer width 3250
//#define CTX_SUBDIV  16  //  max framebufer width 2047
//#define CTX_SUBDIV  24  //  max framebufer width 1350
//#define CTX_SUBDIV  32  //  max framebufer width 1023
#endif


// 8    12 68 40 24
// 16   12 68 40 24
/* scale-factor for font outlines prior to bit quantization by CTX_SUBDIV
 *
 * changing this also changes font file format - the value should be baked
 * into the ctxf files making them less dependent on the ctx used to
 * generate them
 */
#define CTX_BAKE_FONT_SIZE    160

/* pack some linetos/curvetos/movetos into denser drawlist instructions,
 * permitting more vectors to be stored in the same space, experimental
 * feature with added overhead.
 */
#ifndef CTX_BITPACK
#define CTX_BITPACK           1
#endif

#ifndef CTX_PARSER_FIXED_TEMP
#define CTX_PARSER_FIXED_TEMP 0
         // when 1  CTX_PARSER_MAXLEN is the fixed max stringlen
#endif   // and no allocations happens beyond creating the parser,
         // when 0 the scratchbuf for parsing is a separate dynamically
         // growing buffer, that maxes out at CTX_PARSER_MAXLEN
         //
#ifndef CTX_PARSER_MAXLEN
#if CTX_PARSER_FIXED_TEMP
#define CTX_PARSER_MAXLEN  1024*128        // This is the maximum texture/string size supported
#else
#define CTX_PARSER_MAXLEN  1024*1024*16    // 16mb
#endif
#endif

#ifndef CTX_RASTERIZER_ALLOW_DIRECT
#define CTX_RASTERIZER_ALLOW_DIRECT 1
#endif

#ifndef CTX_RASTERIZER_BEZIER_FIXED_POINT
#define CTX_RASTERIZER_BEZIER_FIXED_POINT 1
#endif

#ifndef CTX_FAST_FILL_RECT
#define CTX_FAST_FILL_RECT 1    /*  matters most for tiny rectangles where it shaves overhead, for larger rectangles
                                    a ~15-20% performance win can be seen. */
#endif

#ifndef CTX_FAST_STROKE_RECT
#define CTX_FAST_STROKE_RECT 1
#endif


#ifndef CTX_COMPOSITING_GROUPS
#define CTX_COMPOSITING_GROUPS   1
#endif

/* maximum nesting level of compositing groups
 */
#ifndef CTX_GROUP_MAX
#define CTX_GROUP_MAX             8
#endif

#ifndef CTX_ENABLE_CLIP
#define CTX_ENABLE_CLIP           1
#endif

/* use a 1bit clip buffer, saving RAM on microcontrollers, other rendering
 * will still be antialiased.
 */
#ifndef CTX_1BIT_CLIP
#define CTX_1BIT_CLIP             0
#endif


#ifndef CTX_ENABLE_SHADOW_BLUR
#define CTX_ENABLE_SHADOW_BLUR    0
#endif

// fudge geomtry slightly with smoother blend between edges,
// busting some SDF artifacts apparent in acute angles
#ifndef CTX_RASTERIZER_BLUR_FUDGE
#define CTX_RASTERIZER_BLUR_FUDGE 0
#endif

#ifndef CTX_GRADIENTS
#define CTX_GRADIENTS             1
#endif

#ifndef CTX_ALIGNED_STRUCTS
#define CTX_ALIGNED_STRUCTS       1
#endif

#ifndef CTX_GRADIENT_CACHE
#define CTX_GRADIENT_CACHE        1
#endif


#ifndef CTX_FONT_SHAPE_CACHE
#define CTX_FONT_SHAPE_CACHE      0
#endif

// size of per ctx context static temp helper buf
// for temporary shapings - not needing to incur
// an allocation
#ifndef CTX_SHAPE_GLYPHS
#define CTX_SHAPE_GLYPHS     32
#endif

#ifndef CTX_FONTS_FROM_FILE
#define CTX_FONTS_FROM_FILE  0
#endif

#ifndef CTX_GET_CONTENTS
#if CTX_FONTS_FROM_FILE
#define CTX_GET_CONTENTS    1
#else
#define CTX_GET_CONTENTS    0
#endif
#endif

#ifndef CTX_FORMATTER
#define CTX_FORMATTER       1
#endif

#ifndef CTX_PARSER
#define CTX_PARSER          1
#endif

#ifndef CTX_CURRENT_PATH
#define CTX_CURRENT_PATH    1
#endif

#ifndef CTX_VT
#define CTX_VT              0
#endif

/* when ctx_math is defined, which it is by default, we use ctx' own
 * implementations of math functions, instead of relying on math.h
 * the possible inlining gives us a slight speed-gain, and on
 * embedded platforms guarantees that we do not do double precision
 * math.
 */
#ifndef CTX_MATH
#define CTX_MATH            1  // use internal fast math for sqrt,sin,cos,atan2f etc.
#endif

#define ctx_log(fmt, ...)
//#define ctx_log(str, a...) fprintf(stderr, str, ##a)

/* the initial journal size - for both rasterizer
 * edgelist and drawlist.
 */
#ifndef CTX_MIN_JOURNAL_SIZE
#define CTX_MIN_JOURNAL_SIZE      512
#endif

/* The maximum size we permit the drawlist to grow to,
 * the memory used is this number * 9, where 9 is sizeof(CtxEntry)
 */
#ifndef CTX_MAX_JOURNAL_SIZE
//#define CTX_MAX_JOURNAL_SIZE   CTX_MIN_JOURNAL_SIZE
#define CTX_MAX_JOURNAL_SIZE 1024*1024*8
#endif

#ifndef CTX_DRAWLIST_STATIC
#define CTX_DRAWLIST_STATIC  0
#endif

#ifndef CTX_MIN_EDGE_LIST_SIZE
#define CTX_MIN_EDGE_LIST_SIZE   1024*4
#endif


// 3 5 or 15 - this is the AA used for worst-case scanlines; with crossings or edge start|ends
#ifndef CTX_RASTERIZER_AA
#define CTX_RASTERIZER_AA  5  // vertical-AA of CTX_ANTIALIAS_DEFAULT
#endif

/* The maximum complexity of a single path
 */
#ifndef CTX_MAX_EDGE_LIST_SIZE
#define CTX_MAX_EDGE_LIST_SIZE  CTX_MIN_EDGE_LIST_SIZE
#endif

#ifndef CTX_MAX_KEYDB
#define CTX_MAX_KEYDB 64 // number of entries in keydb
                         // entries are "copy-on-change" between states
#endif

#ifndef CTX_32BIT_SEGMENTS
#define CTX_32BIT_SEGMENTS 1  // without this clipping problems might
                              // occur when drawing far outside the viewport
                              // or with large translate amounts
                              // on micro controllers you most often will
                              // want this set to 0
#endif

/* whether we dither or not for gradients
 */
#ifndef CTX_DITHER
#define CTX_DITHER 0
#endif

/*  with 0 only source-over clear and copy will work, the API still
 *  through - but the backend is limited, for use to measure
 *  size and possibly in severely constrained ROMs.
 */
#ifndef CTX_BLENDING_AND_COMPOSITING
#define CTX_BLENDING_AND_COMPOSITING 1
#endif

/*  this forces the inlining of some performance
 *  critical paths.
 */
#ifndef CTX_FORCE_INLINES
#define CTX_FORCE_INLINES               1
#endif

/* create one-off inlined inner loop for normal blend mode (for floating point,
 * and grayscale for RGBA8 manual loops overrrides. Disabling this should speed
 * up compiles at penalty for the given formats.
 */
#ifndef CTX_INLINED_NORMAL     
#define CTX_INLINED_NORMAL      0
#endif

/*
 *  do not use manual RGBA8 code but rely on ctx inline templating
 */
#ifndef CTX_INLINED_NORMAL_RGBA8
#define CTX_INLINED_NORMAL_RGBA8  0
#endif

#undef CTX_RASTERIZER_SWITCH_DISPATCH
#ifndef CTX_RASTERIZER_SWITCH_DISPATCH
#define CTX_RASTERIZER_SWITCH_DISPATCH  1 // marginal improvement for some
                                          // modes, maybe get rid of this?
#endif

#ifndef CTX_U8_TO_FLOAT_LUT
#define CTX_U8_TO_FLOAT_LUT  0
#endif

#ifndef CTX_INLINED_GRADIENTS
#define CTX_INLINED_GRADIENTS   1
#endif

#ifndef CTX_BRAILLE_TEXT
#define CTX_BRAILLE_TEXT        0
#endif

/* Build code paths for grayscale rasterization, this makes clipping
 * faster.
 */
#ifndef CTX_NATIVE_GRAYA8
#define CTX_NATIVE_GRAYA8       0
#endif

/* enable CMYK rasterization targets
 */
#ifndef CTX_ENABLE_CMYK
#define CTX_ENABLE_CMYK         1
#endif

/* enable color management, slightly increases CtxColor struct size, should
 * be disabled for microcontrollers.
 */


#ifndef CTX_EVENTS
#define CTX_EVENTS              1
#endif

#ifndef CTX_MAX_DEVICES
#define CTX_MAX_DEVICES 16
#endif

#ifndef CTX_MAX_KEYBINDINGS
#define CTX_MAX_KEYBINDINGS 256
#endif


#ifndef CTX_PROTOCOL_U8_COLOR
#define CTX_PROTOCOL_U8_COLOR 0
#endif

#ifndef CTX_TERMINAL_EVENTS
#if CTX_EVENTS
#define CTX_TERMINAL_EVENTS 1
#else
#define CTX_TERMINAL_EVENTS 0
#endif
#endif

#ifndef CTX_LIMIT_FORMATS
#define CTX_LIMIT_FORMATS       0
#endif

#ifndef CTX_ENABLE_FLOAT
#define CTX_ENABLE_FLOAT        0
#endif

/* by default ctx includes all pixel formats, on microcontrollers
 * it can be useful to slim down code and runtime size by only
 * defining the used formats, set CTX_LIMIT_FORMATS to 1, and
 * manually add CTX_ENABLE_ flags for each of them.
 */
#if CTX_LIMIT_FORMATS
#if CTX_NATIVE_GRAYA8
#define CTX_ENABLE_GRAYA8               1
#define CTX_ENABLE_GRAY8                1
#endif
#else

#define CTX_ENABLE_GRAY1                1
#define CTX_ENABLE_GRAY2                1
#define CTX_ENABLE_GRAY4                1
#define CTX_ENABLE_GRAY8                1
#define CTX_ENABLE_GRAYA8               1
#define CTX_ENABLE_GRAYF                1
#define CTX_ENABLE_GRAYAF               1

#define CTX_ENABLE_RGB8                 1
#define CTX_ENABLE_RGBA8                1
#define CTX_ENABLE_BGRA8                1
#define CTX_ENABLE_BGR8                 1
#define CTX_ENABLE_RGB332               1
#define CTX_ENABLE_RGB565               1
#define CTX_ENABLE_RGB565_BYTESWAPPED   1
#define CTX_ENABLE_RGBAF                1
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT                1
#define CTX_ENABLE_YUV420               1

#if CTX_ENABLE_CMYK
#define CTX_ENABLE_CMYK8                1
#define CTX_ENABLE_CMYKA8               1
#define CTX_ENABLE_CMYKAF               1
#endif
#endif

#ifndef CTX_RGB565_ALPHA
#define CTX_RGB565_ALPHA                0   // when enabled pure purple is transparent,
                                            // for a ~15% overall performance hit
#endif

#ifndef CTX_RGB332_ALPHA
#define CTX_RGB332_ALPHA                0   // when enabled pure purple is transparent,
                                            // for a ~15% overall performance hit
#endif

#ifndef CTX_RESOLVED_FONTS
#define CTX_RESOLVED_FONTS 8   // how many font-strings to cache the resolution for in a static
                               // hash-table
#endif

/* by including ctx-font-regular.h, or ctx-font-mono.h the
 * built-in fonts using ctx drawlist encoding is enabled
 */
#ifndef CTX_NO_FONTS
#ifndef CTX_FONT_ENGINE_CTX
#define CTX_FONT_ENGINE_CTX        1
#endif
#endif

#ifndef CTX_ONE_FONT_ENGINE
#define CTX_ONE_FONT_ENGINE 0
#endif


#ifndef CTX_FONT_ENGINE_CTX_FS
#define CTX_FONT_ENGINE_CTX_FS 0
#endif

#ifdef HB_H
#ifndef CTX_HARFBUZZ
#define CTX_HARFBUZZ 1
#endif
#else
#ifndef CTX_HARFBUZZ
#define CTX_HARFBUZZ 0
#endif
#endif


#if CTX_HARFBUZZ
#ifndef CTX_FONT_ENGINE_HARFBUZZ
#define CTX_FONT_ENGINE_HARFBUZZ   1
#endif
#else
#define CTX_FONT_ENGINE_HARFBUZZ   0
#endif

#ifndef CTX_BABL
#ifdef _BABL_H
#define CTX_BABL 1
#else
#define CTX_BABL 0
#endif
#endif

#ifndef _BABL_H
#undef CTX_BABL
#define CTX_BABL 0
#endif

#ifndef CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
#define CTX_ALWAYS_USE_NEAREST_FOR_SCALE1 0
#endif

/* include the bitpack packer, can be opted out of to decrease code size
 */
#ifndef CTX_BITPACK_PACKER
#define CTX_BITPACK_PACKER 0
#endif

/* enable RGBA8 intermediate format for
 *the indirectly implemented pixel-formats.
 */
#if CTX_ENABLE_GRAY1 | CTX_ENABLE_GRAY2 | CTX_ENABLE_GRAY4 | CTX_ENABLE_RGB565 | CTX_ENABLE_RGB565_BYTESWAPPED | CTX_ENABLE_RGB8 | CTX_ENABLE_RGB332

  #ifdef CTX_ENABLE_RGBA8
    #undef CTX_ENABLE_RGBA8
  #endif
  #define CTX_ENABLE_RGBA8  1
#endif

#ifdef CTX_ENABLE_CMYKF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_GRAYF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_GRAYAF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_RGBAF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_CMYKAF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif

#ifdef CTX_ENABLE_CMYKF
#ifdef CTX_ENABLE_FLOAT
#undef CTX_ENABLE_FLOAT
#endif
#define CTX_ENABLE_FLOAT 1
#endif


/* enable cmykf which is cmyk intermediate format
 */
#ifdef CTX_ENABLE_CMYK8
#ifdef CTX_ENABLE_CMYKF
#undef CTX_ENABLE_CMYKF
#endif
#define CTX_ENABLE_CMYKF  1
#endif
#ifdef CTX_ENABLE_CMYKA8
#ifdef CTX_ENABLE_CMYKF
#undef CTX_ENABLE_CMYKF
#endif
#define CTX_ENABLE_CMYKF  1
#endif

#ifdef CTX_ENABLE_CMYKF8
#ifdef CTX_ENABLE_CMYK
#undef CTX_ENABLE_CMYK
#endif
#define CTX_ENABLE_CMYK   1
#endif

#define CTX_PI                              3.141592653589793f
#ifndef CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS
#define CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS  (128)
#endif

#ifndef CTX_MAX_FONTS
#define CTX_MAX_FONTS            32
#endif

#ifndef CTX_GLYPH_CACHE
#define CTX_GLYPH_CACHE 1
#endif

#ifndef CTX_GLYPH_CACHE_SIZE
#define CTX_GLYPH_CACHE_SIZE     128
#endif

#ifndef CTX_MAX_STATES
#define CTX_MAX_STATES           16
#endif

#ifndef CTX_MAX_EDGES
#define CTX_MAX_EDGES            255
#endif

#ifndef CTX_MAX_PENDING
#define CTX_MAX_PENDING          128
#endif

#ifndef CTX_MAX_TEXTURES
#define CTX_MAX_TEXTURES         32
#endif

#ifndef CTX_HASH_ROWS
#define CTX_HASH_ROWS            6
#endif
#ifndef CTX_HASH_COLS
#define CTX_HASH_COLS            5
#endif

#ifndef CTX_INLINE_FILL_RULE
#define CTX_INLINE_FILL_RULE 1
#endif

#ifndef CTX_MAX_THREADS
#define CTX_MAX_THREADS          8 // runtime is max of cores/2 and this
#endif

#ifndef CTX_FRAGMENT_SPECIALIZE
#define CTX_FRAGMENT_SPECIALIZE 1
#endif

#define CTX_RASTERIZER_EDGE_MULTIPLIER  2048
                                        // increasing this to 2048
                                        // removes artifacts in top half of res-diagram -
                                        // but reduces maximum available buffer width
#ifndef CTX_IMPLEMENTATION
#define CTX_IMPLEMENTATION 0
#else
#undef CTX_IMPLEMENTATION
#define CTX_IMPLEMENTATION 1
#endif

#ifndef CTX_MAX_SCANLINE_LENGTH
#define CTX_MAX_SCANLINE_LENGTH 4096
#endif


#ifndef CTX_MAX_CBS
#define CTX_MAX_CBS              64 // was 128 - each kb is kind of big
#endif

#ifndef static_OPAQUE // causes a CTX_MAX_SCANLINE_LENGTH
                          // buffer of 255 bytes to be part of
                          // rasterizer
#define static_OPAQUE 1
#endif

#ifdef CTX_RASTERIZER
#if CTX_RASTERIZER==0
#if CTX_SDL || CTX_FB || CTX_HEADLESS
#undef CTX_RASTERIZER
#define CTX_RASTERIZER 1
#endif
#else
#undef CTX_RASTERIZER
#define CTX_RASTERIZER 1
#endif
#endif

#if CTX_SDL || CTX_FB || CTX_HEADLESS
#if CTX_EVENTS
#undef CTX_EVENTS
#endif
#define CTX_EVENTS 1
#endif




#ifndef CTX_HEADLESS

#if CTX_FB || CTX_SDL || CTX_KMS
#define CTX_HEADLESS 1
#endif
#endif


#ifndef CTX_GRADIENT_CACHE_ELEMENTS
#define CTX_GRADIENT_CACHE_ELEMENTS 256
#endif

#ifndef CTX_PARSER_MAX_ARGS
#define CTX_PARSER_MAX_ARGS 20
#endif

#ifndef CTX_MAX_DASHES
#define CTX_MAX_DASHES CTX_PARSER_MAX_ARGS
#endif

#ifndef CTX_SCREENSHOT
#define CTX_SCREENSHOT 0
#endif

#ifndef CTX_ALSA
#define CTX_ALSA 0
#endif

#ifndef CTX_AUDIO
#define CTX_AUDIO 0
#endif

#if CTX_AUDIO==0
#if CTX_ALSA
#undef CTX_ALSA
#define CTX_ALSA 0
#endif
#endif

#ifndef CTX_CURL
#define CTX_CURL 0
#endif

#if ESP_PLATFORM
#include <freertos/FreeRTOS.h>
#include <freertos/task.h>
#endif

#if CTX_THREADS
#if ESP_PLATFORM
#include <pthread.h>

#if ESP_IDF_VERSION < ESP_IDF_VERSION_VAL(5, 2, 0)
#include <esp_pthread.h>
#endif

#else
#include <pthread.h>
#endif
#define mtx_lock pthread_mutex_lock
#define mtx_unlock pthread_mutex_unlock
#define mtx_t pthread_mutex_t
#define cnd_t pthread_cond_t
#define mtx_plain NULL
#define mtx_init pthread_mutex_init
#define cnd_init(a) pthread_cond_init(a,NULL)
#define cnd_wait pthread_cond_wait
#define cnd_broadcast pthread_cond_broadcast
#define thrd_create(tid, tiled_render_fun, args) pthread_create(tid, NULL, tiled_render_fun, args)
#define thrd_t pthread_t
#else

#if PICO_BUILD
 
#include <pico/mutex.h>

#define mtx_t          mutex_t
#define mtx_plain      NULL
#define mtx_init(a,b)  mutex_init(a)
#define mtx_lock(a)    mutex_enter_blocking(a)
#define mtx_unlock(a)  mutex_exit(a)


#else

#define mtx_lock(a)
#define mtx_unlock(a)
#define mtx_t size_t
#define mtx_init(a,b)
#define mtx_plain 0

#endif

#define cnd_t size_t
#define cnd_init(a)
#define cnd_wait(a,b)
#define cnd_broadcast(c)
#define thrd_create(tid, tiled_render_fun, args) 0
#define thrd_t size_t

#endif

#ifndef CTX_SIMD_SUFFIX
#define CTX_SIMD_SUFFIX(symbol) symbol##_generic
#define CTX_SIMD_BUILD 0
#else


#define CTX_SIMD_BUILD 1
#ifdef CTX_COMPOSITE
#undef CTX_COMPOSITE
#define CTX_COMPOSITE 1
#endif

#endif


#if CTX_RASTERIZER
#ifndef CTX_COMPOSITE
#define CTX_COMPOSITE 1
#endif
#else
#ifndef CTX_COMPOSITE
#define CTX_COMPOSITE 0
#endif
#endif

#ifndef CTX_COMPOSITE
#define CTX_COMPOSITE 0
#endif

#ifndef CTX_MAX_GRADIENT_STOPS
#define CTX_MAX_GRADIENT_STOPS 16
#endif

#ifndef CTX_BRANCH_HINTS
#define CTX_BRANCH_HINTS  0
#endif

#ifdef EMSCRIPTEN
#define CTX_WASM 1
#else
#define CTX_WASM 0
#endif

#ifndef CTX_MAX_LISTEN_FDS
#define CTX_MAX_LISTEN_FDS 128 // becomes max clients..
#endif

#if CTX_WASM
#undef CTX_THREADS
#define CTX_THREADS 0
#undef CTX_HEADLESS
#define CTX_HEADLESS 0
#undef CTX_EVENTS
#define CTX_EVENTS 1
#undef CTX_PARSER
#define CTX_PARSER 1
#undef CTX_RASTERIZER
#define CTX_RASTERIZER 1
#endif

#ifndef CTX_PDF
#define CTX_PDF 0
#endif

#if CTX_IMAGE_WRITE

#if CTX_AUDIO==0
#define MINIZ_NO_INFLATE_APIS
#endif

#else

#if CTX_AUDIO==0
#define MINIZ_NO_DEFLATE_APIS
#define MINIZ_NO_INFLATE_APIS
#endif

#endif

#define MINIZ_NO_ARCHIVE_APIS
#define MINIZ_NO_STDIO


//#define uncompress tinf_uncompress
//#define Z_OK TINF_OK
//#define Z_BUF_ERROR TINF_BUF_ERROR
//#define Z_DATA_ERROR TINF_DATA_ERROR

#ifndef CTX_RAW_KB_EVENTS
#define CTX_RAW_KB_EVENTS 0
#endif


#ifndef CTX_BAREMETAL
#define CTX_BAREMETAL 0
#endif


#if CTX_IMPLEMENTATION
#ifndef SQUOZE_IMPLEMENTATION
#define SQUOZE_IMPLEMENTATION         1
#define SQUOZE_LIMIT_IMPLEMENTATIONS  1
#define SQUOZE_IMPLEMENTATION_32_UTF8 1
#define SQUOZE_USE_INTERN             0
#endif
#endif

#ifndef CTX_PTY
#define CTX_PTY 1
#endif

#ifndef CTX_STROKE_1PX   
#define CTX_STROKE_1PX    0   // XXX : these code paths can crash in fuzzing
#endif

#ifndef CTX_PICO
#define CTX_PICO 0
#endif


#ifndef CTX_GSTATE_PROTECT
#define CTX_GSTATE_PROTECT  1
#endif

// only applies with gcc not clang
#ifndef CTX_COMPOSITE_O3
#define CTX_COMPOSITE_O3 0
#endif

// only applies with gcc not clang
#ifndef CTX_COMPOSITE_O2
#define CTX_COMPOSITE_O2 0
#endif

// only applies with gcc not clang
#ifndef CTX_RASTERIZER_O3
#define CTX_RASTERIZER_O3 0
#endif

// only applies with gcc not clang
#ifndef CTX_RASTERIZER_O2
#define CTX_RASTERIZER_O2 0
#endif

#if CTX_KMS || CTX_FB
#undef CTX_RAW_KB_EVENTS
#define CTX_RAW_KB_EVENTS 1
#endif

#ifndef CTX_YUV_LUTS
#define  CTX_YUV_LUTS 0
#endif


#ifndef CTX_VT_STYLE_SIZE
#define CTX_VT_STYLE_SIZE   64
#endif

#ifndef CTX_ASSERT
#define CTX_ASSERT               0
#endif


#ifndef CTX_SCANBIN
#define CTX_SCANBIN 0
#endif

#ifndef CTX_LOAD_FILE
#define CTX_LOAD_FILE ___ctx_file_get_contents
#endif

#ifndef CTX_MAGIC
#define CTX_MAGIC 0
#endif

#ifndef CTX_CSS
#define CTX_CSS 0
#endif

#ifndef CTX_SVG_FREE_AGE
#define CTX_SVG_FREE_AGE 62
#endif


/* whether we keep a drawlist per terminal-tab, this causes more memory
 * usage, but should be faster with many tabs with graphical clients
 */
#ifndef CTX_VT_DRAWLIST
#define CTX_VT_DRAWLIST 0
#endif

/* when enabled, tabs that have the capability to launch sub-clients can do
 * so with an APC sequence
 */
#ifndef CTX_VT_LAUNCH
#define CTX_VT_LAUNCH 0
#endif


#ifndef CTX_VT_LOG
#define CTX_VT_LOG 0
#endif

#ifndef CTX_VT_SIXELS
#define CTX_VT_SIXELS 1
#endif

#ifndef CTX_VT_GFX
#define CTX_VT_GFX  1
#endif

#ifndef CTX_FB_KDSETMODE
#define CTX_FB_KDSETMODE 1
#endif

#ifndef CTX_TYPING_POINTER_IGNORE_MS
#define CTX_TYPING_POINTER_IGNORE_MS 700
#endif

#define CTX_FIX_SCALE 1024
#define CTX_FIX_SHIFT 10

#ifndef CTX_WIFI_NAME
#define CTX_WIFI_NAME     "test"
#endif
#ifndef CTX_WIFI_PASSWORD
#define CTX_WIFI_PASSWORD "testtesttest"
#endif

#ifndef assert
#define assert(a)
#endif

#ifndef CTX_NET
#define CTX_NET 0
#endif

#ifndef CTX_FONTGEN
#define CTX_FONTGEN 0
#endif

#if 0
#if CTX_FONT_ENGINE_HARFBUZZ==0
#undef CTX_FONTGEN
#define CTX_FONTGEN 0
#endif
#endif


#ifndef CTX_DECOMPRESSOR
#define CTX_DECOMPRESSOR 0
#endif

#ifndef CTX_COMPRESS
#define CTX_COMPRESS 0
#endif

#ifndef CTX_COMPRESS_NEEDLE_SIZE
#define CTX_COMPRESS_NEEDLE_SIZE 6
#endif

// compressing tiger.ctxc with only current frame reference
// 5 : 56760
// 6 : 55493
// 7 : 56188
// 8 : 58253

// smaller would be good for memory constrained
// devices where we still want this
#ifndef CTX_COMPRESS_HT_SIZE 
#define CTX_COMPRESS_HT_SIZE  (1024)
#endif

// same benchmark with tiger..
//16xxx - 55324
// 8192 - 55385
// 4096 - 55493
// 2048 - 55391
// 1024 - 55632
//  512 - 56269

#if CTX_FORMATTER==0
#undef CTX_NET
#define CTX_NET 0
#endif
#ifndef _DEFAULT_SOURCE
#define _DEFAULT_SOURCE
#endif
#ifndef _XOPEN_SOURCE
#define _XOPEN_SOURCE 600
#endif

#ifndef CTX_STRING_H
#define CTX_STRING_H

typedef struct _CtxString CtxString;
struct _CtxString
{
  char *str;
  int   length;
  int   utf8_length;
  int   allocated_length;
  int   is_line;
};

CtxString   *ctx_string_new_with_size  (const char *initial, int initial_size);
CtxString   *ctx_string_new            (const char *initial);
CtxString   *ctx_string_new_printf (const char *format, ...);
char       *ctx_string_dissolve       (CtxString *string);
void        ctx_string_free           (CtxString *string, int freealloc);
const char *ctx_string_get            (CtxString *string);
uint32_t    ctx_string_get_unichar    (CtxString *string, int pos);
int         ctx_string_get_length     (CtxString *string);
int         ctx_string_get_utf8length (CtxString *string);
void        ctx_string_set            (CtxString *string, const char *new_string);
void        ctx_string_clear          (CtxString *string);
void        ctx_string_append_str     (CtxString *string, const char *str);
void        ctx_string_append_byte    (CtxString *string, char  val);
void        ctx_string_append_string  (CtxString *string, CtxString *string2);
void        ctx_string_append_unichar (CtxString *string, unsigned int unichar);
void        ctx_string_append_data    (CtxString *string, const char *data, int len);

void        ctx_string_pre_alloc       (CtxString *string, int size);
void        ctx_string_append_utf8char (CtxString *string, const char *str);
void        ctx_string_append_printf  (CtxString *string, const char *format, ...);
void        ctx_string_replace_utf8   (CtxString *string, int pos, const char *new_glyph);
void        ctx_string_insert_utf8    (CtxString *string, int pos, const char *new_glyph);

void        ctx_string_insert_unichar (CtxString *string, int pos, uint32_t unichar);
void        ctx_string_replace_unichar (CtxString *string, int pos, uint32_t unichar);
void        ctx_string_remove         (CtxString *string, int pos);
char       *ctx_strdup_printf         (const char *format, ...);
void        ctx_string_append_int     (CtxString *string, int val);
void        ctx_string_append_float   (CtxString *string, float val);

#ifndef TRUE
#define TRUE 1
#endif
#ifndef FALSE
#define FALSE 0
#endif

#endif
#ifndef _CTX_INTERNAL_FONT_
#define _CTX_INTERNAL_FONT_

#ifndef CTX_FONT_ascii
/* glyph index: 
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghi
  jklmnopqrstuvwxyz{|}~  */
static const struct __attribute__ ((packed)) {uint8_t code; uint32_t a; uint32_t b;}
ctx_font_ascii[]={
{15, 0x00000000, 0x000009b7},/* length:2487 CTX_SUBDIV:8 CTX_BAKE_FONT_SIZE:160 */
{'(', 0x00000010, 0x00000002},/* Roboto Regular*/
{32, 0x6f626f52, 0x52206f74},
{'e', 0x616c7567, 0x00000072},
{')', 0x00000010, 0x00000002},
{'(', 0x0000004b, 0x00000009},/* Apache Licence, Version 2.0
                                Copyright 2014 Christian Robertson - Apache 2*/
{32, 0x63617041, 0x4c206568},
{'i', 0x636e6563, 0x56202c65},
{'e', 0x6f697372, 0x2e32206e},
{'0', 0x706f430a, 0x67697279},
{'h', 0x30322074, 0x43203431},
{'h', 0x74736972, 0x206e6169},
{'R', 0x7265626f, 0x6e6f7374},
{32, 0x7041202d, 0x65686361},
{32, 0x00000032, 0x00000000},
{')', 0x0000004b, 0x00000009},
{'@', 0x00000020, 0x000027b0},/*                 x-advance: 39.687500 */
{'M', 0x00000000, 0x00000000},
{'[', 0x00540020, 0xfffffffb},/*kerning    T : -0.019531 */
{'@', 0x00000021, 0x00002940},/*        !        x-advance: 41.250000 */
{'M', 0x41e1a000, 0xc2e38000},
{'l', 0xbf820000, 0x42a34800},
{'4', 0x0000ff98, 0xfd73fff8},
{'l', 0x41728000, 0x00000000},
{'M', 0x41494000, 0xc0e88000},
{'8', 0xd111e400, 0xed32ed11},
{'8', 0x13320021, 0x2f111311},
{'8', 0x2eef1a00, 0x13ce13ef},
{'8', 0xedce00df, 0xd2efedef},
{'@', 0x00000022, 0x00003340},/*        "        x-advance: 51.250000 */
{'M', 0x41adc000, 0xc2dac000},
{'l', 0xc0160000, 0x41df2000},
{'l', 0xc10ac000, 0x00000000},
{'4', 0xfecc0000, 0x00000058},
{'l', 0x00000000, 0x412a0000},
{'M', 0x422b4000, 0xc2dac000},
{'l', 0xc0160000, 0x41df2000},
{'l', 0xc10ac000, 0x00000000},
{'l', 0x00000000, 0xc21a1000},
{'l', 0x41304000, 0x00000000},
{'l', 0x00000000, 0x412a0000},
{'[', 0x00220022, 0xfffffff3},/*kerning  " " : -0.050781 */
{'[', 0x00270022, 0xfffffff3},/*kerning  " ' : -0.050781 */
{'[', 0x00410022, 0xfffffff1},/*kerning  " A : -0.058594 */
{'[', 0x00610022, 0xfffffffa},/*kerning  " a : -0.023438 */
{'[', 0x00630022, 0xfffffff9},/*kerning  " c : -0.027344 */
{'[', 0x00640022, 0xfffffff9},/*kerning  " d : -0.027344 */
{'[', 0x00650022, 0xfffffff9},/*kerning  " e : -0.027344 */
{'[', 0x00670022, 0xfffffff9},/*kerning  " g : -0.027344 */
{'[', 0x006d0022, 0xfffffffe},/*kerning  " m : -0.007812 */
{'[', 0x006e0022, 0xfffffffe},/*kerning  " n : -0.007812 */
{'[', 0x006f0022, 0xfffffff9},/*kerning  " o : -0.027344 */
{'[', 0x00700022, 0xfffffffe},/*kerning  " p : -0.007812 */
{'[', 0x00710022, 0xfffffff9},/*kerning  " q : -0.027344 */
{'[', 0x00730022, 0xfffffff6},/*kerning  " s : -0.039062 */
{'[', 0x00770022, 0x00000001},/*kerning  " w : 0.003906 */
{'@', 0x00000023, 0x00006284},/*        #        x-advance: 98.515625 */
{'M', 0x42566000, 0x80000000},
{'l', 0x40c80000, 0xc2002000},
{'l', 0xc1a50000, 0x00000000},
{'l', 0xc0c80000, 0x42002000},
{'l', 0xc132c000, 0x80000000},
{'l', 0x40c80000, 0xc2002000},
{'l', 0xc1960000, 0x00000000},
{'l', 0x00000000, 0xc12c8000},
{'l', 0x41a6e000, 0x00000000},
{'l', 0x40aa0000, 0xc1dca000},
{'l', 0xc1a1e000, 0x00000000},
{'l', 0x00000000, 0xc12dc000},
{'l', 0x41b2c000, 0x00000000},
{'l', 0x40ca8000, 0xc2020000},
{'l', 0x41340000, 0x00000000},
{'l', 0xc0ca8000, 0x42020000},
{'l', 0x41a50000, 0x00000000},
{'l', 0x40ca8000, 0xc2020000},
{'l', 0x4132c000, 0x00000000},
{'l', 0xc0ca8000, 0x42020000},
{'l', 0x417dc000, 0x00000000},
{'l', 0x00000000, 0x412dc000},
{'l', 0xc18fc000, 0x00000000},
{'l', 0xc0ac8000, 0x41dca000},
{'l', 0x418c0000, 0x00000000},
{'l', 0x00000000, 0x412c8000},
{'4', 0x0000ff64, 0x0100ffce},
{'l', 0xc132c000, 0x80000000},
{'M', 0x42255000, 0xc22b4000},
{'l', 0x41a50000, 0x00000000},
{'l', 0x40ac8000, 0xc1dca000},
{'l', 0xc1a50000, 0x00000000},
{'l', 0xc0ac8000, 0x41dca000},
{'@', 0x00000024, 0x000059ec},/*        $        x-advance: 89.921875 */
{'M', 0x42a2d000, 0xc1eba000},
{'q', 0x00000000, 0x41598000},
{0, 0xc102a000, 0x41ab9000},
{'9', 0x003effbf, 0x004aff51},
{'4', 0x00770000, 0x0000ffa3},
{'l', 0x00000000, 0xc16d8000},
{'q', 0xc1408000, 0xbfa00000},
{0, 0xc1ac3000, 0xc112e000},
{'9', 0xffc1ffb5, 0xff33ffb5},
{'l', 0x41688000, 0x00000000},
{'q', 0x00000000, 0x41534000},
{0, 0x40e24000, 0x418ed000},
{'q', 0x40e24000, 0x4094c000},
{0, 0x416ba000, 0x4094c000},
{'q', 0x41278000, 0x00000000},
{0, 0x4181b000, 0xc09ec000},
{'8', 0x962dd92d, 0x9fd8c800},
{'q', 0xc0a3c000, 0xc0a3c000},
{0, 0xc18cf000, 0xc111a000},
{'q', 0xc16c4000, 0xc0960000},
{0, 0xc1b7c000, 0xc146c000},
{'q', 0xc1034000, 0xc0f78000},
{0, 0xc1034000, 0xc1aaa000},
{'q', 0x00000000, 0xc14f8000},
{0, 0x40f14000, 0xc1a87000},
{'9', 0xffc0003c, 0xffb300a3},
{'4', 0xff780000, 0x0000005d},
{'l', 0x00000000, 0x41898000},
{'q', 0x41520000, 0x3fdc0000},
{0, 0x41a37000, 0x412fa000},
{'9', 0x004a003a, 0x00ca003a},
{'l', 0xc1660000, 0x00000000},
{'q', 0x00000000, 0xc1278000},
{0, 0xc0a00000, 0xc189d000},
{'q', 0xc0a00000, 0xc0d84000},
{0, 0xc1660000, 0xc0d84000},
{'q', 0xc11d8000, 0x00000000},
{0, 0xc167e000, 0x40a14000},
{'8', 0x68db28db, 0x61243b00},
{'q', 0x40938000, 0x40988000},
{0, 0x419b5000, 0x411c4000},
{'q', 0x416ce000, 0x40a00000},
{0, 0x41b2c000, 0x41476000},
{'q', 0x40f14000, 0x40eec000},
{0, 0x40f14000, 0x41a5f000},
{'@', 0x00000025, 0x00007530},/*        %        x-advance: 117.187500 */
{'M', 0x41034000, 0xc2b7c000},
{'q', 0x00000000, 0xc11b0000},
{0, 0x40c80000, 0xc184d000},
{'q', 0x40c80000, 0xc0dd4000},
{0, 0x41884000, 0xc0dd4000},
{'q', 0x412f0000, 0x00000000},
{0, 0x41893000, 0x40dd4000},
{'9', 0x00370031, 0x00840031},
{'l', 0x00000000, 0x40c08000},
{'q', 0x00000000, 0x41188000},
{0, 0xc0c58000, 0x41839000},
{'q', 0xc0c58000, 0x40dd4000},
{0, 0xc1884000, 0x40dd4000},
{'q', 0xc12dc000, 0x00000000},
{0, 0xc1893000, 0xc0dd4000},
{'9', 0xffc9ffce, 0xff7dffce},
{'l', 0x00000000, 0xc0c08000},
{'M', 0x41988000, 0xc2abb800},
{'8', 0x4d182b00, 0x224b2218},
{'8', 0xde4a0032, 0xb318de18},
{'l', 0x00000000, 0xc0c08000},
{'8', 0xb2e8d500, 0xdeb5dee8},
{'8', 0x22b600ce, 0x4ee822e8},
{'l', 0x00000000, 0x40c08000},
{'M', 0x42b4f000, 0xc2c32800},
{'l', 0xc25e3000, 0x42b1d000},
{'4', 0xffd7ffbf, 0xfd3901bc},
{'l', 0x41020000, 0x40a50000},
{'M', 0x427e6000, 0xc1df2000},
{'q', 0x00000000, 0xc119c000},
{0, 0x40c80000, 0xc1843000},
{'q', 0x40c80000, 0xc0dd4000},
{0, 0x41884000, 0xc0dd4000},
{'q', 0x412f0000, 0x00000000},
{0, 0x41893000, 0x40dd4000},
{'9', 0x00370031, 0x00840031},
{'l', 0x00000000, 0x40c30000},
{'q', 0x00000000, 0x4119c000},
{0, 0xc0c58000, 0x41843000},
{'q', 0xc0c58000, 0x40dd4000},
{0, 0xc1884000, 0x40dd4000},
{'q', 0xc12f0000, 0x00000000},
{0, 0xc1898000, 0xc0dd4000},
{'9', 0xffc9ffce, 0xff7cffce},
{'l', 0x00000000, 0xc0c30000},
{'M', 0x4294e800, 0xc1ae6000},
{'8', 0x4e182b00, 0x224b2218},
{'8', 0xde4b0032, 0xb218de18},
{'l', 0x00000000, 0xc0c30000},
{'8', 0xb2e8d400, 0xdeb5dee8},
{'8', 0x22b600ce, 0x4ee822e8},
{'l', 0x00000000, 0x40c30000},
{'@', 0x00000026, 0x00006388},/*        &        x-advance: 99.531250 */
{'M', 0x42a32000, 0x80000000},
{'l', 0xc0f00000, 0xc10fc000},
{'q', 0xc0bb8000, 0x40a78000},
{0, 0xc15c0000, 0x40fc8000},
{'q', 0xc0fc8000, 0x402a0000},
{0, 0xc17b4000, 0x402a0000},
{'q', 0xc1884000, 0x00000000},
{0, 0xc1d70000, 0xc1110000},
{'q', 0xc11d8000, 0xc1110000},
{0, 0xc11d8000, 0xc1b90000},
{'q', 0x00000000, 0xc1228000},
{0, 0x40c44000, 0xc1893000},
{'q', 0x40c44000, 0xc0dfc000},
{0, 0x417e6000, 0xc15de000},
{'8', 0x97b6c8d2, 0x99e5cfe5},
{'q', 0x00000000, 0xc1548000},
{0, 0x40fa0000, 0xc1a37000},
{'q', 0x40fa0000, 0xc0e4c000},
{0, 0x41a5a000, 0xc0e4c000},
{'q', 0x41494000, 0x00000000},
{0, 0x419e7000, 0x40e4c000},
{'q', 0x40e74000, 0x40e4c000},
{0, 0x40e74000, 0x4186b000},
{'8', 0x6edf4100, 0x57a62ddf},
{'4', 0x0032ffbc, 0x00f200ca},
{'9', 0xffaf002a, 0xff4c002a},
{'l', 0x414f8000, 0x00000000},
{'9', 0x009f0000, 0x0108ffb4},
{'4', 0x009b0082, 0x0000ff76},
{'M', 0x41fd2000, 0xc2b06800},
{'9', 0x003a0000, 0x00960049},
{'l', 0x4105c000, 0xc0be0000},
{'8', 0xc93ce429, 0xb813e613},
{'8', 0xbde4dc00, 0xe2afe2e4},
{'8', 0x24ad00c9, 0x55e424e4},
{'M', 0x41b2c000, 0xc1f50000},
{'q', 0x00000000, 0x41048000},
{0, 0x40af0000, 0x4164c000},
{'q', 0x40af0000, 0x40c08000},
{0, 0x41866000, 0x40c08000},
{'9', 0x0000005c, 0xffbb00a8},
{'4', 0xfef7ff23, 0x0013ffe6},
{'8', 0x5bae31bd, 0x3df129f1},
{'@', 0x00000027, 0x00001bf8},/*        '        x-advance: 27.968750 */
{'M', 0x419ec000, 0xc2f00000},
{'l', 0x00000000, 0x41098000},
{'l', 0xbfd20000, 0x41e60000},
{'l', 0xc1214000, 0x00000000},
{'l', 0x3da00000, 0xc2156000},
{'l', 0x413a4000, 0x00000000},
{'[', 0x00220027, 0xfffffff3},/*kerning  ' " : -0.050781 */
{'[', 0x00270027, 0xfffffff3},/*kerning  ' ' : -0.050781 */
{'[', 0x00410027, 0xfffffff1},/*kerning  ' A : -0.058594 */
{'[', 0x00610027, 0xfffffffa},/*kerning  ' a : -0.023438 */
{'[', 0x00630027, 0xfffffff9},/*kerning  ' c : -0.027344 */
{'[', 0x00640027, 0xfffffff9},/*kerning  ' d : -0.027344 */
{'[', 0x00650027, 0xfffffff9},/*kerning  ' e : -0.027344 */
{'[', 0x00670027, 0xfffffff9},/*kerning  ' g : -0.027344 */
{'[', 0x006d0027, 0xfffffffe},/*kerning  ' m : -0.007812 */
{'[', 0x006e0027, 0xfffffffe},/*kerning  ' n : -0.007812 */
{'[', 0x006f0027, 0xfffffff9},/*kerning  ' o : -0.027344 */
{'[', 0x00700027, 0xfffffffe},/*kerning  ' p : -0.007812 */
{'[', 0x00710027, 0xfffffff9},/*kerning  ' q : -0.027344 */
{'[', 0x00730027, 0xfffffff6},/*kerning  ' s : -0.039062 */
{'[', 0x00770027, 0x00000001},/*kerning  ' w : 0.003906 */
{'@', 0x00000028, 0x000036c4},/*        (        x-advance: 54.765625 */
{'M', 0x41278000, 0xc2390000},
{'q', 0x00000000, 0xc1b54000},
{0, 0x40cf8000, 0xc21e7000},
{'q', 0x40cf8000, 0xc187a000},
{0, 0x41764000, 0xc1dd4000},
{'9', 0xffab0047, 0xff8a0082},
{'l', 0x40430000, 0x41188000},
{'q', 0xc1138000, 0x40de8000},
{0, 0xc1901000, 0x41c67000},
{'q', 0xc10ca000, 0x418ed000},
{0, 0xc10ca000, 0x4242d800},
{'q', 0x00000000, 0x41ed8000},
{0, 0x410ca000, 0x423ef000},
{'9', 0x00900046, 0x00cc0090},
{'l', 0xc0430000, 0x410c0000},
{'q', 0xc0ed8000, 0xc0820000},
{0, 0xc182a000, 0xc16c4000},
{'q', 0xc10e8000, 0xc12b4000},
{0, 0xc1764000, 0xc1dcf000},
{'q', 0xc0cf8000, 0xc1875000},
{0, 0xc0cf8000, 0xc2225800},
{'[', 0x00560028, 0x00000002},/*kerning  ( V : 0.007812 */
{'[', 0x00570028, 0x00000002},/*kerning  ( W : 0.007812 */
{'[', 0x00590028, 0x00000002},/*kerning  ( Y : 0.007812 */
{'@', 0x00000029, 0x000037b4},/*        )        x-advance: 55.703125 */
{'M', 0x42313000, 0xc235e000},
{'q', 0x00000000, 0x41b68000},
{0, 0xc0cf8000, 0x421f1000},
{'q', 0xc0cf8000, 0x4187a000},
{0, 0xc176e000, 0x41dd4000},
{'9', 0x0055ffb9, 0x0076ff7e},
{'l', 0xc0430000, 0xc10c0000},
{'q', 0x41124000, 0xc0de8000},
{0, 0x418fc000, 0xc1c9e000},
{'q', 0x410d4000, 0xc1924000},
{0, 0x410d4000, 0xc2449000},
{'q', 0x00000000, 0xc19e2000},
{0, 0xc08c0000, 0xc209f800},
{'q', 0xc08c0000, 0xc16ba000},
{0, 0xc12a0000, 0xc1c35000},
{'9', 0xffb3ffce, 0xff8fffa2},
{'l', 0x40430000, 0xc10d4000},
{'q', 0x40eb0000, 0x40848000},
{0, 0x41825000, 0x416d8000},
{'q', 0x410f2000, 0x412b4000},
{0, 0x4176e000, 0x41dcf000},
{'q', 0x40cf8000, 0x41875000},
{0, 0x40cf8000, 0x4221b800},
{'@', 0x0000002a, 0x000044e8},/*        *        x-advance: 68.906250 */
{'M', 0x41214000, 0xc25d4000},
{'l', 0x417b4000, 0xc1ac8000},
{'l', 0xc1bcc000, 0xc0e10000},
{'l', 0x406b0000, 0xc13b8000},
{'l', 0x41bcc000, 0x410ac000},
{'l', 0xbf340000, 0xc1d70000},
{'l', 0x413e0000, 0x00000000},
{'l', 0xbf480000, 0x41dac000},
{'l', 0x41ba4000, 0xc10ac000},
{'l', 0x40660000, 0x413f4000},
{'l', 0xc1bfe000, 0x40e38000},
{'l', 0x41764000, 0x41a96000},
{'l', 0xc11b0000, 0x40e88000},
{'l', 0xc1674000, 0xc1b40000},
{'l', 0xc1624000, 0x41afa000},
{'l', 0xc11c4000, 0xc0e38000},
{'@', 0x0000002b, 0x00005ac8},/*        +        x-advance: 90.781250 */
{'M', 0x42a82000, 0xc23db000},
{'l', 0xc1fdc000, 0x00000000},
{'l', 0x00000000, 0x42101000},
{'l', 0xc1674000, 0x00000000},
{'l', 0x00000000, 0xc2101000},
{'l', 0xc1fe6000, 0x00000000},
{'l', 0x00000000, 0xc1598000},
{'l', 0x41fe6000, 0x00000000},
{'l', 0x00000000, 0xc204d000},
{'l', 0x41674000, 0x00000000},
{'l', 0x00000000, 0x4204d000},
{'l', 0x41fdc000, 0x00000000},
{'l', 0x00000000, 0x41598000},
{'@', 0x0000002c, 0x00001f7c},/*        ,        x-advance: 31.484375 */
{'M', 0x41c12000, 0xc1898000},
{'l', 0x00000000, 0x413a4000},
{'q', 0x00000000, 0x40e38000},
{0, 0xc0660000, 0x4170a000},
{'9', 0x003fffe4, 0x0069ffb0},
{'l', 0xc1034000, 0xc0b68000},
{'9', 0xffad003c, 0xff55003d},
{'l', 0x00000000, 0xc14d0000},
{'l', 0x41624000, 0x00000000},
{'[', 0x0022002c, 0xffffffeb},/*kerning  , " : -0.082031 */
{'[', 0x0027002c, 0xffffffeb},/*kerning  , ' : -0.082031 */
{'@', 0x0000002d, 0x00002c38},/*        -        x-advance: 44.218750 */
{'M', 0x42246000, 0xc2593000},
{'l', 0x00000000, 0x413e0000},
{'l', 0xc2188000, 0x00000000},
{'l', 0x00000000, 0xc13e0000},
{'l', 0x42188000, 0x00000000},
{'@', 0x0000002e, 0x00002a30},/*        .        x-advance: 42.187500 */
{'M', 0x41340000, 0xc0f50000},
{'8', 0xcf12e300, 0xec35ec12},
{'8', 0x14350023, 0x31121412},
{'8', 0x30ee1c00, 0x14cb14ee},
{'8', 0xeccb00dd, 0xd0eeecee},
{'[', 0x0022002e, 0xffffffeb},/*kerning  . " : -0.082031 */
{'[', 0x0027002e, 0xffffffeb},/*kerning  . ' : -0.082031 */
{'@', 0x0000002f, 0x00004204},/*        /        x-advance: 66.015625 */
{'M', 0x42755000, 0xc2e38000},
{'l', 0xc23db000, 0x42f70800},
{'l', 0xc146c000, 0x00000000},
{'l', 0x423e0000, 0xc2f70800},
{'l', 0x41458000, 0x00000000},
{'[', 0x002f002f, 0xffffffe4},/*kerning  / / : -0.109375 */
{'@', 0x00000030, 0x000059ec},/*        0        x-advance: 89.921875 */
{'M', 0x42a1b800, 0xc2426000},
{'q', 0x00000000, 0x41dfc000},
{0, 0xc11a6000, 0x421c4000},
{'q', 0xc11a6000, 0x41318000},
{0, 0xc1d1b000, 0x41318000},
{'q', 0xc180c000, 0x00000000},
{0, 0xc1cf3000, 0xc12c8000},
{'9', 0xffaaffb2, 0xfed4ffb0},
{'l', 0x00000000, 0xc1992000},
{'q', 0x00000000, 0xc1df2000},
{0, 0x411c4000, 0xc21a8800},
{'q', 0x411c4000, 0xc12be000},
{0, 0x41d0c000, 0xc12be000},
{'q', 0x4182a000, 0x00000000},
{0, 0x41d02000, 0x4126e000},
{'9', 0x0053004d, 0x01290050},
{'l', 0x00000000, 0x41992000},
{'M', 0x4284a800, 0xc288b800},
{'q', 0x00000000, 0xc199c000},
{0, 0xc0af0000, 0xc1d98000},
{'q', 0xc0af0000, 0xc0ff0000},
{0, 0xc1802000, 0xc0ff0000},
{'q', 0xc1228000, 0x00000000},
{0, 0xc17aa000, 0x40f78000},
{'9', 0x003dffd4, 0x00d2ffd3},
{'l', 0x00000000, 0x41b9a000},
{'q', 0x00000000, 0x41988000},
{0, 0x40b2c000, 0x41dc0000},
{'q', 0x40b2c000, 0x41070000},
{0, 0x417e6000, 0x41070000},
{'q', 0x4128c000, 0x00000000},
{0, 0x417e6000, 0xc1052000},
{'q', 0x40ab4000, 0xc1052000},
{0, 0x40adc000, 0xc1d7f000},
{'l', 0x00000000, 0xc1b5e000},
{'@', 0x00000031, 0x000059ec},/*        1        x-advance: 89.921875 */
{'M', 0x4263d000, 0xc2e4c000},
{'l', 0x00000000, 0x42e4c000},
{'l', 0xc1674000, 0x80000000},
{'l', 0x00000000, 0xc2c0a800},
{'l', 0xc1e92000, 0x412a0000},
{'l', 0x00000000, 0xc150c000},
{'l', 0x42255000, 0xc17a0000},
{'l', 0x40110000, 0x00000000},
{'@', 0x00000032, 0x000059ec},/*        2        x-advance: 89.921875 */
{'M', 0x42a7f800, 0xc13e0000},
{'l', 0x00000000, 0x413e0000},
{'4', 0x0000fdad, 0xffad0000},
{'l', 0x421a6000, 0xc22be000},
{'q', 0x41188000, 0xc12c8000},
{0, 0x414da000, 0xc188e000},
{'q', 0x40548000, 0xc0ca8000},
{0, 0x40548000, 0xc14bc000},
{'q', 0x00000000, 0xc105c000},
{0, 0xc0a64000, 0xc164c000},
{'q', 0xc0a64000, 0xc0be0000},
{0, 0xc16a6000, 0xc0be0000},
{'q', 0xc1368000, 0x00000000},
{0, 0xc1884000, 0x40cf8000},
{'9', 0x0033ffd3, 0x0085ffd3},
{'l', 0xc1674000, 0x00000000},
{'q', 0x00000000, 0xc1660000},
{0, 0x41174000, 0xc1c58000},
{'q', 0x41174000, 0xc1250000},
{0, 0x41dd4000, 0xc1250000},
{'q', 0x4180c000, 0x00000000},
{0, 0x41c99000, 0x4105c000},
{'q', 0x4111a000, 0x4105c000},
{0, 0x4111a000, 0x41b04000},
{'q', 0x00000000, 0x41200000},
{0, 0xc0c58000, 0x41a0f000},
{'9', 0x0050ffcf, 0x009fff87},
{'l', 0xc1f3c000, 0x42043000},
{'l', 0x42642000, 0x00000000},
{'@', 0x00000033, 0x000059ec},/*        3        x-advance: 89.921875 */
{'M', 0x41f46000, 0xc2507000},
{'4', 0xffa10000, 0x00000055},
{'q', 0x4132c000, 0xbda00000},
{0, 0x41852000, 0xc0b40000},
{'q', 0x40af0000, 0xc0b18000},
{0, 0x40af0000, 0xc15d4000},
{'q', 0x00000000, 0xc1a00000},
{0, 0xc19ec000, 0xc1a00000},
{'q', 0xc1138000, 0x00000000},
{0, 0xc16ec000, 0x40a8c000},
{'9', 0x002affd3, 0x0071ffd3},
{'l', 0xc1674000, 0x00000000},
{'q', 0x00000000, 0xc150c000},
{0, 0x411a6000, 0xc1b18000},
{'q', 0x411a6000, 0xc1124000},
{0, 0x41cb7000, 0xc1124000},
{'q', 0x41764000, 0x00000000},
{0, 0x41c71000, 0x4102a000},
{'q', 0x4117e000, 0x4102a000},
{0, 0x4117e000, 0x41c03000},
{'8', 0x6cde3200, 0x599339de},
{'q', 0x41354000, 0x406b0000},
{0, 0x41778000, 0x41354000},
{'q', 0x40848000, 0x40f50000},
{0, 0x40848000, 0x41764000},
{'q', 0x00000000, 0x417f0000},
{0, 0xc1250000, 0x41c4e000},
{'q', 0xc1250000, 0x410ac000},
{0, 0xc1cda000, 0x410ac000},
{'q', 0xc16d8000, 0x00000000},
{0, 0xc1cbc000, 0xc103e000},
{'9', 0xffbfffab, 0xff45ffab},
{'l', 0x41674000, 0x00000000},
{'q', 0x00000000, 0x410fc000},
{0, 0x40ba4000, 0x41660000},
{'q', 0x40ba4000, 0x40ac8000},
{0, 0x417d2000, 0x40ac8000},
{'q', 0x411ec000, 0x00000000},
{0, 0x417b4000, 0xc0a78000},
{'q', 0x40b90000, 0xc0a78000},
{0, 0x40b90000, 0xc17dc000},
{'q', 0x00000000, 0xc12a0000},
{0, 0xc0d20000, 0xc17aa000},
{'q', 0xc0d20000, 0xc0a14000},
{0, 0xc18ca000, 0xc0a14000},
{'l', 0xc1250000, 0x00000000},
{'@', 0x00000034, 0x000059ec},/*        4        x-advance: 89.921875 */
{'M', 0x40848000, 0xc20bb000},
{'l', 0x424bc000, 0xc29da800},
{'l', 0x41764000, 0x00000000},
{'l', 0x00000000, 0x4296f000},
{'l', 0x417dc000, 0x00000000},
{'l', 0x00000000, 0x413e0000},
{'l', 0xc17dc000, 0x00000000},
{'l', 0x00000000, 0x41d34000},
{'l', 0xc1674000, 0x80000000},
{'4', 0xff2d0000, 0x0000fe61},
{'l', 0x00000000, 0xc1084000},
{'M', 0x41a46000, 0xc2192000},
{'l', 0x420de000, 0x00000000},
{'l', 0x00000000, 0xc25f7000},
{'l', 0xbfe60000, 0x404d0000},
{'l', 0xc206b000, 0x4252a000},
{'@', 0x00000035, 0x000059ec},/*        5        x-advance: 89.921875 */
{'M', 0x41dde000, 0xc2589000},
{'l', 0xc1390000, 0xc03e0000},
{'l', 0x40b68000, 0xc2629000},
{'l', 0x42697000, 0x00000000},
{'4', 0x006a0000, 0x0000fe90},
{'l', 0xc05c0000, 0x41f78000},
{'q', 0x41098000, 0xc09d8000},
{0, 0x4197e000, 0xc09d8000},
{'q', 0x417b4000, 0x00000000},
{0, 0x41c67000, 0x4125a000},
{'q', 0x4111a000, 0x4125a000},
{0, 0x4111a000, 0x41de3000},
{'q', 0x00000000, 0x4182a000},
{0, 0xc10e8000, 0x41d89000},
{'q', 0xc10e8000, 0x412be000},
{0, 0xc1d8e000, 0x412be000},
{'q', 0xc15d4000, 0x00000000},
{0, 0xc1bf4000, 0xc0f8c000},
{'9', 0xffc2ffb0, 0xff43ffa3},
{'l', 0x415c0000, 0x00000000},
{'q', 0x402f0000, 0x419ce000},
{0, 0x41ae6000, 0x419ce000},
{'q', 0x4123c000, 0x00000000},
{0, 0x417dc000, 0xc0de8000},
{'q', 0x40b40000, 0xc0de8000},
{0, 0x40b40000, 0xc196a000},
{'q', 0x00000000, 0xc12b4000},
{0, 0xc0bcc000, 0xc1901000},
{'q', 0xc0bcc000, 0xc0e9c000},
{0, 0xc186b000, 0xc0e9c000},
{'8', 0x0fa800c6, 0x2ac40fe2},
{'@', 0x00000036, 0x000059ec},/*        6        x-advance: 89.921875 */
{'M', 0x42a48800, 0xc2147000},
{'q', 0x00000000, 0x41816000},
{0, 0xc1106000, 0x41db6000},
{'q', 0xc1106000, 0x41340000},
{0, 0xc1d25000, 0x41340000},
{'q', 0xc1444000, 0x00000000},
{0, 0xc1a32000, 0xc0d0c000},
{'q', 0xc1020000, 0xc0d0c000},
{0, 0xc1426000, 0xc1848000},
{'9', 0xffb0ffe0, 0xff5cffe0},
{'l', 0x00000000, 0xc0d98000},
{'q', 0x00000000, 0xc1802000},
{0, 0x408fc000, 0xc1f78000},
{'q', 0x408fc000, 0xc16ec000},
{0, 0x41825000, 0xc1c3f000},
{'9', 0xffb4005e, 0xffb4010e},
{'4', 0x0000000a, 0x00620000},
{'q', 0xc1728000, 0x00000000},
{0, 0xc1bc7000, 0x40aa0000},
{'q', 0xc1066000, 0x40aa0000},
{0, 0xc1480000, 0x415ca000},
{'q', 0xc0834000, 0x4107a000},
{0, 0xc09ec000, 0x41915000},
{'q', 0x41110000, 0xc123c000},
{0, 0x41c3a000, 0xc123c000},
{'q', 0x41354000, 0x00000000},
{0, 0x41947000, 0x40af0000},
{'q', 0x40e74000, 0x40af0000},
{0, 0x412aa000, 0x4161a000},
{'9', 0x0045001b, 0x008f001b},
{'M', 0x41c76000, 0xc2269000},
{'q', 0x00000000, 0x41728000},
{0, 0x40d84000, 0x41b9a000},
{'q', 0x40d84000, 0x4100c000},
{0, 0x41746000, 0x4100c000},
{'q', 0x41200000, 0x00000000},
{0, 0x41782000, 0xc0e9c000},
{'q', 0x40b04000, 0xc0e9c000},
{0, 0x40b04000, 0xc195b000},
{'q', 0x00000000, 0xc1228000},
{0, 0xc0a28000, 0xc191a000},
{'q', 0xc0a28000, 0xc100c000},
{0, 0xc17a0000, 0xc100c000},
{'8', 0x249200c4, 0x58bc24cf},
{'l', 0x00000000, 0x40af0000},
{'@', 0x00000037, 0x000059ec},/*        7        x-advance: 89.921875 */
{'M', 0x42a5f000, 0xc2e38000},
{'l', 0x00000000, 0x41020000},
{'l', 0xc23c7000, 0x42d34000},
{'l', 0xc173c000, 0x80000000},
{'l', 0x423c2000, 0xc2cbc000},
{'l', 0xc2764000, 0x00000000},
{'l', 0x00000000, 0xc13e0000},
{'l', 0x4299c000, 0x00000000},
{'@', 0x00000038, 0x000059ec},/*        8        x-advance: 89.921875 */
{'M', 0x42a23000, 0xc1f64000},
{'q', 0x00000000, 0x417a0000},
{0, 0xc126e000, 0x41bfe000},
{'q', 0xc126e000, 0x4105c000},
{0, 0xc1cdf000, 0x4105c000},
{'q', 0xc1750000, 0x00000000},
{0, 0xc1cdf000, 0xc105c000},
{'q', 0xc126e000, 0xc105c000},
{0, 0xc126e000, 0xc1bfe000},
{'q', 0x00000000, 0xc1188000},
{0, 0x40a3c000, 0xc186b000},
{'8', 0xa86ec628, 0xada1e3c4},
{'q', 0xc08d4000, 0xc0d70000},
{0, 0xc08d4000, 0xc1714000},
{'q', 0x00000000, 0xc16ec000},
{0, 0x4117e000, 0xc1b86000},
{'q', 0x4117e000, 0xc1020000},
{0, 0x41c03000, 0xc1020000},
{'q', 0x4169c000, 0x00000000},
{0, 0x41c0d000, 0x41020000},
{'q', 0x4117e000, 0x41020000},
{0, 0x4117e000, 0x41b86000},
{'q', 0x00000000, 0x41070000},
{0, 0xc0910000, 0x4171e000},
{'q', 0xc0910000, 0x40d5c000},
{0, 0xc141c000, 0x4125a000},
{'q', 0x410d4000, 0x40700000},
{0, 0x41610000, 0x41318000},
{'9', 0x003a0029, 0x00860029},
{'M', 0x427ff000, 0xc2a7d000},
{'q', 0x00000000, 0xc1084000},
{0, 0xc0aa0000, 0xc1606000},
{'8', 0xd492d4d6, 0x2a9200bc},
{'q', 0xc0a78000, 0x40a8c000},
{0, 0xc0a78000, 0x41642000},
{'q', 0x00000000, 0x410d4000},
{0, 0x40a78000, 0x41624000},
{'8', 0x2a6e2a29, 0xd66e0044},
{'9', 0xffd6002a, 0xff8f002a},
{'M', 0x42852000, 0xc1f8c000},
{'q', 0x00000000, 0xc1188000},
{0, 0xc0c1c000, 0xc178c000},
{'q', 0xc0c1c000, 0xc0c08000},
{0, 0xc17be000, 0xc0c08000},
{'q', 0xc11ec000, 0x00000000},
{0, 0xc17be000, 0x40c08000},
{'q', 0xc0ba4000, 0x40c08000},
{0, 0xc0ba4000, 0x4178c000},
{'q', 0x00000000, 0x411d8000},
{0, 0x40ba4000, 0x41750000},
{'q', 0x40ba4000, 0x40af0000},
{0, 0x417e6000, 0x40af0000},
{'q', 0x41214000, 0x00000000},
{0, 0x417dc000, 0xc0af0000},
{'q', 0x40b90000, 0xc0af0000},
{0, 0x40b90000, 0xc1750000},
{'@', 0x00000039, 0x000059ec},/*        9        x-advance: 89.921875 */
{'M', 0x429ec000, 0xc2802000},
{'q', 0x00000000, 0x41318000},
{0, 0xbff50000, 0x41b31000},
{'q', 0xbff50000, 0x4134a000},
{0, 0xc0ed8000, 0x41a64000},
{'q', 0xc0b04000, 0x4117e000},
{0, 0xc1820000, 0x41746000},
{'9', 0x002effab, 0x002eff11},
{'l', 0x00000000, 0xc1444000},
{'q', 0x418a2000, 0x00000000},
{0, 0x41cd5000, 0xc0ac8000},
{'q', 0x41066000, 0xc0ac8000},
{0, 0x413b8000, 0xc161a000},
{'q', 0x40548000, 0xc10b6000},
{0, 0x406d8000, 0xc1947000},
{'8', 0x46aa2bdc, 0x1a921ace},
{'q', 0xc1340000, 0x00000000},
{0, 0xc1938000, 0xc0b40000},
{'q', 0xc0e60000, 0xc0b40000},
{0, 0xc12a0000, 0xc1656000},
{'q', 0xc05c0000, 0xc10b6000},
{0, 0xc05c0000, 0xc1901000},
{'q', 0x00000000, 0xc1820000},
{0, 0x410f2000, 0xc1de3000},
{'q', 0x410f2000, 0xc1386000},
{0, 0x41d2f000, 0xc1386000},
{'q', 0x414f8000, 0x00000000},
{0, 0x41a7d000, 0x40d70000},
{'q', 0x41002000, 0x40d70000},
{0, 0x413a4000, 0x418a2000},
{'9', 0x0054001d, 0x00b0001d},
{'l', 0x00000000, 0x40a78000},
{'M', 0x41b04000, 0xc2999800},
{'q', 0x00000000, 0x41228000},
{0, 0x40a3c000, 0x41938000},
{'q', 0x40a3c000, 0x41048000},
{0, 0x41782000, 0x41048000},
{'8', 0xdd6b003b, 0xa846dd30},
{'l', 0x00000000, 0xc0b68000},
{'q', 0x00000000, 0xc178c000},
{0, 0xc0d34000, 0xc1bea000},
{'q', 0xc0d34000, 0xc1048000},
{0, 0xc1746000, 0xc1048000},
{'q', 0xc1214000, 0x00000000},
{0, 0xc1796000, 0x40f3c000},
{'q', 0xc0b04000, 0x40f3c000},
{0, 0xc0b04000, 0x41979000},
{'@', 0x0000003a, 0x000026c0},/*        :        x-advance: 38.750000 */
{'M', 0x41264000, 0xc0f50000},
{'8', 0xcf12e300, 0xec35ec12},
{'8', 0x14350023, 0x31121412},
{'8', 0x30ee1c00, 0x14cb14ee},
{'8', 0xeccb00dd, 0xd0eeecee},
{'M', 0x41278000, 0xc2994800},
{'8', 0xcf12e300, 0xec35ec12},
{'8', 0x14350023, 0x31121412},
{'8', 0x30ee1c00, 0x14cb14ee},
{'8', 0xeccb00dd, 0xd0eeecee},
{'@', 0x0000003b, 0x000021d4},/*        ;        x-advance: 33.828125 */
{'M', 0x41098000, 0xc2994800},
{'8', 0xcf12e300, 0xec35ec12},
{'8', 0x14350023, 0x31121412},
{'8', 0x30ee1c00, 0x14cb14ee},
{'8', 0xeccb00dd, 0xd0eeecee},
{'M', 0x41c8a000, 0xc1898000},
{'l', 0x00000000, 0x413a4000},
{'q', 0x00000000, 0x40e38000},
{0, 0xc0660000, 0x4170a000},
{'9', 0x003fffe4, 0x0069ffb0},
{'l', 0xc1034000, 0xc0b68000},
{'9', 0xffad003c, 0xff55003d},
{'l', 0x00000000, 0xc14d0000},
{'l', 0x41624000, 0x00000000},
{'@', 0x0000003c, 0x00005154},/*        <        x-advance: 81.328125 */
{'M', 0x428b1000, 0xc1750000},
{'l', 0xc27fa000, 0xc1ece000},
{'l', 0x00000000, 0xc1354000},
{'l', 0x427fa000, 0xc1ec4000},
{'l', 0x00000000, 0x41750000},
{'l', 0xc2430000, 0x41a0a000},
{'l', 0x42430000, 0x419e2000},
{'l', 0x00000000, 0x41750000},
{'@', 0x0000003d, 0x000057d0},/*        =        x-advance: 87.812500 */
{'M', 0x429a1000, 0xc2985800},
{'l', 0x00000000, 0x41494000},
{'4', 0x0000fdf7, 0xff9c0000},
{'l', 0x42825000, 0x00000000},
{'M', 0x429a1000, 0xc22f0000},
{'l', 0x00000000, 0x41494000},
{'l', 0xc2825000, 0x00000000},
{'l', 0x00000000, 0xc1494000},
{'l', 0x42825000, 0x00000000},
{'@', 0x0000003e, 0x000053ac},/*        >        x-advance: 83.671875 */
{'M', 0x4128c000, 0xc2abe000},
{'l', 0x42857000, 0x41ec4000},
{'l', 0x00000000, 0x41368000},
{'l', 0xc2857000, 0x41ece000},
{'l', 0x00000000, 0xc16ec000},
{'l', 0x424ee000, 0xc1a46000},
{'l', 0xc24ee000, 0xc1a14000},
{'l', 0x00000000, 0xc16ec000},
{'@', 0x0000003f, 0x00004ba0},/*        ?        x-advance: 75.625000 */
{'M', 0x4229b000, 0xc2002000},
{'l', 0xc1688000, 0x00000000},
{'q', 0x3da00000, 0xc1354000},
{0, 0x40480000, 0xc187a000},
{'8', 0x9753d318, 0xaf4ad529},
{'8', 0x9a20da20, 0x9bdec000},
{'8', 0xdb9ddbde, 0x1d9e00c9},
{'9', 0x001dffd5, 0x005effd5},
{'l', 0xc1674000, 0x00000000},
{'q', 0x3e200000, 0xc1534000},
{0, 0x4116a000, 0xc1a5a000},
{'q', 0x41142000, 0xc0f00000},
{0, 0x41b63000, 0xc0f00000},
{'q', 0x416ec000, 0x00000000},
{0, 0x41b8b000, 0x40ff0000},
{'q', 0x4102a000, 0x40ff0000},
{0, 0x4102a000, 0x41adc000},
{'q', 0x00000000, 0x41278000},
{0, 0xc0c58000, 0x41965000},
{'q', 0xc0c58000, 0x41052000},
{0, 0xc1570000, 0x4170a000},
{'9', 0x0035ffc6, 0x009effc6},
{'M', 0x41da2000, 0xc0e88000},
{'8', 0xd111e400, 0xed32ed11},
{'8', 0x13330021, 0x2f111311},
{'8', 0x2eef1a00, 0x13cd13ef},
{'8', 0xedce00df, 0xd2efedef},
{'@', 0x00000040, 0x00008fac},/*        @        x-advance: 143.671875 */
{'M', 0x42c5a800, 0x41e38000},
{'q', 0xc0af0000, 0x40610000},
{0, 0xc15a2000, 0x40a78000},
{'q', 0xc102a000, 0x3fdc0000},
{0, 0xc176e000, 0x3fdc0000},
{'q', 0xc1ed8000, 0x00000000},
{0, 0xc2379800, 0xc19c9000},
{'q', 0xc181b000, 0xc19c9000},
{0, 0xc16ce000, 0xc254a800},
{'q', 0x3f700000, 0xc1ab4000},
{0, 0x41156000, 0xc218f800},
{'q', 0x41066000, 0xc186b000},
{0, 0x41b86000, 0xc1d43000},
{'q', 0x416a6000, 0xc11b0000},
{0, 0x42090800, 0xc11b0000},
{'q', 0x41f0a000, 0x00000000},
{0, 0x4236a800, 0x419ce000},
{'q', 0x41796000, 0x419ce000},
{0, 0x41642000, 0x42539000},
{'q', 0xbec80000, 0x4119c000},
{0, 0xc0688000, 0x41988000},
{'q', 0xc04f8000, 0x41174000},
{0, 0xc1228000, 0x417aa000},
{'q', 0xc0dd4000, 0x40c6c000},
{0, 0xc1915000, 0x40c6c000},
{'q', 0xc16b0000, 0x00000000},
{0, 0xc1960000, 0xc1548000},
{'q', 0xc1070000, 0x41548000},
{0, 0xc1a78000, 0x41548000},
{'q', 0xc137c000, 0x00000000},
{0, 0xc187a000, 0xc1174000},
{'q', 0xc0af0000, 0xc1174000},
{0, 0xc0820000, 0xc1c6c000},
{'q', 0x3fe60000, 0xc1a46000},
{0, 0x413c2000, 0xc2027800},
{'q', 0x411f6000, 0xc1412000},
{0, 0x41b09000, 0xc1412000},
{'8', 0x136a0043, 0x2e491326},
{'l', 0xc07f0000, 0x422d2000},
{'q', 0xbf820000, 0x41368000},
{0, 0x40250000, 0x416c4000},
{'q', 0x40660000, 0x40570000},
{0, 0x40ed8000, 0x40570000},
{'q', 0x41174000, 0x00000000},
{0, 0x41692000, 0xc1138000},
{'q', 0x40a3c000, 0xc1138000},
{0, 0x40b54000, 0xc1b36000},
{'q', 0x3faa0000, 0xc1e92000},
{0, 0xc12d2000, 0xc236d000},
{'q', 0xc1426000, 0xc1848000},
{0, 0xc21d5800, 0xc1848000},
{'q', 0xc1c4e000, 0x00000000},
{0, 0xc21ce000, 0x418e8000},
{'q', 0xc169c000, 0x418e8000},
{0, 0xc17c8000, 0x4239a000},
{'q', 0xbfb40000, 0x41e9c000},
{0, 0x413ae000, 0x42386000},
{'q', 0x41516000, 0x41870000},
{0, 0x42180800, 0x41870000},
{'8', 0xf36f0037, 0xdd5ef338},
{'l', 0x403e0000, 0x410e8000},
{'M', 0x425d9000, 0xc202a000},
{'q', 0xbf820000, 0x41304000},
{0, 0x40098000, 0x41884000},
{'8', 0x30503019, 0xe444001f},
{'9', 0xffe40024, 0xffa1003d},
{'4', 0xfffb0000, 0xfec7001c},
{'q', 0xc08c0000, 0xc0110000},
{0, 0xc1160000, 0xc0110000},
{'q', 0xc1084000, 0x00000000},
{0, 0xc16ce000, 0x410c0000},
{'q', 0xc0c94000, 0x410c0000},
{0, 0xc0fb4000, 0x41cee000},
{'@', 0x00000041, 0x00006860},/*        A        x-advance: 104.375000 */
{'M', 0x40110000, 0x80000000},
{'l', 0x422d7000, 0xc2e38000},
{'l', 0x41534000, 0x00000000},
{'l', 0x422e1000, 0x42e38000},
{'l', 0xc1778000, 0x80000000},
{'l', 0xc12dc000, 0xc1ee2000},
{'4', 0x0000fe84, 0x00eeffaa},
{'l', 0xc1764000, 0x80000000},
{'M', 0x4203e000, 0xc2287000},
{'l', 0x421a6000, 0x00000000},
{'l', 0xc19a6000, 0xc2543000},
{'l', 0xc19a6000, 0x42543000},
{'[', 0x00220041, 0xfffffff1},/*kerning  A " : -0.058594 */
{'[', 0x00270041, 0xfffffff1},/*kerning  A ' : -0.058594 */
{'[', 0x003f0041, 0xfffffff9},/*kerning  A ? : -0.027344 */
{'[', 0x00430041, 0xffffffff},/*kerning  A C : -0.003906 */
{'[', 0x00470041, 0xffffffff},/*kerning  A G : -0.003906 */
{'[', 0x004f0041, 0xffffffff},/*kerning  A O : -0.003906 */
{'[', 0x00510041, 0xffffffff},/*kerning  A Q : -0.003906 */
{'[', 0x00540041, 0xfffffff0},/*kerning  A T : -0.062500 */
{'[', 0x00550041, 0xfffffffe},/*kerning  A U : -0.007812 */
{'[', 0x00560041, 0xfffffff5},/*kerning  A V : -0.042969 */
{'[', 0x00570041, 0xfffffff8},/*kerning  A W : -0.031250 */
{'[', 0x00590041, 0xfffffff5},/*kerning  A Y : -0.042969 */
{'[', 0x006f0041, 0xffffffff},/*kerning  A o : -0.003906 */
{'[', 0x00740041, 0xfffffffe},/*kerning  A t : -0.007812 */
{'[', 0x00750041, 0xffffffff},/*kerning  A u : -0.003906 */
{'[', 0x00760041, 0xfffffffa},/*kerning  A v : -0.023438 */
{'[', 0x00770041, 0xfffffffc},/*kerning  A w : -0.015625 */
{'[', 0x00790041, 0xfffffffa},/*kerning  A y : -0.023438 */
{'[', 0x007a0041, 0x00000001},/*kerning  A z : 0.003906 */
{'@', 0x00000042, 0x000063b0},/*        B        x-advance: 99.687500 */
{'M', 0x42b54000, 0xc2034000},
{'q', 0x00000000, 0x417f0000},
{0, 0xc1246000, 0x41c30000},
{'9', 0x0043ffae, 0x0043ff26},
{'4', 0x0000fec2, 0xfc720000},
{'l', 0x42151000, 0x00000000},
{'q', 0x418c0000, 0x00000000},
{0, 0x41da7000, 0x40e74000},
{'q', 0x411ce000, 0x40e74000},
{0, 0x411ce000, 0x41b8b000},
{'q', 0x00000000, 0x40fc8000},
{0, 0xc08fc000, 0x41606000},
{'q', 0xc08fc000, 0x40c44000},
{0, 0xc144e000, 0x4117e000},
{'q', 0x4119c000, 0x402a0000},
{0, 0x416ba000, 0x41214000},
{'9', 0x003b0028, 0x00880028},
{'M', 0x41e24000, 0xc2cad000},
{'4', 0x01200000, 0x000000b5},
{'q', 0x411b0000, 0x00000000},
{0, 0x417a0000, 0xc09b0000},
{'q', 0x40be0000, 0xc09b0000},
{0, 0x40be0000, 0xc1570000},
{'9', 0xff750000, 0xff72ff58},
{'l', 0xc1b90000, 0x00000000},
{'M', 0x42974000, 0xc202a000},
{'q', 0x00000000, 0xc1188000},
{0, 0xc0a64000, 0xc170a000},
{'9', 0xffd4ffd7, 0xffd4ff7b},
{'4', 0x0000ff35, 0x01470000},
{'l', 0x41c62000, 0x00000000},
{'q', 0x412b4000, 0x00000000},
{0, 0x41852000, 0xc0b04000},
{'q', 0x40be0000, 0xc0b04000},
{0, 0x40be0000, 0xc16e2000},
{'[', 0x00540042, 0xfffffffd},/*kerning  B T : -0.011719 */
{'[', 0x00560042, 0xfffffffd},/*kerning  B V : -0.011719 */
{'[', 0x00590042, 0xfffffff9},/*kerning  B Y : -0.027344 */
{'@', 0x00000043, 0x00006824},/*        C        x-advance: 104.140625 */
{'M', 0x42a3c000, 0xc210b000},
{'l', 0x41700000, 0x00000000},
{'q', 0xbfdc0000, 0x41816000},
{0, 0xc1430000, 0x41d7a000},
{'q', 0xc1278000, 0x412c8000},
{0, 0xc1f78000, 0x412c8000},
{'q', 0xc19ec000, 0x00000000},
{0, 0xc1fff000, 0xc162e000},
{'9', 0xff8fff9f, 0xfed3ff9d},
{'l', 0x00000000, 0xc1430000},
{'q', 0x00000000, 0xc1bf4000},
{0, 0x4144e000, 0xc2197000},
{'q', 0x4144e000, 0xc1674000},
{0, 0x42059800, 0xc1674000},
{'q', 0x419a6000, 0x00000000},
{0, 0x41ed8000, 0x412a0000},
{'9', 0x00550053, 0x00db0060},
{'l', 0xc1700000, 0x00000000},
{'q', 0xbfdc0000, 0xc13e0000},
{0, 0xc0f50000, 0xc1965000},
{'q', 0xc0be0000, 0xc0dd4000},
{0, 0xc1992000, 0xc0dd4000},
{'q', 0xc1714000, 0x00000000},
{0, 0xc1b72000, 0x41318000},
{'9', 0x0058ffc2, 0x00e9ffc2},
{'l', 0x00000000, 0x4137c000},
{'q', 0x00000000, 0x41866000},
{0, 0x40e38000, 0x41e51000},
{'q', 0x40e38000, 0x413d6000},
{0, 0x41b22000, 0x413d6000},
{'q', 0x41660000, 0x00000000},
{0, 0x41a23000, 0xc0d70000},
{'q', 0x40bcc000, 0xc0d70000},
{0, 0x40fb4000, 0xc1960000},
{'[', 0x00290043, 0xfffffffd},/*kerning  C ) : -0.011719 */
{'[', 0x00540043, 0xfffffffd},/*kerning  C T : -0.011719 */
{'[', 0x005d0043, 0xffffffff},/*kerning  C ] : -0.003906 */
{'[', 0x007d0043, 0xfffffffe},/*kerning  C } : -0.007812 */
{'@', 0x00000044, 0x00006900},/*        D        x-advance: 105.000000 */
{'M', 0x41534000, 0x80000000},
{'4', 0xfc720000, 0x00000101},
{'q', 0x41b36000, 0x00000000},
{0, 0x42112800, 0x4166a000},
{'9', 0x0073006e, 0x0138006e},
{'l', 0x00000000, 0x40d98000},
{'q', 0x00000000, 0x41c58000},
{0, 0xc15f2000, 0x421c4000},
{'9', 0x0073ff91, 0x0073fed4},
{'l', 0xc1f64000, 0x80000000},
{'M', 0x41e24000, 0xc2cad000},
{'4', 0x02c90000, 0x0000007d},
{'q', 0x4191a000, 0x00000000},
{0, 0x41db6000, 0xc1340000},
{'9', 0xffa60049, 0xff110049},
{'l', 0x00000000, 0xc0de8000},
{'q', 0x00000000, 0xc19ce000},
{0, 0xc1138000, 0xc1f28000},
{'q', 0xc1138000, 0xc12b4000},
{0, 0xc1d02000, 0xc12b4000},
{'l', 0xc188e000, 0x00000000},
{'[', 0x002c0044, 0xfffffff3},/*kerning  D , : -0.050781 */
{'[', 0x002e0044, 0xfffffff3},/*kerning  D . : -0.050781 */
{'[', 0x00410044, 0xfffffffe},/*kerning  D A : -0.007812 */
{'[', 0x00540044, 0xfffffffd},/*kerning  D T : -0.011719 */
{'[', 0x00560044, 0xfffffffe},/*kerning  D V : -0.007812 */
{'[', 0x00580044, 0xfffffffe},/*kerning  D X : -0.007812 */
{'[', 0x00590044, 0xfffffffb},/*kerning  D Y : -0.019531 */
{'[', 0x005a0044, 0xfffffffe},/*kerning  D Z : -0.007812 */
{'@', 0x00000045, 0x00005af0},/*        E        x-advance: 90.937500 */
{'M', 0x41534000, 0x80000000},
{'l', 0x00000000, 0xc2e38000},
{'l', 0x428ef800, 0x00000000},
{'l', 0x00000000, 0x41458000},
{'l', 0xc261a000, 0x00000000},
{'l', 0x00000000, 0x42124000},
{'l', 0x4244e000, 0x00000000},
{'l', 0x00000000, 0x41444000},
{'l', 0xc244e000, 0x00000000},
{'l', 0x00000000, 0x42214000},
{'l', 0x4264c000, 0x00000000},
{'l', 0x00000000, 0x41444000},
{'l', 0xc2908800, 0x80000000},
{'[', 0x00540045, 0x00000002},/*kerning  E T : 0.007812 */
{'[', 0x00630045, 0xfffffffe},/*kerning  E c : -0.007812 */
{'[', 0x00640045, 0xfffffffe},/*kerning  E d : -0.007812 */
{'[', 0x00650045, 0xfffffffe},/*kerning  E e : -0.007812 */
{'[', 0x00660045, 0xfffffffe},/*kerning  E f : -0.007812 */
{'[', 0x00670045, 0xfffffffe},/*kerning  E g : -0.007812 */
{'[', 0x006f0045, 0xfffffffe},/*kerning  E o : -0.007812 */
{'[', 0x00710045, 0xfffffffe},/*kerning  E q : -0.007812 */
{'[', 0x00750045, 0xfffffffe},/*kerning  E u : -0.007812 */
{'[', 0x00760045, 0xfffffffd},/*kerning  E v : -0.011719 */
{'[', 0x00770045, 0xfffffffe},/*kerning  E w : -0.007812 */
{'[', 0x00790045, 0xfffffffd},/*kerning  E y : -0.011719 */
{'@', 0x00000046, 0x00005870},/*        F        x-advance: 88.437500 */
{'M', 0x41534000, 0x80000000},
{'l', 0x00000000, 0xc2e38000},
{'l', 0x428cf000, 0x00000000},
{'l', 0x00000000, 0x41458000},
{'l', 0xc25d9000, 0x00000000},
{'l', 0x00000000, 0x421b5000},
{'l', 0x423ea000, 0x00000000},
{'l', 0x00000000, 0x41458000},
{'l', 0xc23ea000, 0x00000000},
{'l', 0x00000000, 0x4248f000},
{'l', 0xc1714000, 0x80000000},
{'[', 0x002c0046, 0xffffffe3},/*kerning  F , : -0.113281 */
{'[', 0x002e0046, 0xffffffe3},/*kerning  F . : -0.113281 */
{'[', 0x00410046, 0xffffffeb},/*kerning  F A : -0.082031 */
{'[', 0x004a0046, 0xffffffdf},/*kerning  F J : -0.128906 */
{'[', 0x00540046, 0x00000002},/*kerning  F T : 0.007812 */
{'[', 0x00610046, 0xfffffffc},/*kerning  F a : -0.015625 */
{'[', 0x00630046, 0xfffffffe},/*kerning  F c : -0.007812 */
{'[', 0x00640046, 0xfffffffe},/*kerning  F d : -0.007812 */
{'[', 0x00650046, 0xfffffffe},/*kerning  F e : -0.007812 */
{'[', 0x00670046, 0xfffffffe},/*kerning  F g : -0.007812 */
{'[', 0x006f0046, 0xfffffffe},/*kerning  F o : -0.007812 */
{'[', 0x00710046, 0xfffffffe},/*kerning  F q : -0.007812 */
{'[', 0x00720046, 0xfffffffd},/*kerning  F r : -0.011719 */
{'[', 0x00750046, 0xfffffffe},/*kerning  F u : -0.007812 */
{'[', 0x00760046, 0xfffffffd},/*kerning  F v : -0.011719 */
{'[', 0x00790046, 0xfffffffd},/*kerning  F y : -0.011719 */
{'@', 0x00000047, 0x00006cfc},/*        G        x-advance: 108.984375 */
{'M', 0x42c28800, 0xc2629000},
{'l', 0x00000000, 0x42269000},
{'q', 0xc04d0000, 0x40960000},
{0, 0xc1430000, 0x412a0000},
{'q', 0xc10fc000, 0x40be0000},
{0, 0xc1e06000, 0x40be0000},
{'q', 0xc1a64000, 0x00000000},
{0, 0xc2089000, 0xc1642000},
{'9', 0xff8eff96, 0xfec2ff96},
{'l', 0x00000000, 0xc10d4000},
{'q', 0x00000000, 0xc1cb2000},
{0, 0x413e0000, 0xc21ec000},
{'q', 0x413e0000, 0xc164c000},
{0, 0x4207a000, 0xc164c000},
{'q', 0x419c4000, 0x00000000},
{0, 0x41ed3000, 0x411d8000},
{'9', 0x004e0050, 0x00c80062},
{'l', 0xc1714000, 0x00000000},
{'q', 0xbfc80000, 0xc1138000},
{0, 0xc0f64000, 0xc17dc000},
{'q', 0xc0c44000, 0xc0d48000},
{0, 0xc1997000, 0xc0d48000},
{'q', 0xc17b4000, 0x00000000},
{0, 0xc1b90000, 0x4130e000},
{'9', 0x0058ffc5, 0x00f0ffc4},
{'l', 0x00000000, 0x41160000},
{'q', 0x00000000, 0x419d8000},
{0, 0x410e8000, 0x41f5f000},
{'q', 0x410e8000, 0x4130e000},
{0, 0x41bc2000, 0x4130e000},
{'q', 0x4141c000, 0x00000000},
{0, 0x418c0000, 0xc0368000},
{'9', 0xffea002b, 0xffd5003d},
{'l', 0x00000000, 0xc1cbc000},
{'l', 0xc1d2a000, 0x00000000},
{'l', 0x00000000, 0xc1430000},
{'l', 0x42255000, 0x00000000},
{'@', 0x00000048, 0x00007224},/*        H        x-advance: 114.140625 */
{'M', 0x42ab4000, 0x80000000},
{'l', 0x00000000, 0xc2525000},
{'l', 0xc2656000, 0x00000000},
{'l', 0x00000000, 0x42525000},
{'l', 0xc1714000, 0x80000000},
{'l', 0x00000000, 0xc2e38000},
{'l', 0x41714000, 0x00000000},
{'l', 0x00000000, 0x4243a000},
{'l', 0x42656000, 0x00000000},
{'l', 0x00000000, 0xc243a000},
{'l', 0x41700000, 0x00000000},
{'l', 0x00000000, 0x42e38000},
{'l', 0xc1700000, 0x80000000},
{'[', 0x00410048, 0x00000002},/*kerning  H A : 0.007812 */
{'[', 0x00540048, 0xfffffffd},/*kerning  H T : -0.011719 */
{'[', 0x00580048, 0x00000002},/*kerning  H X : 0.007812 */
{'[', 0x00590048, 0xfffffffd},/*kerning  H Y : -0.011719 */
{'@', 0x00000049, 0x00002b84},/*        I        x-advance: 43.515625 */
{'M', 0x41eb0000, 0xc2e38000},
{'l', 0x00000000, 0x42e38000},
{'l', 0xc1714000, 0x80000000},
{'l', 0x00000000, 0xc2e38000},
{'l', 0x41714000, 0x00000000},
{'[', 0x00410049, 0x00000002},/*kerning  I A : 0.007812 */
{'[', 0x00540049, 0xfffffffd},/*kerning  I T : -0.011719 */
{'[', 0x00580049, 0x00000002},/*kerning  I X : 0.007812 */
{'[', 0x00590049, 0xfffffffd},/*kerning  I Y : -0.011719 */
{'@', 0x0000004a, 0x00005848},/*        J        x-advance: 88.281250 */
{'M', 0x4273c000, 0xc2e38000},
{'4', 0x00000078, 0x02840000},
{'q', 0x00000000, 0x41870000},
{0, 0xc121e000, 0x41ce9000},
{'q', 0xc121e000, 0x410f2000},
{0, 0xc1cdf000, 0x410f2000},
{'q', 0xc17b4000, 0x00000000},
{0, 0xc1ce9000, 0xc1020000},
{'9', 0xffbfffb0, 0xff39ffb0},
{'l', 0x41714000, 0x00000000},
{'q', 0x00000000, 0x412c8000},
{0, 0x40ba4000, 0x417c8000},
{'q', 0x40ba4000, 0x40a00000},
{0, 0x4170a000, 0x40a00000},
{'q', 0x410e8000, 0x00000000},
{0, 0x416e2000, 0xc0b68000},
{'q', 0x40bf4000, 0xc0b68000},
{0, 0x40bf4000, 0xc1866000},
{'l', 0x00000000, 0xc2a11800},
{'[', 0x0041004a, 0xfffffffe},/*kerning  J A : -0.007812 */
{'@', 0x0000004b, 0x00006464},/*        K        x-advance: 100.390625 */
{'M', 0x42a4d800, 0x80000000},
{'l', 0xc220f000, 0xc253e000},
{'l', 0xc15e8000, 0x41674000},
{'l', 0x00000000, 0x421a1000},
{'l', 0xc1714000, 0x80000000},
{'l', 0x00000000, 0xc2e38000},
{'l', 0x41714000, 0x00000000},
{'l', 0x00000000, 0x42606000},
{'l', 0x424a3000, 0xc2606000},
{'l', 0x41910000, 0x00000000},
{'l', 0xc232c000, 0x4248f000},
{'l', 0x42408000, 0x427e1000},
{'l', 0xc18fc000, 0x80000000},
{'[', 0x002d004b, 0xfffffff8},/*kerning  K - : -0.031250 */
{'[', 0x0043004b, 0xfffffffd},/*kerning  K C : -0.011719 */
{'[', 0x0047004b, 0xfffffffd},/*kerning  K G : -0.011719 */
{'[', 0x004f004b, 0xfffffffd},/*kerning  K O : -0.011719 */
{'[', 0x0051004b, 0xfffffffd},/*kerning  K Q : -0.011719 */
{'[', 0x0063004b, 0xfffffffd},/*kerning  K c : -0.011719 */
{'[', 0x0064004b, 0xfffffffd},/*kerning  K d : -0.011719 */
{'[', 0x0065004b, 0xfffffffd},/*kerning  K e : -0.011719 */
{'[', 0x0067004b, 0xfffffffd},/*kerning  K g : -0.011719 */
{'[', 0x006d004b, 0xfffffffe},/*kerning  K m : -0.007812 */
{'[', 0x006e004b, 0xfffffffe},/*kerning  K n : -0.007812 */
{'[', 0x006f004b, 0xfffffffd},/*kerning  K o : -0.011719 */
{'[', 0x0070004b, 0xfffffffe},/*kerning  K p : -0.007812 */
{'[', 0x0071004b, 0xfffffffd},/*kerning  K q : -0.011719 */
{'[', 0x0075004b, 0xfffffffe},/*kerning  K u : -0.007812 */
{'[', 0x0076004b, 0xfffffffb},/*kerning  K v : -0.019531 */
{'[', 0x0077004b, 0xfffffff8},/*kerning  K w : -0.031250 */
{'[', 0x0079004b, 0xfffffffb},/*kerning  K y : -0.019531 */
{'@', 0x0000004c, 0x0000562c},/*        L        x-advance: 86.171875 */
{'M', 0x42a46000, 0xc1444000},
{'l', 0x00000000, 0x41444000},
{'l', 0xc289f800, 0x80000000},
{'l', 0x00000000, 0xc2e38000},
{'l', 0x41714000, 0x00000000},
{'l', 0x00000000, 0x42caf800},
{'l', 0x4257a000, 0x00000000},
{'[', 0x0022004c, 0xffffffd6},/*kerning  L " : -0.164062 */
{'[', 0x0027004c, 0xffffffd6},/*kerning  L ' : -0.164062 */
{'[', 0x0041004c, 0x00000002},/*kerning  L A : 0.007812 */
{'[', 0x0043004c, 0xfffffff8},/*kerning  L C : -0.031250 */
{'[', 0x0047004c, 0xfffffff8},/*kerning  L G : -0.031250 */
{'[', 0x004f004c, 0xfffffff8},/*kerning  L O : -0.031250 */
{'[', 0x0051004c, 0xfffffff8},/*kerning  L Q : -0.031250 */
{'[', 0x0054004c, 0xffffffdd},/*kerning  L T : -0.136719 */
{'[', 0x0055004c, 0xfffffffa},/*kerning  L U : -0.023438 */
{'[', 0x0056004c, 0xffffffea},/*kerning  L V : -0.085938 */
{'[', 0x0057004c, 0xffffffee},/*kerning  L W : -0.070312 */
{'[', 0x0059004c, 0xffffffe2},/*kerning  L Y : -0.117188 */
{'[', 0x0075004c, 0xfffffffb},/*kerning  L u : -0.019531 */
{'[', 0x0076004c, 0xfffffff0},/*kerning  L v : -0.062500 */
{'[', 0x0077004c, 0xfffffff5},/*kerning  L w : -0.042969 */
{'[', 0x0079004c, 0xfffffff0},/*kerning  L y : -0.062500 */
{'@', 0x0000004d, 0x00008bb0},/*        M        x-advance: 139.687500 */
{'M', 0x428bb000, 0xc1a82000},
{'l', 0x4214c000, 0xc2b97800},
{'l', 0x419b0000, 0x00000000},
{'l', 0x00000000, 0x42e38000},
{'l', 0xc1700000, 0x80000000},
{'l', 0x00000000, 0xc2318000},
{'l', 0x3fbe0000, 0xc23e5000},
{'l', 0xc215b000, 0x42b7e800},
{'l', 0xc1368000, 0x80000000},
{'l', 0xc2156000, 0xc2b83800},
{'l', 0x3fbe0000, 0x423ef000},
{'l', 0x00000000, 0x42318000},
{'l', 0xc1700000, 0x80000000},
{'l', 0x00000000, 0xc2e38000},
{'l', 0x419b0000, 0x00000000},
{'l', 0x42151000, 0x42b97800},
{'[', 0x0041004d, 0x00000002},/*kerning  M A : 0.007812 */
{'[', 0x0054004d, 0xfffffffd},/*kerning  M T : -0.011719 */
{'[', 0x0058004d, 0x00000002},/*kerning  M X : 0.007812 */
{'[', 0x0059004d, 0xfffffffd},/*kerning  M Y : -0.011719 */
{'@', 0x0000004e, 0x00007224},/*        N        x-advance: 114.140625 */
{'M', 0x42c96800, 0xc2e38000},
{'l', 0x00000000, 0x42e38000},
{'l', 0xc1728000, 0x80000000},
{'l', 0xc2651000, 0xc2af7800},
{'l', 0x00000000, 0x42af7800},
{'l', 0xc1714000, 0x80000000},
{'l', 0x00000000, 0xc2e38000},
{'l', 0x41714000, 0x00000000},
{'l', 0x42660000, 0x42aff000},
{'l', 0x00000000, 0xc2aff000},
{'l', 0x416ec000, 0x00000000},
{'[', 0x0041004e, 0x00000002},/*kerning  N A : 0.007812 */
{'[', 0x0054004e, 0xfffffffd},/*kerning  N T : -0.011719 */
{'[', 0x0058004e, 0x00000002},/*kerning  N X : 0.007812 */
{'[', 0x0059004e, 0xfffffffd},/*kerning  N Y : -0.011719 */
{'@', 0x0000004f, 0x00006e14},/*        O        x-advance: 110.078125 */
{'M', 0x42c99000, 0xc2552000},
{'q', 0x00000000, 0x41cd0000},
{0, 0xc146c000, 0x4220f000},
{'q', 0xc146c000, 0x4169c000},
{0, 0xc204d000, 0x4169c000},
{'q', 0xc1a1e000, 0x00000000},
{0, 0xc2043000, 0xc169c000},
{'9', 0xff8cff9a, 0xfebfff9a},
{'l', 0x00000000, 0xc0e60000},
{'q', 0x00000000, 0xc1cc6000},
{0, 0x414bc000, 0xc220c800},
{'q', 0x414bc000, 0xc16a6000},
{0, 0x4203e000, 0xc16a6000},
{'q', 0x41a50000, 0x00000000},
{0, 0x42045800, 0x4166a000},
{'9', 0x00730063, 0x013b0065},
{'l', 0x00000000, 0x41020000},
{'M', 0x42abb800, 0xc2728000},
{'q', 0x00000000, 0xc1a28000},
{0, 0xc1020000, 0xc1f8c000},
{'q', 0xc1020000, 0xc12c8000},
{0, 0xc1b5e000, 0xc12c8000},
{'q', 0xc15fc000, 0x00000000},
{0, 0xc1b31000, 0x412c8000},
{'9', 0x0056ffbd, 0x00f8ffbd},
{'l', 0x00000000, 0x40eb0000},
{'q', 0x00000000, 0x41a3c000},
{0, 0x4107a000, 0x41fa5000},
{'q', 0x4107a000, 0x412d2000},
{0, 0x41b3b000, 0x412d2000},
{'q', 0x416b0000, 0x00000000},
{0, 0x41b59000, 0xc12d2000},
{'q', 0x41002000, 0xc12d2000},
{0, 0x41002000, 0xc1fa5000},
{'l', 0x00000000, 0xc0eb0000},
{'[', 0x002c004f, 0xfffffff3},/*kerning  O , : -0.050781 */
{'[', 0x002e004f, 0xfffffff3},/*kerning  O . : -0.050781 */
{'[', 0x0041004f, 0xfffffffe},/*kerning  O A : -0.007812 */
{'[', 0x0054004f, 0xfffffffd},/*kerning  O T : -0.011719 */
{'[', 0x0056004f, 0xfffffffe},/*kerning  O V : -0.007812 */
{'[', 0x0058004f, 0xfffffffe},/*kerning  O X : -0.007812 */
{'[', 0x0059004f, 0xfffffffb},/*kerning  O Y : -0.019531 */
{'[', 0x005a004f, 0xfffffffe},/*kerning  O Z : -0.007812 */
{'@', 0x00000050, 0x000064f0},/*        P        x-advance: 100.937500 */
{'M', 0x41e24000, 0xc2327000},
{'l', 0x00000000, 0x42327000},
{'4', 0x0000ff88, 0xfc720000},
{'l', 0x4227d000, 0x00000000},
{'q', 0x419b0000, 0x00000000},
{0, 0x41ed3000, 0x411c4000},
{'q', 0x41246000, 0x411c4000},
{0, 0x41246000, 0x41c76000},
{'q', 0x00000000, 0x4183e000},
{0, 0xc1246000, 0x41cbc000},
{'9', 0x0047ffae, 0x0047ff13},
{'l', 0xc1d70000, 0x00000000},
{'M', 0x41e24000, 0xc2cad000},
{'4', 0x01640000, 0x000000d7},
{'q', 0x41548000, 0x00000000},
{0, 0x41988000, 0xc0c58000},
{'q', 0x40b90000, 0xc0c58000},
{0, 0x40b90000, 0xc17dc000},
{'q', 0x00000000, 0xc10c0000},
{0, 0xc0b90000, 0xc17a0000},
{'q', 0xc0b90000, 0xc0dc0000},
{0, 0xc1988000, 0xc0dc0000},
{'l', 0xc1d70000, 0x00000000},
{'[', 0x002c0050, 0xffffffd7},/*kerning  P , : -0.160156 */
{'[', 0x002e0050, 0xffffffd7},/*kerning  P . : -0.160156 */
{'[', 0x00410050, 0xffffffef},/*kerning  P A : -0.066406 */
{'[', 0x004a0050, 0xffffffe7},/*kerning  P J : -0.097656 */
{'[', 0x00580050, 0xfffffffd},/*kerning  P X : -0.011719 */
{'[', 0x005a0050, 0xfffffffd},/*kerning  P Z : -0.011719 */
{'[', 0x00610050, 0xffffffff},/*kerning  P a : -0.003906 */
{'[', 0x00630050, 0xffffffff},/*kerning  P c : -0.003906 */
{'[', 0x00640050, 0xffffffff},/*kerning  P d : -0.003906 */
{'[', 0x00650050, 0xffffffff},/*kerning  P e : -0.003906 */
{'[', 0x00670050, 0xffffffff},/*kerning  P g : -0.003906 */
{'[', 0x006f0050, 0xffffffff},/*kerning  P o : -0.003906 */
{'[', 0x00710050, 0xffffffff},/*kerning  P q : -0.003906 */
{'[', 0x00740050, 0x00000001},/*kerning  P t : 0.003906 */
{'[', 0x00760050, 0x00000001},/*kerning  P v : 0.003906 */
{'[', 0x00790050, 0x00000001},/*kerning  P y : 0.003906 */
{'@', 0x00000051, 0x00006e14},/*        Q        x-advance: 110.078125 */
{'M', 0x42c8f000, 0x411d8000},
{'4', 0x004bffaf, 0xff68ff41},
{'q', 0xc0b40000, 0x3fb40000},
{0, 0xc13e0000, 0x3fb40000},
{'q', 0xc1a1e000, 0x00000000},
{0, 0xc2043000, 0xc169c000},
{'9', 0xff8cff9a, 0xfebfff9a},
{'l', 0x00000000, 0xc0e60000},
{'q', 0x00000000, 0xc1cc6000},
{0, 0x414bc000, 0xc220c800},
{'q', 0x414bc000, 0xc16a6000},
{0, 0x4203e000, 0xc16a6000},
{'q', 0x41a50000, 0x00000000},
{0, 0x42045800, 0x4166a000},
{'9', 0x00730063, 0x013b0065},
{'l', 0x00000000, 0x41020000},
{'q', 0x00000000, 0x41834000},
{0, 0xc0a78000, 0x41e33000},
{'9', 0x005fffd7, 0x0095ff8c},
{'l', 0x41a1e000, 0x41802000},
{'M', 0x42aa5000, 0xc2728000},
{'q', 0x00000000, 0xc1a28000},
{0, 0xc1020000, 0xc1f8c000},
{'q', 0xc1020000, 0xc12c8000},
{0, 0xc1b5e000, 0xc12c8000},
{'q', 0xc15fc000, 0x00000000},
{0, 0xc1b31000, 0x412c8000},
{'9', 0x0056ffbd, 0x00f8ffbd},
{'l', 0x00000000, 0x40eb0000},
{'q', 0x00000000, 0x41a3c000},
{0, 0x4107a000, 0x41fa5000},
{'q', 0x4107a000, 0x412d2000},
{0, 0x41b3b000, 0x412d2000},
{'q', 0x416b0000, 0x00000000},
{0, 0x41b59000, 0xc12d2000},
{'q', 0x41002000, 0xc12d2000},
{0, 0x41002000, 0xc1fa5000},
{'l', 0x00000000, 0xc0eb0000},
{'[', 0x00540051, 0xfffffffb},/*kerning  Q T : -0.019531 */
{'[', 0x00560051, 0xfffffffd},/*kerning  Q V : -0.011719 */
{'[', 0x00570051, 0xfffffffe},/*kerning  Q W : -0.007812 */
{'[', 0x00590051, 0xfffffffc},/*kerning  Q Y : -0.015625 */
{'@', 0x00000052, 0x00006298},/*        R        x-advance: 98.593750 */
{'M', 0x429f6000, 0x80000000},
{'l', 0xc1c58000, 0xc2386000},
{'l', 0xc1d5c000, 0x00000000},
{'l', 0x00000000, 0x42386000},
{'4', 0x0000ff88, 0xfc720000},
{'l', 0x4216a000, 0x00000000},
{'q', 0x41992000, 0x00000000},
{0, 0x41ebf000, 0x410c0000},
{'q', 0x4125a000, 0x410c0000},
{0, 0x4125a000, 0x41cb2000},
{'q', 0x00000000, 0x412a0000},
{0, 0xc0b7c000, 0x41947000},
{'9', 0x003fffd3, 0x005eff81},
{'4', 0x018200d5, 0x00070000},
{'l', 0xc180c000, 0x80000000},
{'M', 0x41e24000, 0xc2cad000},
{'4', 0x01580000, 0x000000b8},
{'q', 0x4141c000, 0x00000000},
{0, 0x4191f000, 0xc0c6c000},
{'q', 0x40c44000, 0xc0c6c000},
{0, 0x40c44000, 0xc170a000},
{'q', 0x00000000, 0xc11d8000},
{0, 0xc0bcc000, 0xc17d2000},
{'q', 0xc0bcc000, 0xc0bf4000},
{0, 0xc1979000, 0xc0bf4000},
{'l', 0xc1b4a000, 0x00000000},
{'[', 0x00540052, 0xfffffff6},/*kerning  R T : -0.039062 */
{'[', 0x00560052, 0xfffffffe},/*kerning  R V : -0.007812 */
{'[', 0x00590052, 0xfffffffa},/*kerning  R Y : -0.023438 */
{'@', 0x00000053, 0x00005f00},/*        S        x-advance: 95.000000 */
{'M', 0x4293a800, 0xc1e60000},
{'q', 0x00000000, 0xc0fa0000},
{0, 0xc0a8c000, 0xc1480000},
{'q', 0xc0a8c000, 0xc0960000},
{0, 0xc1b13000, 0xc11a6000},
{'q', 0xc1870000, 0xc09ec000},
{0, 0xc1d57000, 0xc14a8000},
{'q', 0xc11ce000, 0xc0f64000},
{0, 0xc11ce000, 0xc1a69000},
{'q', 0x00000000, 0xc1548000},
{0, 0x41296000, 0xc1b0e000},
{'q', 0x41296000, 0xc10d4000},
{0, 0x41e0b000, 0xc10d4000},
{'q', 0x4199c000, 0x00000000},
{0, 0x41ed3000, 0x4128c000},
{'9', 0x00540053, 0x00be0053},
{'l', 0xc1700000, 0x00000000},
{'q', 0x00000000, 0xc1188000},
{0, 0xc0c58000, 0xc17c8000},
{'q', 0xc0c58000, 0xc0c80000},
{0, 0xc1974000, 0xc0c80000},
{'q', 0xc1408000, 0x00000000},
{0, 0xc18e8000, 0x40a8c000},
{'8', 0x69d22ad2, 0x5e313800},
{'q', 0x40c44000, 0x40974000},
{0, 0x419fb000, 0x410a2000},
{'q', 0x419e2000, 0x40b18000},
{0, 0x41e6a000, 0x415b6000},
{'q', 0x41110000, 0x4102a000},
{0, 0x41110000, 0x41ab9000},
{'q', 0x00000000, 0x415e8000},
{0, 0xc12dc000, 0x41b18000},
{'q', 0xc12dc000, 0x41048000},
{0, 0xc1e6a000, 0x41048000},
{'q', 0xc1278000, 0x00000000},
{0, 0xc1a2d000, 0xc07a0000},
{'q', 0xc11e2000, 0xc07a0000},
{0, 0xc1820000, 0xc139a000},
{'9', 0xffc3ffce, 0xff68ffce},
{'l', 0x41700000, 0x00000000},
{'q', 0x00000000, 0x413a4000},
{0, 0x41084000, 0x4187a000},
{'q', 0x41084000, 0x40aa0000},
{0, 0x419ba000, 0x40aa0000},
{'q', 0x413cc000, 0x00000000},
{0, 0x41915000, 0xc09c4000},
{'q', 0x40cbc000, 0xc09c4000},
{0, 0x40cbc000, 0xc152a000},
{'@', 0x00000054, 0x00005f78},/*        T        x-advance: 95.468750 */
{'M', 0x407a0000, 0xc2cad000},
{'l', 0x00000000, 0xc1458000},
{'l', 0x42afc800, 0x00000000},
{'l', 0x00000000, 0x41458000},
{'l', 0xc2124000, 0x00000000},
{'l', 0x00000000, 0x42cad000},
{'l', 0xc16d8000, 0x80000000},
{'l', 0x00000000, 0xc2cad000},
{'l', 0xc211f000, 0x00000000},
{'[', 0x00200054, 0xfffffffb},/*kerning  T   : -0.019531 */
{'[', 0x002c0054, 0xffffffe5},/*kerning  T , : -0.105469 */
{'[', 0x002d0054, 0xffffffe3},/*kerning  T - : -0.113281 */
{'[', 0x002e0054, 0xffffffe5},/*kerning  T . : -0.105469 */
{'[', 0x00410054, 0xfffffff6},/*kerning  T A : -0.039062 */
{'[', 0x00430054, 0xfffffffd},/*kerning  T C : -0.011719 */
{'[', 0x00470054, 0xfffffffd},/*kerning  T G : -0.011719 */
{'[', 0x004a0054, 0xffffffe2},/*kerning  T J : -0.117188 */
{'[', 0x004f0054, 0xfffffffd},/*kerning  T O : -0.011719 */
{'[', 0x00510054, 0xfffffffd},/*kerning  T Q : -0.011719 */
{'[', 0x00530054, 0xfffffffe},/*kerning  T S : -0.007812 */
{'[', 0x00540054, 0x00000002},/*kerning  T T : 0.007812 */
{'[', 0x00560054, 0x00000002},/*kerning  T V : 0.007812 */
{'[', 0x00570054, 0x00000001},/*kerning  T W : 0.003906 */
{'[', 0x00590054, 0x00000002},/*kerning  T Y : 0.007812 */
{'[', 0x00610054, 0xfffffff2},/*kerning  T a : -0.054688 */
{'[', 0x00630054, 0xfffffff4},/*kerning  T c : -0.046875 */
{'[', 0x00640054, 0xfffffff4},/*kerning  T d : -0.046875 */
{'[', 0x00650054, 0xfffffff4},/*kerning  T e : -0.046875 */
{'[', 0x00670054, 0xfffffff4},/*kerning  T g : -0.046875 */
{'[', 0x006d0054, 0xfffffff3},/*kerning  T m : -0.050781 */
{'[', 0x006e0054, 0xfffffff3},/*kerning  T n : -0.050781 */
{'[', 0x006f0054, 0xfffffff4},/*kerning  T o : -0.046875 */
{'[', 0x00700054, 0xfffffff3},/*kerning  T p : -0.050781 */
{'[', 0x00710054, 0xfffffff4},/*kerning  T q : -0.046875 */
{'[', 0x00720054, 0xfffffff7},/*kerning  T r : -0.035156 */
{'[', 0x00730054, 0xfffffff2},/*kerning  T s : -0.054688 */
{'[', 0x00750054, 0xfffffff4},/*kerning  T u : -0.046875 */
{'[', 0x00760054, 0xfffffff7},/*kerning  T v : -0.035156 */
{'[', 0x00770054, 0xfffffff9},/*kerning  T w : -0.027344 */
{'[', 0x00780054, 0xfffffff7},/*kerning  T x : -0.035156 */
{'[', 0x00790054, 0xfffffff7},/*kerning  T y : -0.035156 */
{'[', 0x007a0054, 0xfffffff9},/*kerning  T z : -0.027344 */
{'@', 0x00000055, 0x000067c0},/*        U        x-advance: 103.750000 */
{'M', 0x429c9000, 0xc2e38000},
{'4', 0x00000078, 0x02670000},
{'q', 0x00000000, 0x4199c000},
{0, 0xc1458000, 0x41e65000},
{'q', 0xc1458000, 0x41192000},
{0, 0xc1e74000, 0x41192000},
{'q', 0xc18b6000, 0x00000000},
{0, 0xc1ea1000, 0xc1192000},
{'9', 0xffb4ffa2, 0xff1affa2},
{'4', 0xfd990000, 0x00000077},
{'l', 0x00000000, 0x4299e800},
{'q', 0x00000000, 0x4155c000},
{0, 0x40e74000, 0x419dd000},
{'q', 0x40e74000, 0x40cbc000},
{0, 0x41979000, 0x40cbc000},
{'q', 0x413e0000, 0x00000000},
{0, 0x41988000, 0xc0cbc000},
{'q', 0x40e60000, 0xc0cbc000},
{0, 0x40e60000, 0xc19dd000},
{'l', 0x00000000, 0xc299e800},
{'[', 0x00410055, 0xfffffffe},/*kerning  U A : -0.007812 */
{'@', 0x00000056, 0x000065e0},/*        V        x-advance: 101.875000 */
{'M', 0x42c78800, 0xc2e38000},
{'l', 0xc2287000, 0x42e38000},
{'l', 0xc155c000, 0x80000000},
{'l', 0xc2282000, 0xc2e38000},
{'l', 0x4182a000, 0x00000000},
{'l', 0x42011000, 0x42bb8000},
{'l', 0x42025000, 0xc2bb8000},
{'l', 0x4182a000, 0x00000000},
{'[', 0x00290056, 0x00000002},/*kerning  V ) : 0.007812 */
{'[', 0x002c0056, 0xffffffe4},/*kerning  V , : -0.109375 */
{'[', 0x002d0056, 0xfffffffc},/*kerning  V - : -0.015625 */
{'[', 0x002e0056, 0xffffffe4},/*kerning  V . : -0.109375 */
{'[', 0x00410056, 0xfffffff7},/*kerning  V A : -0.035156 */
{'[', 0x00430056, 0xffffffff},/*kerning  V C : -0.003906 */
{'[', 0x00470056, 0xffffffff},/*kerning  V G : -0.003906 */
{'[', 0x004f0056, 0xffffffff},/*kerning  V O : -0.003906 */
{'[', 0x00510056, 0xffffffff},/*kerning  V Q : -0.003906 */
{'[', 0x005d0056, 0x00000002},/*kerning  V ] : 0.007812 */
{'[', 0x00610056, 0xfffffffb},/*kerning  V a : -0.019531 */
{'[', 0x00630056, 0xfffffffb},/*kerning  V c : -0.019531 */
{'[', 0x00640056, 0xfffffffb},/*kerning  V d : -0.019531 */
{'[', 0x00650056, 0xfffffffb},/*kerning  V e : -0.019531 */
{'[', 0x00670056, 0xfffffffb},/*kerning  V g : -0.019531 */
{'[', 0x006f0056, 0xfffffffb},/*kerning  V o : -0.019531 */
{'[', 0x00710056, 0xfffffffb},/*kerning  V q : -0.019531 */
{'[', 0x00720056, 0xfffffffd},/*kerning  V r : -0.011719 */
{'[', 0x00750056, 0xfffffffd},/*kerning  V u : -0.011719 */
{'[', 0x00760056, 0xffffffff},/*kerning  V v : -0.003906 */
{'[', 0x00790056, 0xffffffff},/*kerning  V y : -0.003906 */
{'[', 0x007d0056, 0x00000002},/*kerning  V } : 0.007812 */
{'@', 0x00000057, 0x00008df4},/*        W        x-advance: 141.953125 */
{'M', 0x430a8400, 0xc2e38000},
{'l', 0xc1dc0000, 0x42e38000},
{'l', 0xc15ac000, 0x80000000},
{'l', 0xc1bb8000, 0xc2a5c800},
{'l', 0xbfe60000, 0xc10ac000},
{'l', 0xbfe60000, 0x410ac000},
{'l', 0xc1c26000, 0x42a5c800},
{'l', 0xc15ac000, 0x80000000},
{'l', 0xc1dca000, 0xc2e38000},
{'l', 0x41700000, 0x00000000},
{'l', 0x418fc000, 0x429ba000},
{'l', 0x40110000, 0x41714000},
{'l', 0x40480000, 0xc1584000},
{'l', 0x41b4a000, 0xc29ec000},
{'l', 0x41494000, 0x00000000},
{'l', 0x41afa000, 0x429ec000},
{'l', 0x404d0000, 0x415d4000},
{'l', 0x401b0000, 0xc1778000},
{'l', 0x418ca000, 0xc29b7800},
{'l', 0x41714000, 0x00000000},
{'[', 0x00290057, 0x00000001},/*kerning  W ) : 0.003906 */
{'[', 0x002c0057, 0xfffffff1},/*kerning  W , : -0.058594 */
{'[', 0x002d0057, 0xfffffff9},/*kerning  W - : -0.027344 */
{'[', 0x002e0057, 0xfffffff1},/*kerning  W . : -0.058594 */
{'[', 0x00410057, 0xfffffffb},/*kerning  W A : -0.019531 */
{'[', 0x00540057, 0x00000001},/*kerning  W T : 0.003906 */
{'[', 0x005d0057, 0x00000001},/*kerning  W ] : 0.003906 */
{'[', 0x00610057, 0xfffffffc},/*kerning  W a : -0.015625 */
{'[', 0x00630057, 0xfffffffd},/*kerning  W c : -0.011719 */
{'[', 0x00640057, 0xfffffffd},/*kerning  W d : -0.011719 */
{'[', 0x00650057, 0xfffffffd},/*kerning  W e : -0.011719 */
{'[', 0x00670057, 0xfffffffd},/*kerning  W g : -0.011719 */
{'[', 0x006f0057, 0xfffffffd},/*kerning  W o : -0.011719 */
{'[', 0x00710057, 0xfffffffd},/*kerning  W q : -0.011719 */
{'[', 0x00720057, 0xfffffffe},/*kerning  W r : -0.007812 */
{'[', 0x00750057, 0xfffffffe},/*kerning  W u : -0.007812 */
{'[', 0x007d0057, 0x00000001},/*kerning  W } : 0.003906 */
{'@', 0x00000058, 0x00006450},/*        X        x-advance: 100.312500 */
{'M', 0x41b7c000, 0xc2e38000},
{'l', 0x41dac000, 0x422e6000},
{'l', 0x41dac000, 0xc22e6000},
{'l', 0x418ca000, 0x00000000},
{'l', 0xc20fc000, 0x42615000},
{'l', 0x42133000, 0x4265b000},
{'l', 0xc18de000, 0x80000000},
{'l', 0xc1e06000, 0xc231d000},
{'l', 0xc1e06000, 0x4231d000},
{'l', 0xc18de000, 0x80000000},
{'l', 0x42133000, 0xc265b000},
{'l', 0xc20fc000, 0xc2615000},
{'l', 0x418ca000, 0x00000000},
{'[', 0x002d0058, 0xfffffffb},/*kerning  X - : -0.019531 */
{'[', 0x00430058, 0xfffffffd},/*kerning  X C : -0.011719 */
{'[', 0x00470058, 0xfffffffd},/*kerning  X G : -0.011719 */
{'[', 0x004f0058, 0xfffffffd},/*kerning  X O : -0.011719 */
{'[', 0x00510058, 0xfffffffd},/*kerning  X Q : -0.011719 */
{'[', 0x00560058, 0x00000001},/*kerning  X V : 0.003906 */
{'[', 0x00630058, 0xfffffffd},/*kerning  X c : -0.011719 */
{'[', 0x00640058, 0xfffffffd},/*kerning  X d : -0.011719 */
{'[', 0x00650058, 0xfffffffd},/*kerning  X e : -0.011719 */
{'[', 0x00670058, 0xfffffffd},/*kerning  X g : -0.011719 */
{'[', 0x006f0058, 0xfffffffe},/*kerning  X o : -0.007812 */
{'[', 0x00710058, 0xfffffffd},/*kerning  X q : -0.011719 */
{'[', 0x00750058, 0xfffffffe},/*kerning  X u : -0.007812 */
{'[', 0x00760058, 0xfffffffd},/*kerning  X v : -0.011719 */
{'[', 0x00790058, 0xfffffffd},/*kerning  X y : -0.011719 */
{'@', 0x00000059, 0x00006018},/*        Y        x-advance: 96.093750 */
{'M', 0x41938000, 0xc2e38000},
{'l', 0x41ec4000, 0x42647000},
{'l', 0x41ece000, 0xc2647000},
{'l', 0x4188e000, 0x00000000},
{'l', 0xc21ce000, 0x428e8000},
{'l', 0x00000000, 0x422a0000},
{'l', 0xc1714000, 0x80000000},
{'l', 0x00000000, 0xc22a0000},
{'l', 0xc21ce000, 0xc28e8000},
{'l', 0x418a2000, 0x00000000},
{'[', 0x00260059, 0xfffffffd},/*kerning  Y & : -0.011719 */
{'[', 0x00290059, 0x00000002},/*kerning  Y ) : 0.007812 */
{'[', 0x002a0059, 0xfffffffa},/*kerning  Y * : -0.023438 */
{'[', 0x002c0059, 0xffffffe6},/*kerning  Y , : -0.101562 */
{'[', 0x002d0059, 0xfffffffa},/*kerning  Y - : -0.023438 */
{'[', 0x002e0059, 0xffffffe6},/*kerning  Y . : -0.101562 */
{'[', 0x00410059, 0xfffffff5},/*kerning  Y A : -0.042969 */
{'[', 0x00430059, 0xfffffffd},/*kerning  Y C : -0.011719 */
{'[', 0x00470059, 0xfffffffd},/*kerning  Y G : -0.011719 */
{'[', 0x004a0059, 0xfffffff4},/*kerning  Y J : -0.046875 */
{'[', 0x004f0059, 0xfffffffd},/*kerning  Y O : -0.011719 */
{'[', 0x00510059, 0xfffffffd},/*kerning  Y Q : -0.011719 */
{'[', 0x00530059, 0xfffffffe},/*kerning  Y S : -0.007812 */
{'[', 0x00540059, 0x00000002},/*kerning  Y T : 0.007812 */
{'[', 0x00550059, 0xfffffff4},/*kerning  Y U : -0.046875 */
{'[', 0x00560059, 0x00000002},/*kerning  Y V : 0.007812 */
{'[', 0x00570059, 0x00000002},/*kerning  Y W : 0.007812 */
{'[', 0x00580059, 0x00000001},/*kerning  Y X : 0.003906 */
{'[', 0x00590059, 0x00000002},/*kerning  Y Y : 0.007812 */
{'[', 0x005d0059, 0x00000002},/*kerning  Y ] : 0.007812 */
{'[', 0x00610059, 0xfffffff7},/*kerning  Y a : -0.035156 */
{'[', 0x00630059, 0xfffffff8},/*kerning  Y c : -0.031250 */
{'[', 0x00640059, 0xfffffff8},/*kerning  Y d : -0.031250 */
{'[', 0x00650059, 0xfffffff8},/*kerning  Y e : -0.031250 */
{'[', 0x00660059, 0xfffffffe},/*kerning  Y f : -0.007812 */
{'[', 0x00670059, 0xfffffff8},/*kerning  Y g : -0.031250 */
{'[', 0x006d0059, 0xfffffffb},/*kerning  Y m : -0.019531 */
{'[', 0x006e0059, 0xfffffffb},/*kerning  Y n : -0.019531 */
{'[', 0x006f0059, 0xfffffff8},/*kerning  Y o : -0.031250 */
{'[', 0x00700059, 0xfffffffb},/*kerning  Y p : -0.019531 */
{'[', 0x00710059, 0xfffffff8},/*kerning  Y q : -0.031250 */
{'[', 0x00720059, 0xfffffffb},/*kerning  Y r : -0.019531 */
{'[', 0x00730059, 0xfffffff9},/*kerning  Y s : -0.027344 */
{'[', 0x00740059, 0xfffffffe},/*kerning  Y t : -0.007812 */
{'[', 0x00750059, 0xfffffffc},/*kerning  Y u : -0.015625 */
{'[', 0x00760059, 0xfffffffe},/*kerning  Y v : -0.007812 */
{'[', 0x00780059, 0xfffffffe},/*kerning  Y x : -0.007812 */
{'[', 0x00790059, 0xfffffffe},/*kerning  Y y : -0.007812 */
{'[', 0x007a0059, 0xfffffffd},/*kerning  Y z : -0.011719 */
{'[', 0x007d0059, 0x00000002},/*kerning  Y } : 0.007812 */
{'@', 0x0000005a, 0x00005fdc},/*        Z        x-advance: 95.859375 */
{'M', 0x40fa0000, 0xc2cad000},
{'l', 0x00000000, 0xc1458000},
{'l', 0x429e7000, 0x00000000},
{'l', 0x00000000, 0x412f0000},
{'l', 0xc27a0000, 0x42b51800},
{'l', 0x42820000, 0x00000000},
{'l', 0x00000000, 0x41444000},
{'l', 0xc2a57800, 0x80000000},
{'l', 0x00000000, 0xc1340000},
{'l', 0x42796000, 0xc2b45000},
{'l', 0xc2755000, 0x00000000},
{'[', 0x0041005a, 0x00000001},/*kerning  Z A : 0.003906 */
{'[', 0x0043005a, 0xfffffffd},/*kerning  Z C : -0.011719 */
{'[', 0x0047005a, 0xfffffffd},/*kerning  Z G : -0.011719 */
{'[', 0x004f005a, 0xfffffffd},/*kerning  Z O : -0.011719 */
{'[', 0x0051005a, 0xfffffffd},/*kerning  Z Q : -0.011719 */
{'[', 0x0063005a, 0xfffffffe},/*kerning  Z c : -0.007812 */
{'[', 0x0064005a, 0xfffffffe},/*kerning  Z d : -0.007812 */
{'[', 0x0065005a, 0xfffffffe},/*kerning  Z e : -0.007812 */
{'[', 0x0067005a, 0xfffffffe},/*kerning  Z g : -0.007812 */
{'[', 0x006f005a, 0xfffffffe},/*kerning  Z o : -0.007812 */
{'[', 0x0071005a, 0xfffffffe},/*kerning  Z q : -0.007812 */
{'[', 0x0075005a, 0xfffffffe},/*kerning  Z u : -0.007812 */
{'[', 0x0076005a, 0xfffffffd},/*kerning  Z v : -0.011719 */
{'[', 0x0077005a, 0xfffffffd},/*kerning  Z w : -0.011719 */
{'[', 0x0079005a, 0xfffffffd},/*kerning  Z y : -0.011719 */
{'@', 0x0000005b, 0x00002a6c},/*        [        x-advance: 42.421875 */
{'M', 0x42237000, 0xc3020000},
{'l', 0x00000000, 0x413e0000},
{'l', 0xc16ec000, 0x00000000},
{'l', 0x00000000, 0x4302a000},
{'l', 0x416ec000, 0x00000000},
{'l', 0x00000000, 0x413e0000},
{'l', 0xc1eb0000, 0x00000000},
{'l', 0x00000000, 0xc31a6000},
{'l', 0x41eb0000, 0x00000000},
{'[', 0x004a005b, 0xfffffffe},/*kerning  [ J : -0.007812 */
{'[', 0x0055005b, 0xfffffffe},/*kerning  [ U : -0.007812 */
{'@', 0x0000005c, 0x000041b4},/*       \         x-advance: 65.703125 */
{'M', 0x424ad000, 0x411c4000},
{'l', 0xc23e0000, 0xc2f70800},
{'l', 0x415c0000, 0x00000000},
{'l', 0x423e0000, 0x42f70800},
{'l', 0xc15c0000, 0x00000000},
{'@', 0x0000005d, 0x00002a6c},/*        ]        x-advance: 42.421875 */
{'M', 0x3f480000, 0xc2ec4000},
{'l', 0x00000000, 0xc13e0000},
{'l', 0x41ec4000, 0x00000000},
{'l', 0x00000000, 0x431a6000},
{'l', 0xc1ec4000, 0x00000000},
{'l', 0x00000000, 0xc13e0000},
{'l', 0x41700000, 0x00000000},
{'l', 0x00000000, 0xc302a000},
{'l', 0xc1700000, 0x00000000},
{'@', 0x0000005e, 0x000042e0},/*        ^        x-advance: 66.875000 */
{'M', 0x40a00000, 0xc263d000},
{'l', 0x41bae000, 0xc2633000},
{'l', 0x41200000, 0x00000000},
{'l', 0x41ba4000, 0x42633000},
{'l', 0xc1570000, 0x00000000},
{'l', 0xc16d8000, 0xc216f000},
{'l', 0xc16ec000, 0x4216f000},
{'l', 0xc1570000, 0x00000000},
{'@', 0x0000005f, 0x00004830},/*        _        x-advance: 72.187500 */
{'M', 0x428fe800, 0x80000000},
{'l', 0x00000000, 0x413e0000},
{'l', 0xc28f4800, 0x00000000},
{'l', 0x00000000, 0xc13e0000},
{'l', 0x428f4800, 0x80000000},
{'@', 0x00000060, 0x00003174},/*        `        x-advance: 49.453125 */
{'M', 0x41afa000, 0xc2f00000},
{'l', 0x41714000, 0x41b7c000},
{'l', 0xc146c000, 0x00000000},
{'l', 0xc1a14000, 0xc1b7c000},
{'l', 0x418c0000, 0x00000000},
{'@', 0x00000061, 0x00005708},/*        a        x-advance: 87.031250 */
{'M', 0x427cd000, 0x80000000},
{'8', 0xb9f0e8f5, 0x3baf22e0},
{'q', 0xc0c30000, 0x40458000},
{0, 0xc15e8000, 0x40458000},
{'q', 0xc14f8000, 0x00000000},
{0, 0xc1a64000, 0xc0e88000},
{'q', 0xc0fa0000, 0xc0e88000},
{0, 0xc0fa0000, 0xc18e8000},
{'q', 0x00000000, 0xc1598000},
{0, 0x41250000, 0xc1a55000},
{'9', 0xffc80052, 0xffc800dd},
{'4', 0x00000072, 0xffcb0000},
{'8', 0xa1ddc400, 0xdd96dddd},
{'8', 0x209700bf, 0x4ad820d8},
{'l', 0xc1674000, 0x00000000},
{'q', 0x00000000, 0xc1110000},
{0, 0x41124000, 0xc1884000},
{'q', 0x41124000, 0xc0ff0000},
{0, 0x41c3a000, 0xc0ff0000},
{'q', 0x415c0000, 0x00000000},
{0, 0x41b4a000, 0x40e10000},
{'9', 0x00380046, 0x00aa0046},
{'l', 0x00000000, 0x4217e000},
{'9', 0x005d0000, 0x00940017},
{'4', 0x000a0000, 0x0000ff88},
{'M', 0x421ce000, 0xc1318000},
{'8', 0xe16c003e, 0xbb40e12d},
{'4', 0xff750000, 0x0000ff95},
{'q', 0xc1c30000, 0x3ef00000},
{0, 0xc1c30000, 0x417a0000},
{'8', 0x51203000, 0x21602120},
{'[', 0x00220061, 0xfffffff8},/*kerning  a " : -0.031250 */
{'[', 0x00270061, 0xfffffff8},/*kerning  a ' : -0.031250 */
{'[', 0x00760061, 0xffffffff},/*kerning  a v : -0.003906 */
{'[', 0x00790061, 0xffffffff},/*kerning  a y : -0.003906 */
{'@', 0x00000062, 0x000059d8},/*        b        x-advance: 89.843750 */
{'M', 0x42a52800, 0xc2255000},
{'q', 0x00000000, 0x41942000},
{0, 0xc1098000, 0x41f5a000},
{'q', 0xc1098000, 0x41430000},
{0, 0xc1c26000, 0x41430000},
{'9', 0x0000ff80, 0xffa6ff3b},
{'l', 0xbf480000, 0x411c4000},
{'l', 0xc1548000, 0x80000000},
{'4', 0xfc400000, 0x00000074},
{'l', 0x00000000, 0x42327000},
{'q', 0x41098000, 0xc12b4000},
{0, 0x41c08000, 0xc12b4000},
{'q', 0x417f0000, 0x00000000},
{0, 0x41c3f000, 0x413e0000},
{'9', 0x005f0044, 0x00fa0044},
{'l', 0x00000000, 0x3fd20000},
{'M', 0x4237c000, 0xc293d000},
{'8', 0x1e9a00c1, 0x4ac31ed9},
{'l', 0x00000000, 0x42133000},
{'8', 0x4b3e2d16, 0x1e661e28},
{'q', 0x413b8000, 0x00000000},
{0, 0x4186b000, 0xc1106000},
{'9', 0xffb80028, 0xff540028},
{'l', 0x00000000, 0xbfd20000},
{'q', 0x00000000, 0xc1494000},
{0, 0xc09c4000, 0xc1ae1000},
{'q', 0xc09c4000, 0xc112e000},
{0, 0xc189d000, 0xc112e000},
{'[', 0x00220062, 0xfffffffd},/*kerning  b " : -0.011719 */
{'[', 0x00270062, 0xfffffffd},/*kerning  b ' : -0.011719 */
{'[', 0x00760062, 0xffffffff},/*kerning  b v : -0.003906 */
{'[', 0x00780062, 0xffffffff},/*kerning  b x : -0.003906 */
{'[', 0x00790062, 0xffffffff},/*kerning  b y : -0.003906 */
{'[', 0x007a0062, 0xffffffff},/*kerning  b z : -0.003906 */
{'@', 0x00000063, 0x000053c0},/*        c        x-advance: 83.750000 */
{'M', 0x42336000, 0xc1250000},
{'8', 0xdc6c003d, 0xa232dc2e},
{'l', 0x415c0000, 0x00000000},
{'q', 0xbf0c0000, 0x41354000},
{0, 0xc1264000, 0x419e7000},
{'q', 0xc11d8000, 0x4107a000},
{0, 0xc1ba4000, 0x4107a000},
{'q', 0xc1988000, 0x00000000},
{0, 0xc1e29000, 0xc1494000},
{'9', 0xff9cffb6, 0xff14ffb6},
{'l', 0x00000000, 0xc0520000},
{'q', 0x00000000, 0xc1884000},
{0, 0x41142000, 0xc1ece000},
{'q', 0x41142000, 0xc1494000},
{0, 0x41e29000, 0xc1494000},
{'q', 0x416d8000, 0x00000000},
{0, 0x41bfe000, 0x410ca000},
{'9', 0x00460049, 0x00ae004d},
{'l', 0xc15c0000, 0x00000000},
{'q', 0xbf0c0000, 0xc0fa0000},
{0, 0xc0bcc000, 0xc1548000},
{'q', 0xc0ab4000, 0xc0af0000},
{0, 0xc1606000, 0xc0af0000},
{'8', 0x249100b9, 0x5bc824d8},
{'9', 0x0037fff0, 0x0072fff0},
{'l', 0x00000000, 0x40520000},
{'q', 0x00000000, 0x40ed8000},
{0, 0x40020000, 0x41660000},
{'q', 0x40020000, 0x40de8000},
{0, 0x40e24000, 0x41372000},
{'q', 0x40a14000, 0x408fc000},
{0, 0x41606000, 0x408fc000},
{'[', 0x00220063, 0xffffffff},/*kerning  c " : -0.003906 */
{'[', 0x00270063, 0xffffffff},/*kerning  c ' : -0.003906 */
{'@', 0x00000064, 0x00005a3c},/*        d        x-advance: 90.234375 */
{'M', 0x42831800, 0x80000000},
{'l', 0xbf340000, 0xc1110000},
{'q', 0xc10ac000, 0x412a0000},
{0, 0xc1c12000, 0x412a0000},
{'q', 0xc16d8000, 0x00000000},
{0, 0xc1bfe000, 0xc1408000},
{'9', 0xffa0ffb7, 0xff0fffb6},
{'l', 0x00000000, 0xc0110000},
{'q', 0x00000000, 0xc19b0000},
{0, 0x4112e000, 0xc1fa0000},
{'q', 0x4112e000, 0xc13e0000},
{0, 0x41c21000, 0xc13e0000},
{'9', 0x00000077, 0x005000bb},
{'l', 0x00000000, 0xc22fa000},
{'4', 0x00000074, 0x03c00000},
{'l', 0xc1548000, 0x80000000},
{'M', 0x41afa000, 0xc2255000},
{'q', 0x00000000, 0x41494000},
{0, 0x40aa0000, 0x41acd000},
{'q', 0x40aa0000, 0x41106000},
{0, 0x41884000, 0x41106000},
{'9', 0x0000006b, 0xff9e00a0},
{'l', 0x00000000, 0xc21ba000},
{'q', 0xc0cf8000, 0xc1408000},
{0, 0xc19ec000, 0xc1408000},
{'q', 0xc13cc000, 0x00000000},
{0, 0xc1893000, 0x4112e000},
{'q', 0xc0ab4000, 0x4112e000},
{0, 0xc0ab4000, 0x41ae1000},
{'l', 0x00000000, 0x3fd20000},
{'@', 0x00000065, 0x000054d8},/*        e        x-advance: 84.843750 */
{'M', 0x429bc800, 0xc16c4000},
{'q', 0xc0848000, 0x40c80000},
{0, 0xc13b8000, 0x4134a000},
{'q', 0xc0f28000, 0x40a14000},
{0, 0xc1a0a000, 0x40a14000},
{'q', 0xc18d4000, 0x00000000},
{0, 0xc1e1f000, 0xc137c000},
{'9', 0xffa5ffac, 0xff15ffac},
{'l', 0x00000000, 0xc0520000},
{'q', 0x00000000, 0xc15d4000},
{0, 0x40a8c000, 0xc1bc7000},
{'q', 0x40a8c000, 0xc11ba000},
{0, 0x415b6000, 0xc16d8000},
{'q', 0x41070000, 0xc0a3c000},
{0, 0x418fc000, 0xc0a3c000},
{'q', 0x41910000, 0x00000000},
{0, 0x41d39000, 0x413d6000},
{'9', 0x005e0042, 0x00ec0042},
{'4', 0x00330000, 0x0000fe37},
{'q', 0x3ea00000, 0x413a4000},
{0, 0x40dd4000, 0x419e7000},
{'q', 0x40d34000, 0x4102a000},
{0, 0x41915000, 0x4102a000},
{'8', 0xe767003d, 0xbe4ae72a},
{'l', 0x410c0000, 0x40de8000},
{'M', 0x4230e000, 0xc2947000},
{'q', 0xc1098000, 0x00000000},
{0, 0xc1688000, 0x40c80000},
{'9', 0x0032ffd1, 0x008fffc5},
{'4', 0x00000152, 0xfff80000},
{'q', 0xbf0c0000, 0xc1070000},
{0, 0xc09c4000, 0xc17c8000},
{'q', 0xc08ac000, 0xc0eb0000},
{0, 0xc176e000, 0xc0eb0000},
{'[', 0x00220065, 0xffffffff},/*kerning  e " : -0.003906 */
{'[', 0x00270065, 0xffffffff},/*kerning  e ' : -0.003906 */
{'[', 0x00760065, 0xffffffff},/*kerning  e v : -0.003906 */
{'[', 0x00790065, 0xffffffff},/*kerning  e y : -0.003906 */
{'@', 0x00000066, 0x000037a0},/*        f        x-advance: 55.625000 */
{'M', 0x424a3000, 0xc292e000},
{'l', 0xc18fc000, 0x00000000},
{'l', 0x00000000, 0x4292e000},
{'l', 0xc1674000, 0x80000000},
{'l', 0x00000000, 0xc292e000},
{'0', 0xa8000096, 0xb500006a},
{'q', 0x3e200000, 0xc1570000},
{0, 0x40f14000, 0xc1a46000},
{'q', 0x40ec4000, 0xc0e38000},
{0, 0x41a19000, 0xc0e38000},
{'9', 0x00000026, 0x000a0050},
{'l', 0xbf480000, 0x413cc000},
{'q', 0xc0520000, 0xbf340000},
{0, 0xc0fa0000, 0xbf340000},
{'9', 0x0000ff8d, 0x007bff8b},
{'l', 0x00000000, 0x41160000},
{'l', 0x418fc000, 0x00000000},
{'l', 0x00000000, 0x41318000},
{'[', 0x00220066, 0x00000002},/*kerning  f " : 0.007812 */
{'[', 0x00270066, 0x00000002},/*kerning  f ' : 0.007812 */
{'[', 0x00290066, 0x00000002},/*kerning  f ) : 0.007812 */
{'[', 0x005d0066, 0x00000002},/*kerning  f ] : 0.007812 */
{'[', 0x00630066, 0xfffffffd},/*kerning  f c : -0.011719 */
{'[', 0x00640066, 0xfffffffd},/*kerning  f d : -0.011719 */
{'[', 0x00650066, 0xfffffffd},/*kerning  f e : -0.011719 */
{'[', 0x00670066, 0xfffffffd},/*kerning  f g : -0.011719 */
{'[', 0x00710066, 0xfffffffd},/*kerning  f q : -0.011719 */
{'[', 0x007d0066, 0x00000002},/*kerning  f } : 0.007812 */
{'@', 0x00000067, 0x000059d8},/*        g        x-advance: 89.843750 */
{'M', 0x422c8000, 0x42057000},
{'q', 0xc0d20000, 0x00000000},
{0, 0xc180c000, 0xc04d0000},
{'9', 0xffe7ffb4, 0xffa5ff87},
{'l', 0x40f28000, 0xc1098000},
{'q', 0x41160000, 0x41368000},
{0, 0x41b54000, 0x41368000},
{'q', 0x4123c000, 0x00000000},
{0, 0x4182a000, 0xc0b7c000},
{'9', 0xffd30030, 0xff7a0030},
{'l', 0x00000000, 0xc0ed8000},
{'q', 0xc1098000, 0x41214000},
{0, 0xc1bd6000, 0x41214000},
{'q', 0xc173c000, 0x00000000},
{0, 0xc1c1c000, 0xc1430000},
{'9', 0xff9fffb9, 0xff0bffb9},
{'l', 0x00000000, 0xbfd20000},
{'q', 0x00000000, 0xc19b0000},
{0, 0x410f2000, 0xc1fa0000},
{'q', 0x410f2000, 0xc13e0000},
{0, 0x41c35000, 0xc13e0000},
{'9', 0x0000007c, 0x005700c1},
{'4', 0xffb50005, 0x00000069},
{'l', 0x00000000, 0x42a57800},
{'q', 0x00000000, 0x41866000},
{0, 0xc1200000, 0x41cfd000},
{'9', 0x0049ffb0, 0x0049ff32},
{'M', 0x41b04000, 0xc2255000},
{'q', 0x00000000, 0x41494000},
{0, 0x40a8c000, 0x41acd000},
{'q', 0x40a8c000, 0x41106000},
{0, 0x4187f000, 0x41106000},
{'9', 0x0000006e, 0xff9c00a1},
{'l', 0x00000000, 0xc21a1000},
{'8', 0xbbc4d7eb, 0xe59de5da},
{'q', 0xc13cc000, 0x00000000},
{0, 0xc188e000, 0x4112e000},
{'q', 0xc0aa0000, 0x4112e000},
{0, 0xc0aa0000, 0x41ae1000},
{'l', 0x00000000, 0x3fd20000},
{'@', 0x00000068, 0x00005820},/*        h        x-advance: 88.125000 */
{'M', 0x42386000, 0xc293d000},
{'8', 0x1e9e00c9, 0x4ebe1ed6},
{'l', 0x00000000, 0x42719000},
{'l', 0xc1674000, 0x80000000},
{'4', 0xfc400000, 0x00000073},
{'l', 0x00000000, 0x42363000},
{'q', 0x4119c000, 0xc13a4000},
{0, 0x41c76000, 0xc13a4000},
{'q', 0x4146c000, 0x00000000},
{0, 0x419dd000, 0x40de8000},
{'9', 0x0037003a, 0x00ba003b},
{'4', 0x01be0000, 0x0000ff8c},
{'l', 0x00000000, 0xc25e3000},
{'q', 0x00000000, 0xc11ec000},
{0, 0xc08ac000, 0xc1624000},
{'q', 0xc08ac000, 0xc0870000},
{0, 0xc148a000, 0xc0870000},
{'[', 0x00220068, 0xfffffff3},/*kerning  h " : -0.050781 */
{'[', 0x00270068, 0xfffffff3},/*kerning  h ' : -0.050781 */
{'@', 0x00000069, 0x000026e8},/*        i        x-advance: 38.906250 */
{'M', 0x41318000, 0xc2d5e800},
{'8', 0xd111e400, 0xed32ed11},
{'8', 0x13320021, 0x2f111311},
{'8', 0x2def1a00, 0x13ce13ef},
{'8', 0xedce00df, 0xd3efedef},
{'M', 0x41d5c000, 0xc2a91000},
{'l', 0x00000000, 0x42a91000},
{'l', 0xc1688000, 0x80000000},
{'l', 0x00000000, 0xc2a91000},
{'l', 0x41688000, 0x00000000},
{'@', 0x0000006a, 0x00002648},/*        j        x-advance: 38.281250 */
{'M', 0x411ec000, 0xc2d5e800},
{'8', 0xd111e400, 0xed32ed11},
{'8', 0x13330021, 0x2f111311},
{'8', 0x2def1a00, 0x13cd13ef},
{'8', 0xedce00df, 0xd3efedef},
{'M', 0x41368000, 0xc2a91000},
{'4', 0x00000074, 0x02f40000},
{'q', 0x00000000, 0x41c08000},
{0, 0xc1b0e000, 0x41c08000},
{'9', 0x0000ffda, 0xfff6ffb9},
{'l', 0x3da00000, 0xc13a4000},
{'8', 0x05350517, 0xaa4e004c},
{'l', 0x00000000, 0xc2bf1800},
{'@', 0x0000006b, 0x00005118},/*        k        x-advance: 81.093750 */
{'M', 0x427ff000, 0x80000000},
{'l', 0xc1ea6000, 0xc21ce000},
{'l', 0xc1124000, 0x41174000},
{'l', 0x00000000, 0x41ee2000},
{'l', 0xc1688000, 0x80000000},
{'l', 0x00000000, 0xc2f00000},
{'l', 0x41688000, 0x00000000},
{'l', 0x00000000, 0x42910000},
{'l', 0x40f78000, 0xc1138000},
{'l', 0x41d20000, 0xc1de8000},
{'l', 0x418d4000, 0x00000000},
{'l', 0xc203e000, 0x420cf000},
{'l', 0x42133000, 0x42453000},
{'l', 0xc1884000, 0x80000000},
{'[', 0x0063006b, 0xfffffffe},/*kerning  k c : -0.007812 */
{'[', 0x0064006b, 0xfffffffe},/*kerning  k d : -0.007812 */
{'[', 0x0065006b, 0xfffffffe},/*kerning  k e : -0.007812 */
{'[', 0x0067006b, 0xfffffffe},/*kerning  k g : -0.007812 */
{'[', 0x0071006b, 0xfffffffe},/*kerning  k q : -0.007812 */
{'@', 0x0000006c, 0x000026e8},/*        l        x-advance: 38.906250 */
{'M', 0x41d5c000, 0xc2f00000},
{'l', 0x00000000, 0x42f00000},
{'l', 0xc1688000, 0x80000000},
{'l', 0x00000000, 0xc2f00000},
{'l', 0x41688000, 0x00000000},
{'@', 0x0000006d, 0x00008c50},/*        m        x-advance: 140.312500 */
{'M', 0x42336000, 0xc293d000},
{'9', 0x0000ff8f, 0x005fff65},
{'l', 0x00000000, 0x4277d000},
{'l', 0xc1688000, 0x80000000},
{'4', 0xfd5c0000, 0x0000006e},
{'l', 0x3ec80000, 0x41138000},
{'q', 0x41124000, 0xc12c8000},
{0, 0x41c80000, 0xc12c8000},
{'q', 0x41020000, 0x00000000},
{0, 0x4167e000, 0x404f8000},
{'8', 0x524d1932, 0xb257d021},
{'q', 0x40d84000, 0xc0700000},
{0, 0x417e6000, 0xc0700000},
{'q', 0x41598000, 0x00000000},
{0, 0x41a7d000, 0x40e88000},
{'9', 0x003a003b, 0x00ba003b},
{'4', 0x01bc0000, 0x0000ff8c},
{'l', 0x00000000, 0xc25ed000},
{'q', 0x00000000, 0xc12a0000},
{0, 0xc09c4000, 0xc166a000},
{'8', 0xe297e2d9, 0x299200ba},
{'9', 0x0029ffd8, 0x0064ffd2},
{'4', 0x01c00000, 0x0000ff8d},
{'l', 0x00000000, 0xc25e8000},
{'q', 0x00000000, 0xc11ec000},
{0, 0xc09d8000, 0xc161a000},
{'q', 0xc09d8000, 0xc085c000},
{0, 0xc1520000, 0xc085c000},
{'[', 0x0022006d, 0xfffffff3},/*kerning  m " : -0.050781 */
{'[', 0x0027006d, 0xfffffff3},/*kerning  m ' : -0.050781 */
{'@', 0x0000006e, 0x0000585c},/*        n        x-advance: 88.359375 */
{'M', 0x42386000, 0xc293d000},
{'8', 0x1e9e00c9, 0x4ebe1ed6},
{'l', 0x00000000, 0x42719000},
{'l', 0xc1674000, 0x80000000},
{'4', 0xfd5c0000, 0x0000006d},
{'l', 0x3ef00000, 0x4128c000},
{'q', 0x4119c000, 0xc141c000},
{0, 0x41c9e000, 0xc141c000},
{'q', 0x4146c000, 0x00000000},
{0, 0x419dd000, 0x40de8000},
{'9', 0x0037003a, 0x00ba003b},
{'4', 0x01be0000, 0x0000ff8c},
{'l', 0x00000000, 0xc25e3000},
{'q', 0x00000000, 0xc11ec000},
{0, 0xc08ac000, 0xc1624000},
{'q', 0xc08ac000, 0xc0870000},
{0, 0xc148a000, 0xc0870000},
{'[', 0x0022006e, 0xfffffff3},/*kerning  n " : -0.050781 */
{'[', 0x0027006e, 0xfffffff3},/*kerning  n ' : -0.050781 */
{'@', 0x0000006f, 0x00005b40},/*        o        x-advance: 91.250000 */
{'M', 0x40e60000, 0xc22c8000},
{'q', 0x00000000, 0xc192e000},
{0, 0x41250000, 0xc1f55000},
{'q', 0x41250000, 0xc144e000},
{0, 0x41e06000, 0xc144e000},
{'q', 0x418de000, 0x00000000},
{0, 0x41e06000, 0x41412000},
{'9', 0x00600052, 0x00f00054},
{'l', 0x00000000, 0x40250000},
{'q', 0x00000000, 0x4192e000},
{0, 0xc125a000, 0x41f50000},
{'q', 0xc125a000, 0x41444000},
{0, 0xc1e0b000, 0x41444000},
{'q', 0xc18e8000, 0x00000000},
{0, 0xc1e15000, 0xc1444000},
{'9', 0xff9effae, 0xff0bffae},
{'l', 0x00000000, 0xbfe60000},
{'M', 0x41ad2000, 0xc2255000},
{'q', 0x00000000, 0x41494000},
{0, 0x40bf4000, 0x41ae6000},
{'q', 0x40bf4000, 0x41138000},
{0, 0x4190b000, 0x41138000},
{'q', 0x413cc000, 0x00000000},
{0, 0x418e8000, 0xc111a000},
{'9', 0xffb80030, 0xff530030},
{'l', 0x00000000, 0xc0020000},
{'q', 0x00000000, 0xc146c000},
{0, 0xc0c08000, 0xc1ae1000},
{'q', 0xc0c08000, 0xc1156000},
{0, 0xc1906000, 0xc1156000},
{'q', 0xc13f4000, 0x00000000},
{0, 0xc18f7000, 0x41156000},
{'q', 0xc0bf4000, 0x41156000},
{0, 0xc0bf4000, 0x41ae1000},
{'l', 0x00000000, 0x3fe60000},
{'[', 0x0022006f, 0xffffffef},/*kerning  o " : -0.066406 */
{'[', 0x0027006f, 0xffffffef},/*kerning  o ' : -0.066406 */
{'[', 0x0076006f, 0xffffffff},/*kerning  o v : -0.003906 */
{'[', 0x0078006f, 0xfffffffe},/*kerning  o x : -0.007812 */
{'[', 0x0079006f, 0xffffffff},/*kerning  o y : -0.003906 */
{'[', 0x007a006f, 0xfffffffe},/*kerning  o z : -0.007812 */
{'@', 0x00000070, 0x000059d8},/*        p        x-advance: 89.843750 */
{'M', 0x42467000, 0x3fc80000},
{'9', 0x0000ff87, 0xffb2ff3f},
{'l', 0x00000000, 0x4222d000},
{'l', 0xc1688000, 0x00000000},
{'4', 0xfc580000, 0x0000006a},
{'l', 0x3f340000, 0x4114c000},
{'q', 0x410e8000, 0xc12dc000},
{0, 0x41c3a000, 0xc12dc000},
{'q', 0x41802000, 0x00000000},
{0, 0x41c44000, 0x413e0000},
{'9', 0x005f0044, 0x00fa0044},
{'l', 0x00000000, 0x3fd20000},
{'q', 0x00000000, 0x41942000},
{0, 0xc108e000, 0x41f5a000},
{'9', 0x0061ffbc, 0x0061ff3e},
{'M', 0x4234a000, 0xc293d000},
{'9', 0x0000ff97, 0x005dff63},
{'l', 0x00000000, 0x4221e000},
{'q', 0x40d48000, 0x4137c000},
{0, 0x419ec000, 0x4137c000},
{'q', 0x413b8000, 0x00000000},
{0, 0x41893000, 0xc1138000},
{'9', 0xffb7002b, 0xff52002b},
{'l', 0x00000000, 0xbfd20000},
{'q', 0x00000000, 0xc1494000},
{0, 0xc0adc000, 0xc1ae1000},
{'q', 0xc0adc000, 0xc112e000},
{0, 0xc18a7000, 0xc112e000},
{'[', 0x00220070, 0xfffffffd},/*kerning  p " : -0.011719 */
{'[', 0x00270070, 0xfffffffd},/*kerning  p ' : -0.011719 */
{'[', 0x00760070, 0xffffffff},/*kerning  p v : -0.003906 */
{'[', 0x00780070, 0xffffffff},/*kerning  p x : -0.003906 */
{'[', 0x00790070, 0xffffffff},/*kerning  p y : -0.003906 */
{'[', 0x007a0070, 0xffffffff},/*kerning  p z : -0.003906 */
{'@', 0x00000071, 0x00005af0},/*        q        x-advance: 90.937500 */
{'M', 0x429d8000, 0xc2a91000},
{'4', 0x03a80000, 0x0000ff8c},
{'l', 0x00000000, 0xc2214000},
{'q', 0xc10ac000, 0x41160000},
{0, 0xc1ba4000, 0x41160000},
{'q', 0xc17a0000, 0x00000000},
{0, 0xc1c49000, 0xc1430000},
{'9', 0xff9fffb9, 0xff0bffb9},
{'l', 0x00000000, 0xbfd20000},
{'q', 0x00000000, 0xc19b0000},
{0, 0x410e8000, 0xc1fa0000},
{'q', 0x410e8000, 0xc13e0000},
{0, 0x41c62000, 0xc13e0000},
{'9', 0x00000078, 0x005000be},
{'4', 0xffbc0005, 0x0000006a},
{'M', 0x41afa000, 0xc2255000},
{'q', 0x00000000, 0x41494000},
{0, 0x40adc000, 0x41ae6000},
{'q', 0x40adc000, 0x41138000},
{0, 0x41893000, 0x41138000},
{'9', 0x00000067, 0xffa6009d},
{'l', 0x00000000, 0xc225f000},
{'q', 0xc0d98000, 0xc1318000},
{0, 0xc19c4000, 0xc1318000},
{'q', 0xc13e0000, 0x00000000},
{0, 0xc18a7000, 0x41156000},
{'q', 0xc0adc000, 0x41156000},
{0, 0xc0adc000, 0x41aff000},
{'l', 0x00000000, 0x3fc80000},
{'@', 0x00000072, 0x00003638},/*        r        x-advance: 54.218750 */
{'M', 0x42331000, 0xc2906000},
{'9', 0x0000ff8f, 0x0061ff66},
{'l', 0x00000000, 0x42700000},
{'l', 0xc1674000, 0x80000000},
{'4', 0xfd5c0000, 0x00000070},
{'l', 0x3ea00000, 0x411b0000},
{'q', 0x40de8000, 0xc1340000},
{0, 0x41a0a000, 0xc1340000},
{'9', 0x00000020, 0x00080033},
{'l', 0xbda00000, 0x41570000},
{'q', 0xc04d0000, 0xbf200000},
{0, 0xc0e10000, 0xbf200000},
{'[', 0x00220072, 0x00000002},/*kerning  r " : 0.007812 */
{'[', 0x00270072, 0x00000002},/*kerning  r ' : 0.007812 */
{'[', 0x002c0072, 0xfffffff1},/*kerning  r , : -0.058594 */
{'[', 0x002e0072, 0xfffffff1},/*kerning  r . : -0.058594 */
{'[', 0x00610072, 0xfffffffb},/*kerning  r a : -0.019531 */
{'[', 0x00630072, 0xfffffffe},/*kerning  r c : -0.007812 */
{'[', 0x00640072, 0xfffffffe},/*kerning  r d : -0.007812 */
{'[', 0x00650072, 0xfffffffe},/*kerning  r e : -0.007812 */
{'[', 0x00660072, 0x00000001},/*kerning  r f : 0.003906 */
{'[', 0x00670072, 0xfffffffe},/*kerning  r g : -0.007812 */
{'[', 0x006f0072, 0xfffffffe},/*kerning  r o : -0.007812 */
{'[', 0x00710072, 0xfffffffe},/*kerning  r q : -0.007812 */
{'[', 0x00740072, 0x00000006},/*kerning  r t : 0.023438 */
{'[', 0x00760072, 0x00000002},/*kerning  r v : 0.007812 */
{'[', 0x00770072, 0x00000002},/*kerning  r w : 0.007812 */
{'[', 0x00790072, 0x00000002},/*kerning  r y : 0.007812 */
{'@', 0x00000073, 0x00005294},/*        s        x-advance: 82.578125 */
{'M', 0x4270f000, 0xc1b36000},
{'q', 0x00000000, 0xc0938000},
{0, 0xc0598000, 0xc1052000},
{'q', 0xc0598000, 0xc06d8000},
{0, 0xc17aa000, 0xc0c6c000},
{'q', 0xc1624000, 0xc0390000},
{0, 0xc1b40000, 0xc1066000},
{'q', 0xc105c000, 0xc0b04000},
{0, 0xc105c000, 0xc1807000},
{'q', 0x00000000, 0xc1200000},
{0, 0x4108e000, 0xc18ac000},
{'q', 0x4108e000, 0xc0eb0000},
{0, 0x41b63000, 0xc0eb0000},
{'q', 0x41728000, 0x00000000},
{0, 0x41bd6000, 0x40f78000},
{'9', 0x003d0044, 0x00950044},
{'l', 0xc1674000, 0x00000000},
{'8', 0xb1dcd700, 0xdb97dbdc},
{'8', 0x1f9900b9, 0x44e11fe1},
{'q', 0x00000000, 0x40938000},
{0, 0x40688000, 0x40eec000},
{'q', 0x40688000, 0x40368000},
{0, 0x4176e000, 0x40b04000},
{'q', 0x4178c000, 0x40610000},
{0, 0x41b95000, 0x41138000},
{'q', 0x40f3c000, 0x40b68000},
{0, 0x40f3c000, 0x4180c000},
{'q', 0x00000000, 0x4132c000},
{0, 0xc10f2000, 0x41910000},
{'q', 0xc10f2000, 0x40de8000},
{0, 0xc1bdb000, 0x40de8000},
{'q', 0xc1866000, 0x00000000},
{0, 0xc1cda000, 0xc1098000},
{'9', 0xffbcffb9, 0xff68ffb9},
{'l', 0x41688000, 0x00000000},
{'8', 0x63374703, 0x1b691b33},
{'8', 0xe56c0046, 0xbb25e525},
{'@', 0x00000074, 0x00003458},/*        t        x-advance: 52.343750 */
{'M', 0x423b3000, 0x80000000},
{'8', 0x0ca70cd9, 0xd99200c0},
{'9', 0xffd9ffd3, 0xff73ffd3},
{'l', 0x00000000, 0xc251b000},
{'l', 0xc1778000, 0x00000000},
{'l', 0x00000000, 0xc1318000},
{'l', 0x41778000, 0x00000000},
{'l', 0x00000000, 0xc1a46000},
{'l', 0x41674000, 0x00000000},
{'l', 0x00000000, 0x41a46000},
{'l', 0x417c8000, 0x00000000},
{'4', 0x00580000, 0x0000ff82},
{'l', 0x00000000, 0x42520000},
{'8', 0x42163300, 0x0e330e16},
{'q', 0x402f0000, 0x00000000},
{0, 0x40d98000, 0xbf700000},
{'l', 0x3da00000, 0x413cc000},
{'[', 0x006f0074, 0xfffffffe},/*kerning  t o : -0.007812 */
{'@', 0x00000075, 0x00005834},/*        u        x-advance: 88.203125 */
{'M', 0x427dc000, 0x80000000},
{'l', 0xbea00000, 0xc105c000},
{'q', 0xc1048000, 0x411ec000},
{0, 0xc1c58000, 0x411ec000},
{'q', 0xc146c000, 0x00000000},
{0, 0xc1a0a000, 0xc0eb0000},
{'9', 0xffc6ffc3, 0xff3fffc3},
{'4', 0xfe4c0000, 0x00000073},
{'l', 0x00000000, 0x425ac000},
{'q', 0x00000000, 0x41368000},
{0, 0x4099c000, 0x41746000},
{'q', 0x4099c000, 0x40778000},
{0, 0x412be000, 0x40778000},
{'9', 0x00000082, 0xff9e00b0},
{'l', 0x00000000, 0xc2764000},
{'l', 0x41688000, 0x00000000},
{'l', 0x00000000, 0x42a91000},
{'l', 0xc15d4000, 0x80000000},
{'@', 0x00000076, 0x00004d80},/*        v        x-advance: 77.500000 */
{'M', 0x42953800, 0xc2a91000},
{'l', 0xc1f32000, 0x42a91000},
{'l', 0xc1304000, 0x80000000},
{'l', 0xc1f50000, 0xc2a91000},
{'l', 0x416d8000, 0x00000000},
{'l', 0x41abe000, 0x42818800},
{'l', 0x41a78000, 0xc2818800},
{'l', 0x416c4000, 0x00000000},
{'[', 0x00220076, 0x00000001},/*kerning  v " : 0.003906 */
{'[', 0x00270076, 0x00000001},/*kerning  v ' : 0.003906 */
{'[', 0x002c0076, 0xfffffff3},/*kerning  v , : -0.050781 */
{'[', 0x002e0076, 0xfffffff3},/*kerning  v . : -0.050781 */
{'[', 0x00610076, 0xffffffff},/*kerning  v a : -0.003906 */
{'[', 0x00630076, 0xffffffff},/*kerning  v c : -0.003906 */
{'[', 0x00640076, 0xffffffff},/*kerning  v d : -0.003906 */
{'[', 0x00650076, 0xffffffff},/*kerning  v e : -0.003906 */
{'[', 0x00660076, 0x00000001},/*kerning  v f : 0.003906 */
{'[', 0x00670076, 0xffffffff},/*kerning  v g : -0.003906 */
{'[', 0x006f0076, 0xffffffff},/*kerning  v o : -0.003906 */
{'[', 0x00710076, 0xffffffff},/*kerning  v q : -0.003906 */
{'@', 0x00000077, 0x0000783c},/*        w        x-advance: 120.234375 */
{'M', 0x42e8f800, 0xc2a91000},
{'l', 0xc1c44000, 0x42a91000},
{'l', 0xc13b8000, 0x80000000},
{'l', 0xc1a46000, 0xc27ff000},
{'l', 0xc1a00000, 0x427ff000},
{'l', 0xc13cc000, 0x80000000},
{'l', 0xc1c44000, 0xc2a91000},
{'l', 0x41674000, 0x00000000},
{'l', 0x41852000, 0x427c8000},
{'l', 0x419d8000, 0xc27c8000},
{'l', 0x413a4000, 0x00000000},
{'l', 0x41a00000, 0x4280e800},
{'l', 0x4182a000, 0xc280e800},
{'l', 0x41660000, 0x00000000},
{'[', 0x002c0077, 0xfffffff1},/*kerning  w , : -0.058594 */
{'[', 0x002e0077, 0xfffffff1},/*kerning  w . : -0.058594 */
{'@', 0x00000078, 0x00004f60},/*        x        x-advance: 79.375000 */
{'M', 0x41a64000, 0xc2a91000},
{'l', 0x41942000, 0x41f64000},
{'l', 0x41960000, 0xc1f64000},
{'l', 0x4187a000, 0x00000000},
{'l', 0xc1dd4000, 0x4226e000},
{'l', 0x41e42000, 0x422b4000},
{'l', 0xc185c000, 0x80000000},
{'l', 0xc19c4000, 0xc1fdc000},
{'l', 0xc19c4000, 0x41fdc000},
{'l', 0xc1866000, 0x80000000},
{'l', 0x41e38000, 0xc22b4000},
{'l', 0xc1dca000, 0xc226e000},
{'l', 0x41852000, 0x00000000},
{'[', 0x00630078, 0xfffffffe},/*kerning  x c : -0.007812 */
{'[', 0x00640078, 0xfffffffe},/*kerning  x d : -0.007812 */
{'[', 0x00650078, 0xfffffffe},/*kerning  x e : -0.007812 */
{'[', 0x00670078, 0xfffffffe},/*kerning  x g : -0.007812 */
{'[', 0x006f0078, 0xfffffffe},/*kerning  x o : -0.007812 */
{'[', 0x00710078, 0xfffffffe},/*kerning  x q : -0.007812 */
{'@', 0x00000079, 0x00004bb4},/*        y        x-advance: 75.703125 */
{'M', 0x421f6000, 0x4150c000},
{'9', 0x009effc4, 0x00a8ff4b},
{'l', 0xc01b0000, 0x3da00000},
{'9', 0x0000ffdc, 0xfff6ffbf},
{'4', 0xffa30000, 0x0002001f},
{'8', 0xe85b003a, 0xaa36e821},
{'l', 0x40480000, 0xc1098000},
{'l', 0xc1f14000, 0xc2a73000},
{'l', 0x417c8000, 0x00000000},
{'l', 0x41a96000, 0x427d2000},
{'l', 0x419ce000, 0xc27d2000},
{'l', 0x41778000, 0x00000000},
{'l', 0xc207a000, 0x42c32800},
{'[', 0x00220079, 0x00000001},/*kerning  y " : 0.003906 */
{'[', 0x00270079, 0x00000001},/*kerning  y ' : 0.003906 */
{'[', 0x002c0079, 0xfffffff3},/*kerning  y , : -0.050781 */
{'[', 0x002e0079, 0xfffffff3},/*kerning  y . : -0.050781 */
{'[', 0x00610079, 0xffffffff},/*kerning  y a : -0.003906 */
{'[', 0x00630079, 0xffffffff},/*kerning  y c : -0.003906 */
{'[', 0x00640079, 0xffffffff},/*kerning  y d : -0.003906 */
{'[', 0x00650079, 0xffffffff},/*kerning  y e : -0.003906 */
{'[', 0x00660079, 0x00000001},/*kerning  y f : 0.003906 */
{'[', 0x00670079, 0xffffffff},/*kerning  y g : -0.003906 */
{'[', 0x006f0079, 0xffffffff},/*kerning  y o : -0.003906 */
{'[', 0x00710079, 0xffffffff},/*kerning  y q : -0.003906 */
{'@', 0x0000007a, 0x00004f60},/*        z        x-advance: 79.375000 */
{'M', 0x40f28000, 0xc2912800},
{'l', 0x00000000, 0xc13f4000},
{'l', 0x42804800, 0x00000000},
{'l', 0x00000000, 0x4123c000},
{'l', 0xc23c2000, 0x4279b000},
{'l', 0x42453000, 0x00000000},
{'l', 0x00000000, 0x413e0000},
{'l', 0xc2861000, 0x80000000},
{'l', 0x00000000, 0xc12a0000},
{'l', 0x423a4000, 0xc277d000},
{'l', 0xc237c000, 0x00000000},
{'[', 0x0063007a, 0xfffffffe},/*kerning  z c : -0.007812 */
{'[', 0x0064007a, 0xfffffffe},/*kerning  z d : -0.007812 */
{'[', 0x0065007a, 0xfffffffe},/*kerning  z e : -0.007812 */
{'[', 0x0067007a, 0xfffffffe},/*kerning  z g : -0.007812 */
{'[', 0x006f007a, 0xfffffffe},/*kerning  z o : -0.007812 */
{'[', 0x0071007a, 0xfffffffe},/*kerning  z q : -0.007812 */
{'@', 0x0000007b, 0x00003624},/*        {        x-advance: 54.140625 */
{'M', 0x40a00000, 0xc229b000},
{'l', 0x00000000, 0xc1354000},
{'9', 0x00000084, 0xff690084},
{'l', 0x00000000, 0xc17c8000},
{'q', 0x00000000, 0xc1458000},
{0, 0x40be0000, 0xc1b09000},
{'9', 0xffb3002f, 0xff8e00af},
{'l', 0x40430000, 0x410e8000},
{'q', 0xc114c000, 0x40390000},
{0, 0xc14d0000, 0x4125a000},
{'9', 0x003bffe4, 0x0088ffe4},
{'l', 0x00000000, 0x41778000},
{'q', 0x00000000, 0x418f2000},
{0, 0xc150c000, 0x41c76000},
{'9', 0x00370068, 0x00c80068},
{'l', 0x00000000, 0x41750000},
{'q', 0x00000000, 0x4119c000},
{0, 0x40610000, 0x41884000},
{'9', 0x003b001c, 0x00520066},
{'l', 0xc0430000, 0x410fc000},
{'q', 0xc17f0000, 0xc0910000},
{0, 0xc1af0000, 0xc164c000},
{'9', 0xffb2ffd1, 0xff50ffd1},
{'l', 0x00000000, 0xc17a0000},
{'q', 0x00000000, 0xc1988000},
{0, 0xc1848000, 0xc1988000},
{'[', 0x004a007b, 0xfffffffe},/*kerning  { J : -0.007812 */
{'[', 0x0055007b, 0xfffffffe},/*kerning  { U : -0.007812 */
{'@', 0x0000007c, 0x00002710},/*        |        x-advance: 39.062500 */
{'M', 0x41cb2000, 0xc2e38000},
{'l', 0x00000000, 0x4306d800},
{'l', 0xc13a4000, 0x00000000},
{'l', 0x00000000, 0xc306d800},
{'l', 0x413a4000, 0x00000000},
{'@', 0x0000007d, 0x00003624},/*        }        x-advance: 54.140625 */
{'M', 0x4243f000, 0xc2570000},
{'l', 0x00000000, 0x41354000},
{'9', 0x0000ff7c, 0x0097ff7c},
{'l', 0x00000000, 0x417c8000},
{'q', 0x00000000, 0x41444000},
{0, 0xc0bcc000, 0x41b04000},
{'9', 0x004effd1, 0x0072ff51},
{'l', 0xc0430000, 0xc10fc000},
{'q', 0x41138000, 0xc0390000},
{0, 0x414c6000, 0xc1250000},
{'9', 0xffc5001c, 0xff78001c},
{'l', 0x00000000, 0xc178c000},
{'q', 0x00000000, 0xc192e000},
{0, 0x41638000, 0xc1c62000},
{'9', 0xffcdff8f, 0xff3aff8f},
{'l', 0x00000000, 0xc178c000},
{'q', 0x00000000, 0xc119c000},
{0, 0xc0610000, 0xc1889000},
{'9', 0xffc5ffe4, 0xffaeff9a},
{'l', 0x40430000, 0xc10e8000},
{'q', 0x41802000, 0x40910000},
{0, 0x41af5000, 0x41642000},
{'9', 0x004d002f, 0x00b0002f},
{'l', 0x00000000, 0x417f0000},
{'q', 0x00000000, 0x41960000},
{0, 0x41848000, 0x41960000},
{'@', 0x0000007e, 0x00006cd4},/*        ~        x-advance: 108.828125 */
{'M', 0x42ad9800, 0xc272d000},
{'l', 0x413e0000, 0xbda00000},
{'q', 0x00000000, 0x41408000},
{0, 0xc0e24000, 0x41a55000},
{'q', 0xc0e24000, 0x410a2000},
{0, 0xc191f000, 0x410a2000},
{'8', 0xeba100ca, 0xc0a6ebd7},
{'8', 0xd3c6e3e1, 0xf0c6f0e6},
{'8', 0x21b600d1, 0x5ce621e6},
{'l', 0xc1494000, 0x3e200000},
{'q', 0x00000000, 0xc1444000},
{0, 0x40e24000, 0xc1a3c000},
{'q', 0x40e24000, 0xc1034000},
{0, 0x41915000, 0xc1034000},
{'8', 0x165f0035, 0x3f591629},
{'8', 0x3446292f, 0x0a300a16},
{'8', 0xdc4d0030, 0xa11ddc1d},
};
#define ctx_font_ascii_name "Roboto Regular"
#endif
#endif //_CTX_INTERNAL_FONT_
#ifndef __CTX_LIST__
#define __CTX_LIST__

#include <stdlib.h>

#ifndef CTX_EXTERNAL_MALLOC
static inline void *ctx_realloc (void *mem, size_t old_size, size_t new_size)
{
  if (old_size){};
  return (void*)realloc (mem, new_size);
}

static inline void *ctx_malloc (size_t size)
{
  return (void*)malloc (size);
}

static inline void ctx_free (void *mem)
{
  free (mem);
}

static inline void *ctx_calloc (size_t size, size_t count)
{
  return calloc (size, count);
}

#endif

/* The whole ctx_list implementation is in the header and will be inlined
 * wherever it is used.
 */
struct _CtxList {
  void *data;
  CtxList *next;
  void (*freefunc)(void *data, void *freefunc_data);
  void *freefunc_data;
};

static inline void ctx_list_prepend_full (CtxList **list, void *data,
    void (*freefunc)(void *data, void *freefunc_data),
    void *freefunc_data)
{
  CtxList *new_= (CtxList*)ctx_calloc (1, sizeof (CtxList));
  new_->next = *list;
  new_->data=data;
  new_->freefunc=freefunc;
  new_->freefunc_data = freefunc_data;
  *list = new_;
}

static inline int ctx_list_length (CtxList *list)
{
  int length = 0;
  CtxList *l;
  for (l = list; l; l = l->next, length++);
  return length;
}

static inline void ctx_list_prepend (CtxList **list, void *data)
{
  CtxList *new_ = (CtxList*) ctx_calloc (1, sizeof (CtxList));
  new_->next= *list;
  new_->data=data;
  *list = new_;
}

static inline CtxList *ctx_list_nth (CtxList *list, int no)
{
  while (no-- && list)
    { list = list->next; }
  return list;
}

static inline void *ctx_list_nth_data (CtxList *list, int no)
{
  CtxList *l = ctx_list_nth (list, no);
  if (l)
    return l->data;
  return NULL;
}


static inline void
ctx_list_insert_before (CtxList **list, CtxList *sibling,
                       void *data)
{
  if (*list == NULL || *list == sibling)
    {
      ctx_list_prepend (list, data);
    }
  else
    {
      CtxList *prev = NULL;
      for (CtxList *l = *list; l; l=l->next)
        {
          if (l == sibling)
            { break; }
          prev = l;
        }
      if (prev)
        {
          CtxList *new_ = (CtxList*)ctx_calloc (1, sizeof (CtxList));
          new_->next = sibling;
          new_->data = data;
          prev->next=new_;
        }
    }
}

static inline void ctx_list_remove_link (CtxList **list, CtxList *link)
{
  CtxList *iter, *prev = NULL;
  if ((*list) == link)
    {
      prev = (*list)->next;
      *list = prev;
      link->next = NULL;
      return;
    }
  for (iter = *list; iter; iter = iter->next)
    if (iter == link)
      {
        if (prev)
          prev->next = iter->next;
        link->next = NULL;
        return;
      }
    else
      prev = iter;
}

static inline void ctx_list_remove (CtxList **list, void *data)
{
  CtxList *iter, *prev = NULL;
  if ((*list)->data == data)
    {
      if ((*list)->freefunc)
        (*list)->freefunc ((*list)->data, (*list)->freefunc_data);
      prev = (*list)->next;
      ctx_free (*list);
      *list = prev;
      return;
    }
  for (iter = *list; iter; iter = iter->next)
    if (iter->data == data)
      {
        if (iter->freefunc)
          iter->freefunc (iter->data, iter->freefunc_data);
        prev->next = iter->next;
        ctx_free (iter);
        break;
      }
    else
      prev = iter;
}

static inline void ctx_list_free (CtxList **list)
{
  while (*list)
    ctx_list_remove (list, (*list)->data);
}

static inline void
ctx_list_reverse (CtxList **list)
{
  CtxList *new_ = NULL;
  CtxList *l;
  for (l = *list; l; l=l->next)
    ctx_list_prepend (&new_, l->data);
  ctx_list_free (list);
  *list = new_;
}

static inline void *ctx_list_last (CtxList *list)
{
  if (list)
    {
      CtxList *last;
      for (last = list; last->next; last=last->next);
      return last->data;
    }
  return NULL;
}

static inline void ctx_list_concat (CtxList **list, CtxList *list_b)
{
  if (*list)
    {
      CtxList *last;
      for (last = *list; last->next; last=last->next);
      last->next = list_b;
      return;
    }
  *list = list_b;
}

static inline void ctx_list_append_full (CtxList **list, void *data,
    void (*freefunc)(void *data, void *freefunc_data),
    void *freefunc_data)
{
  CtxList *new_ = (CtxList*) ctx_calloc (1, sizeof (CtxList));
  new_->data=data;
  new_->freefunc = freefunc;
  new_->freefunc_data = freefunc_data;
  ctx_list_concat (list, new_);
}

static inline void ctx_list_append (CtxList **list, void *data)
{
  ctx_list_append_full (list, data, NULL, NULL);
}

static inline void
ctx_list_insert_at (CtxList **list,
                    int       no,
                    void     *data)
{
  if (*list == NULL || no == 0)
    {
      ctx_list_prepend (list, data);
    }
  else
    {
      int pos = 0;
      CtxList *prev = NULL;
      CtxList *sibling = NULL;
      for (CtxList *l = *list; l && pos < no; l=l->next)
        {
          prev = sibling;
          sibling = l;
          pos ++;
        }
      if (prev)
        {
          CtxList *new_ = (CtxList*)ctx_calloc (1, sizeof (CtxList));
          new_->next = sibling;
          new_->data = data;
          prev->next=new_;
          return;
        }
      ctx_list_append (list, data);
    }
}

static CtxList*
ctx_list_merge_sorted (CtxList* list1,
                       CtxList* list2,
    int(*compare)(const void *a, const void *b, void *userdata), void *userdata
)
{
  if (list1 == NULL)
     return(list2);
  else if (list2==NULL)
     return(list1);

  if (compare (list1->data, list2->data, userdata) >= 0)
  {
    list1->next = ctx_list_merge_sorted (list1->next,list2, compare, userdata);
    /*list1->next->prev = list1;
      list1->prev = NULL;*/
    return list1;
  }
  else
  {
    list2->next = ctx_list_merge_sorted (list1,list2->next, compare, userdata);
    /*list2->next->prev = list2;
      list2->prev = NULL;*/
    return list2;
  }
}

static void
ctx_list_split_half (CtxList*  head,
                     CtxList** list1,
                     CtxList** list2)
{
  CtxList* fast;
  CtxList* slow;
  if (head==NULL || head->next==NULL)
  {
    *list1 = head;
    *list2 = NULL;
  }
  else
  {
    slow = head;
    fast = head->next;

    while (fast != NULL)
    {
      fast = fast->next;
      if (fast != NULL)
      {
        slow = slow->next;
        fast = fast->next;
      }
    }

    *list1 = head;
    *list2 = slow->next;
    slow->next = NULL;
  }
}

static inline void ctx_list_sort (CtxList **head,
    int(*compare)(const void *a, const void *b, void *userdata),
    void *userdata)
{
  CtxList* list1;
  CtxList* list2;

  /* Base case -- length 0 or 1 */
  if ((*head == NULL) || ((*head)->next == NULL))
  {
    return;
  }

  ctx_list_split_half (*head, &list1, &list2);
  ctx_list_sort (&list1, compare, userdata);
  ctx_list_sort (&list2, compare, userdata);
  *head = ctx_list_merge_sorted (list1, list2, compare, userdata);
}

static inline void ctx_list_insert_sorted (CtxList **list,
                                           void     *item,
    int(*compare)(const void *a, const void *b, void *userdata),
                                           void     *userdata)
{
  ctx_list_prepend (list, item);
  ctx_list_sort (list, compare, userdata);
}


static inline CtxList *ctx_list_find_custom (CtxList *list,
                                         void    *needle,
                                         int(*compare)(const void *a, const void *b, void *userdata),
                                         void *userdata)
{
  CtxList *l;
  for (l = list; l; l = l->next)
  {
    if (compare (l->data, needle, userdata) == 0)
      return l;
  }
  return NULL;
}

#endif
 /* Copyright (C) 2020 Øyvind Kolås <pippin@gimp.org>
 */

#if CTX_FORMATTER||CTX_AUDIO

/* returns the maximum string length including terminating \0 */
int ctx_a85enc_len (int input_length);
int ctx_a85enc (const void *srcp, char *dst, int count);


#endif

#if CTX_PARSER

int ctx_a85dec (const char *src, char *dst, int count);
int ctx_a85len (const char *src, int count);
#endif
#ifndef __CTX_EXTRA_H
#define __CTX_EXTRA_H

#if CTX_FORCE_INLINES
#define CTX_INLINE inline __attribute__((always_inline))
#else
#define CTX_INLINE inline
#endif

void ctx_wait_for_renderer (Ctx *ctx);

#define CTX_CLAMP(val,min,max) ((val)<(min)?(min):(val)>(max)?(max):(val))
//static CTX_INLINE int   ctx_mini (const int a, const int b)     { if (a < b) return a; return b; }
//static CTX_INLINE int   ctx_maxi (const int a, const int b)     { if (a > b) return a; return b; }
static CTX_INLINE int   ctx_mini (const int a, const int b)     {
return (a<b)*a+(a>=b)*b;
        //if (a < b) return a; return b; 
}
static CTX_INLINE int   ctx_maxi (const int a, const int b)     {
return (a>b)*a+(a<=b)*b;
        //if (a > b) return a; return b; 
}
static CTX_INLINE float ctx_minf (const float a, const float b) { if (a < b) return a; return b; }
static CTX_INLINE float ctx_maxf (const float a, const float b) { if (a > b) return a; return b; }
static CTX_INLINE float ctx_clampf (const float v, const float min, const float max) {
       return CTX_CLAMP(v,min,max);
}


typedef enum CtxOutputmode
{
  CTX_OUTPUT_MODE_QUARTER,
  CTX_OUTPUT_MODE_BRAILLE,
  CTX_OUTPUT_MODE_SIXELS,
  CTX_OUTPUT_MODE_GRAYS,
  CTX_OUTPUT_MODE_CTX,
  CTX_OUTPUT_MODE_CTX_COMPACT,
  CTX_OUTPUT_MODE_CTX_FILE,
  CTX_OUTPUT_MODE_CTX_COMPACT_FILE,
  CTX_OUTPUT_MODE_UI
} CtxOutputmode;

static CTX_INLINE float ctx_pow2 (const float a) { return a * a; }
#if CTX_MATH

static CTX_INLINE float
ctx_fabsf (const float x)
{
  union
  {
    float f;
    uint32_t i;
  } u = { x };
  u.i &= 0x7fffffff;
  return u.f;
}

static CTX_INLINE float
ctx_invsqrtf (const float x)
{
  union
  {
    float f;
    uint32_t i;
  } u = { x };
  u.i = 0x5f3759df - (u.i >> 1);
  u.f *= (1.5f - 0.5f * x * u.f * u.f);
  u.f *= (1.5f - 0.5f * x * u.f * u.f); //repeating Newton-Raphson step for higher precision
  return u.f;
}


CTX_INLINE static float ctx_sqrtf (const float a)
{
  return 1.0f/ctx_invsqrtf (a);
}

CTX_INLINE static float ctx_hypotf (const float a, const float b)
{
  return ctx_sqrtf (ctx_pow2 (a)+ctx_pow2 (b) );
}

CTX_INLINE static float
ctx_sinf (float x)
{
  if (x < -CTX_PI * 2)
    {
      x = -x;
      long ix = (long)(x / (CTX_PI * 2));
      x = x - ix * CTX_PI * 2;
      x = -x;
    }
  if (x < -CTX_PI * 1000)
  {
    x = -0.5f;
  }
  if (x > CTX_PI * 1000)
  {
    // really large numbers tend to cause practically inifinite
    // loops since the > CTX_PI * 2 seemingly fails
    x = 0.5f;
  }
  if (x > CTX_PI * 2)
    { 
      long ix = (long)(x / (CTX_PI * 2));
      x = x - (ix * CTX_PI * 2);
    }
  while (x < -CTX_PI)
    { x += CTX_PI * 2; }
  while (x > CTX_PI)
    { x -= CTX_PI * 2; }

  /* source : http://mooooo.ooo/chebyshev-sine-approximation/ */
  const float coeffs[]=
  {
    -0.10132118f,           // x
      0.0066208798f,         // x^3
      -0.00017350505f,        // x^5
      0.0000025222919f,      // x^7
      -0.000000023317787f,    // x^9
      0.00000000013291342f
    }; // x^11
  float x2 = x*x;
  float p11 = coeffs[5];
  float p9  = p11*x2 + coeffs[4];
  float p7  = p9*x2  + coeffs[3];
  float p5  = p7*x2  + coeffs[2];
  float p3  = p5*x2  + coeffs[1];
  float p1  = p3*x2  + coeffs[0];
  return (x - CTX_PI + 0.00000008742278f) *
         (x + CTX_PI - 0.00000008742278f) * p1 * x;
}

static CTX_INLINE float ctx_atan2f (const float y, const float x)
{
  float atan, z;
  if ( x == 0.0f )
    {
      if ( y > 0.0f )
        { return CTX_PI/2; }
      if ( y == 0.0f )
        { return 0.0f; }
      return -CTX_PI/2;
    }
  z = y/x;
  if ( ctx_fabsf ( z ) < 1.0f )
    {
      atan = z/ (1.0f + 0.28f*z*z);
      if (x < 0.0f)
        {
          if ( y < 0.0f )
            { return atan - CTX_PI; }
          return atan + CTX_PI;
        }
    }
  else
    {
      atan = CTX_PI/2 - z/ (z*z + 0.28f);
      if ( y < 0.0f ) { return atan - CTX_PI; }
    }
  return atan;
}


static CTX_INLINE float ctx_atanf (const float a)
{
  return ctx_atan2f (a, 1.0f);
}

static CTX_INLINE float ctx_asinf (const float x)
{
  return ctx_atanf ( x * ctx_invsqrtf (1.0f-ctx_pow2 (x) ));
}

static CTX_INLINE float ctx_acosf (const float x)
{
  return ctx_atanf ( ctx_sqrtf (1.0f-ctx_pow2 (x) ) / (x) );
}

CTX_INLINE static float ctx_cosf (const float a)
{
  return ctx_sinf ( (a) + CTX_PI/2.0f);
}

static CTX_INLINE float ctx_tanf (const float a)
{
  return (ctx_cosf (a) / ctx_sinf (a) );
}
static CTX_INLINE float
ctx_floorf (const float x)
{
  return (int)x; // XXX
}
static CTX_INLINE float
ctx_expf (const float x)
{
  union { uint32_t i; float f; } v =
    {  (uint32_t)( (1 << 23) * (x + 183.1395965f)) };
  return v.f;
}

/* define more trig based on having sqrt, sin and atan2 */

#else
#if !__COSMOPOLITAN__
#include <math.h>
#endif
static CTX_INLINE float ctx_fabsf (const float x)           { return fabsf (x); }
static CTX_INLINE float ctx_floorf (const float x)          { return floorf (x); }
static CTX_INLINE float ctx_asinf (const float x)           { return asinf (x); }
static CTX_INLINE float ctx_sinf (const float x)            { return sinf (x); }
static CTX_INLINE float ctx_atan2f (const float y, float x) { return atan2f (y, x); }
static CTX_INLINE float ctx_hypotf (const float a, float b) { return hypotf (a, b); }
static CTX_INLINE float ctx_acosf (const float a)           { return acosf (a); }
static CTX_INLINE float ctx_cosf (const float a)            { return cosf (a); }
static CTX_INLINE float ctx_tanf (const float a)            { return tanf (a); }
static CTX_INLINE float ctx_expf (const float p)            { return expf (p); }
static CTX_INLINE float ctx_sqrtf (const float a)           { return sqrtf (a); }
static CTX_INLINE float ctx_atanf (const float a)           { return atanf (a); }
#endif

static CTX_INLINE float
ctx_invsqrtf_fast (const float x)
{
  union
  {
    float f;
    uint32_t i;
  } u = { x };
  u.i = 0x5f3759df - (u.i >> 1);
  return u.f;
}
CTX_INLINE static float ctx_sqrtf_fast (const float a)
{
  return 1.0f/ctx_invsqrtf_fast (a);
}
CTX_INLINE static float ctx_hypotf_fast (const float a, const float b)
{
  return ctx_sqrtf_fast (ctx_pow2 (a)+ctx_pow2 (b) );
}


static CTX_INLINE float ctx_atan2f_rest (
  const float x, const float y_recip)
{
  float atan, z = x * y_recip;
  if ( ctx_fabsf ( z ) < 1.0f )
    {
      atan = z/ (1.0f + 0.28f*z*z);
      if (y_recip < 0.0f)
        {
          if ( x < 0.0f )
            { return atan - CTX_PI; }
          return atan + CTX_PI;
        }
    }
  else
    {
      atan = CTX_PI/2 - z/ (z*z + 0.28f);
      if ( x < 0.0f ) { return atan - CTX_PI; }
    }
  return atan;
}


static inline float _ctx_parse_float (const char *str, char **endptr)
{
  return strtof (str, endptr); /* XXX: , vs . problem in some locales */
}

const char *ctx_get_string (Ctx *ctx, uint32_t hash);
void ctx_set_string (Ctx *ctx, uint32_t hash, const char *value);
void ctx_set_blob (Ctx *ctx, uint32_t hash, const void*value, int length);
typedef struct _CtxColor CtxColor;

void
ctx_matrix_translate (CtxMatrix *matrix, float x, float y);


void ctx_get_matrix (Ctx *ctx, CtxMatrix *matrix);
void ctx_set_matrix (Ctx *ctx, CtxMatrix *matrix);
int _ctx_is_rasterizer (Ctx *ctx);

int ctx_color (Ctx *ctx, const char *string);
typedef struct _CtxState CtxState;
CtxColor *ctx_color_new (void);
CtxState *ctx_get_state (Ctx *ctx);
void ctx_color_get_rgba (CtxState *state, CtxColor *color, float *out);
void ctx_color_set_rgba (CtxState *state, CtxColor *color, float r, float g, float b, float a);
void ctx_color_free (CtxColor *color);
void ctx_set_color (Ctx *ctx, uint32_t hash, CtxColor *color);
int  ctx_get_color (Ctx *ctx, uint32_t hash, CtxColor *color);
int  ctx_color_set_from_string (Ctx *ctx, CtxColor *color, const char *string);

int ctx_color_is_transparent (CtxColor *color);

void ctx_user_to_device          (Ctx *ctx, float *x, float *y);
void ctx_user_to_device_distance (Ctx *ctx, float *x, float *y);


void ctx_device_to_user          (Ctx *ctx, float *x, float *y);
void ctx_device_to_user_distance (Ctx *ctx, float *x, float *y);

int ctx_is_set_now (Ctx *ctx, uint32_t hash);
void ctx_set_size (Ctx *ctx, int width, int height);

static inline float ctx_matrix_get_scale (CtxMatrix *matrix)
{
   return ctx_maxf (ctx_maxf (ctx_fabsf (matrix->m[0][0]),
                         ctx_fabsf (matrix->m[0][1]) ),
               ctx_maxf (ctx_fabsf (matrix->m[1][0]),
                         ctx_fabsf (matrix->m[1][1]) ) );
}

#if CTX_GET_CONTENTS
int
_ctx_file_get_contents (const char     *path,
                        unsigned char **contents,
                        long           *length);
#endif

#if CTX_BABL
void ctx_rasterizer_colorspace_babl (CtxState      *state,
                                     CtxColorSpace  space_slot,
                                     const Babl    *space);
#endif
void ctx_rasterizer_colorspace_icc (CtxState            *state,
                                    CtxColorSpace        space_slot,
                                    const unsigned char *icc_data,
                                    int                  icc_length);


CtxBuffer *ctx_buffer_new_bare (void);

void ctx_buffer_set_data (CtxBuffer *buffer,
                          void *data, int width, int height,
                          int stride,
                          CtxPixelFormat pixel_format,
                          void (*freefunc) (void *pixels, void *user_data),
                          void *user_data);

int ctx_textureclock (Ctx *ctx);


void ctx_list_backends(void);
int ctx_pixel_format_ebpp (CtxPixelFormat format);

#endif
#if 0
#if !__COSMOPOLITAN__
#include <stdarg.h>
#include <unistd.h>
#include <math.h>

#endif
#include "ctx.h"
#endif

/* An immediate mode toolkit for ctx, ctx expects to receive full frames of
 * data to draw and by keeping knowledge of the contents of the previous frame
 * avoid re-drawing unchanged areas of the display.
 *
 * 
 * TODO/BUGS:
 *   - more than one scroll per panel
 *   - horizontal scroll
 */

typedef struct _Css      Css;
typedef struct _CssPanel CssPanel;


extern int _css_key_bindings_active;
Css *css_new        (Ctx *ctx);
void css_destroy    (Css *itk);
void css_reset      (Css *itk);

CssPanel *css_panel_start (Css *itk, const char *title, int x, int y, int width, int height);
void      css_panel_end   (Css *itk);

void css_newline    (Css *itk);
void css_seperator  (Css *itk);
void css_titlebar   (Css *itk, const char *label);

void css_label      (Css *itk, const char *label);
void css_labelf     (Css *itk, const char *format, ...);


int  css_toggle     (Css *itk, const char *label, int in_val);

int  css_button     (Css *itk, const char *label);

/* this is already modernized - it is the same as css_toggle but gets rendered
 * differently
 */
int  css_radio      (Css *itk, const char *label, int set);


/* needs tweaking / expander2 it should return the state of the expander */
int  css_expander   (Css *itk, const char *label, int *val);

/* return newly set value if ant change */
int  css_choice     (Css *itk, const char *label, int in_val);
void css_choice_add (Css *itk, int value, const char *label);
void css_set_focus_no (Css *itk, int pos);

int  css_control_no (Css *itk);


/*
 * returns NULL if value is unchanged or a newly allocated string
 * when entry has been changed.
 *
 */
char *css_entry (Css *itk,
                 const char *label,
                 const char *fallback,
                 const char *val);



/* returns the new value - if it has changed due to interaction
 */
float css_slider    (Css *itk, const char *label, float value, double min, double max, double step);

/* these are utilities to keep some code a little bit shorter
 */
void css_slider_int   (Css *itk, const char *label, int *val, int min, int max, int step);
void css_slider_float (Css *itk, const char *label, float *val, float min, float max, float step);
void css_slider_uint8 (Css *itk, const char *label, uint8_t *val, uint8_t min, uint8_t max, uint8_t step);

/*  returns 1 when the value has been changed
 *
 *  this expects a string to write to maxlen is length including
 *  room for terminating \0
 *
 *  return 1 when the value has been changed
 */
int css_entry_str_len (Css        *itk,
                       const char *label,
                       const char *fallback,
                       char       *val,
                       int         maxlen);

/* to be called on focus changes that might take focus away from
 * edited css_entry
 */
void css_entry_commit (Css *itk);
void css_lost_focus (Css *itk);

/* return new value if changed */


//void css_choice_add (Css *itk, int value, const char *label);
void        css_done          (Css *itk);
void        css_style_color   (Ctx *ctx, const char *name);
//void      css_style_color2  (Css *itk, const char *klass, const char*attr);
void        css_style_bg      (Css *itk, const char *klass);
void        css_style_fg      (Css *itk, const char *klass);
const char *css_style_string  (const char *name);
float       css_style_float   (char *name);
float       css_em            (Css *itk);

Ctx        *css_ctx            (Css *itk);
float       css_x              (Css *itk);
float       css_y              (Css *itk);
void        css_set_x          (Css *itk, float x);
void        css_set_y          (Css *itk, float y);
void        css_set_xy         (Css *itk, float x, float y);
void        css_set_edge_left  (Css *itk, float edge);
void        css_set_edge_right (Css *itk, float edge);
void        css_set_edge_top   (Css *itk, float edge);
void        css_set_edge_bottom(Css *itk, float edge);
float       css_wrap_width     (Css *itk);
float       css_height         (Css *itk);
void        css_set_height     (Css *itk, float height);
float       css_edge_left      (Css *itk);
float       css_edge_right     (Css *itk);
float       css_edge_top       (Css *itk);
float       css_edge_bottom    (Css *itk);
void        css_set_wrap_width (Css *itk, float wwidth);

/* runs until ctx_exit itk->ctx) is called */
void        css_run_ui         (Css *itk, int (*ui_fun)(Css *itk, void *data), void *ui_data);
void        css_set_font_size  (Css *itk, float font_size);

void        css_set_scale       (Css *itk, float scale);
float       css_scale           (Css *itk);
float       css_rel_ver_advance (Css *itk);

int         css_focus_no         (Css *itk);
int         css_is_editing_entry (Css *itk);

/*
   A helper function that does css_run_ui on a ctx context that is both created
   and destroyed, this helps keep small programs tiny.

   void css_main (int (*ui_fun)(Css *itk, void *data), void *ui_data)
   {
     Ctx *ctx = ctx_new (-1, -1, NULL);
     Css *itk = css_new (ctx);
     css_run_ui (itk, ui_fun, ui_data);
     css_destroy (itk);
     ctx_destroy (ctx);
    }
 */

void css_main         (int (*ui_fun)(Css *itk, void *data), void *ui_data);
void css_key_bindings (Css *itk);

typedef struct _CtxControl CtxControl;
CtxControl *css_focused_control (Css *itk);
CtxControl *css_find_control    (Css *itk, int no);
CtxControl *css_add_control     (Css *itk,
                                 int type,
                                 const char *label,
                                 float x, float y,
                                 float width, float height);
void css_set_flag               (Css *itk, int flag, int on);


void css_panels_reset_scroll    (Css *itk);

void css_ctx_settings (Css *itk);
void css_css_settings (Css *itk);
void css_key_quit (CtxEvent *event, void *userdata, void *userdata2);

enum {
  UI_SLIDER = 1,
  UI_EXPANDER,
  UI_TOGGLE,
  UI_LABEL,
  UI_TITLEBAR,
  UI_BUTTON,
  UI_CHOICE,
  UI_ENTRY,
  UI_MENU,
  UI_SEPARATOR,
  UI_RADIO
};

enum {
  CSS_FLAG_SHOW_LABEL = (1<<0),
  CSS_FLAG_ACTIVE     = (1<<1),
  CSS_FLAG_CANCEL_ON_LOST_FOCUS = (1<<2),
  CSS_FLAG_DEFAULT    = (CSS_FLAG_SHOW_LABEL|CSS_FLAG_ACTIVE)
};
 // XXX : commit or cancel entry on focus change
 //


struct _CssPanel{
  int   x;
  int   y;
  int   width;
  int   height;
  int   expanded;
  int   max_y;
  float scroll_start_y;
  float scroll;

  int   do_scroll_jump;
  const char *title;
};

typedef struct CssPal{
  char id;
  uint8_t r;
  uint8_t g;
  uint8_t b;
  uint8_t a;
} CssPal;

struct _CtxControl{
  int no;
  int ref_count;
  uint64_t flags;
  int type; /* this should be a pointer to the vfuncs/class struct
               instead - along with one optional instance data per control */
  char *label;
  void *id; /* possibly unique identifier */

  float x;
  float y;
  float width;
  float height;
  void *val;

  float value;

  char *entry_value;

  char *fallback;
  float min;
  float max;
  float step;
};


float css_panel_scroll (Css *itk);
void css_panel_set_scroll (Css *itk, float scroll);

typedef struct _Css          Css;
typedef struct _CtxStyle     CtxStyle;

void css_start            (Css *mrg, const char *class_name, void *id_ptr);
void css_start_with_style (Css *mrg,
                           const char *style_id,
                           void       *id_ptr,
                           const char *style);

void css_start_with_stylef (Css *mrg, const char *style_id, void *id_ptr,
                            const char *format, ...);
void css_xml_render (Css *mrg,
                     char *uri_base,
                     void (*link_cb) (CtxEvent *event, void *href, void *link_data),
                     void *link_data,
                     void *(finalize)(void *listen_data, void *listen_data2, void *finalize_data),
                     void *finalize_data,
                     char *html_);

void
css_printf (Css *mrg, const char *format, ...);

void css_print_xml (Css *mrg, const char *xml);

void
css_printf_xml (Css *mrg, const char *format, ...);

void
css_print_xml (Css *mrg, const char *utf8);

// returns width added horizontally
float css_addstr (Css *mrg, const char *string, int utf8_length);

void ctx_stylesheet_add (Css *mrg, const char *css, const char *uri_base,
                         int priority, char **error);
CtxStyle *ctx_style (Css *mrg);

void css_end (Css *mrg, CtxFloatRectangle *ret_rect);

int
mrg_get_contents (Css         *mrg,
                  const char  *referer,
                  const char  *input_uri,
                  char       **contents,
                  long        *length);

int css_xml_extent (Css *mrg, uint8_t *contents, float *width, float *height, float *vb_x, float *vb_y, float *vb_width, float *vb_height);
#ifndef __CTX_CONSTANTS
#define __CTX_CONSTANTS
#define SQZ_a 195u // "a"
#define SQZ_absolute 1840437120u // "absolute"
#define SQZ_action 3696112672u // "action"
#define SQZ_addStop 220908742u // "addStop"
#define SQZ_aelig 1120987016u // "aelig"
#define SQZ_alias 2034413622u // "alias"
#define SQZ_all_scroll 1118648896u // "all-scroll"
#define SQZ_alpha 4000549904u // "alpha"
#define SQZ_alphabetic 2966120946u // "alphabetic"
#define SQZ_amp 7368131u // "amp"
#define SQZ_apos 1936683203u // "apos"
#define SQZ_aqua 1635086787u // "aqua"
#define SQZ_arc 6517443u // "arc"
#define SQZ_arcTo 3982854812u // "arcTo"
#define SQZ_aring 855903140u // "aring"
#define SQZ_auto 1869903299u // "auto"
#define SQZ_background 1071035380u // "background"
#define SQZ_background_color 609802584u // "background-color"
#define SQZ_beginPath 120180698u // "beginPath"
#define SQZ_bevel 761062270u // "bevel"
#define SQZ_bidi_override 29328268u // "bidi-override"
#define SQZ_black 271321868u // "black"
#define SQZ_blend 316843154u // "blend"
#define SQZ_blendMode 644815934u // "blendMode"
#define SQZ_blending 3694082958u // "blending"
#define SQZ_blink 2515894180u // "blink"
#define SQZ_block 2220858820u // "block"
#define SQZ_blue 1702194373u // "blue"
#define SQZ_bold 1684828101u // "bold"
#define SQZ_bolder 2370434142u // "bolder"
#define SQZ_border 1170187232u // "border"
#define SQZ_border_bottom 3975273498u // "border-bottom"
#define SQZ_border_bottom_color 3186075110u // "border-bottom-color"
#define SQZ_border_bottom_width 1124387196u // "border-bottom-width"
#define SQZ_border_box 1245790092u // "border-box"
#define SQZ_border_color 541811290u // "border-color"
#define SQZ_border_left 2975583868u // "border-left"
#define SQZ_border_left_color 3331269224u // "border-left-color"
#define SQZ_border_left_width 3310378862u // "border-left-width"
#define SQZ_border_right 4045413762u // "border-right"
#define SQZ_border_right_color 243329524u // "border-right-color"
#define SQZ_border_right_width 1360128814u // "border-right-width"
#define SQZ_border_top 1801458758u // "border-top"
#define SQZ_border_top_color 3407512826u // "border-top-color"
#define SQZ_border_top_width 4176048594u // "border-top-width"
#define SQZ_border_width 3642950774u // "border-width"
#define SQZ_both 1752461253u // "both"
#define SQZ_bottom 1302706776u // "bottom"
#define SQZ_box_sizing 3965777366u // "box-sizing"
#define SQZ_br 29381u // "br"
#define SQZ_bull 1819047365u // "bull"
#define SQZ_butt 1953789381u // "butt"
#define SQZ_c 199u // "c"
#define SQZ_cap 7365063u // "cap"
#define SQZ_cedil 1951672802u // "cedil"
#define SQZ_cell 1819043271u // "cell"
#define SQZ_cent 1953392071u // "cent"
#define SQZ_center 1006603526u // "center"
#define SQZ_circle 196442712u // "circle"
#define SQZ_class 680581762u // "class"
#define SQZ_clear 1094071360u // "clear"
#define SQZ_clip 1885957319u // "clip"
#define SQZ_closePath 3537486488u // "closePath"
#define SQZ_cmyk 1803120071u // "cmyk"
#define SQZ_cmykS 3263315852u // "cmykS"
#define SQZ_cmykSpace 2366647638u // "cmykSpace"
#define SQZ_cmyka 3355381580u // "cmyka"
#define SQZ_cmykaS 3917993734u // "cmykaS"
#define SQZ_col_resize 2272161114u // "col-resize"
#define SQZ_colgroup 4270888898u // "colgroup"
#define SQZ_color 4231809138u // "color"
#define SQZ_colorSpace 4246256736u // "colorSpace"
#define SQZ_compositingMode 3764262848u // "compositingMode"
#define SQZ_conicGradient 1669326832u // "conicGradient"
#define SQZ_context_menu 434478918u // "context-menu"
#define SQZ_copy 2037411783u // "copy"
#define SQZ_crosshair 4030082594u // "crosshair"
#define SQZ_curren 3230040072u // "curren"
#define SQZ_currentColor 3452186816u // "currentColor"
#define SQZ_cursor 4212479966u // "cursor"
#define SQZ_cursor_wait 1647783762u // "cursor-wait"
#define SQZ_curveTo 48499966u // "curveTo"
#define SQZ_cx 30919u // "cx"
#define SQZ_cy 31175u // "cy"
#define SQZ_cyan 1851881927u // "cyan"
#define SQZ_d 201u // "d"
#define SQZ_darken 2939689930u // "darken"
#define SQZ_dd 25801u // "dd"
#define SQZ_default 2280700682u // "default"
#define SQZ_defineFont 813704086u // "defineFont"
#define SQZ_defineGlyph 1628031142u // "defineGlyph"
#define SQZ_defineTexture 4030922434u // "defineTexture"
#define SQZ_defs 1936090569u // "defs"
#define SQZ_deg 6776265u // "deg"
#define SQZ_destinationAtop 1605909240u // "destinationAtop"
#define SQZ_destinationIn 4096489814u // "destinationIn"
#define SQZ_destinationOut 1966109282u // "destinationOut"
#define SQZ_destinationOver 507903672u // "destinationOver"
#define SQZ_deviceCMYK 3879736092u // "deviceCMYK"
#define SQZ_deviceRGB 911778270u // "deviceRGB"
#define SQZ_difference 3137481792u // "difference"
#define SQZ_direction 626501720u // "direction"
#define SQZ_display 512467722u // "display"
#define SQZ_div 7760329u // "div"
#define SQZ_dotted 1961166438u // "dotted"
#define SQZ_drgb 1650946761u // "drgb"
#define SQZ_drgbS 179784888u // "drgbS"
#define SQZ_drgbSpace 1000873868u // "drgbSpace"
#define SQZ_drgba 465014226u // "drgba"
#define SQZ_drgbaS 2465325300u // "drgbaS"
#define SQZ_dt 29897u // "dt"
#define SQZ_e_resize 3569673796u // "e-resize"
#define SQZ_ellipse 3790670476u // "ellipse"
#define SQZ_embed 3279610738u // "embed"
#define SQZ_end 6581963u // "end"
#define SQZ_endFrame 2645960260u // "endFrame"
#define SQZ_endGroup 2864376370u // "endGroup"
#define SQZ_euro 1869772235u // "euro"
#define SQZ_evenOdd 3373267632u // "evenOdd"
#define SQZ_evenodd 1852152574u // "evenodd"
#define SQZ_ew_resize 2458267842u // "ew-resize"
#define SQZ_extend 2652659078u // "extend"
#define SQZ_feather 4162344430u // "feather"
#define SQZ_file 1701603789u // "file"
#define SQZ_fill 1819044301u // "fill"
#define SQZ_fillRect 3070816944u // "fillRect"
#define SQZ_fillRule 2262201016u // "fillRule"
#define SQZ_fill_color 2350805940u // "fill-color"
#define SQZ_fill_rule 1151472398u // "fill-rule"
#define SQZ_first_child 439258010u // "first-child"
#define SQZ_fixed 778407114u // "fixed"
#define SQZ_float 2455282620u // "float"
#define SQZ_flow_root 518738066u // "flow-root"
#define SQZ_font 1953394637u // "font"
#define SQZ_fontSize 2620910512u // "fontSize"
#define SQZ_font_family 2798191102u // "font-family"
#define SQZ_font_size 3477901146u // "font-size"
#define SQZ_font_style 2628706306u // "font-style"
#define SQZ_font_weight 1197895732u // "font-weight"
#define SQZ_fr 29389u // "fr"
#define SQZ_fuchsia 439362132u // "fuchsia"
#define SQZ_fx 30925u // "fx"
#define SQZ_fy 31181u // "fy"
#define SQZ_g 207u // "g"
#define SQZ_globalAlpha 3833809790u // "globalAlpha"
#define SQZ_glyph 1308254186u // "glyph"
#define SQZ_gradientAddStop 2831884664u // "gradientAddStop"
#define SQZ_gradientTransform 1288938878u // "gradientTransform"
#define SQZ_gradientUnits 2968072588u // "gradientUnits"
#define SQZ_gray 2036429519u // "gray"
#define SQZ_grayS 417614710u // "grayS"
#define SQZ_graya 2560443068u // "graya"
#define SQZ_grayaS 2408801086u // "grayaS"
#define SQZ_green 1517032782u // "green"
#define SQZ_gt 29903u // "gt"
#define SQZ_hanging 72134188u // "hanging"
#define SQZ_head 1684104657u // "head"
#define SQZ_height 3762298230u // "height"
#define SQZ_hellip 4254915686u // "hellip"
#define SQZ_help 1886152145u // "help"
#define SQZ_hidden 2737189728u // "hidden"
#define SQZ_horLineTo 2754532u // "horLineTo"
#define SQZ_hr 29393u // "hr"
#define SQZ_href 1717924561u // "href"
#define SQZ_html 1819112657u // "html"
#define SQZ_http 1886680273u // "http"
#define SQZ_hue 6649297u // "hue"
#define SQZ_id 25811u // "id"
#define SQZ_identity 4029142280u // "identity"
#define SQZ_ideographic 3361616408u // "ideographic"
#define SQZ_iexcl 2255292952u // "iexcl"
#define SQZ_imageSmoothing 3109175850u // "imageSmoothing"
#define SQZ_img 6778323u // "img"
#define SQZ_inline_block 3464466506u // "inline-block"
#define SQZ_input 3954510188u // "input"
#define SQZ_inset 3128333578u // "inset"
#define SQZ_italic 396886224u // "italic"
#define SQZ_join 1852403669u // "join"
#define SQZ_justify 233888506u // "justify"
#define SQZ_kerningPair 2079485344u // "kerningPair"
#define SQZ_lab 6447577u // "lab"
#define SQZ_labS 1398956505u // "labS"
#define SQZ_laba 1633837529u // "laba"
#define SQZ_labaS 4170772618u // "labaS"
#define SQZ_laquo 64667100u // "laquo"
#define SQZ_lch 6841305u // "lch"
#define SQZ_lchS 1399350233u // "lchS"
#define SQZ_lcha 1634231257u // "lcha"
#define SQZ_lchaS 2598918966u // "lchaS"
#define SQZ_left 1952867801u // "left"
#define SQZ_letter_spacing 1326564462u // "letter-spacing"
#define SQZ_li 27097u // "li"
#define SQZ_lighten 2693650260u // "lighten"
#define SQZ_lime 1701669337u // "lime"
#define SQZ_line 1701734873u // "line"
#define SQZ_lineCap 3957741450u // "lineCap"
#define SQZ_lineDash 2886130602u // "lineDash"
#define SQZ_lineDashOffset 1904302200u // "lineDashOffset"
#define SQZ_lineHeight 1698077880u // "lineHeight"
#define SQZ_lineJoin 3891781172u // "lineJoin"
#define SQZ_lineTo 3077153258u // "lineTo"
#define SQZ_lineWidth 3851910782u // "lineWidth"
#define SQZ_line_height 3733591786u // "line-height"
#define SQZ_line_width 1869007654u // "line-width"
#define SQZ_linearGradient 905023680u // "linearGradient"
#define SQZ_linethrough 34244248u // "linethrough"
#define SQZ_link 1802398169u // "link"
#define SQZ_list_item 2339943222u // "list-item"
#define SQZ_lower 697158190u // "lower"
#define SQZ_lowerBottom 4240938844u // "lowerBottom"
#define SQZ_lt 29913u // "lt"
#define SQZ_ltr 7501017u // "ltr"
#define SQZ_magenta 578523642u // "magenta"
#define SQZ_margin 2593567380u // "margin"
#define SQZ_margin_bottom 2905482030u // "margin-bottom"
#define SQZ_margin_left 1656714300u // "margin-left"
#define SQZ_margin_right 3017942990u // "margin-right"
#define SQZ_margin_top 625296560u // "margin-top"
#define SQZ_maroon 3386542482u // "maroon"
#define SQZ_max_height 129479668u // "max-height"
#define SQZ_max_width 713333008u // "max-width"
#define SQZ_maximize 4009606768u // "maximize"
#define SQZ_mdash 2043309408u // "mdash"
#define SQZ_meta 1635018203u // "meta"
#define SQZ_middle 2223770148u // "middle"
#define SQZ_middot 435157574u // "middot"
#define SQZ_min_height 3589941308u // "min-height"
#define SQZ_min_width 570636676u // "min-width"
#define SQZ_miter 886459200u // "miter"
#define SQZ_miterLimit 1856773288u // "miterLimit"
#define SQZ_move 1702260699u // "move"
#define SQZ_moveTo 3083476356u // "moveTo"
#define SQZ_multiply 3976122014u // "multiply"
#define SQZ_n_resize 202450980u // "n-resize"
#define SQZ_navy 2037801437u // "navy"
#define SQZ_nbsp 1886610141u // "nbsp"
#define SQZ_ne_resize 2436514350u // "ne-resize"
#define SQZ_nesw_resize 366763636u // "nesw-resize"
#define SQZ_newPage 2687321890u // "newPage"
#define SQZ_newPath 4208019970u // "newPath"
#define SQZ_newState 3121230612u // "newState"
#define SQZ_no_drop 890688824u // "no-drop"
#define SQZ_none 1701736413u // "none"
#define SQZ_nonzero 2746451764u // "nonzero"
#define SQZ_normal 1883425054u // "normal"
#define SQZ_not_allowed 224860282u // "not-allowed"
#define SQZ_nowrap 3884678112u // "nowrap"
#define SQZ_ns_resize 1753032392u // "ns-resize"
#define SQZ_nw_resize 3642132534u // "nw-resize"
#define SQZ_oblique 2262696070u // "oblique"
#define SQZ_offset 1396589030u // "offset"
#define SQZ_olive 3415799870u // "olive"
#define SQZ_omega 1147793334u // "omega"
#define SQZ_opacity 2184299270u // "opacity"
#define SQZ_option 537535526u // "option"
#define SQZ_ordm 1835299551u // "ordm"
#define SQZ_oslash 1567583338u // "oslash"
#define SQZ_overflow 3253908870u // "overflow"
#define SQZ_overline 2037328102u // "overline"
#define SQZ_p 225u // "p"
#define SQZ_padding 2806228288u // "padding"
#define SQZ_padding_bottom 2393940612u // "padding-bottom"
#define SQZ_padding_left 2955272020u // "padding-left"
#define SQZ_padding_right 4039401684u // "padding-right"
#define SQZ_padding_top 439515192u // "padding-top"
#define SQZ_paint 1082699806u // "paint"
#define SQZ_para 1634886113u // "para"
#define SQZ_path 1752457697u // "path"
#define SQZ_phi 6908129u // "phi"
#define SQZ_plusmn 279695816u // "plusmn"
#define SQZ_pointer 132752978u // "pointer"
#define SQZ_polygon 1804794854u // "polygon"
#define SQZ_polyline 845479076u // "polyline"
#define SQZ_position 3883240572u // "position"
#define SQZ_pound 363756384u // "pound"
#define SQZ_pre 6648545u // "pre"
#define SQZ_pre_line 4122418998u // "pre-line"
#define SQZ_pre_wrap 837003298u // "pre-wrap"
#define SQZ_preserve 1666261276u // "preserve"
#define SQZ_print_symbols 669612738u // "print-symbols"
#define SQZ_progress 2815872894u // "progress"
#define SQZ_purple 3066163412u // "purple"
#define SQZ_quadTo 3205866160u // "quadTo"
#define SQZ_quot 1953461731u // "quot"
#define SQZ_r 229u // "r"
#define SQZ_radialGradient 83850682u // "radialGradient"
#define SQZ_raise 2772216630u // "raise"
#define SQZ_raiseTop 1913256554u // "raiseTop"
#define SQZ_raquo 3261968210u // "raquo"
#define SQZ_rect 1952671205u // "rect"
#define SQZ_rectangle 1861211308u // "rectangle"
#define SQZ_red 6579685u // "red"
#define SQZ_reg 6776293u // "reg"
#define SQZ_rel 7103973u // "rel"
#define SQZ_relArcTo 4253296276u // "relArcTo"
#define SQZ_relCurveTo 2548821600u // "relCurveTo"
#define SQZ_relHorLineTo 3243288302u // "relHorLineTo"
#define SQZ_relLineTo 1630005260u // "relLineTo"
#define SQZ_relMoveTo 429673596u // "relMoveTo"
#define SQZ_relQuadTo 2362773920u // "relQuadTo"
#define SQZ_relSmoothTo 1725151068u // "relSmoothTo"
#define SQZ_relSmoothqTo 2960208730u // "relSmoothqTo"
#define SQZ_relVerLineTo 1112835164u // "relVerLineTo"
#define SQZ_relative 979899102u // "relative"
#define SQZ_resetPath 2864022032u // "resetPath"
#define SQZ_restore 1405984258u // "restore"
#define SQZ_reverse 2464792996u // "reverse"
#define SQZ_rgb 6449125u // "rgb"
#define SQZ_rgbS 1398958053u // "rgbS"
#define SQZ_rgbSpace 1625332122u // "rgbSpace"
#define SQZ_rgba 1633839077u // "rgba"
#define SQZ_rgbaS 4158357036u // "rgbaS"
#define SQZ_right 1751820526u // "right"
#define SQZ_rotate 1488065704u // "rotate"
#define SQZ_round 3173447652u // "round"
#define SQZ_roundRectangle 3273785582u // "roundRectangle"
#define SQZ_row_resize 702013530u // "row-resize"
#define SQZ_rtl 7107813u // "rtl"
#define SQZ_rx 30949u // "rx"
#define SQZ_ry 31205u // "ry"
#define SQZ_s_resize 125328402u // "s-resize"
#define SQZ_save 1702257127u // "save"
#define SQZ_scale 2647970994u // "scale"
#define SQZ_screen 3670530854u // "screen"
#define SQZ_scroll 3099410214u // "scroll"
#define SQZ_se_resize 315956726u // "se-resize"
#define SQZ_sect 1952671207u // "sect"
#define SQZ_setFontSize 231476456u // "setFontSize"
#define SQZ_setLineCap 174619460u // "setLineCap"
#define SQZ_setLineJoin 4048631422u // "setLineJoin"
#define SQZ_setLineWidth 3926586244u // "setLineWidth"
#define SQZ_shadowBlur 3889925774u // "shadowBlur"
#define SQZ_shadowColor 291132682u // "shadowColor"
#define SQZ_shadowOffsetX 1630263752u // "shadowOffsetX"
#define SQZ_shadowOffsetY 89733304u // "shadowOffsetY"
#define SQZ_shy 7956711u // "shy"
#define SQZ_silver 2643959904u // "silver"
#define SQZ_smoothQuadTo 954100048u // "smoothQuadTo"
#define SQZ_smoothTo 174420282u // "smoothTo"
#define SQZ_solid 2770487110u // "solid"
#define SQZ_sourceAtop 864901378u // "sourceAtop"
#define SQZ_sourceIn 1369048320u // "sourceIn"
#define SQZ_sourceOut 1938332472u // "sourceOut"
#define SQZ_sourceOver 134897678u // "sourceOver"
#define SQZ_sourceTransform 1611809620u // "sourceTransform"
#define SQZ_spreadMethod 3574032566u // "spreadMethod"
#define SQZ_square 239664392u // "square"
#define SQZ_src 6517479u // "src"
#define SQZ_start 4080984002u // "start"
#define SQZ_startFrame 2128007688u // "startFrame"
#define SQZ_startGroup 4085444064u // "startGroup"
#define SQZ_static 3471421972u // "static"
#define SQZ_stop 1886352615u // "stop"
#define SQZ_stop_color 1175890462u // "stop-color"
#define SQZ_stop_opacity 250359768u // "stop-opacity"
#define SQZ_stroke 1444212908u // "stroke"
#define SQZ_strokePos 888669104u // "strokePos"
#define SQZ_strokeRect 1131907664u // "strokeRect"
#define SQZ_strokeSource 2685374474u // "strokeSource"
#define SQZ_stroke_color 1804158464u // "stroke-color"
#define SQZ_stroke_linecap 535668102u // "stroke-linecap"
#define SQZ_stroke_linejoin 1888005366u // "stroke-linejoin"
#define SQZ_stroke_miterlimit 1499817720u // "stroke-miterlimit"
#define SQZ_stroke_width 2655701078u // "stroke-width"
#define SQZ_style 3511288852u // "style"
#define SQZ_sub 6452711u // "sub"
#define SQZ_sup1 829453799u // "sup1"
#define SQZ_sup2 846231015u // "sup2"
#define SQZ_sup3 863008231u // "sup3"
#define SQZ_super 532067904u // "super"
#define SQZ_svg 6780647u // "svg"
#define SQZ_sw_resize 2121684268u // "sw-resize"
#define SQZ_syntax_highlight 1534043236u // "syntax-highlight"
#define SQZ_tab_size 945510526u // "tab-size"
#define SQZ_table 2705307032u // "table"
#define SQZ_tbody 3472781808u // "tbody"
#define SQZ_td 25833u // "td"
#define SQZ_teal 1818322409u // "teal"
#define SQZ_text 1954047465u // "text"
#define SQZ_textAlign 3594701278u // "textAlign"
#define SQZ_textBaseline 1453773018u // "textBaseline"
#define SQZ_textDirection 1179776176u // "textDirection"
#define SQZ_text_align 519203916u // "text-align"
#define SQZ_text_anchor 2618811808u // "text-anchor"
#define SQZ_text_decoration 2191990606u // "text-decoration"
#define SQZ_text_indent 1164860048u // "text-indent"
#define SQZ_text_stroke 3855125514u // "text-stroke"
#define SQZ_text_stroke_color 357760164u // "text-stroke-color"
#define SQZ_text_stroke_width 376828216u // "text-stroke-width"
#define SQZ_texture 785032878u // "texture"
#define SQZ_tfoot 3061216442u // "tfoot"
#define SQZ_th 26857u // "th"
#define SQZ_thead 460193516u // "thead"
#define SQZ_title 300059882u // "title"
#define SQZ_top 7368681u // "top"
#define SQZ_tr 29417u // "tr"
#define SQZ_trade 3023660122u // "trade"
#define SQZ_transform 3615253204u // "transform"
#define SQZ_translate 1137670376u // "translate"
#define SQZ_transparent 1911736550u // "transparent"
#define SQZ_true 1702195945u // "true"
#define SQZ_ui 27115u // "ui"
#define SQZ_underline 4021545710u // "underline"
#define SQZ_unicode_bidi 185934494u // "unicode-bidi"
#define SQZ_unmaximize 3435737582u // "unmaximize"
#define SQZ_userCMYK 622108702u // "userCMYK"
#define SQZ_userRGB 4035904520u // "userRGB"
#define SQZ_verLineTo 1200482574u // "verLineTo"
#define SQZ_version 3945712782u // "version"
#define SQZ_vertical_align 3047242218u // "vertical-align"
#define SQZ_vertical_text 580215056u // "vertical-text"
#define SQZ_viewBox 1582737754u // "viewBox"
#define SQZ_viewbox 816983354u // "viewbox"
#define SQZ_visibility 4182119682u // "visibility"
#define SQZ_visible 2712656970u // "visible"
#define SQZ_w_resize 505786646u // "w-resize"
#define SQZ_white 518020662u // "white"
#define SQZ_white_space 2063040106u // "white-space"
#define SQZ_width 3799171678u // "width"
#define SQZ_winding 2304820652u // "winding"
#define SQZ_word_spacing 2779764612u // "word-spacing"
#define SQZ_wrapLeft 3331521568u // "wrapLeft"
#define SQZ_wrapRight 1810250152u // "wrapRight"
#define SQZ_x 241u // "x"
#define SQZ_x1 12785u // "x1"
#define SQZ_x2 13041u // "x2"
#define SQZ_xor 7499761u // "xor"
#define SQZ_y 243u // "y"
#define SQZ_y1 12787u // "y1"
#define SQZ_y2 13043u // "y2"
#define SQZ_yellow 490403164u // "yellow"
#define SQZ_yen 7235059u // "yen"
#define SQZ_yes 7562739u // "yes"
#define SQZ_z_index 448175280u // "z-index"
#define SQZ_zoom_in 2458604508u // "zoom-in"
#define SQZ_zoom_out 3603903986u // "zoom-out"
#endif

#ifndef __CTX_LIBC_H
#define __CTX_LIBC_H

#if !__COSMOPOLITAN__
#include <stddef.h>
#endif

static inline void ctx_strcpy (char *dst, const char *src)
{
  int i = 0;
  for (i = 0; src[i]; i++)
    { dst[i] = src[i]; }
  dst[i] = 0;
}

static inline char *_ctx_strchr (const char *haystack, char needle)
{
  const char *p = haystack;
  while (*p && *p != needle)
    {
      p++;
    }
  if (*p == needle)
    { return (char *) p; }
  return NULL;
}
static inline char *ctx_strchr (const char *haystack, char needle)
{
  return _ctx_strchr (haystack, needle);
}

static inline int ctx_strcmp (const char *a, const char *b)
{
  int i;
  for (i = 0; a[i] && b[i]; a++, b++)
    if (a[0] != b[0])
      { return 1; }
  if (a[0] == 0 && b[0] == 0) { return 0; }
  return 1;
}

static inline int ctx_strncmp (const char *a, const char *b, size_t n)
{
  size_t i;
  for (i = 0; a[i] && b[i] && i < n; a++, b++)
    if (a[0] != b[0])
      { return 1; }
  if (i >=n) return 1;
  return 0;
}

static inline int ctx_strlen (const char *s)
{
  int len = 0;
  for (; *s; s++) { len++; }
  return len;
}

static inline char *ctx_strstr (const char *h, const char *n)
{
  int needle_len = ctx_strlen (n);
  if (n[0]==0)
    { return (char *) h; }
  while (*h)
    {
      if (!ctx_strncmp (h, n, needle_len) )
        { return (char *) h; }
      h++;
    }
  return NULL;
}

static inline char *ctx_strdup (const char *str)
{
  int len = ctx_strlen (str);
  char *ret = (char*)ctx_malloc (len + 1);
  memcpy (ret, str, len);
  ret[len]=0;
  return ret;
}

static inline int ctx_atoi (const char *str)
{
  int ret = 0;
  int sign = 1;
  int pos = 0;
  if (str[0]=='-'){
    sign = -1;
    pos ++;
  }
  while(str[pos] >= '0' && str[pos] <= '9')
  {
    int digit = str[pos] - '0';
    ret = ret * 10 + digit;
    pos++;
  }
  return ret * sign;
}

#endif
#ifndef CTX_AUDIO_H
#define CTX_AUDIO_H

#if !__COSMOPOLITAN__
#include <stdint.h>
#endif

/* This enum should be kept in sync with the corresponding mmm enum.
 */
typedef enum {
  CTX_F32,
  CTX_F32S,
  CTX_S16,
  CTX_S16S
} CtxPCM;

void   ctx_pcm_set_format        (Ctx *ctx, CtxPCM format);
CtxPCM ctx_pcm_get_format        (Ctx *ctx);
int    ctx_pcm_get_sample_rate   (Ctx *ctx);
void   ctx_pcm_set_sample_rate   (Ctx *ctx, int sample_rate);
int    ctx_pcm_get_frame_chunk   (Ctx *ctx);
int    ctx_pcm_get_queued        (Ctx *ctx);
float  ctx_pcm_get_queued_length (Ctx *ctx);
int    ctx_pcm_queue             (Ctx *ctx, const int8_t *data, int frames);

#endif
/* Copyright (c) 2021-2022 Øyvind Kolås <pippin@gimp.org>

  Fast cache-miss eliminating unicode strings for C.

  All features are optional:
    optimized 32bit 52bit 62bit and 64bit squoze encodings in UTF5+ and/or UTF-8
    string interning and APIS (for only getting the core squoze reference
                               implementation)
      both utf8, unichar and printf for core APIs
      embedding of strings (only for debug/profiling)
      reference counting
      embedded length

  License to be determined, the core implementation the snippet for
  squoze64_utf8 on https://squoz.org/ is ISC licensed



*/
#if 0
Minimal usage example:

#define SQUOZE_IMPLEMENTATION
#include "squoze.h"

int main (int argc, char **argv)
{:q
  char temp[10];
  Sqz *string = NULL;
  
  sqz_set (&string, "hello");


}

#endif

#ifndef SQUOZE_H
#define SQUOZE_H

#include <stdint.h>
#include <stddef.h>

// configuration of internal squoze, these
// are values that must be set before both header
// and implementation uses of squoze.h the values only
// impact the string interning implementation and not
// the low-level APIs


#ifndef SQUOZE_INTERN_DIRECT_STRING     // when 1 the pointers returned are
#define SQUOZE_INTERN_DIRECT_STRING  1  // directly string pointers
                                        // when 0 the struct of the per entry
                                        // allocation is returned, for integration
                                        // with garbage collectors that scan
                                        // for pointers 0 is preferable.
#endif

#ifndef SQUOZE_ID_BITS         // number of bits to use for interning API
#define SQUOZE_ID_BITS 64      // 32 52 62 or 64
#endif

#ifndef SQUOZE_ID_UTF5         // use UTF5+ as the embed encoding
#define SQUOZE_ID_UTF5 0       // if not set then UTF8 is used
#endif

#ifndef SQUOZE_ID_MURMUR       // use murmurhash and no embedding
#define SQUOZE_ID_MURMUR 0     //
#endif

#ifndef SQUOZE_REF_COUNTING    // build the refcounting support, adds
#define SQUOZE_REF_COUNTING 0  // per-interned-string overhead
#endif

#ifndef SQUOZE_STORE_LENGTH    // store byte-lengths as part of
#define SQUOZE_STORE_LENGTH 1  // per-interned-string data
#endif

#ifndef SQUOZE_USE_INTERN      // enable interning hash-table
#define SQUOZE_USE_INTERN 1    // without this only a single
                               // core implementation can be built
			       //
/*  XXX - you should not need to tweak anything below here,
 *        though the tweaks are available for tinkering
 *        and debugging.
 */
#ifndef SQUOZE_REF_SANITY
#define SQUOZE_REF_SANITY      0 // report consistency errors and use more RAM
#endif

#ifndef SQUOZE_CLOBBER_ON_FREE
#define SQUOZE_CLOBBER_ON_FREE 0
  // clobber strings when freeing, not a full leak report
  // but better to always glitch than silently succeding or failing
#endif

#ifndef SQUOZE_INITIAL_POOL_SIZE
#define SQUOZE_INITIAL_POOL_SIZE   (1<<8)  // initial hash-table capacity
#endif

#ifndef SQUOZE_USE_BUILTIN_CLZ
#define SQUOZE_USE_BUILTIN_CLZ  1 // use builtin for determining highest bit in unicode char
#endif

#ifndef SQUOZE_UTF8_MANUAL_UNROLL
#define SQUOZE_UTF8_MANUAL_UNROLL 1 // use manually unrolled UTF8 code
#endif

#ifndef SQUOZE_LIMIT_IMPLEMENTATIONS
#define SQUOZE_LIMIT_IMPLEMENTATIONS 0
#endif

#ifndef SQUOZE_IMPLEMENTATION_32_UTF8
#define SQUOZE_IMPLEMENTATION_32_UTF8 (!SQUOZE_LIMIT_IMPLEMENTATIONS)
#endif
#ifndef SQUOZE_IMPLEMENTATION_32_UTF5
#define SQUOZE_IMPLEMENTATION_32_UTF5 (!SQUOZE_LIMIT_IMPLEMENTATIONS)
#endif
#ifndef SQUOZE_IMPLEMENTATION_52_UTF5
#define SQUOZE_IMPLEMENTATION_52_UTF5 (!SQUOZE_LIMIT_IMPLEMENTATIONS)
#endif
#ifndef SQUOZE_IMPLEMENTATION_62_UTF5
#define SQUOZE_IMPLEMENTATION_62_UTF5 (!SQUOZE_LIMIT_IMPLEMENTATIONS)
#endif
#ifndef SQUOZE_IMPLEMENTATION_64_UTF8
#define SQUOZE_IMPLEMENTATION_64_UTF8 (!SQUOZE_LIMIT_IMPLEMENTATIONS)
#endif
#endif

#if SQUOZE_USE_INTERN

#if SQUOZE_ID_BITS==32
typedef uint32_t sqz_id_t;
#else
typedef uint64_t sqz_id_t;
#endif


typedef struct _Sqz      Sqz;      /* handle representing a squozed string  */


/* create a new string that is the concatenation of a and b
 */
Sqz         *sqz_utf8             (const char *str);
//static const char  *sqz_decode           (Sqz *squozed, char *temp);


int          sqz_length           (Sqz *squozed);
sqz_id_t     sqz_id               (Sqz *squozed);
uint32_t     sqz_unichar_at       (Sqz *a, int pos);
int          sqz_strcmp           (Sqz *a, Sqz *b);
inline int   sqz_equal            (Sqz *a, Sqz *b) { return a == b; }
void         sqz_unset            (Sqz **a);


Sqz         *sqz_cat              (Sqz *a, Sqz *b);
Sqz         *sqz_substring        (Sqz *a, int pos, int length);

void         sqz_insert           (Sqz **a, int pos, Sqz *b);
void         sqz_set              (Sqz **a, Sqz *b);
void         sqz_erase            (Sqz **a, int pos, int length);

#include <stdarg.h>
Sqz         *sqz_printf           (const char *format, ...);
Sqz         *sqz_printf_va_list   (const char *format, va_list list);
Sqz         *sqz_unichar          (uint32_t unichar);
Sqz         *sqz_double           (double value);
Sqz         *sqz_int              (int value);

/* the following is APIs mostly implemented in terms of the above */

int          sqz_has_prefix       (Sqz *a, Sqz *prefix);
int          sqz_has_suffix       (Sqz *a, Sqz *suffix);

void         sqz_insert_double    (Sqz **a, int pos, double value);
void         sqz_insert_int       (Sqz **a, int pos, int value);


void         sqz_insert_unichar   (Sqz **a, int pos, uint32_t unichar);
void         sqz_replace_unichar  (Sqz **a, int pos, int length, uint32_t unichar);
void         sqz_append_unichar   (Sqz **a, uint32_t unichar);
void         sqz_append_utf8      (Sqz **a, const char *utf8);
int          sqz_has_prefix_utf8  (Sqz  *a, const char *utf8);
int          sqz_has_suffix_utf8  (Sqz  *a, const char *utf8);
void         sqz_insert_utf8      (Sqz **a, int pos, const char *utf8);
void         sqz_set_utf8         (Sqz **a, const char *utf8);
void         sqz_replace_utf8     (Sqz **a, int pos, int length, const char *utf8);
void         sqz_set_printf       (Sqz **a, const char *format, ...);
void         sqz_append_printf    (Sqz **a, const char *format, ...);
void         sqz_insert_printf    (Sqz **a, int pos, const char *format, ...);
void         sqz_replace_printf   (Sqz **a, int pos, int length, const char *format, ...);
/* increase reference count of string */
Sqz         *sqz_ref              (Sqz *squozed);
Sqz         *sqz_dup              (Sqz *squozed);
/* decrement reference count of string */
void         sqz_unref            (Sqz *squozed);
typedef struct _SqzPool  SqzPool;  /* a pool for grouping allocated strings */


/* create a new string pool, with fallback to another pool -
 * or NULL for fallback to default pool, takes a reference on fallback.
 */
SqzPool    *sqz_pool_new          (SqzPool *fallback);

/* increase reference count of pool
 */
void         sqz_pool_ref         (SqzPool *pool);

/* decrease reference point of pool, when matching _new() + _ref() calls
 * the pool is destoryed.
 */
void         sqz_pool_unref       (SqzPool *pool);

/* add a string to a squoze pool
 */
Sqz         *sqz_pool_add         (SqzPool *pool, const char *str);

Sqz *sqz_concat (Sqz *a, Sqz *b);

/* Report stats on interned strings 
 */
void sqz_pool_mem_stats (SqzPool *pool,
                         size_t     *size,
                         size_t     *slack,
                         size_t     *intern_alloc);

/* empty all pools
 */
void sqz_cleanup (void);

#endif


#if SQUOZE_IMPLEMENTATION_32_UTF5 || \
    SQUOZE_IMPLEMENTATION_52_UTF5 || \
    SQUOZE_IMPLEMENTATION_62_UTF5
#define SQUOZE_USE_UTF5 1
#else
#define SQUOZE_USE_UTF5 0
#endif


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#if SQUOZE_IMPLEMENTATION_32_UTF5
uint32_t     squoze32_utf5        (const char *utf8, size_t len);
const char  *squoze32_utf5_decode (uint32_t    id,   char *dest);
#endif

#if SQUOZE_IMPLEMENTATION_32_UTF8
//static uint32_t     squoze32_utf8        (const char *utf8, size_t len);
//static const char  *squoze32_utf8_decode (uint32_t    id,   char *dest);
#endif

#if SQUOZE_IMPLEMENTATION_52_UTF5
uint64_t     squoze52_utf5        (const char *utf8, size_t len);
const char  *squoze52_utf5_decode (uint64_t    id,   char *dest);
#endif

#if SQUOZE_IMPLEMENTATION_62_UTF5
uint64_t     squoze62_utf5        (const char *utf8, size_t len);
const char  *squoze62_utf5_decode (uint64_t    id,   char *dest);
#endif

#if SQUOZE_IMPLEMENTATION_64_UTF8
uint64_t     squoze64_utf8        (const char *utf8, size_t len);
const char  *squoze62_utf8_decode (uint64_t    id,   char *dest);
#endif

#endif

#ifdef SQUOZE_IMPLEMENTATION

static inline uint32_t MurmurOAAT32 (const char * key, int len)
{
  size_t h = 3323198485ul;
  for (int i = 0;i < len;i++) {
    h ^= key[i];
    h *= 0x5bd1e995;
    h &= 0xffffffff;
    h ^= h >> 15;
  }
  return h;
}

static inline uint64_t MurmurOAAT64 ( const char * key, int len)
{
  uint64_t h = 525201411107845655ull;
  for (int i = 0;i < len;i++) {
    h ^= key[i];
    h *= 0x5bd1e9955bd1e995;
    h ^= h >> 47;
  }
  return h;
}

#if SQUOZE_USE_UTF5 // YYY

// TODO:  UTF5+ should operate directly on bits instead of
//        going via bytes
static inline void squoze5_encode (const char *input, int inlen,
                                   char *output, int *r_outlen,
                                   int   permit_squeezed,
                                   int   escape_endzero);
static void squoze_decode_utf5_bytes (int is_utf5, 
                                      const unsigned char *input, int inlen,
                                      char *output, int *r_outlen);
static inline size_t squoze5_encode_int (const char *input, int inlen,
                                         int maxlen, int *overflow,
                                         int escape_endzero);

#endif


/* this should have the same behavior as the bitwidth and encoding
 * specific implementations
 */
static inline uint64_t squoze_encode_id (int squoze_dim, int utf5, const char *stf8, size_t len)
{
  int length = len;
  uint64_t id = 0;
#if SQUOZE_USE_UTF5
  if (utf5)
  {
    int max_quintets = squoze_dim / 5;
    if (length <= max_quintets)
    {
      int overflow = 0;
      id = squoze5_encode_int (stf8, length, max_quintets, &overflow, 1);
      if (!overflow)
        return id;
    }
    id = 0;
    id = MurmurOAAT32(stf8, length);
    id &= ~1;
  }
  else
#endif
  {
    const uint8_t *utf8 = (const uint8_t*)stf8;
    if (squoze_dim > 32)
      squoze_dim = 64;
    int bytes_dim = squoze_dim / 8;
  
    uint8_t first_byte = ((uint8_t*)utf8)[0];
    if (first_byte<128
        && first_byte != 11
        && (length <= bytes_dim))
    {
      id = utf8[0] * 2 + 1;
      for (int i = 1; i < length; i++)
        id += ((uint64_t)utf8[i]<<(8*(i)));
    }
    else if (length <= bytes_dim-1)
    {
      id = 23;
      for (int i = 0; i < length; i++)
        id += ((uint64_t)utf8[i]<<(8*(i+1)));
    }
    else
    {
      id = MurmurOAAT32(stf8, len);
      id &= ~1;  // make even - intern marker
    }
  }
  return id;
}

#ifdef __CTX_H__ // override with ctx variants if included from ctx
#define strdup ctx_strdup
#define strstr ctx_strstr
#endif


#if SQUOZE_IMPLEMENTATION_32_UTF5
uint32_t squoze32_utf5 (const char *utf8, size_t len)
{
  return squoze_encode_id (32, 1, utf8, len);
}
#endif

#if SQUOZE_IMPLEMENTATION_52_UTF5
uint64_t squoze52_utf5 (const char *utf8, size_t len)
{
  return squoze_encode_id (52, 1, utf8, len);
}
#endif

#if SQUOZE_IMPLEMENTATION_62_UTF5
uint64_t squoze62_utf5 (const char *utf8, size_t len)
{
  return squoze_encode_id (62, 1, utf8, len);
}
#endif

static inline uint64_t squoze_utf8 (size_t bytes_dim, const char *stf8, size_t length)
{
  uint64_t id;
  const uint8_t *utf8 = (const uint8_t*)stf8;

  uint8_t first_byte = ((uint8_t*)utf8)[0];
  if (   first_byte < 128
      && first_byte != 11
      && (length <= bytes_dim))
  {
      switch (length)
      {
#if SQUOZE_UTF8_MANUAL_UNROLL
        case 0: id = 1;
                break;
        case 1: id = utf8[0] * 2 + 1;
                break;
        case 2: id = utf8[0] * 2 + 1 + (utf8[1] << (8*1));
                break;
        case 3: id = utf8[0] * 2 + 1 + (utf8[1] << (8*1))
                                     + (utf8[2] << (8*2));
                break;
        case 4: id = utf8[0] * 2 + 1 + (utf8[1] << (8*1))
                                     + (utf8[2] << (8*2))
                                     + (utf8[3] << (8*3));
                break;
        case 5: id = utf8[0] * 2 + 1 + ((uint64_t)utf8[1] << (8*1))
                                     + ((uint64_t)utf8[2] << (8*2))
                                     + ((uint64_t)utf8[3] << (8*3))
                                     + ((uint64_t)utf8[4] << (8*4));
                break;
        case 6: id = utf8[0] * 2 + 1 + ((uint64_t)utf8[1] << (8*1))
                                     + ((uint64_t)utf8[2] << (8*2))
                                     + ((uint64_t)utf8[3] << (8*3))
                                     + ((uint64_t)utf8[4] << (8*4))
                                     + ((uint64_t)utf8[5] << (8*5));
                break;
        case 7: id = utf8[0] * 2 + 1 + ((uint64_t)utf8[1] << (8*1))
                                     + ((uint64_t)utf8[2] << (8*2))
                                     + ((uint64_t)utf8[3] << (8*3))
                                     + ((uint64_t)utf8[4] << (8*4))
                                     + ((uint64_t)utf8[5] << (8*5))
                                     + ((uint64_t)utf8[6] << (8*6));
                break;
        case 8: id = utf8[0] * 2 + 1 + ((uint64_t)utf8[1] << (8*1))
                                     + ((uint64_t)utf8[2] << (8*2))
                                     + ((uint64_t)utf8[3] << (8*3))
                                     + ((uint64_t)utf8[4] << (8*4))
                                     + ((uint64_t)utf8[5] << (8*5))
                                     + ((uint64_t)utf8[6] << (8*6))
                                     + ((uint64_t)utf8[7] << (8*7));
                break;
#endif
        default:
          id = utf8[0] * 2 + 1;
          for (unsigned int i = 1; i < length; i++)
            id += ((uint64_t)utf8[i]<<(8*(i)));
      }
    return id;
  }
  else if (length <= bytes_dim-1)
  {
      switch (length)
      {
#if SQUOZE_UTF8_MANUAL_UNROLL
        case 0: id = 23;
          break;
        case 1: id = 23 + (utf8[0] << (8*1));
          break;
        case 2: id = 23 + (utf8[0] << (8*1))
                        + (utf8[1] << (8*2));
          break;
        case 3: id = 23 + (utf8[0] << (8*1))
                        + (utf8[1] << (8*2))
                        + (utf8[2] << (8*3));
          break;
        case 4: id = 23 + ((uint64_t)utf8[0] << (8*1))
                        + ((uint64_t)utf8[1] << (8*2))
                        + ((uint64_t)utf8[2] << (8*3))
                        + ((uint64_t)utf8[3] << (8*4));
          break;
        case 5: id = 23 + ((uint64_t)utf8[0] << (8*1))
                        + ((uint64_t)utf8[1] << (8*2))
                        + ((uint64_t)utf8[2] << (8*3))
                        + ((uint64_t)utf8[3] << (8*4))
                        + ((uint64_t)utf8[4] << (8*5));
          break;
        case 6: id = 23 + ((uint64_t)utf8[0] << (8*1))
                        + ((uint64_t)utf8[1] << (8*2))
                        + ((uint64_t)utf8[2] << (8*3))
                        + ((uint64_t)utf8[3] << (8*4))
                        + ((uint64_t)utf8[4] << (8*5))
                        + ((uint64_t)utf8[5] << (8*6));
          break;
        case 7: id = 23 + ((uint64_t)utf8[0] << (8*1))
                        + ((uint64_t)utf8[1] << (8*2))
                        + ((uint64_t)utf8[2] << (8*3))
                        + ((uint64_t)utf8[3] << (8*4))
                        + ((uint64_t)utf8[4] << (8*5))
                        + ((uint64_t)utf8[5] << (8*6))
                        + ((uint64_t)utf8[6] << (8*7));
          break;
#endif
        default:
          id = 23;
          for (unsigned int i = 0; i < length; i++)
            id += ((uint64_t)utf8[i]<<(8*(i+1)));
      }
    return id;
  }

  id = MurmurOAAT32(stf8, length);
  id &= ~1;  // make even - intern marker
  return id;
}

#if SQUOZE_IMPLEMENTATION_64_UTF8
uint64_t squoze64_utf8 (const char *stf8, size_t length)
{
  return squoze_utf8 (8, stf8, length);
}
#endif

#if SQUOZE_IMPLEMENTATION_32_UTF8
static uint32_t squoze32_utf8 (const char *stf8, size_t length)
{
  uint32_t id;
  const uint8_t *utf8 = (const uint8_t*)stf8;
  size_t bytes_dim = 4;

  uint8_t first_byte = ((uint8_t*)utf8)[0];
  if (first_byte    < 128
      && first_byte != 11
      && (length <= bytes_dim))
  {
      switch (length)
      {
#if SQUOZE_UTF8_MANUAL_UNROLL
        case 0: id = 1;
                break;
        case 1: id = utf8[0] * 2 + 1;
                break;
        case 2: id = utf8[0] * 2 + 1 + (utf8[1] << (8*1));
                break;
        case 3: id = utf8[0] * 2 + 1 + (utf8[1] << (8*1))
                                     + (utf8[2] << (8*2));
                break;
        case 4: id = utf8[0] * 2 + 1 + (utf8[1] << (8*1))
                                     + (utf8[2] << (8*2))
                                     + (utf8[3] << (8*3));
                break;
#endif
        default:
          id = utf8[0] * 2 + 1;
          for (unsigned int i = 1; i < length; i++)
            id += ((uint32_t)utf8[i]<<(8*(i)));
      }
    return id;
  }
  else if (length <= bytes_dim-1)
  {
      switch (length)
      {
#if SQUOZE_UTF8_MANUAL_UNROLL
        case 0: id = 23;
          break;
        case 1: id = 23 + (utf8[0] << (8*1));
          break;
        case 2: id = 23 + (utf8[0] << (8*1))
                        + (utf8[1] << (8*2));
          break;
        case 3: id = 23 + (utf8[0] << (8*1))
                        + (utf8[1] << (8*2))
                        + (utf8[2] << (8*3));
          break;
#endif
        default:
          id = 23;
          for (unsigned int i = 0; i < length; i++)
            id += ((uint32_t)utf8[i]<<(8*(i+1)));
      }
    return id;
  }

  id = MurmurOAAT32(stf8, length);
  id &= ~1;  // make even - intern marker
  return id;
}
#endif


static const char *squoze_id_decode_r (int squoze_dim, uint64_t hash, char *ret, int retlen, int is_utf5)
{
#if SQUOZE_USE_UTF5
  if (is_utf5)
  {
    int is_utf5       = (hash & 2)!=0;
    uint8_t utf5[20]=""; // we newer go really high since there isnt room
                          // in the integers
    uint64_t tmp = hash;
    int len = 0;
    tmp /= 4;
    utf5[len]=0;
    while (tmp > 0)
    {
      utf5[len++] = tmp & 31;
      tmp /= 32;
    }
    utf5[len]=0;
    squoze_decode_utf5_bytes (is_utf5, utf5, len, ret, &retlen);
    return ret;
  }
  else
#endif
  {
    if (squoze_dim == 32)
    {
      if ((hash & 0xff) == 23)
      {
         memcpy (ret, ((char*)&hash)+1, 3);
         ret[3] = 0;
      }
      else
      {
        memcpy (ret, &hash, 4);
        ((unsigned char*)ret)[0]/=2;
        ret[4] = 0;
      }
    }
    else
    {
      if ((hash & 0xff) == 23)
      {
        memcpy (ret, ((char*)&hash)+1, 7);
        ret[7] = 0;
      }
      else
      {
        memcpy (ret, &hash, 8);
        ((unsigned char*)ret)[0]/=2;
        ret[8] = 0;
      }
    }
    return ret;
  }
}

static const char *squoze_id_decode (int squoze_dim, uint64_t id, int is_utf5, char *dest);
static const char *squoze_id_decode (int squoze_dim, uint64_t id, int is_utf5, char *dest)
{
  if (id == 0 || ((id & 1) == 0)) {dest[0]=0;return NULL; }
  else if (id == 3) { dest[0]=0;return NULL;}
  squoze_id_decode_r (squoze_dim, id, dest, 16, is_utf5);
  return dest;
}

#if SQUOZE_IMPLEMENTATION_32_UTF5
const char *squoze32_utf5_decode (uint32_t id, char *dest)
{
  return squoze_id_decode (32, id, 1, dest);
}
#endif

#if SQUOZE_IMPLEMENTATION_52_UTF5
const char *squoze52_utf5_decode (uint64_t id, char *dest)
{
  return squoze_id_decode (52, id, 1, dest);
}
#endif

#if SQUOZE_IMPLEMENTATION_62_UTF5
const char *squoze62_utf5_decode (uint64_t id, char *dest)
{
  return squoze_id_decode (62, id, 1, dest);
}
#endif

#if SQUOZE_IMPLEMENTATION_64_UTF8
static const char *squoze64_utf8_decode (uint64_t id, char *dest)
{
  return squoze_id_decode (64, id, 0, dest);
}
#endif

#if SQUOZE_IMPLEMENTATION_32_UTF8
static const char *squoze32_utf8_decode (uint32_t id, char *dest)
{
  return squoze_id_decode (32, id, 0, dest);
}
#endif

static inline uint32_t
squoze_utf8_to_unichar (const char *input)
{
  const uint8_t *utf8 = (const uint8_t *) input;
  uint8_t c = utf8[0];
  if ( (c & 0x80) == 0)
    { return c; }
  else if ( (c & 0xE0) == 0xC0)
    return ( (utf8[0] & 0x1F) << 6) |
           (utf8[1] & 0x3F);
  else if ( (c & 0xF0) == 0xE0)
    return ( (utf8[0] & 0xF)  << 12) |
           ( (utf8[1] & 0x3F) << 6) |
           (utf8[2] & 0x3F);
  else if ( (c & 0xF8) == 0xF0)
    return ( (utf8[0] & 0x7)  << 18) |
           ( (utf8[1] & 0x3F) << 12) |
           ( (utf8[2] & 0x3F) << 6) |
           (utf8[3] & 0x3F);
  else if ( (c & 0xFC) == 0xF8)
    return ( (utf8[0] & 0x3)  << 24) |
           ( (utf8[1] & 0x3F) << 18) |
           ( (utf8[2] & 0x3F) << 12) |
           ( (utf8[3] & 0x3F) << 6) |
           (utf8[4] & 0x3F);
  else if ( (c & 0xFE) == 0xFC)
    return ( (utf8[0] & 0x1)  << 30) |
           ( (utf8[1] & 0x3F) << 24) |
           ( (utf8[2] & 0x3F) << 18) |
           ( (utf8[3] & 0x3F) << 12) |
           ( (utf8[4] & 0x3F) << 6) |
           (utf8[5] & 0x3F);
  return 0;
}
static inline int
squoze_unichar_to_utf8 (uint32_t  ch,
                      uint8_t  *dest)
{
  /* http://www.cprogramming.com/tutorial/utf8.c  */
  /*  Basic UTF-8 manipulation routines
    by Jeff Bezanson
    placed in the public domain Fall 2005 ... */
  if (ch < 0x80)
    {
      dest[0] = (char) ch;
      return 1;
    }
  if (ch < 0x800)
    {
      dest[0] = (ch>>6) | 0xC0;
      dest[1] = (ch & 0x3F) | 0x80;
      return 2;
    }
  if (ch < 0x10000)
    {
      dest[0] = (ch>>12) | 0xE0;
      dest[1] = ( (ch>>6) & 0x3F) | 0x80;
      dest[2] = (ch & 0x3F) | 0x80;
      return 3;
    }
  if (ch < 0x110000)
    {
      dest[0] = (ch>>18) | 0xF0;
      dest[1] = ( (ch>>12) & 0x3F) | 0x80;
      dest[2] = ( (ch>>6) & 0x3F) | 0x80;
      dest[3] = (ch & 0x3F) | 0x80;
      return 4;
    }
  return 0;
}

static inline int squoze_utf8_strlen (const char *s)
{
  int count;
  if (!s)
    { return 0; }
  for (count = 0; *s; s++)
    if ( (*s & 0xC0) != 0x80)
      { count++; }
  return count;
}

static inline int
squoze_utf8_len (const unsigned char first_byte)
{
  if      ( (first_byte & 0x80) == 0)
    { return 1; }
  else if ( (first_byte & 0xE0) == 0xC0)
    { return 2; }
  else if ( (first_byte & 0xF0) == 0xE0)
    { return 3; }
  else if ( (first_byte & 0xF8) == 0xF0)
    { return 4; }
  return 1;
}



/*  data structures and implementation for string interning, potentially
 *  with both ref-counting and pools of strings.
 */
#if SQUOZE_USE_INTERN

struct _Sqz {
#if SQUOZE_REF_COUNTING
    int32_t       ref_count; // set to magic value for ROM strings?
                             // and store pointer in string data?
#endif
#if SQUOZE_STORE_LENGTH
    int32_t       length;
#endif
    sqz_id_t      hash;
    char          string[];
};


static inline uint64_t sqz_pool_encode     (SqzPool *pool, const char *utf8, size_t len, Sqz **interned_ref);

struct _SqzPool
{
  int32_t     ref_count;
  SqzPool    *fallback;
  Sqz       **hashtable;
  int         count;
  int         size;
  SqzPool    *next;
};

static SqzPool global_pool = {0, NULL, NULL, 0, 0, NULL};

static SqzPool *sqz_pools = NULL;

static int sqz_pool_find (SqzPool *pool, uint64_t hash, int length, const uint8_t *bytes)
{
  if (pool->size == 0)
    return -1;
  int pos = (hash/2) & (pool->size-1);
  if (!pool->hashtable[pos])
    return -1;
  while (pool->hashtable[pos]->hash != hash
#if SQUOZE_STORE_LENGTH
         || pool->hashtable[pos]->length != length
#endif
         || strcmp (pool->hashtable[pos]->string, (char*)bytes)
         )
  {
    pos++;
    pos &= (pool->size-1);
    if (!pool->hashtable[pos])
      return -1;
  }
  return pos;
}
static int sqz_pool_add_entry (SqzPool *pool, Sqz *str)
{
  if (pool->count + 1 >= pool->size / 2)
  {
     Sqz **old = pool->hashtable;
     int old_size = pool->size;
     if (old_size == 0)
       pool->size = SQUOZE_INITIAL_POOL_SIZE;
     else
       pool->size *= 2;
     pool->hashtable = (Sqz**)calloc (pool->size, sizeof (void*));
     if (old)
     {
       for (int i = 0; i < old_size; i++)
         if (old[i])
           sqz_pool_add_entry (pool, old[i]);
       free (old);
     }
  }
  pool->count++;

  int pos = (str->hash/2) & (pool->size-1);
  while (pool->hashtable[pos])
  {
    pos++;
    pos &= (pool->size-1);
  }
  pool->hashtable[pos]=str;
  return pos;
}

#if SQUOZE_REF_SANITY
static int sqz_pool_remove (SqzPool *pool, Sqz *squozed, int do_free)
{
  Sqz *str = squozed;
  int no = sqz_pool_find (pool, str->hash, strlen (str->string), (uint8_t*)str->string);
  if (no < 0)
    return 0;
  if (do_free)
    free (str);
#ifdef assert
  assert (pool->hashtable[no] == squozed);
#endif
  pool->hashtable[no]=0;
  
  // check if there is another one to promote now
  for (int i = no+1; pool->hashtable[i]; i = (i+1)&(pool->size-1))
  {
    if ((pool->hashtable[i]->hash & (pool->size-1)) == (unsigned)no)
    {
      Sqz *for_upgrade = pool->hashtable[i];
      sqz_pool_remove (pool, for_upgrade, 0);
      sqz_pool_add_entry (pool, for_upgrade);
      break;
    }
  }
  return 1;
}
#endif

static Sqz *sqz_lookup (SqzPool *pool, sqz_id_t id, int length, const uint8_t *bytes)
{
  int pos = sqz_pool_find (pool, id, length, bytes);
  if (pos >= 0)
    return pool->hashtable[pos];
  if (pool->fallback)
    return sqz_lookup (pool->fallback, id, length, bytes);
  return NULL;
}

void sqz_pool_mem_stats (SqzPool *pool,
                         size_t     *size,
                            size_t     *slack,
                            size_t     *intern_alloc)
{
  if (!pool) pool = &global_pool;
  if (size)
  {
    *size = sizeof (SqzPool) + pool->size * sizeof (void*);
  }
  if (slack)
  {
    *slack = (pool->size - pool->count) * sizeof (void*);
  }

  if (intern_alloc)
  {
    size_t sum = 0;
    for (int i = 0; i < pool->size; i++)
    {
      if (pool->hashtable[i])
      {
        Sqz *squoze = pool->hashtable[i];
        sum += strlen (squoze->string) + 1 + sizeof (Sqz);
      }
    }
    *intern_alloc = sum;
  }
}

 // we do 32bit also for 64bit - we want the same predetermined hashes to match
static inline Sqz *_sqz_pool_add (SqzPool *pool, const char *str)
{
  if (!pool) pool = &global_pool;
  Sqz *interned = NULL;
  uint64_t hash = sqz_pool_encode (pool, str, strlen (str), &interned);

  if (interned)
  {
#ifdef assert
    assert ((((size_t)interned)&0x1)==0);
#endif
#if SQUOZE_DIRECT_STRING
    return interned+1;
#else
    return interned;
#endif
  }
  else
    return (Sqz*)((size_t)hash);
}

Sqz *sqz_pool_add (SqzPool *pool, const char *str)
{
  return _sqz_pool_add (pool, str);
}

Sqz *sqz_utf8(const char *str)
{
  return _sqz_pool_add (NULL, str);
}

// encodes utf8 to a squoze id of squoze_dim bits - if interned_ret is provided overflowed ids
// are interned and a new interned squoze is returned.
static uint64_t sqz_pool_encode (SqzPool *pool, const char *utf8, size_t len, Sqz **interned_ref)
{
#if   SQUOZE_ID_BITS==32 && SQUOZE_ID_MURMUR
   uint64_t hash = MurmurOAAT32(utf8, len) & ~1;
#elif  SQUOZE_ID_BITS==32 && SQUOZE_ID_UTF5
   uint64_t hash = squoze32_utf5 (utf8, len);
#elif SQUOZE_ID_BITS==32 && SQUOZE_ID_UTF8
   uint64_t hash = squoze32_utf8 (utf8, len);
#elif SQUOZE_ID_BITS==62 && SQUOZE_ID_UTF5
   uint64_t hash = squoze62_utf5 (utf8, len);
#elif SQUOZE_ID_BITS==52 && SQUOZE_ID_UTF5
   uint64_t hash = squoze52_utf5 (utf8, len);
#elif SQUOZE_ID_BITS==64 && SQUOZE_ID_UTF8
   uint64_t hash = squoze64_utf8 (utf8, len);
#else
   uint64_t hash = squoze_encode_id (SQUOZE_ID_BITS, SQUOZE_ID_UTF5, utf8, len);
#endif

  if (!interned_ref)
    return hash;
  if (pool == NULL) pool = &global_pool;
  if ((hash & 1)==0)
  {
    Sqz *str = sqz_lookup (pool, hash, len, (const uint8_t*)utf8);
    if (str)
    {
#if SQUOZE_REF_COUNTING
      str->ref_count++;
#endif
      if (interned_ref) *interned_ref = str + SQUOZE_INTERN_DIRECT_STRING;
      return hash; 
    }

    {
      Sqz *entry = (Sqz*)calloc (len + 1 + sizeof(Sqz), 1);
      entry->hash = hash;
#if SQUOZE_STORE_LENGTH
      entry->length = len;
#endif
      strcpy (entry->string, utf8);
      if (interned_ref) *interned_ref = entry + SQUOZE_INTERN_DIRECT_STRING;
      sqz_pool_add_entry (pool, entry);
    }
  }
  return hash;
}

static inline int sqz_is_interned (Sqz *squozed)
{
  return ((((size_t)(squozed))&1) == 0);
}

static inline int sqz_is_embedded (Sqz *squozed)
{
  return !sqz_is_interned (squozed);
}

/* returns either the string or temp with the decode
 * embedded string decoded
 */
static const char *sqz_decode (Sqz *squozed, char *temp)
{
  if (!squozed) return NULL;
  if (sqz_is_embedded (squozed))
  {
#if   SQUOZE_ID_BITS==32 && SQUOZE_ID_UTF5
    return squoze32_utf5_decode ((size_t)squozed, temp);
#elif SQUOZE_ID_BITS==32 && SQUOZE_ID_UTF8
    return squoze32_utf8_decode ((size_t)squozed, temp);
#elif SQUOZE_ID_BITS==52 && SQUOZE_ID_UTF5
    return squoze52_utf5_decode ((size_t)squozed, temp);
#elif SQUOZE_ID_BITS==62 && SQUOZE_ID_UTF5
    return squoze62_utf5_decode ((size_t)squozed, temp);
#elif SQUOZE_ID_BITS==64 && SQUOZE_ID_UTF8
    return squoze64_utf8_decode ((size_t)squozed, temp);
#else
    return squoze_id_decode (SQUOZE_ID_BITS,
                             ((size_t)squozed),
                             SQUOZE_ID_UTF5,
                             temp);
#endif
  }
  else
  {
#if SQUOZE_INTERN_DIRECT_STRING
    return (char*)squozed;
#else
    return squozed->string;
#endif
  }
}

Sqz *sqz_ref (Sqz *squozed)
{
#if SQUOZE_REF_COUNTING
  if (sqz_is_interned (squozed))
  {
     (squozed-SQUOZE_INTERN_DIRECT_STRING)->ref_count ++;
  }
#endif
  return squozed;
}
Sqz *sqz_dup (Sqz *squozed)
{
  return sqz_ref (squozed);
}

void sqz_unref (Sqz *squozed)
{
#if SQUOZE_REF_COUNTING
  if (sqz_is_interned (squozed))
  {
#if SQUOZE_INTERN_DIRECT_STRING
      squozed--;
#endif
      if (squozed->ref_count <= 0)
      {
#if SQUOZE_CLOBBER_ON_FREE
        squozed->string[-squozed->ref_count]='#';
#endif
#if SQUOZE_REF_SANITY
        if (squozed->ref_count < 0)
          fprintf (stderr, "double unref for \"%s\"\n", squozed->string);
        squozed->ref_count--;
#else
        SqzPool *pool = &global_pool;
        if (sqz_pool_remove (pool, squozed, 1))
        {
          return;
        }
        pool = sqz_pools;
        if (pool)
        do {
          if (sqz_pool_remove (pool, squozed, 1))
          {
            return;
          }
          pool = pool->next;
        } while (pool);
#endif
      }
      else
      {
        squozed->ref_count--;
      }
  }
#endif
}

int sqz_has_prefix (Sqz *a, Sqz *prefix)
{
  char tmp_a[16];
  char tmp_prefix[16];
  const char *a_str = sqz_decode (a, tmp_a);
  const char *prefix_str = sqz_decode (prefix, tmp_prefix);
  return !strncmp (a_str, prefix_str, strlen (prefix_str));
}

int sqz_has_prefix_utf8 (Sqz *a, const char *utf8)
{
  Sqz *b = sqz_utf8 (utf8);
  int ret = sqz_has_prefix (a, b);
  sqz_unref (b);
  return ret;
}

int sqz_has_suffix_utf8 (Sqz *a, const char *utf8)
{
  Sqz *b = sqz_utf8 (utf8);
  int ret = sqz_has_suffix (a, b);
  sqz_unref (b);
  return ret;
}

int sqz_has_suffix (Sqz *a, Sqz *suffix)
{
  char        tmp_a[16];
  const char *a_str = sqz_decode (a, tmp_a);
  int         a_len = strlen (a_str);
  char        tmp_suffix[16];
  const char *suffix_str = sqz_decode (suffix, tmp_suffix);
  int         suffix_len = strlen (suffix_str);
  
  if (a_len < suffix_len)
    return 0;
  return strcmp (a_str + a_len - suffix_len, suffix_str);
}


static void _sqz_prepend (Sqz **squoze, Sqz *head)
{
  if (!squoze) return;
  Sqz *combined = sqz_cat (head, *squoze);
  sqz_unref (*squoze);
  *squoze=combined;
}

static void _sqz_append (Sqz **squoze, Sqz *tail)
{
  if (!squoze) return;
  Sqz *combined = sqz_cat (*squoze, tail);
  sqz_unref (*squoze);
  *squoze=combined;
}

Sqz *sqz_substring (Sqz *a, int pos, int length)
{
  int src_length = sqz_length (a);
  if (pos > src_length)
    return sqz_utf8 ("");
  if (pos < 0)
    pos = src_length + pos + 1;
  char tmp[16];
  const char *src = sqz_decode (a, tmp);
  char *end;
  int allocated = 0;

  char *copy;
  if (src_length < 256)
  {
    copy = alloca (strlen (src) + 1);
    strcpy (copy, src);
  }
  else
  {
    copy  = strdup (src);
    allocated = 1;
  }
  char *p = copy;
  int i;
  for (i = 0; i < pos; i++)
    p += squoze_utf8_len (*p);
  end = p;
  for (i = 0; i < length && *end; i++)
    end += squoze_utf8_len (*end);
  *end = 0;

  Sqz *ret = sqz_utf8 (p);
  if (allocated)
    free (copy);
  return ret;
}

void sqz_erase (Sqz **a, int pos, int length)
{
  if (!a) return;
  if (!*a) return;

  if (length < 1)
    return;
  if (pos < 0)
  {
    pos = sqz_length (*a) + pos;
  }

  Sqz *pre  = sqz_substring (*a, 0, pos);
  Sqz *post = sqz_substring (*a, pos+length, 10000);
  sqz_unref (*a);
  *a = sqz_cat (pre, post);
  sqz_unref (pre);
  sqz_unref (post);
}

void sqz_insert (Sqz **a, int pos, Sqz *b)
{
  if (pos == 0)
  {
    _sqz_prepend (a, b);
    return;
  }
  if (pos == -1)
  {
    _sqz_append (a, b);
    return;
  }
  if (!a) return;
  if (!*a) return;
  if (pos < 0)
  {
    pos = sqz_length (*a) + pos + 1;
  }
  Sqz *pre  = sqz_substring (*a, 0, pos);
  Sqz *post = sqz_substring (*a, pos, 10000);
  sqz_unref (*a);

  *a = sqz_cat (pre, b);
  _sqz_append (a, post);
  sqz_unref (pre);
  sqz_unref (post);
}

void sqz_insert_utf8 (Sqz **a, int pos, const char *utf8)
{
  Sqz *b = sqz_utf8 (utf8);
  sqz_insert (a, pos, b);
  sqz_unref (b);
}

Sqz *sqz_unichar (uint32_t unichar)
{
  char temp[5];
  temp[squoze_unichar_to_utf8 (unichar, (uint8_t*)temp)]=0;
  return sqz_utf8 (temp);
}

Sqz *sqz_int (int value)
{
  char temp[40];
  sprintf (temp, "%i", value);
  if (strchr (temp, ','))
    *strchr (temp, ',')='.';
  return sqz_utf8 (temp);
}

Sqz *sqz_double (double value)
{
  char temp[40];
  sprintf (temp, "%f", value);
  if (strchr (temp, ','))
    *strchr (temp, ',')='.';
  return sqz_utf8 (temp);
}

void sqz_insert_unichar (Sqz **a, int pos, uint32_t unichar)
{
  Sqz *b = sqz_unichar (unichar);
  sqz_insert (a, pos, b);
  sqz_unref (b);
}

void sqz_insert_double (Sqz **a, int pos, double value)
{
  Sqz *b = sqz_double (value);
  sqz_insert (a, pos, b);
  sqz_unref (b);
}

void sqz_insert_int (Sqz **a, int pos, int value)
{
  Sqz *b = sqz_int (value);
  sqz_insert (a, pos, b);
  sqz_unref (b);
}

uint32_t sqz_unichar_at (Sqz *a, int pos)
{
  char tmp[16];
  const char *str = sqz_decode (a, tmp);
  const char *p = str;
  int i;
  if (pos < 0)
  {
    pos = sqz_length (a) + pos;
  }
  for (i = 0; i < pos; i++)
    p += squoze_utf8_len (*p);
  return squoze_utf8_to_unichar (p);
}

void sqz_replace (Sqz **a, int pos, int length, Sqz *b)
{
  sqz_erase (a, pos, length);
  sqz_insert (a, pos, b);
}

void sqz_replace_unichar  (Sqz **a, int pos, int length, uint32_t unichar)
{
  Sqz *b = sqz_unichar (unichar);
  sqz_erase (a, pos, length);
  sqz_insert (a, pos, b);
  sqz_unref (b);
}

void sqz_replace_utf8  (Sqz **a, int pos, int length, const char *utf8)
{
  sqz_erase (a, pos, length);
  sqz_insert_utf8 (a, pos, utf8);
}

void sqz_append_utf8 (Sqz **a, const char *utf8)
{
  sqz_insert_utf8 (a, -1, utf8);
}

void sqz_append_unichar (Sqz **a, uint32_t unichar)
{
  sqz_insert_unichar (a, -1, unichar);
}

#define SQZ_EXPAND_PRINTF \
  va_list ap; \
  size_t needed; \
  char *buffer; \
  va_start (ap, format); \
  needed = vsnprintf (NULL, 0, format, ap) + 1; \
  if (needed < 256) \
    buffer = alloca (needed);\
  else\
    buffer = malloc (needed);\
  va_end (ap);\
  va_start (ap, format);\
  vsnprintf (buffer, needed, format, ap);\
  va_end (ap);\
  Sqz *b = sqz_utf8 (buffer);\
  if (needed >= 256)\
    free (buffer);

Sqz      *sqz_printf (const char *format, ...)
{
  SQZ_EXPAND_PRINTF;
  return b;
}

void sqz_insert_printf (Sqz **a, int pos, const char *format, ...)
{
  SQZ_EXPAND_PRINTF;
  sqz_insert (a, pos, b);
  sqz_unref (b);
}

void sqz_replace_printf (Sqz **a, int pos, int length, const char *format, ...)
{
  SQZ_EXPAND_PRINTF;
  sqz_replace (a, pos, length, b);
  sqz_unref (b);
}

void sqz_append_printf (Sqz **a, const char *format, ...)
{
  SQZ_EXPAND_PRINTF;
  sqz_insert (a, -1, b);
  sqz_unref (b);
}

int sqz_strcmp (Sqz *a, Sqz *b)
{
  if (a == b) return 0;
  char tmp_a[16];
  char tmp_b[16];
  return strcmp (sqz_decode (a, tmp_a), sqz_decode (b, tmp_b));
}

static void _sqz_steal (Sqz **a, Sqz *b)
{
  if (*a)
    sqz_unref (*a);
  *a = b;
}

void sqz_set (Sqz **a, Sqz *b)
{
  if (*a)
    sqz_unref (*a);
  *a = sqz_ref (b);
}

void sqz_set_utf8 (Sqz **a, const char *str)
{
  _sqz_steal (a, sqz_utf8 (str));
}

void sqz_set_printf (Sqz **a, const char *format, ...)
{
  SQZ_EXPAND_PRINTF;
  _sqz_steal (a, b);
}

void sqz_unset (Sqz **a)
{
  if (*a == NULL) return;
  sqz_unref (*a);
  *a = NULL;
}

sqz_id_t sqz_id (Sqz *squozed)
{
  if (!squozed) return 0;
  if (sqz_is_embedded (squozed))
    return ((size_t)(squozed));
  else
  {
#if SQUOZE_INTERN_DIRECT_STRING
    squozed--;
#endif
    return squozed->hash;
  }
}

int sqz_length (Sqz *squozed)
{
  char buf[15];
  if (!squozed) return 0;
  return squoze_utf8_strlen(sqz_decode (squozed, buf));
}

// XXX : not used - remove it, and be unicode native?
int sqz_byte_length (Sqz *squozed)
{
  char buf[15];
  if (!squozed) return 0;
#if 0
  return strlen(sqz_decode (squozed, buf));
#else
  if (sqz_is_embedded (squozed))
  {
    sqz_decode (squozed, buf);
    return strlen (buf);
  }
  else
  {
#if SQUOZE_INTERN_DIRECT_STRING
    squozed--;
#endif
#if SQUOZE_STORE_LENGTH
    return squozed->length;
#endif
    return strlen (squozed->string);
  }
#endif
  return 0;
}

Sqz *sqz_cat (Sqz *a, Sqz *b)
{
  char buf_a[16];
  char buf_b[16];
  const char *str_a = sqz_decode (a, buf_a);
  const char *str_b = sqz_decode (b, buf_b);
  int len_a = strlen (str_a);
  int len_b = strlen (str_b);
  if (len_a + len_b < 128)
  {
    char temp[128];
    temp[0]=0;
    strcpy (temp, str_a);
    if (str_b)
      strcpy (&temp[strlen(temp)], str_b);
    return sqz_utf8 (temp);
  }
  else
  {
    char *temp = malloc (len_a + len_b + 1);
    temp[0]=0;
    strcpy (temp, str_a);
    if (str_b)
      strcpy (&temp[strlen(temp)], str_b);
    Sqz *ret = sqz_utf8 (temp);
    free (temp);
    return ret;
  }
}


SqzPool *sqz_pool_new     (SqzPool *fallback)
{
  SqzPool *pool = (SqzPool*)calloc (sizeof (SqzPool), 1);
  pool->fallback = fallback;
  pool->next = sqz_pools;
  sqz_pools = pool;
  if (fallback)
    sqz_pool_ref (fallback);
  return pool;
}

void sqz_pool_ref (SqzPool *pool)
{
  if (!pool) return;
  pool->ref_count--;
}

static void sqz_pool_destroy (SqzPool *pool)
{
#if 0
    fprintf (stderr, "destorying pool: size:%i count:%i embedded:%i\n",
       pool->size, pool->count, pool->count_embedded);
#endif
    for (int i = 0; i < pool->size; i++)
    {
      if (pool->hashtable[i])
        free (pool->hashtable[i]);
      pool->hashtable[i] = 0;
    }
    if (pool->fallback)
      sqz_pool_unref (pool->fallback);

    if (pool == sqz_pools)
    {
       sqz_pools = pool->next;
    }
    else
    {
      SqzPool *prev = NULL;
      SqzPool *iter = sqz_pools;
      while (iter && iter != pool)
      {
         prev = iter;
         iter = iter->next;
      }
      if (prev) // XXX not needed
        prev->next = pool->next;
    }
    pool->size = 0;
    pool->count = 0;
    if (pool->hashtable)
      free (pool->hashtable);
    pool->hashtable = NULL;

    // XXX report non unreffed items based on config
}

void sqz_pool_unref (SqzPool *pool)
{
  if (!pool) return;
  if (pool->ref_count == 0)
  {
    sqz_pool_destroy (pool);
    free (pool);
  }
  else
  {
    pool->ref_count--;
  }
}

void
sqz_cleanup (void)
{
  sqz_pool_destroy (&global_pool);
  // also destory other known pools
  // XXX : when debugging report leaked pools
}
#endif

// UTF5 implementation

#if SQUOZE_USE_UTF5

// extra value meaning in UTF5 mode
#define SQUOZE_ENTER_SQUEEZE    16

// value meanings in squeeze mode
#define SQUOZE_SPACE            0
#define SQUOZE_DEC_OFFSET_A     27
#define SQUOZE_INC_OFFSET_A     28
#define SQUOZE_DEC_OFFSET_B     29
#define SQUOZE_INC_OFFSET_B     30
#define SQUOZE_ENTER_UTF5       31


static inline uint32_t squoze_utf8_to_unichar (const char *input);
static inline int      squoze_unichar_to_utf8 (uint32_t  ch, uint8_t  *dest);
static inline int      squoze_utf8_len        (const unsigned char first_byte);
static inline int      squoze_utf8_strlen     (const char *s);


/* returns the base-offset of the segment this unichar belongs to,
 *
 * segments are 26 items long and are offset so that 'a'-'z' is
 * one segment.
 */
#define SQUOZE_JUMP_STRIDE      26
#define SQUOZE_JUMP_OFFSET      19
static inline int squoze_new_offset (uint32_t unichar)
{
  uint32_t ret = unichar - (unichar % SQUOZE_JUMP_STRIDE) + SQUOZE_JUMP_OFFSET;
  if (ret > unichar) ret -= SQUOZE_JUMP_STRIDE;
  return ret;
}

static inline int squoze_needed_jump (uint32_t off, uint32_t unicha)
{
  int count = 0;
  int unichar = unicha;
  int offset = off;

  if (unichar == 32) // space is always in range
    return 0;

  /* TODO: replace this with direct computation of values instead of loop */
  while (unichar < offset)
  {
    offset -= SQUOZE_JUMP_STRIDE;
    count --;
  }
  if (count)
    return count;

  return (unichar - offset) / SQUOZE_JUMP_STRIDE;
}


static inline int
squoze_utf5_length (uint32_t unichar)
{
  if (unichar == 0)
    return 1;
#if SQUOZE_USE_BUILTIN_CLZ
  return __builtin_clz(unichar)/4+1;
#else
  int nibbles = 1;
  while (unichar)
  {
    nibbles ++;
    unichar /= 16;
  }
  return nibbles;
#endif
}

typedef struct EncodeUtf5 {
  int      is_utf5;
  int      offset;
  int      length;
  void    *write_data;
  uint32_t current;
} EncodeUtf5;

static inline int squoze_compute_cost_utf5 (int offset, int val, int utf5_length, int next_val, int next_utf5_length)
{
  int cost = 0; 
  cost += utf5_length;
  if (next_val)
  {
    cost += next_utf5_length;
  }
  return cost;
}

static inline int squoze_compute_cost_squeezed (int offset, int val, int needed_jump, int next_val, int next_utf5_length)
{
  int cost = 0;
  if (needed_jump == 0)
  {
    cost += 1;
  }
  else if (needed_jump >= -2 && needed_jump <= 2)
  {
    cost += 2;
    offset += SQUOZE_JUMP_STRIDE * needed_jump;
  }
  else if (needed_jump >= -10 && needed_jump <= 10)
  {
    cost += 3;
    offset += SQUOZE_JUMP_STRIDE * needed_jump;
  }
  else
  {
    cost += 100; // very expensive, makes the other choice win
  }

  if (next_val)
  {
    int change_cost = 1 + squoze_utf5_length (next_val);
    int no_change_cost = 0;
    needed_jump = squoze_needed_jump (offset, next_val);

    if (needed_jump == 0)
    {
      no_change_cost += 1;
    }
    else if (needed_jump >= -2 && needed_jump <= 2)
    {
      no_change_cost += 2;
    }
    else if (needed_jump >= -10 && needed_jump <= 10)
    {
      no_change_cost += 3;
      offset += SQUOZE_JUMP_STRIDE * needed_jump;
    }
    else
    {
      no_change_cost = change_cost;
    }
    if (change_cost < no_change_cost)
      cost += change_cost;
    else
      cost += no_change_cost;
  }

  return cost;
}

static inline void squoze5_encode (const char *input, int inlen,
                                   char *output, int *r_outlen,
                                   int   permit_squeezed,
                                   int   escape_endzero)
{
  int offset  = 97;//squoze_new_offset('a');
  int is_utf5 = 1;
  int len     = 0;

  int first_len;
  int next_val = squoze_utf8_to_unichar (&input[0]);
  int next_utf5_length = squoze_utf5_length (next_val);
  for (int i = 0; i < inlen; i+= first_len)
  {
    int val = next_val;
    int utf5_length = next_utf5_length;
    int needed_jump = squoze_needed_jump (offset, val);
    first_len = squoze_utf8_len (input[i]);
    if (i + first_len < inlen)
    {
      next_val = squoze_utf8_to_unichar (&input[i+first_len]);
      next_utf5_length = squoze_utf5_length (next_val);
    }

    if (is_utf5)
    {
      int change_cost    = squoze_compute_cost_squeezed (offset, val, needed_jump, next_val, next_utf5_length);
      int no_change_cost = squoze_compute_cost_utf5 (offset, val, utf5_length, next_val, next_utf5_length);
  
      if (i != 0)          /* ignore cost of initial 'G' */
        change_cost += 1;

      if (permit_squeezed && change_cost <= no_change_cost)
      {
        output[len++] = SQUOZE_ENTER_SQUEEZE;
        is_utf5 = 0;
      }
    }
    else
    {
      int change_cost    = 1 + squoze_compute_cost_utf5 (offset, val, utf5_length, next_val, next_utf5_length);
      int no_change_cost = squoze_compute_cost_squeezed (offset, val, needed_jump, next_val, next_utf5_length);

      if (change_cost < no_change_cost)
      {
        output[len++] = SQUOZE_ENTER_UTF5;
        is_utf5 = 1;
      }
    }

    if (!is_utf5)
    {
      if (needed_jump)
      {
        if (needed_jump >= -2 && needed_jump <= 2)
        {
          switch (needed_jump)
          {
            case -1: output[len++] = SQUOZE_DEC_OFFSET_B; break;
            case  1: output[len++] = SQUOZE_INC_OFFSET_B; break;
            case -2: output[len++] = SQUOZE_DEC_OFFSET_A; break;
            case  2: output[len++] = SQUOZE_INC_OFFSET_A; break;
          }
          offset += SQUOZE_JUMP_STRIDE * needed_jump;
        }
        else if (needed_jump >= -10 && needed_jump <= 10) {
              int encoded_val;
              if (needed_jump < -2)
                encoded_val = 5 - needed_jump;
              else
                encoded_val = needed_jump - 3;

              output[len++] = (encoded_val / 4) + SQUOZE_DEC_OFFSET_A;
              output[len++] = (encoded_val % 4) + SQUOZE_DEC_OFFSET_A;

              offset += SQUOZE_JUMP_STRIDE * needed_jump;
        }
        else
        {
#ifdef assert
          assert(0); // should not be reached
#endif
          output[len++] = SQUOZE_ENTER_UTF5;
          is_utf5 = 1;
        }
      }
    }

    if (is_utf5)
    {
      offset = squoze_new_offset (val);
      int quintet_no = 0;
      uint8_t temp[12]={0,};

      while (val)
      {
        int oval = val % 16;
        int hi = 16;
        if (val / 16)
          hi = 0;
        temp[quintet_no++] = oval + hi;
        val /= 16;
      }
      for (int i = 0; i < quintet_no; i++)
        output[len++] = temp[quintet_no-1-i];
    }
    else 
    {
      output[len++] = (val == ' ')?SQUOZE_SPACE:val-offset+1;
    }
  }

  if (escape_endzero && len && output[len-1]==0)
  {
    if (is_utf5)
      output[len++] = 16;
    else
      output[len++] = SQUOZE_ENTER_UTF5;
  }
  output[len]=0;
  if (r_outlen)
    *r_outlen = len;
}

/* squoze_encode_int:
 * @input utf8 input data
 * @inlen length of @input in bytes
 * @maxlen maximum number of quintets to encode
 * @overflow pointer to int that gets set to 1 if we overflow
 * @permit_squeezed 
 *
 */
static inline size_t squoze5_encode_int (const char *input, int inlen,
                                         int maxlen, int *overflow,
                                         int escape_endzero)
{
  size_t ret  = 0;
  int offset  = 97;//squoze_new_offset('a');
  int is_utf5 = 1;
  int len     = 0;

  int start_utf5 = 1;
  int gotzero = 0;

#define ADD_QUINTET(q) \
  do { \
    if (len + inlen-i > maxlen) {\
      *overflow = 1;\
      return 0;\
    }\
    ret |= ((size_t)(q))<<(5*len++); gotzero = (q==0);\
  } while (0)

  int first_len;
  int next_val = squoze_utf8_to_unichar (&input[0]);
  int next_utf5_length = squoze_utf5_length (next_val);
  int i = 0;
  for (int i = 0; i < inlen; i+= first_len)
  {
    int val         = next_val;
    int utf5_length = squoze_utf5_length (val);
    int needed_jump = squoze_needed_jump (offset, val);
    first_len = squoze_utf8_len (input[i]);
    if (i + first_len < inlen)
    {
      next_val         = squoze_utf8_to_unichar (&input[i+first_len]);
      next_utf5_length = squoze_utf5_length (next_val);
    }
    else
    {
      next_val = 0;
      next_utf5_length = 0;
    }

    if (is_utf5)
    {
      int change_cost    = squoze_compute_cost_squeezed (offset, val, needed_jump, next_val, next_utf5_length);
      int no_change_cost = squoze_compute_cost_utf5 (offset, val, utf5_length, next_val, next_utf5_length);
  
      if (i != 0)          /* ignore cost of initial 'G' */
        change_cost += 1;

      if (change_cost <= no_change_cost)
      {
        if (i != 0)
        { 
          ADD_QUINTET(SQUOZE_ENTER_SQUEEZE);
        }
        else
          start_utf5 = 0;

        is_utf5 = 0;
      }
    }
    else
    {
      int change_cost    = 1 + squoze_compute_cost_utf5 (offset, val, utf5_length, next_val, next_utf5_length);
      int no_change_cost = squoze_compute_cost_squeezed (offset, val, needed_jump, next_val, next_utf5_length);

      if (change_cost < no_change_cost)
      {
        ADD_QUINTET(SQUOZE_ENTER_UTF5);
        is_utf5 = 1;
      }
    }

    if (!is_utf5)
    {
      if (needed_jump)
      {
        if (needed_jump >= -2 && needed_jump <= 2)
        {
          switch (needed_jump)
          {
            case -1: ADD_QUINTET(SQUOZE_DEC_OFFSET_B); break;
            case  1: ADD_QUINTET(SQUOZE_INC_OFFSET_B); break;
            case -2: ADD_QUINTET(SQUOZE_DEC_OFFSET_A); break;
            case  2: ADD_QUINTET(SQUOZE_INC_OFFSET_A); break;
          }
          offset += SQUOZE_JUMP_STRIDE * needed_jump;
        }
        else if (needed_jump >= -10 && needed_jump <= 10) {
              int encoded_val;
              if (needed_jump < -2)
                encoded_val = 5 - needed_jump;
              else
                encoded_val = needed_jump - 3;

              ADD_QUINTET ((encoded_val/4) + SQUOZE_DEC_OFFSET_A);
              ADD_QUINTET ((encoded_val%4) + SQUOZE_DEC_OFFSET_A);

              offset += SQUOZE_JUMP_STRIDE * needed_jump;
        }
        else
        {
#ifdef assert
          assert(0); // should not be reached
#endif
          ADD_QUINTET (SQUOZE_ENTER_UTF5);
          is_utf5 = 1;
        }
      }
    }

    if (is_utf5)
    {
      offset = squoze_new_offset (val);
      int quintet_no = 0;
      uint8_t temp[12]={0,};

      while (val)
      {
        temp[quintet_no++] = (val&0xf) + (val/16)?0:16;
        val /= 16;
      }
      for (int j = 0; j < quintet_no; j++)
        ADD_QUINTET(temp[quintet_no-1-j]);
    }
    else 
    {
      ADD_QUINTET((val == ' ')?SQUOZE_SPACE:val-offset+1);
    }
  }

#if 1
  if (escape_endzero && len && gotzero)
  {
    // do a mode-change after 0 to avoid 0 being interpreted
    // as end of quintets
    ADD_QUINTET(is_utf5?16:SQUOZE_ENTER_UTF5);
  }
#endif

#undef ADD_QUINTET

  return (ret<<2) | ((start_utf5*2)|1);
}

typedef struct SquozeUtf5Dec {
  int       is_utf5;
  int       offset;
  void     *write_data;
  uint32_t  current;
  void    (*append_unichar) (uint32_t unichar, void *write_data);
  int       jumped_amount;
  int       jump_mode;
} SquozeUtf5Dec;

typedef struct SquozeUtf5DecDefaultData {
  uint8_t *buf;
  int      length;
} SquozeUtf5DecDefaultData;

static void squoze_decode_utf5_append_unichar_as_utf8 (uint32_t unichar, void *write_data)
{
  SquozeUtf5DecDefaultData *data = (SquozeUtf5DecDefaultData*)write_data;
  int length = squoze_unichar_to_utf8 (unichar, &data->buf[data->length]);
  data->buf[data->length += length] = 0;
}

static void squoze_decode_jump (SquozeUtf5Dec *dec, uint8_t in)
{
  dec->offset -= SQUOZE_JUMP_STRIDE * dec->jumped_amount;
  int jump_len = (dec->jump_mode - SQUOZE_DEC_OFFSET_A) * 4 +
                 (in - SQUOZE_DEC_OFFSET_A);
  if (jump_len > 7)
    jump_len = 5 - jump_len;
  else
    jump_len += 3;
  dec->offset += jump_len * SQUOZE_JUMP_STRIDE;
  dec->jumped_amount = 0;
}

static void squoze_decode_utf5 (SquozeUtf5Dec *dec, uint8_t in)
{
  if (dec->is_utf5)
  {
    if (in >= 16)
    {
      if (dec->current)
      {
        dec->offset = squoze_new_offset (dec->current);
        dec->append_unichar (dec->current, dec->write_data);
        dec->current = 0;
      }
    }
    if (in == SQUOZE_ENTER_SQUEEZE)
    {
      if (dec->current)
      {
        dec->offset = squoze_new_offset (dec->current);
        dec->append_unichar (dec->current, dec->write_data);
        dec->current = 0;
      }
      dec->is_utf5 = 0;
    }
    else
    {
      dec->current = dec->current * 16 + (in % 16);
    }
  }
  else
  {
    if (dec->jumped_amount)
    {
      switch (in)
      {
        case SQUOZE_DEC_OFFSET_A:
        case SQUOZE_DEC_OFFSET_B:
        case SQUOZE_INC_OFFSET_A:
        case SQUOZE_INC_OFFSET_B:
          squoze_decode_jump (dec, in);
          break;
        default:
          dec->append_unichar (dec->offset + (in - 1), dec->write_data);
          dec->jumped_amount = 0;
          dec->jump_mode = 0;
          break;
      }
    }
    else
    {
      switch (in)
      {
        case SQUOZE_ENTER_UTF5:
          dec->is_utf5 = 1;
          dec->jumped_amount = 0;
          dec->jump_mode = 0;
          break;
        case SQUOZE_SPACE: 
          dec->append_unichar (' ', dec->write_data);
          dec->jumped_amount = 0;
          dec->jump_mode = 0;
          break;
        case SQUOZE_DEC_OFFSET_A:
          dec->jumped_amount = -2;
          dec->jump_mode = in;
          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
          break;
        case SQUOZE_INC_OFFSET_A:
          dec->jumped_amount = 2;
          dec->jump_mode = in;
          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
          break;
        case SQUOZE_DEC_OFFSET_B:
          dec->jumped_amount = -1;
          dec->jump_mode = in;
          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
          break;
        case SQUOZE_INC_OFFSET_B:
          dec->jumped_amount = 1;
          dec->jump_mode = in;
          dec->offset += dec->jumped_amount * SQUOZE_JUMP_STRIDE;
          break;
        default:
          dec->append_unichar (dec->offset + (in - 1), dec->write_data);
          dec->jumped_amount = 0;
          dec->jump_mode = 0;
      }
    }
  }
}

static void squoze_decode_utf5_bytes (int is_utf5, 
                                      const unsigned char *input, int inlen,
                                      char *output, int *r_outlen)
{
  SquozeUtf5DecDefaultData append_data = {(unsigned char*)output, 0};
  SquozeUtf5Dec dec = {is_utf5,
                     97,//squoze_new_offset('a'),
                     &append_data,
                     0,
                     squoze_decode_utf5_append_unichar_as_utf8,
                     0, 0
                    };
  for (int i = 0; i < inlen; i++)
    squoze_decode_utf5 (&dec, input[i]);
  if (dec.current)
    dec.append_unichar (dec.current, dec.write_data);
  if (r_outlen)
    *r_outlen = append_data.length;
}
#endif

#endif

#if CTX_IMPLEMENTATION || CTX_SIMD_BUILD


#ifndef MINIZ_EXPORT
#define MINIZ_EXPORT
#endif
/* miniz.c 3.0.0 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing
   See "unlicense" statement at the end of this file.
   Rich Geldreich <richgel99@gmail.com>, last updated Oct. 13, 2013
   Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt

   Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define
   MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros).

   * Low-level Deflate/Inflate implementation notes:

     Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or
     greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses
     approximately as well as zlib.

     Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function
     coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory
     block large enough to hold the entire file.

     The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation.

   * zlib-style API notes:

     miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in
     zlib replacement in many apps:
        The z_stream struct, optional memory allocation callbacks
        deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound
        inflateInit/inflateInit2/inflate/inflateReset/inflateEnd
        compress, compress2, compressBound, uncompress
        CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines.
        Supports raw deflate streams or standard zlib streams with adler-32 checking.

     Limitations:
      The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries.
      I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but
      there are no guarantees that miniz.c pulls this off perfectly.

   * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by
     Alex Evans. Supports 1-4 bytes/pixel images.

   * ZIP archive API notes:

     The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to
     get the job done with minimal fuss. There are simple API's to retrieve file information, read files from
     existing archives, create new archives, append new files to existing archives, or clone archive data from
     one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h),
     or you can specify custom file read/write callbacks.

     - Archive reading: Just call this function to read a single file from a disk archive:

      void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name,
        size_t *pSize, mz_uint zip_flags);

     For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central
     directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files.

     - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file:

     int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);

     The locate operation can optionally check file comments too, which (as one example) can be used to identify
     multiple versions of the same file in an archive. This function uses a simple linear search through the central
     directory, so it's not very fast.

     Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and
     retrieve detailed info on each file by calling mz_zip_reader_file_stat().

     - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data
     to disk and builds an exact image of the central directory in memory. The central directory image is written
     all at once at the end of the archive file when the archive is finalized.

     The archive writer can optionally align each file's local header and file data to any power of 2 alignment,
     which can be useful when the archive will be read from optical media. Also, the writer supports placing
     arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still
     readable by any ZIP tool.

     - Archive appending: The simple way to add a single file to an archive is to call this function:

      mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name,
        const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);

     The archive will be created if it doesn't already exist, otherwise it'll be appended to.
     Note the appending is done in-place and is not an atomic operation, so if something goes wrong
     during the operation it's possible the archive could be left without a central directory (although the local
     file headers and file data will be fine, so the archive will be recoverable).

     For more complex archive modification scenarios:
     1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to
     preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the
     compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and
     you're done. This is safe but requires a bunch of temporary disk space or heap memory.

     2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(),
     append new files as needed, then finalize the archive which will write an updated central directory to the
     original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a
     possibility that the archive's central directory could be lost with this method if anything goes wrong, though.

     - ZIP archive support limitations:
     No spanning support. Extraction functions can only handle unencrypted, stored or deflated files.
     Requires streams capable of seeking.

   * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the
     below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it.

   * Important: For best perf. be sure to customize the below macros for your target platform:
     #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1
     #define MINIZ_LITTLE_ENDIAN 1
     #define MINIZ_HAS_64BIT_REGISTERS 1

   * On platforms using glibc, Be sure to "#define _LARGEFILE64_SOURCE 1" before including miniz.c to ensure miniz
     uses the 64-bit variants: fopen64(), stat64(), etc. Otherwise you won't be able to process large files
     (i.e. 32-bit stat() fails for me on files > 0x7FFFFFFF bytes).
*/
#pragma once



/* Defines to completely disable specific portions of miniz.c: 
   If all macros here are defined the only functionality remaining will be CRC-32 and adler-32. */

/* Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. */
/*#define MINIZ_NO_STDIO */

/* If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or */
/* get/set file times, and the C run-time funcs that get/set times won't be called. */
/* The current downside is the times written to your archives will be from 1979. */
/*#define MINIZ_NO_TIME */

/* Define MINIZ_NO_DEFLATE_APIS to disable all compression API's. */
/*#define MINIZ_NO_DEFLATE_APIS */

/* Define MINIZ_NO_INFLATE_APIS to disable all decompression API's. */
/*#define MINIZ_NO_INFLATE_APIS */

/* Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. */
/*#define MINIZ_NO_ARCHIVE_APIS */

/* Define MINIZ_NO_ARCHIVE_WRITING_APIS to disable all writing related ZIP archive API's. */
/*#define MINIZ_NO_ARCHIVE_WRITING_APIS */

/* Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. */
/*#define MINIZ_NO_ZLIB_APIS */

/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. */
/*#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES */

/* Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. 
   Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc
   callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user
   functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. */
/*#define MINIZ_NO_MALLOC */

#ifdef MINIZ_NO_INFLATE_APIS
#define MINIZ_NO_ARCHIVE_APIS
#endif

#ifdef MINIZ_NO_DEFLATE_APIS
#define MINIZ_NO_ARCHIVE_WRITING_APIS
#endif

#if defined(__TINYC__) && (defined(__linux) || defined(__linux__))
/* TODO: Work around "error: include file 'sys\utime.h' when compiling with tcc on Linux */
#define MINIZ_NO_TIME
#endif

#include <stddef.h>

#if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS)
#include <time.h>
#endif

#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__)
/* MINIZ_X86_OR_X64_CPU is only used to help set the below macros. */
#define MINIZ_X86_OR_X64_CPU 1
#else
#define MINIZ_X86_OR_X64_CPU 0
#endif

/* Set MINIZ_LITTLE_ENDIAN only if not set */
#if !defined(MINIZ_LITTLE_ENDIAN)
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__)

#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
/* Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. */
#define MINIZ_LITTLE_ENDIAN 1
#else
#define MINIZ_LITTLE_ENDIAN 0
#endif

#else

#if MINIZ_X86_OR_X64_CPU
#define MINIZ_LITTLE_ENDIAN 1
#else
#define MINIZ_LITTLE_ENDIAN 0
#endif

#endif
#endif

/* Using unaligned loads and stores causes errors when using UBSan */
#if defined(__has_feature)
#if __has_feature(undefined_behavior_sanitizer)
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
#endif
#endif

/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES only if not set */
#if !defined(MINIZ_USE_UNALIGNED_LOADS_AND_STORES)
#if MINIZ_X86_OR_X64_CPU
/* Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. */
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
#define MINIZ_UNALIGNED_USE_MEMCPY
#else
#define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 0
#endif
#endif

#if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__)
/* Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). */
#define MINIZ_HAS_64BIT_REGISTERS 1
#else
#define MINIZ_HAS_64BIT_REGISTERS 0
#endif

#ifdef __cplusplus
extern "C" {
#endif

/* ------------------- zlib-style API Definitions. */

/* For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! */
typedef unsigned long mz_ulong;

/* mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. */
MINIZ_EXPORT void mz_free(void *p);

#define MZ_ADLER32_INIT (1)
/* mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. */
MINIZ_EXPORT mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len);

#define MZ_CRC32_INIT (0)
/* mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. */
MINIZ_EXPORT mz_ulong mz_crc32(mz_ulong crc, const unsigned char *ptr, size_t buf_len);

/* Compression strategies. */
enum
{
    MZ_DEFAULT_STRATEGY = 0,
    MZ_FILTERED = 1,
    MZ_HUFFMAN_ONLY = 2,
    MZ_RLE = 3,
    MZ_FIXED = 4
};

/* Method */
#define MZ_DEFLATED 8

/* Heap allocation callbacks.
Note that mz_alloc_func parameter types purposely differ from zlib's: items/size is size_t, not unsigned long. */
typedef void *(*mz_alloc_func)(void *opaque, size_t items, size_t size);
typedef void (*mz_free_func)(void *opaque, void *address);
typedef void *(*mz_realloc_func)(void *opaque, void *address, size_t items, size_t size);

/* Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. */
enum
{
    MZ_NO_COMPRESSION = 0,
    MZ_BEST_SPEED = 1,
    MZ_BEST_COMPRESSION = 9,
    MZ_UBER_COMPRESSION = 10,
    MZ_DEFAULT_LEVEL = 6,
    MZ_DEFAULT_COMPRESSION = -1
};

#define MZ_VERSION "11.0.0"
#define MZ_VERNUM 0xB000
#define MZ_VER_MAJOR 11
#define MZ_VER_MINOR 0
#define MZ_VER_REVISION 0
#define MZ_VER_SUBREVISION 0

#ifndef MINIZ_NO_ZLIB_APIS

/* Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). */
enum
{
    MZ_NO_FLUSH = 0,
    MZ_PARTIAL_FLUSH = 1,
    MZ_SYNC_FLUSH = 2,
    MZ_FULL_FLUSH = 3,
    MZ_FINISH = 4,
    MZ_BLOCK = 5
};

/* Return status codes. MZ_PARAM_ERROR is non-standard. */
enum
{
    MZ_OK = 0,
    MZ_STREAM_END = 1,
    MZ_NEED_DICT = 2,
    MZ_ERRNO = -1,
    MZ_STREAM_ERROR = -2,
    MZ_DATA_ERROR = -3,
    MZ_MEM_ERROR = -4,
    MZ_BUF_ERROR = -5,
    MZ_VERSION_ERROR = -6,
    MZ_PARAM_ERROR = -10000
};

/* Window bits */
#define MZ_DEFAULT_WINDOW_BITS 15

struct mz_internal_state;

/* Compression/decompression stream struct. */
typedef struct mz_stream_s
{
    const unsigned char *next_in; /* pointer to next byte to read */
    unsigned int avail_in;        /* number of bytes available at next_in */
    mz_ulong total_in;            /* total number of bytes consumed so far */

    unsigned char *next_out; /* pointer to next byte to write */
    unsigned int avail_out;  /* number of bytes that can be written to next_out */
    mz_ulong total_out;      /* total number of bytes produced so far */

    char *msg;                       /* error msg (unused) */
    struct mz_internal_state *state; /* internal state, allocated by zalloc/zfree */

    mz_alloc_func zalloc; /* optional heap allocation function (defaults to malloc) */
    mz_free_func zfree;   /* optional heap free function (defaults to free) */
    void *opaque;         /* heap alloc function user pointer */

    int data_type;     /* data_type (unused) */
    mz_ulong adler;    /* adler32 of the source or uncompressed data */
    mz_ulong reserved; /* not used */
} mz_stream;

typedef mz_stream *mz_streamp;

/* Returns the version string of miniz.c. */
MINIZ_EXPORT const char *mz_version(void);

#ifndef MINIZ_NO_DEFLATE_APIS

/* mz_deflateInit() initializes a compressor with default options: */
/* Parameters: */
/*  pStream must point to an initialized mz_stream struct. */
/*  level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. */
/*  level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. */
/*  (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) */
/* Return values: */
/*  MZ_OK on success. */
/*  MZ_STREAM_ERROR if the stream is bogus. */
/*  MZ_PARAM_ERROR if the input parameters are bogus. */
/*  MZ_MEM_ERROR on out of memory. */
MINIZ_EXPORT int mz_deflateInit(mz_streamp pStream, int level);

/* mz_deflateInit2() is like mz_deflate(), except with more control: */
/* Additional parameters: */
/*   method must be MZ_DEFLATED */
/*   window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) */
/*   mem_level must be between [1, 9] (it's checked but ignored by miniz.c) */
MINIZ_EXPORT int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy);

/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). */
MINIZ_EXPORT int mz_deflateReset(mz_streamp pStream);

/* mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. */
/* Parameters: */
/*   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */
/*   flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. */
/* Return values: */
/*   MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). */
/*   MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. */
/*   MZ_STREAM_ERROR if the stream is bogus. */
/*   MZ_PARAM_ERROR if one of the parameters is invalid. */
/*   MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) */
MINIZ_EXPORT int mz_deflate(mz_streamp pStream, int flush);

/* mz_deflateEnd() deinitializes a compressor: */
/* Return values: */
/*  MZ_OK on success. */
/*  MZ_STREAM_ERROR if the stream is bogus. */
MINIZ_EXPORT int mz_deflateEnd(mz_streamp pStream);

/* mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. */
MINIZ_EXPORT mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len);

/* Single-call compression functions mz_compress() and mz_compress2(): */
/* Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. */
MINIZ_EXPORT int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
MINIZ_EXPORT int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level);

/* mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). */
MINIZ_EXPORT mz_ulong mz_compressBound(mz_ulong source_len);

#endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/

#ifndef MINIZ_NO_INFLATE_APIS

/* Initializes a decompressor. */
MINIZ_EXPORT int mz_inflateInit(mz_streamp pStream);

/* mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: */
/* window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). */
MINIZ_EXPORT int mz_inflateInit2(mz_streamp pStream, int window_bits);

/* Quickly resets a compressor without having to reallocate anything. Same as calling mz_inflateEnd() followed by mz_inflateInit()/mz_inflateInit2(). */
MINIZ_EXPORT int mz_inflateReset(mz_streamp pStream);

/* Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. */
/* Parameters: */
/*   pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. */
/*   flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. */
/*   On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). */
/*   MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. */
/* Return values: */
/*   MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. */
/*   MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. */
/*   MZ_STREAM_ERROR if the stream is bogus. */
/*   MZ_DATA_ERROR if the deflate stream is invalid. */
/*   MZ_PARAM_ERROR if one of the parameters is invalid. */
/*   MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again */
/*   with more input data, or with more room in the output buffer (except when using single call decompression, described above). */
MINIZ_EXPORT int mz_inflate(mz_streamp pStream, int flush);

/* Deinitializes a decompressor. */
MINIZ_EXPORT int mz_inflateEnd(mz_streamp pStream);

/* Single-call decompression. */
/* Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. */
MINIZ_EXPORT int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len);
MINIZ_EXPORT int mz_uncompress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong *pSource_len);
#endif /*#ifndef MINIZ_NO_INFLATE_APIS*/

/* Returns a string description of the specified error code, or NULL if the error code is invalid. */
MINIZ_EXPORT const char *mz_error(int err);

/* Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. */
/* Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. */
#ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES
typedef unsigned char Byte;
typedef unsigned int uInt;
typedef mz_ulong uLong;
typedef Byte Bytef;
typedef uInt uIntf;
typedef char charf;
typedef int intf;
typedef void *voidpf;
typedef uLong uLongf;
typedef void *voidp;
typedef void *const voidpc;
#define Z_NULL 0
#define Z_NO_FLUSH MZ_NO_FLUSH
#define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH
#define Z_SYNC_FLUSH MZ_SYNC_FLUSH
#define Z_FULL_FLUSH MZ_FULL_FLUSH
#define Z_FINISH MZ_FINISH
#define Z_BLOCK MZ_BLOCK
#define Z_OK MZ_OK
#define Z_STREAM_END MZ_STREAM_END
#define Z_NEED_DICT MZ_NEED_DICT
#define Z_ERRNO MZ_ERRNO
#define Z_STREAM_ERROR MZ_STREAM_ERROR
#define Z_DATA_ERROR MZ_DATA_ERROR
#define Z_MEM_ERROR MZ_MEM_ERROR
#define Z_BUF_ERROR MZ_BUF_ERROR
#define Z_VERSION_ERROR MZ_VERSION_ERROR
#define Z_PARAM_ERROR MZ_PARAM_ERROR
#define Z_NO_COMPRESSION MZ_NO_COMPRESSION
#define Z_BEST_SPEED MZ_BEST_SPEED
#define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION
#define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION
#define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY
#define Z_FILTERED MZ_FILTERED
#define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY
#define Z_RLE MZ_RLE
#define Z_FIXED MZ_FIXED
#define Z_DEFLATED MZ_DEFLATED
#define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS
#define alloc_func mz_alloc_func
#define free_func mz_free_func
#define internal_state mz_internal_state
#define z_stream mz_stream

#ifndef MINIZ_NO_DEFLATE_APIS
#define deflateInit mz_deflateInit
#define deflateInit2 mz_deflateInit2
#define deflateReset mz_deflateReset
#define deflate mz_deflate
#define deflateEnd mz_deflateEnd
#define deflateBound mz_deflateBound
#define compress mz_compress
#define compress2 mz_compress2
#define compressBound mz_compressBound
#endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/

#ifndef MINIZ_NO_INFLATE_APIS
#define inflateInit mz_inflateInit
#define inflateInit2 mz_inflateInit2
#define inflateReset mz_inflateReset
#define inflate mz_inflate
#define inflateEnd mz_inflateEnd
#define uncompress mz_uncompress
#define uncompress2 mz_uncompress2
#endif /*#ifndef MINIZ_NO_INFLATE_APIS*/

#define crc32 mz_crc32
#define adler32 mz_adler32
#define MAX_WBITS 15
#define MAX_MEM_LEVEL 9
#define zError mz_error
#define ZLIB_VERSION MZ_VERSION
#define ZLIB_VERNUM MZ_VERNUM
#define ZLIB_VER_MAJOR MZ_VER_MAJOR
#define ZLIB_VER_MINOR MZ_VER_MINOR
#define ZLIB_VER_REVISION MZ_VER_REVISION
#define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION
#define zlibVersion mz_version
#define zlib_version mz_version()
#endif /* #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES */

#endif /* MINIZ_NO_ZLIB_APIS */

#ifdef __cplusplus
}
#endif





#pragma once
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>



/* ------------------- Types and macros */
typedef unsigned char mz_uint8;
typedef signed short mz_int16;
typedef unsigned short mz_uint16;
typedef unsigned int mz_uint32;
typedef unsigned int mz_uint;
typedef int64_t mz_int64;
typedef uint64_t mz_uint64;
typedef int mz_bool;

#define MZ_FALSE (0)
#define MZ_TRUE (1)

/* Works around MSVC's spammy "warning C4127: conditional expression is constant" message. */
#ifdef _MSC_VER
#define MZ_MACRO_END while (0, 0)
#else
#define MZ_MACRO_END while (0)
#endif

#ifdef MINIZ_NO_STDIO
#define MZ_FILE void *
#else
#include <stdio.h>
#define MZ_FILE FILE
#endif /* #ifdef MINIZ_NO_STDIO */

#ifdef MINIZ_NO_TIME
typedef struct mz_dummy_time_t_tag
{
    mz_uint32 m_dummy1;
    mz_uint32 m_dummy2;
} mz_dummy_time_t;
#define MZ_TIME_T mz_dummy_time_t
#else
#define MZ_TIME_T time_t
#endif

#define MZ_ASSERT(x) assert(x)

#ifdef MINIZ_NO_MALLOC
#define MZ_MALLOC(x) NULL
#define MZ_FREE(x) (void)x, ((void)0)
#define MZ_REALLOC(p, x) NULL
#else
#define MZ_MALLOC(x) malloc(x)
#define MZ_FREE(x) free(x)
#define MZ_REALLOC(p, x) realloc(p, x)
#endif

#define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b))
#define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj))
#define MZ_CLEAR_ARR(obj) memset((obj), 0, sizeof(obj))
#define MZ_CLEAR_PTR(obj) memset((obj), 0, sizeof(*obj))

#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
#define MZ_READ_LE16(p) *((const mz_uint16 *)(p))
#define MZ_READ_LE32(p) *((const mz_uint32 *)(p))
#else
#define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U))
#define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8 *)(p))[0]) | ((mz_uint32)(((const mz_uint8 *)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8 *)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8 *)(p))[3]) << 24U))
#endif

#define MZ_READ_LE64(p) (((mz_uint64)MZ_READ_LE32(p)) | (((mz_uint64)MZ_READ_LE32((const mz_uint8 *)(p) + sizeof(mz_uint32))) << 32U))

#ifdef _MSC_VER
#define MZ_FORCEINLINE __forceinline
#elif defined(__GNUC__)
#define MZ_FORCEINLINE __inline__ __attribute__((__always_inline__))
#else
#define MZ_FORCEINLINE inline
#endif

#ifdef __cplusplus
extern "C" {
#endif

extern MINIZ_EXPORT void *miniz_def_alloc_func(void *opaque, size_t items, size_t size);
extern MINIZ_EXPORT void miniz_def_free_func(void *opaque, void *address);
extern MINIZ_EXPORT void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size);

#define MZ_UINT16_MAX (0xFFFFU)
#define MZ_UINT32_MAX (0xFFFFFFFFU)

#ifdef __cplusplus
}
#endif
 #pragma once


#ifndef MINIZ_NO_DEFLATE_APIS

#ifdef __cplusplus
extern "C" {
#endif
/* ------------------- Low-level Compression API Definitions */

/* Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). */
#define TDEFL_LESS_MEMORY 0

/* tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): */
/* TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). */
enum
{
    TDEFL_HUFFMAN_ONLY = 0,
    TDEFL_DEFAULT_MAX_PROBES = 128,
    TDEFL_MAX_PROBES_MASK = 0xFFF
};

/* TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. */
/* TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). */
/* TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. */
/* TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). */
/* TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) */
/* TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. */
/* TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. */
/* TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. */
/* The low 12 bits are reserved to control the max # of hash probes per dictionary lookup (see TDEFL_MAX_PROBES_MASK). */
enum
{
    TDEFL_WRITE_ZLIB_HEADER = 0x01000,
    TDEFL_COMPUTE_ADLER32 = 0x02000,
    TDEFL_GREEDY_PARSING_FLAG = 0x04000,
    TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000,
    TDEFL_RLE_MATCHES = 0x10000,
    TDEFL_FILTER_MATCHES = 0x20000,
    TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000,
    TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000
};

/* High level compression functions: */
/* tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). */
/* On entry: */
/*  pSrc_buf, src_buf_len: Pointer and size of source block to compress. */
/*  flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. */
/* On return: */
/*  Function returns a pointer to the compressed data, or NULL on failure. */
/*  *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. */
/*  The caller must free() the returned block when it's no longer needed. */
MINIZ_EXPORT void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);

/* tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. */
/* Returns 0 on failure. */
MINIZ_EXPORT size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);

/* Compresses an image to a compressed PNG file in memory. */
/* On entry: */
/*  pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. */
/*  The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. */
/*  level may range from [0,10], use MZ_NO_COMPRESSION, MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc. or a decent default is MZ_DEFAULT_LEVEL */
/*  If flip is true, the image will be flipped on the Y axis (useful for OpenGL apps). */
/* On return: */
/*  Function returns a pointer to the compressed data, or NULL on failure. */
/*  *pLen_out will be set to the size of the PNG image file. */
/*  The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. */
MINIZ_EXPORT void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip);
MINIZ_EXPORT void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out);

/* Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. */
typedef mz_bool (*tdefl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);

/* tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. */
MINIZ_EXPORT mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);

enum
{
    TDEFL_MAX_HUFF_TABLES = 3,
    TDEFL_MAX_HUFF_SYMBOLS_0 = 288,
    TDEFL_MAX_HUFF_SYMBOLS_1 = 32,
    TDEFL_MAX_HUFF_SYMBOLS_2 = 19,
    TDEFL_LZ_DICT_SIZE = 32768,
    TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1,
    TDEFL_MIN_MATCH_LEN = 3,
    TDEFL_MAX_MATCH_LEN = 258
};

/* TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). */
#if TDEFL_LESS_MEMORY
enum
{
    TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024,
    TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
    TDEFL_MAX_HUFF_SYMBOLS = 288,
    TDEFL_LZ_HASH_BITS = 12,
    TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
    TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
    TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
};
#else
enum
{
    TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024,
    TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10,
    TDEFL_MAX_HUFF_SYMBOLS = 288,
    TDEFL_LZ_HASH_BITS = 15,
    TDEFL_LEVEL1_HASH_SIZE_MASK = 4095,
    TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3,
    TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS
};
#endif

/* The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. */
typedef enum {
    TDEFL_STATUS_BAD_PARAM = -2,
    TDEFL_STATUS_PUT_BUF_FAILED = -1,
    TDEFL_STATUS_OKAY = 0,
    TDEFL_STATUS_DONE = 1
} tdefl_status;

/* Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums */
typedef enum {
    TDEFL_NO_FLUSH = 0,
    TDEFL_SYNC_FLUSH = 2,
    TDEFL_FULL_FLUSH = 3,
    TDEFL_FINISH = 4
} tdefl_flush;

/* tdefl's compression state structure. */
typedef struct
{
    tdefl_put_buf_func_ptr m_pPut_buf_func;
    void *m_pPut_buf_user;
    mz_uint m_flags, m_max_probes[2];
    int m_greedy_parsing;
    mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size;
    mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end;
    mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer;
    mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish;
    tdefl_status m_prev_return_status;
    const void *m_pIn_buf;
    void *m_pOut_buf;
    size_t *m_pIn_buf_size, *m_pOut_buf_size;
    tdefl_flush m_flush;
    const mz_uint8 *m_pSrc;
    size_t m_src_buf_left, m_out_buf_ofs;
    mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1];
    mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
    mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
    mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS];
    mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE];
    mz_uint16 m_next[TDEFL_LZ_DICT_SIZE];
    mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE];
    mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE];
} tdefl_compressor;

/* Initializes the compressor. */
/* There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. */
/* pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. */
/* If pBut_buf_func is NULL the user should always call the tdefl_compress() API. */
/* flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) */
MINIZ_EXPORT tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);

/* Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. */
MINIZ_EXPORT tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush);

/* tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. */
/* tdefl_compress_buffer() always consumes the entire input buffer. */
MINIZ_EXPORT tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush);

MINIZ_EXPORT tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d);
MINIZ_EXPORT mz_uint32 tdefl_get_adler32(tdefl_compressor *d);

/* Create tdefl_compress() flags given zlib-style compression parameters. */
/* level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) */
/* window_bits may be -15 (raw deflate) or 15 (zlib) */
/* strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED */
MINIZ_EXPORT mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy);

#ifndef MINIZ_NO_MALLOC
/* Allocate the tdefl_compressor structure in C so that */
/* non-C language bindings to tdefl_ API don't need to worry about */
/* structure size and allocation mechanism. */
MINIZ_EXPORT tdefl_compressor *tdefl_compressor_alloc(void);
MINIZ_EXPORT void tdefl_compressor_free(tdefl_compressor *pComp);
#endif

#ifdef __cplusplus
}
#endif

#endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/
 #pragma once

/* ------------------- Low-level Decompression API Definitions */

#ifndef MINIZ_NO_INFLATE_APIS

#ifdef __cplusplus
extern "C" {
#endif
/* Decompression flags used by tinfl_decompress(). */
/* TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. */
/* TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. */
/* TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). */
/* TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. */
enum
{
    TINFL_FLAG_PARSE_ZLIB_HEADER = 1,
    TINFL_FLAG_HAS_MORE_INPUT = 2,
    TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4,
    TINFL_FLAG_COMPUTE_ADLER32 = 8
};

/* High level decompression functions: */
/* tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). */
/* On entry: */
/*  pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. */
/* On return: */
/*  Function returns a pointer to the decompressed data, or NULL on failure. */
/*  *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. */
/*  The caller must call mz_free() on the returned block when it's no longer needed. */
MINIZ_EXPORT void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags);

/* tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. */
/* Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. */
#define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1))
MINIZ_EXPORT size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags);

/* tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. */
/* Returns 1 on success or 0 on failure. */
typedef int (*tinfl_put_buf_func_ptr)(const void *pBuf, int len, void *pUser);
MINIZ_EXPORT int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags);

struct tinfl_decompressor_tag;
typedef struct tinfl_decompressor_tag tinfl_decompressor;

#ifndef MINIZ_NO_MALLOC
/* Allocate the tinfl_decompressor structure in C so that */
/* non-C language bindings to tinfl_ API don't need to worry about */
/* structure size and allocation mechanism. */
MINIZ_EXPORT tinfl_decompressor *tinfl_decompressor_alloc(void);
MINIZ_EXPORT void tinfl_decompressor_free(tinfl_decompressor *pDecomp);
#endif

/* Max size of LZ dictionary. */
#define TINFL_LZ_DICT_SIZE 32768

/* Return status. */
typedef enum {
    /* This flags indicates the inflator needs 1 or more input bytes to make forward progress, but the caller is indicating that no more are available. The compressed data */
    /* is probably corrupted. If you call the inflator again with more bytes it'll try to continue processing the input but this is a BAD sign (either the data is corrupted or you called it incorrectly). */
    /* If you call it again with no input you'll just get TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS again. */
    TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS = -4,

    /* This flag indicates that one or more of the input parameters was obviously bogus. (You can try calling it again, but if you get this error the calling code is wrong.) */
    TINFL_STATUS_BAD_PARAM = -3,

    /* This flags indicate the inflator is finished but the adler32 check of the uncompressed data didn't match. If you call it again it'll return TINFL_STATUS_DONE. */
    TINFL_STATUS_ADLER32_MISMATCH = -2,

    /* This flags indicate the inflator has somehow failed (bad code, corrupted input, etc.). If you call it again without resetting via tinfl_init() it it'll just keep on returning the same status failure code. */
    TINFL_STATUS_FAILED = -1,

    /* Any status code less than TINFL_STATUS_DONE must indicate a failure. */

    /* This flag indicates the inflator has returned every byte of uncompressed data that it can, has consumed every byte that it needed, has successfully reached the end of the deflate stream, and */
    /* if zlib headers and adler32 checking enabled that it has successfully checked the uncompressed data's adler32. If you call it again you'll just get TINFL_STATUS_DONE over and over again. */
    TINFL_STATUS_DONE = 0,

    /* This flag indicates the inflator MUST have more input data (even 1 byte) before it can make any more forward progress, or you need to clear the TINFL_FLAG_HAS_MORE_INPUT */
    /* flag on the next call if you don't have any more source data. If the source data was somehow corrupted it's also possible (but unlikely) for the inflator to keep on demanding input to */
    /* proceed, so be sure to properly set the TINFL_FLAG_HAS_MORE_INPUT flag. */
    TINFL_STATUS_NEEDS_MORE_INPUT = 1,

    /* This flag indicates the inflator definitely has 1 or more bytes of uncompressed data available, but it cannot write this data into the output buffer. */
    /* Note if the source compressed data was corrupted it's possible for the inflator to return a lot of uncompressed data to the caller. I've been assuming you know how much uncompressed data to expect */
    /* (either exact or worst case) and will stop calling the inflator and fail after receiving too much. In pure streaming scenarios where you have no idea how many bytes to expect this may not be possible */
    /* so I may need to add some code to address this. */
    TINFL_STATUS_HAS_MORE_OUTPUT = 2
} tinfl_status;

/* Initializes the decompressor to its initial state. */
#define tinfl_init(r)     \
    do                    \
    {                     \
        (r)->m_state = 0; \
    }                     \
    MZ_MACRO_END
#define tinfl_get_adler32(r) (r)->m_check_adler32

/* Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. */
/* This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. */
MINIZ_EXPORT tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags);

/* Internal/private bits follow. */
enum
{
    TINFL_MAX_HUFF_TABLES = 3,
    TINFL_MAX_HUFF_SYMBOLS_0 = 288,
    TINFL_MAX_HUFF_SYMBOLS_1 = 32,
    TINFL_MAX_HUFF_SYMBOLS_2 = 19,
    TINFL_FAST_LOOKUP_BITS = 10,
    TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS
};

#if MINIZ_HAS_64BIT_REGISTERS
#define TINFL_USE_64BIT_BITBUF 1
#else
#define TINFL_USE_64BIT_BITBUF 0
#endif

#if TINFL_USE_64BIT_BITBUF
typedef mz_uint64 tinfl_bit_buf_t;
#define TINFL_BITBUF_SIZE (64)
#else
typedef mz_uint32 tinfl_bit_buf_t;
#define TINFL_BITBUF_SIZE (32)
#endif

struct tinfl_decompressor_tag
{
    mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES];
    tinfl_bit_buf_t m_bit_buf;
    size_t m_dist_from_out_buf_start;
    mz_int16 m_look_up[TINFL_MAX_HUFF_TABLES][TINFL_FAST_LOOKUP_SIZE];
    mz_int16 m_tree_0[TINFL_MAX_HUFF_SYMBOLS_0 * 2];
    mz_int16 m_tree_1[TINFL_MAX_HUFF_SYMBOLS_1 * 2];
    mz_int16 m_tree_2[TINFL_MAX_HUFF_SYMBOLS_2 * 2];
    mz_uint8 m_code_size_0[TINFL_MAX_HUFF_SYMBOLS_0];
    mz_uint8 m_code_size_1[TINFL_MAX_HUFF_SYMBOLS_1];
    mz_uint8 m_code_size_2[TINFL_MAX_HUFF_SYMBOLS_2];
    mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137];
};

#ifdef __cplusplus
}
#endif

#endif /*#ifndef MINIZ_NO_INFLATE_APIS*/
 
#pragma once


/* ------------------- ZIP archive reading/writing */

#ifndef MINIZ_NO_ARCHIVE_APIS

#ifdef __cplusplus
extern "C" {
#endif

enum
{
    /* Note: These enums can be reduced as needed to save memory or stack space - they are pretty conservative. */
    MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024,
    MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 512,
    MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 512
};

typedef struct
{
    /* Central directory file index. */
    mz_uint32 m_file_index;

    /* Byte offset of this entry in the archive's central directory. Note we currently only support up to UINT_MAX or less bytes in the central dir. */
    mz_uint64 m_central_dir_ofs;

    /* These fields are copied directly from the zip's central dir. */
    mz_uint16 m_version_made_by;
    mz_uint16 m_version_needed;
    mz_uint16 m_bit_flag;
    mz_uint16 m_method;

    /* CRC-32 of uncompressed data. */
    mz_uint32 m_crc32;

    /* File's compressed size. */
    mz_uint64 m_comp_size;

    /* File's uncompressed size. Note, I've seen some old archives where directory entries had 512 bytes for their uncompressed sizes, but when you try to unpack them you actually get 0 bytes. */
    mz_uint64 m_uncomp_size;

    /* Zip internal and external file attributes. */
    mz_uint16 m_internal_attr;
    mz_uint32 m_external_attr;

    /* Entry's local header file offset in bytes. */
    mz_uint64 m_local_header_ofs;

    /* Size of comment in bytes. */
    mz_uint32 m_comment_size;

    /* MZ_TRUE if the entry appears to be a directory. */
    mz_bool m_is_directory;

    /* MZ_TRUE if the entry uses encryption/strong encryption (which miniz_zip doesn't support) */
    mz_bool m_is_encrypted;

    /* MZ_TRUE if the file is not encrypted, a patch file, and if it uses a compression method we support. */
    mz_bool m_is_supported;

    /* Filename. If string ends in '/' it's a subdirectory entry. */
    /* Guaranteed to be zero terminated, may be truncated to fit. */
    char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE];

    /* Comment field. */
    /* Guaranteed to be zero terminated, may be truncated to fit. */
    char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE];

#ifdef MINIZ_NO_TIME
    MZ_TIME_T m_padding;
#else
    MZ_TIME_T m_time;
#endif
} mz_zip_archive_file_stat;

typedef size_t (*mz_file_read_func)(void *pOpaque, mz_uint64 file_ofs, void *pBuf, size_t n);
typedef size_t (*mz_file_write_func)(void *pOpaque, mz_uint64 file_ofs, const void *pBuf, size_t n);
typedef mz_bool (*mz_file_needs_keepalive)(void *pOpaque);

struct mz_zip_internal_state_tag;
typedef struct mz_zip_internal_state_tag mz_zip_internal_state;

typedef enum {
    MZ_ZIP_MODE_INVALID = 0,
    MZ_ZIP_MODE_READING = 1,
    MZ_ZIP_MODE_WRITING = 2,
    MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3
} mz_zip_mode;

typedef enum {
    MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100,
    MZ_ZIP_FLAG_IGNORE_PATH = 0x0200,
    MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400,
    MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800,
    MZ_ZIP_FLAG_VALIDATE_LOCATE_FILE_FLAG = 0x1000, /* if enabled, mz_zip_reader_locate_file() will be called on each file as its validated to ensure the func finds the file in the central dir (intended for testing) */
    MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY = 0x2000,     /* validate the local headers, but don't decompress the entire file and check the crc32 */
    MZ_ZIP_FLAG_WRITE_ZIP64 = 0x4000,               /* always use the zip64 file format, instead of the original zip file format with automatic switch to zip64. Use as flags parameter with mz_zip_writer_init*_v2 */
    MZ_ZIP_FLAG_WRITE_ALLOW_READING = 0x8000,
    MZ_ZIP_FLAG_ASCII_FILENAME = 0x10000,
    /*After adding a compressed file, seek back
    to local file header and set the correct sizes*/
    MZ_ZIP_FLAG_WRITE_HEADER_SET_SIZE = 0x20000
} mz_zip_flags;

typedef enum {
    MZ_ZIP_TYPE_INVALID = 0,
    MZ_ZIP_TYPE_USER,
    MZ_ZIP_TYPE_MEMORY,
    MZ_ZIP_TYPE_HEAP,
    MZ_ZIP_TYPE_FILE,
    MZ_ZIP_TYPE_CFILE,
    MZ_ZIP_TOTAL_TYPES
} mz_zip_type;

/* miniz error codes. Be sure to update mz_zip_get_error_string() if you add or modify this enum. */
typedef enum {
    MZ_ZIP_NO_ERROR = 0,
    MZ_ZIP_UNDEFINED_ERROR,
    MZ_ZIP_TOO_MANY_FILES,
    MZ_ZIP_FILE_TOO_LARGE,
    MZ_ZIP_UNSUPPORTED_METHOD,
    MZ_ZIP_UNSUPPORTED_ENCRYPTION,
    MZ_ZIP_UNSUPPORTED_FEATURE,
    MZ_ZIP_FAILED_FINDING_CENTRAL_DIR,
    MZ_ZIP_NOT_AN_ARCHIVE,
    MZ_ZIP_INVALID_HEADER_OR_CORRUPTED,
    MZ_ZIP_UNSUPPORTED_MULTIDISK,
    MZ_ZIP_DECOMPRESSION_FAILED,
    MZ_ZIP_COMPRESSION_FAILED,
    MZ_ZIP_UNEXPECTED_DECOMPRESSED_SIZE,
    MZ_ZIP_CRC_CHECK_FAILED,
    MZ_ZIP_UNSUPPORTED_CDIR_SIZE,
    MZ_ZIP_ALLOC_FAILED,
    MZ_ZIP_FILE_OPEN_FAILED,
    MZ_ZIP_FILE_CREATE_FAILED,
    MZ_ZIP_FILE_WRITE_FAILED,
    MZ_ZIP_FILE_READ_FAILED,
    MZ_ZIP_FILE_CLOSE_FAILED,
    MZ_ZIP_FILE_SEEK_FAILED,
    MZ_ZIP_FILE_STAT_FAILED,
    MZ_ZIP_INVALID_PARAMETER,
    MZ_ZIP_INVALID_FILENAME,
    MZ_ZIP_BUF_TOO_SMALL,
    MZ_ZIP_INTERNAL_ERROR,
    MZ_ZIP_FILE_NOT_FOUND,
    MZ_ZIP_ARCHIVE_TOO_LARGE,
    MZ_ZIP_VALIDATION_FAILED,
    MZ_ZIP_WRITE_CALLBACK_FAILED,
    MZ_ZIP_TOTAL_ERRORS
} mz_zip_error;

typedef struct
{
    mz_uint64 m_archive_size;
    mz_uint64 m_central_directory_file_ofs;

    /* We only support up to UINT32_MAX files in zip64 mode. */
    mz_uint32 m_total_files;
    mz_zip_mode m_zip_mode;
    mz_zip_type m_zip_type;
    mz_zip_error m_last_error;

    mz_uint64 m_file_offset_alignment;

    mz_alloc_func m_pAlloc;
    mz_free_func m_pFree;
    mz_realloc_func m_pRealloc;
    void *m_pAlloc_opaque;

    mz_file_read_func m_pRead;
    mz_file_write_func m_pWrite;
    mz_file_needs_keepalive m_pNeeds_keepalive;
    void *m_pIO_opaque;

    mz_zip_internal_state *m_pState;

} mz_zip_archive;

typedef struct
{
    mz_zip_archive *pZip;
    mz_uint flags;

    int status;

    mz_uint64 read_buf_size, read_buf_ofs, read_buf_avail, comp_remaining, out_buf_ofs, cur_file_ofs;
    mz_zip_archive_file_stat file_stat;
    void *pRead_buf;
    void *pWrite_buf;

    size_t out_blk_remain;

    tinfl_decompressor inflator;

#ifdef MINIZ_DISABLE_ZIP_READER_CRC32_CHECKS
    mz_uint padding;
#else
    mz_uint file_crc32;
#endif

} mz_zip_reader_extract_iter_state;

/* -------- ZIP reading */

/* Inits a ZIP archive reader. */
/* These functions read and validate the archive's central directory. */
MINIZ_EXPORT mz_bool mz_zip_reader_init(mz_zip_archive *pZip, mz_uint64 size, mz_uint flags);

MINIZ_EXPORT mz_bool mz_zip_reader_init_mem(mz_zip_archive *pZip, const void *pMem, size_t size, mz_uint flags);

#ifndef MINIZ_NO_STDIO
/* Read a archive from a disk file. */
/* file_start_ofs is the file offset where the archive actually begins, or 0. */
/* actual_archive_size is the true total size of the archive, which may be smaller than the file's actual size on disk. If zero the entire file is treated as the archive. */
MINIZ_EXPORT mz_bool mz_zip_reader_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint32 flags);
MINIZ_EXPORT mz_bool mz_zip_reader_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags, mz_uint64 file_start_ofs, mz_uint64 archive_size);

/* Read an archive from an already opened FILE, beginning at the current file position. */
/* The archive is assumed to be archive_size bytes long. If archive_size is 0, then the entire rest of the file is assumed to contain the archive. */
/* The FILE will NOT be closed when mz_zip_reader_end() is called. */
MINIZ_EXPORT mz_bool mz_zip_reader_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint64 archive_size, mz_uint flags);
#endif

/* Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. */
MINIZ_EXPORT mz_bool mz_zip_reader_end(mz_zip_archive *pZip);

/* -------- ZIP reading or writing */

/* Clears a mz_zip_archive struct to all zeros. */
/* Important: This must be done before passing the struct to any mz_zip functions. */
MINIZ_EXPORT void mz_zip_zero_struct(mz_zip_archive *pZip);

MINIZ_EXPORT mz_zip_mode mz_zip_get_mode(mz_zip_archive *pZip);
MINIZ_EXPORT mz_zip_type mz_zip_get_type(mz_zip_archive *pZip);

/* Returns the total number of files in the archive. */
MINIZ_EXPORT mz_uint mz_zip_reader_get_num_files(mz_zip_archive *pZip);

MINIZ_EXPORT mz_uint64 mz_zip_get_archive_size(mz_zip_archive *pZip);
MINIZ_EXPORT mz_uint64 mz_zip_get_archive_file_start_offset(mz_zip_archive *pZip);
MINIZ_EXPORT MZ_FILE *mz_zip_get_cfile(mz_zip_archive *pZip);

/* Reads n bytes of raw archive data, starting at file offset file_ofs, to pBuf. */
MINIZ_EXPORT size_t mz_zip_read_archive_data(mz_zip_archive *pZip, mz_uint64 file_ofs, void *pBuf, size_t n);

/* All mz_zip funcs set the m_last_error field in the mz_zip_archive struct. These functions retrieve/manipulate this field. */
/* Note that the m_last_error functionality is not thread safe. */
MINIZ_EXPORT mz_zip_error mz_zip_set_last_error(mz_zip_archive *pZip, mz_zip_error err_num);
MINIZ_EXPORT mz_zip_error mz_zip_peek_last_error(mz_zip_archive *pZip);
MINIZ_EXPORT mz_zip_error mz_zip_clear_last_error(mz_zip_archive *pZip);
MINIZ_EXPORT mz_zip_error mz_zip_get_last_error(mz_zip_archive *pZip);
MINIZ_EXPORT const char *mz_zip_get_error_string(mz_zip_error mz_err);

/* MZ_TRUE if the archive file entry is a directory entry. */
MINIZ_EXPORT mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive *pZip, mz_uint file_index);

/* MZ_TRUE if the file is encrypted/strong encrypted. */
MINIZ_EXPORT mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive *pZip, mz_uint file_index);

/* MZ_TRUE if the compression method is supported, and the file is not encrypted, and the file is not a compressed patch file. */
MINIZ_EXPORT mz_bool mz_zip_reader_is_file_supported(mz_zip_archive *pZip, mz_uint file_index);

/* Retrieves the filename of an archive file entry. */
/* Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. */
MINIZ_EXPORT mz_uint mz_zip_reader_get_filename(mz_zip_archive *pZip, mz_uint file_index, char *pFilename, mz_uint filename_buf_size);

/* Attempts to locates a file in the archive's central directory. */
/* Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH */
/* Returns -1 if the file cannot be found. */
MINIZ_EXPORT int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags);
MINIZ_EXPORT mz_bool mz_zip_reader_locate_file_v2(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags, mz_uint32 *file_index);

/* Returns detailed information about an archive file entry. */
MINIZ_EXPORT mz_bool mz_zip_reader_file_stat(mz_zip_archive *pZip, mz_uint file_index, mz_zip_archive_file_stat *pStat);

/* MZ_TRUE if the file is in zip64 format. */
/* A file is considered zip64 if it contained a zip64 end of central directory marker, or if it contained any zip64 extended file information fields in the central directory. */
MINIZ_EXPORT mz_bool mz_zip_is_zip64(mz_zip_archive *pZip);

/* Returns the total central directory size in bytes. */
/* The current max supported size is <= MZ_UINT32_MAX. */
MINIZ_EXPORT size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip);

/* Extracts a archive file to a memory buffer using no memory allocation. */
/* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */
MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);
MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size);

/* Extracts a archive file to a memory buffer. */
MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags);
MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive *pZip, const char *pFilename, void *pBuf, size_t buf_size, mz_uint flags);

/* Extracts a archive file to a dynamically allocated heap buffer. */
/* The memory will be allocated via the mz_zip_archive's alloc/realloc functions. */
/* Returns NULL and sets the last error on failure. */
MINIZ_EXPORT void *mz_zip_reader_extract_to_heap(mz_zip_archive *pZip, mz_uint file_index, size_t *pSize, mz_uint flags);
MINIZ_EXPORT void *mz_zip_reader_extract_file_to_heap(mz_zip_archive *pZip, const char *pFilename, size_t *pSize, mz_uint flags);

/* Extracts a archive file using a callback function to output the file's data. */
MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive *pZip, mz_uint file_index, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);
MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive *pZip, const char *pFilename, mz_file_write_func pCallback, void *pOpaque, mz_uint flags);

/* Extract a file iteratively */
MINIZ_EXPORT mz_zip_reader_extract_iter_state* mz_zip_reader_extract_iter_new(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags);
MINIZ_EXPORT mz_zip_reader_extract_iter_state* mz_zip_reader_extract_file_iter_new(mz_zip_archive *pZip, const char *pFilename, mz_uint flags);
MINIZ_EXPORT size_t mz_zip_reader_extract_iter_read(mz_zip_reader_extract_iter_state* pState, void* pvBuf, size_t buf_size);
MINIZ_EXPORT mz_bool mz_zip_reader_extract_iter_free(mz_zip_reader_extract_iter_state* pState);

#ifndef MINIZ_NO_STDIO
/* Extracts a archive file to a disk file and sets its last accessed and modified times. */
/* This function only extracts files, not archive directory records. */
MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_file(mz_zip_archive *pZip, mz_uint file_index, const char *pDst_filename, mz_uint flags);
MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive *pZip, const char *pArchive_filename, const char *pDst_filename, mz_uint flags);

/* Extracts a archive file starting at the current position in the destination FILE stream. */
MINIZ_EXPORT mz_bool mz_zip_reader_extract_to_cfile(mz_zip_archive *pZip, mz_uint file_index, MZ_FILE *File, mz_uint flags);
MINIZ_EXPORT mz_bool mz_zip_reader_extract_file_to_cfile(mz_zip_archive *pZip, const char *pArchive_filename, MZ_FILE *pFile, mz_uint flags);
#endif

#if 0
/* TODO */
	typedef void *mz_zip_streaming_extract_state_ptr;
	mz_zip_streaming_extract_state_ptr mz_zip_streaming_extract_begin(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags);
	mz_uint64 mz_zip_streaming_extract_get_size(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
	mz_uint64 mz_zip_streaming_extract_get_cur_ofs(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
	mz_bool mz_zip_streaming_extract_seek(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, mz_uint64 new_ofs);
	size_t mz_zip_streaming_extract_read(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState, void *pBuf, size_t buf_size);
	mz_bool mz_zip_streaming_extract_end(mz_zip_archive *pZip, mz_zip_streaming_extract_state_ptr pState);
#endif

/* This function compares the archive's local headers, the optional local zip64 extended information block, and the optional descriptor following the compressed data vs. the data in the central directory. */
/* It also validates that each file can be successfully uncompressed unless the MZ_ZIP_FLAG_VALIDATE_HEADERS_ONLY is specified. */
MINIZ_EXPORT mz_bool mz_zip_validate_file(mz_zip_archive *pZip, mz_uint file_index, mz_uint flags);

/* Validates an entire archive by calling mz_zip_validate_file() on each file. */
MINIZ_EXPORT mz_bool mz_zip_validate_archive(mz_zip_archive *pZip, mz_uint flags);

/* Misc utils/helpers, valid for ZIP reading or writing */
MINIZ_EXPORT mz_bool mz_zip_validate_mem_archive(const void *pMem, size_t size, mz_uint flags, mz_zip_error *pErr);
#ifndef MINIZ_NO_STDIO
MINIZ_EXPORT mz_bool mz_zip_validate_file_archive(const char *pFilename, mz_uint flags, mz_zip_error *pErr);
#endif

/* Universal end function - calls either mz_zip_reader_end() or mz_zip_writer_end(). */
MINIZ_EXPORT mz_bool mz_zip_end(mz_zip_archive *pZip);

/* -------- ZIP writing */

#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS

/* Inits a ZIP archive writer. */
/*Set pZip->m_pWrite (and pZip->m_pIO_opaque) before calling mz_zip_writer_init or mz_zip_writer_init_v2*/
/*The output is streamable, i.e. file_ofs in mz_file_write_func always increases only by n*/
MINIZ_EXPORT mz_bool mz_zip_writer_init(mz_zip_archive *pZip, mz_uint64 existing_size);
MINIZ_EXPORT mz_bool mz_zip_writer_init_v2(mz_zip_archive *pZip, mz_uint64 existing_size, mz_uint flags);

MINIZ_EXPORT mz_bool mz_zip_writer_init_heap(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size);
MINIZ_EXPORT mz_bool mz_zip_writer_init_heap_v2(mz_zip_archive *pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size, mz_uint flags);

#ifndef MINIZ_NO_STDIO
MINIZ_EXPORT mz_bool mz_zip_writer_init_file(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning);
MINIZ_EXPORT mz_bool mz_zip_writer_init_file_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint64 size_to_reserve_at_beginning, mz_uint flags);
MINIZ_EXPORT mz_bool mz_zip_writer_init_cfile(mz_zip_archive *pZip, MZ_FILE *pFile, mz_uint flags);
#endif

/* Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. */
/* For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. */
/* For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). */
/* Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. */
/* Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before */
/* the archive is finalized the file's central directory will be hosed. */
MINIZ_EXPORT mz_bool mz_zip_writer_init_from_reader(mz_zip_archive *pZip, const char *pFilename);
MINIZ_EXPORT mz_bool mz_zip_writer_init_from_reader_v2(mz_zip_archive *pZip, const char *pFilename, mz_uint flags);

/* Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. */
/* To add a directory entry, call this method with an archive name ending in a forwardslash with an empty buffer. */
/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */
MINIZ_EXPORT mz_bool mz_zip_writer_add_mem(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, mz_uint level_and_flags);

/* Like mz_zip_writer_add_mem(), except you can specify a file comment field, and optionally supply the function with already compressed data. */
/* uncomp_size/uncomp_crc32 are only used if the MZ_ZIP_FLAG_COMPRESSED_DATA flag is specified. */
MINIZ_EXPORT mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags,
                                              mz_uint64 uncomp_size, mz_uint32 uncomp_crc32);

MINIZ_EXPORT mz_bool mz_zip_writer_add_mem_ex_v2(mz_zip_archive *pZip, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags,
                                                 mz_uint64 uncomp_size, mz_uint32 uncomp_crc32, MZ_TIME_T *last_modified, const char *user_extra_data_local, mz_uint user_extra_data_local_len,
                                                 const char *user_extra_data_central, mz_uint user_extra_data_central_len);

/* Adds the contents of a file to an archive. This function also records the disk file's modified time into the archive. */
/* File data is supplied via a read callback function. User mz_zip_writer_add_(c)file to add a file directly.*/
MINIZ_EXPORT mz_bool mz_zip_writer_add_read_buf_callback(mz_zip_archive *pZip, const char *pArchive_name, mz_file_read_func read_callback, void* callback_opaque, mz_uint64 max_size,
	const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len,
	const char *user_extra_data_central, mz_uint user_extra_data_central_len);


#ifndef MINIZ_NO_STDIO
/* Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. */
/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */
MINIZ_EXPORT mz_bool mz_zip_writer_add_file(mz_zip_archive *pZip, const char *pArchive_name, const char *pSrc_filename, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);

/* Like mz_zip_writer_add_file(), except the file data is read from the specified FILE stream. */
MINIZ_EXPORT mz_bool mz_zip_writer_add_cfile(mz_zip_archive *pZip, const char *pArchive_name, MZ_FILE *pSrc_file, mz_uint64 max_size,
                                const MZ_TIME_T *pFile_time, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, const char *user_extra_data_local, mz_uint user_extra_data_local_len,
                                const char *user_extra_data_central, mz_uint user_extra_data_central_len);
#endif

/* Adds a file to an archive by fully cloning the data from another archive. */
/* This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data (it may add or modify the zip64 local header extra data field), and the optional descriptor following the compressed data. */
MINIZ_EXPORT mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive *pZip, mz_zip_archive *pSource_zip, mz_uint src_file_index);

/* Finalizes the archive by writing the central directory records followed by the end of central directory record. */
/* After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). */
/* An archive must be manually finalized by calling this function for it to be valid. */
MINIZ_EXPORT mz_bool mz_zip_writer_finalize_archive(mz_zip_archive *pZip);

/* Finalizes a heap archive, returning a pointer to the heap block and its size. */
/* The heap block will be allocated using the mz_zip_archive's alloc/realloc callbacks. */
MINIZ_EXPORT mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive *pZip, void **ppBuf, size_t *pSize);

/* Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. */
/* Note for the archive to be valid, it *must* have been finalized before ending (this function will not do it for you). */
MINIZ_EXPORT mz_bool mz_zip_writer_end(mz_zip_archive *pZip);

/* -------- Misc. high-level helper functions: */

/* mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. */
/* Note this is NOT a fully safe operation. If it crashes or dies in some way your archive can be left in a screwed up state (without a central directory). */
/* level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. */
/* TODO: Perhaps add an option to leave the existing central dir in place in case the add dies? We could then truncate the file (so the old central dir would be at the end) if something goes wrong. */
MINIZ_EXPORT mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags);
MINIZ_EXPORT mz_bool mz_zip_add_mem_to_archive_file_in_place_v2(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_zip_error *pErr);

#ifndef MINIZ_NO_STDIO
/* Reads a single file from an archive into a heap block. */
/* If pComment is not NULL, only the file with the specified comment will be extracted. */
/* Returns NULL on failure. */
MINIZ_EXPORT void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint flags);
MINIZ_EXPORT void *mz_zip_extract_archive_file_to_heap_v2(const char *pZip_filename, const char *pArchive_name, const char *pComment, size_t *pSize, mz_uint flags, mz_zip_error *pErr);
#endif

#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS */

#ifdef __cplusplus
}
#endif

#endif /* MINIZ_NO_ARCHIVE_APIS */
#ifndef __CTX_CLIENTS_H
#define __CTX_CLIENTS_H



struct _CtxClient {
  VT    *vt;        // or NULL when thread

  long       rev;

  CtxList *events;  // we could use this queue also for vt

  Ctx     *ctx;
  char    *title;
  int      x;
  int      y;
  int      width;
  int      height;
  float    opacity;
  CtxClientFlags flags;
#if 0
  int      shaded;
  int      iconified;
  int      maximized;
  int      resizable;
#endif
  int      unmaximized_x;
  int      unmaximized_y;
  int      unmaximized_width;
  int      unmaximized_height;
  int      do_quit;
  long     drawn_rev;
  int      id;
  int      internal; // render a settings window rather than a vt

#if CTX_THREADS
  thrd_t tid;     // and only split code path in processing?
                    // -- why?
#endif
  void (*start_routine)(Ctx *ctx, void *user_data);
  void    *user_data;
  CtxClientFinalize finalize;
  Ctx     *sub_ctx;
  CtxList *ctx_events;


  /* we want to keep variation at the end */
#if CTX_THREADS
  mtx_t    mtx;
#endif
#if CTX_VT_DRAWLIST
  Ctx     *recording;
#endif
};


void ctx_client_lock (CtxClient *client);
void ctx_client_unlock (CtxClient *client);
void ctx_set_focus_cb (Ctx *ctx, void(*focus_cb)(Ctx *ctx, int id, void *user_data), void *user_data);

#endif

#if CTX_IMPLEMENTATION|CTX_COMPOSITE

#ifndef __CTX_INTERNAL_H
#define __CTX_INTERNAL_H

#if !__COSMOPOLITAN__
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <math.h>
#endif


#if CTX_BRANCH_HINTS
#define CTX_LIKELY(x)      __builtin_expect(!!(x), 1)
#define CTX_UNLIKELY(x)    __builtin_expect(!!(x), 0)
#else
#define CTX_LIKELY(x)      (x)
#define CTX_UNLIKELY(x)    (x)
#endif


#define CTX_FULL_AA 15

typedef struct _CtxRasterizer CtxRasterizer;
typedef struct _CtxGState     CtxGState;
//typedef struct _CtxState      CtxState;

typedef struct _CtxSource CtxSource;

typedef enum
{
  CTX_EDGE             = 0, 
  CTX_EDGE_FLIPPED     = 1,
  CTX_NEW_EDGE         = 2,
  CTX_CLOSE_EDGE       = 3
} CtxRasterizerCode;


#define CTX_VALID_RGBA_U8     (1<<0)
#define CTX_VALID_RGBA_DEVICE (1<<1)
#if CTX_ENABLE_CM
#define CTX_VALID_RGBA        (1<<2)
#endif
#if CTX_ENABLE_CMYK
#define CTX_VALID_CMYKA       (1<<3)
#define CTX_VALID_DCMYKA      (1<<4)
#endif
#define CTX_VALID_GRAYA       (1<<5)
#define CTX_VALID_GRAYA_U8    (1<<6)
#define CTX_VALID_LABA        ((1<<7) | CTX_VALID_GRAYA)

struct _CtxColor
{
  uint8_t magic; // for colors used in keydb, set to a non valid start of
                 // string value.
  uint8_t rgba[4];
  uint8_t l_u8;
  uint8_t original; // the bitmask of the originally set color
  uint8_t valid;    // bitmask of which members contain valid
  // values, gets denser populated as more
  // formats are requested from a set color.
  float   device_red;
  float   device_green;
  float   device_blue;
  float   alpha;
  float   l;        // luminance and gray
#if CTX_ENABLE_LAB  // NYI
  float   a;
  float   b;
#endif
#if CTX_ENABLE_CMYK
  float   device_cyan;
  float   device_magenta;
  float   device_yellow;
  float   device_key;
  float   cyan;
  float   magenta;
  float   yellow;
  float   key;
#endif

#if CTX_ENABLE_CM
  float   red;
  float   green;
  float   blue;
#if CTX_BABL
  const Babl *space; // gets copied from state when color is declared
#else
  void   *space; // gets copied from state when color is declared, 
#endif
#endif
};

typedef struct _CtxGradientStop CtxGradientStop;

struct _CtxGradientStop
{
  CtxColor color;
  float   pos;
};


enum _CtxSourceType
{
  CTX_SOURCE_COLOR,
  CTX_SOURCE_NONE = 1,
  CTX_SOURCE_TEXTURE,
  CTX_SOURCE_LINEAR_GRADIENT,
  CTX_SOURCE_RADIAL_GRADIENT,
  CTX_SOURCE_CONIC_GRADIENT,
  CTX_SOURCE_INHERIT_FILL
};

typedef enum _CtxSourceType CtxSourceType;

typedef struct _CtxPixelFormatInfo CtxPixelFormatInfo;

struct _CtxBuffer
{
  void               *data;
  int                 width;
  int                 height;
  int                 stride;
  int                 frame;      // last frame used in, everything > 3 can be removed,
                                  // as clients wont rely on it.
  char               *eid;        // might be NULL, when not - should be unique for pixel contents
  const CtxPixelFormatInfo *format;
  void (*freefunc) (void *pixels, void *user_data);
  void               *user_data;

#if CTX_ENABLE_CM
#if CTX_BABL
  const Babl *space;
#else
  void       *space; 
#endif
#endif
#if CTX_ENABLE_CM
  CtxBuffer          *color_managed; /* only valid for one render target, cache
                                        for a specific space
                                        */
#endif
};



typedef struct _CtxGradient CtxGradient;
struct _CtxGradient
{
  CtxGradientStop stops[CTX_MAX_GRADIENT_STOPS];
  int n_stops;
};

struct _CtxSource
{
  int type;
  CtxMatrix  set_transform;
  CtxMatrix  transform;
  uint32_t   pad;
  union
  {
    CtxColor color;
    struct
    {
      uint8_t rgba[4]; // shares data with set color
      uint8_t pad;
      CtxBuffer *buffer;
    } texture;
    struct
    {
      float x0;
      float y0;
      float x1;
      float y1;
      float length;
      float dx_scaled;
      float dy_scaled;
      float start_scaled;
    } linear_gradient;
    struct
    {
      float x;
      float y;
      float start_angle;
      float cycles;
    } conic_gradient;
    struct
    {
      float x0;
      float y0;
      float r0;
      float x1;
      float y1;
      float r1;
      float rdelta;
    } radial_gradient;
  };
};


typedef struct _Ctx16f16Matrix     Ctx16f16Matrix;
struct
  _Ctx16f16Matrix
{
#if CTX_32BIT_SEGMENTS
  int64_t m[3][3];  // forcing higher precision easily, the extra
                    // memory cost is minuscle
#else
  int32_t m[3][3];
#endif
};


struct _CtxGState
{
#if CTX_32BIT_SEGMENTS
  uint32_t      keydb_pos;
  uint32_t      stringpool_pos;
#else
  uint16_t      keydb_pos;      // this limits these
  uint16_t      stringpool_pos; // 
#endif

  CtxMatrix     transform;
  Ctx16f16Matrix  prepped_transform;
  CtxSource     source_stroke;
  CtxSource     source_fill;
  float         global_alpha_f;

  float         line_width;
  float         line_dash_offset;
  float         stroke_pos;
  float         feather;
  float         miter_limit;
  float         font_size;
#if CTX_ENABLE_SHADOW_BLUR
  float         shadow_blur;
  float         shadow_offset_x;
  float         shadow_offset_y;
#endif
  unsigned int  transform_type:3;
  unsigned int        clipped:1;
  CtxColorModel    color_model:8;
  /* bitfield-pack small state-parts */
  CtxLineCap          line_cap:2;
  CtxLineJoin        line_join:2;
  CtxFillRule        fill_rule:1;
  unsigned int image_smoothing:1;
  unsigned int            font:6;
  unsigned int            bold:1;
  unsigned int          italic:1;

  uint8_t       global_alpha_u8;
  int16_t       clip_min_x;
  int16_t       clip_min_y;
  int16_t       clip_max_x;
  int16_t       clip_max_y;
  int           n_dashes;

#if CTX_ENABLE_CM
#if CTX_BABL
  const Babl   *device_space;
  const Babl   *texture_space;
  const Babl   *rgb_space;       
  const Babl   *cmyk_space;

  const Babl   *fish_rgbaf_user_to_device;
  const Babl   *fish_rgbaf_texture_to_device;
  const Babl   *fish_rgbaf_device_to_user;

#else
  void         *device_space;
  void         *texture_space;
  void         *rgb_space;       
  void         *cmyk_space;
  void         *fish_rgbaf_user_to_device; // dummy padding
  void         *fish_rgbaf_texture_to_device; // dummy padding
  void         *fish_rgbaf_device_to_user; // dummy padding
#endif
#endif
  CtxCompositingMode  compositing_mode; // bitfield refs lead to
  CtxBlend                  blend_mode; // non-vectorization
  CtxExtend                 extend;
  long  tolerance_fixed;
  float tolerance;
  float dashes[CTX_MAX_DASHES]; // XXX moving dashes 
                                //  to state storage,. will
                                //  allow it to be larger,
                                //  free up memory, and
                                //  make save/restore faster
};

typedef enum
{
  CTX_TRANSFORMATION_NONE         = 0,
  CTX_TRANSFORMATION_SCREEN_SPACE = 1,
  CTX_TRANSFORMATION_RELATIVE     = 2,
#if CTX_BITPACK
  CTX_TRANSFORMATION_BITPACK      = 4,
#endif
  CTX_TRANSFORMATION_STORE_CLEAR  = 16,
} CtxTransformation;

#define CTX_DRAWLIST_DOESNT_OWN_ENTRIES   64
#define CTX_DRAWLIST_EDGE_LIST            128
#define CTX_DRAWLIST_CURRENT_PATH         512
// BITPACK

struct _CtxDrawlist
{
  CtxEntry     *entries;
  unsigned int  count;
  int           size;
  uint32_t      flags;
};

// the keydb consists of keys set to floating point values,
// that might also be interpreted as integers for enums.
//
// the hash
typedef struct _CtxKeyDbEntry CtxKeyDbEntry;
struct _CtxKeyDbEntry
{
  uint32_t key;
  float value;
  //union { float f[1]; uint8_t u8[4]; }value;
};

struct _CtxState
{
  int  has_moved;
  unsigned int  has_clipped:1;
  int8_t        source; // used for the single-shifting to stroking
                // 0  = fill
                // 1  = start_stroke
                // 2  = in_stroke
                //
                //   if we're at in_stroke at start of a source definition
                //   we do filling
  int16_t       gstate_no;

  float         x;
  float         y;
  float         first_x;
  float         first_y;
  int           ink_min_x;
  int           ink_min_y;
  int           ink_max_x;
  int           ink_max_y;
#if CTX_GSTATE_PROTECT
  int           gstate_waterlevel;
#endif
  CtxGState     gstate;
#if CTX_GRADIENTS
  CtxGradient   gradient; /* we keep only one gradient,
                             this goes icky with multiple
                             restores - it should really be part of
                             graphics state..
                             XXX, with the stringpool gradients
                             can be stored there.
                           */
#endif
  CtxKeyDbEntry keydb[CTX_MAX_KEYDB];
  CtxGState     gstate_stack[CTX_MAX_STATES];//at end, so can be made dynamic
  char         *stringpool;
  int           stringpool_size;
};


typedef struct _CtxFont       CtxFont;
typedef struct _CtxFontEngine CtxFontEngine;

struct _CtxFontEngine
{
  int   (*glyph)       (CtxFont *font, Ctx *ctx, int glyphid, int stroke);
  float (*glyph_width) (CtxFont *font, Ctx *ctx, int glyphid);
  float (*glyph_kern)  (CtxFont *font, Ctx *ctx, uint32_t glyphA, uint32_t unicharB);

  // return -1 for not found or 0 or positive number for found glyph
  int   (*glyph_lookup)  (CtxFont *font, Ctx *ctx, uint32_t unichar);
  void  (*unload) (CtxFont *font);
  const char *(*get_name)    (CtxFont *font);
  void (*get_vmetrics) (CtxFont *font, float *ascent, float *descent, float *linegap);
};

#if CTX_FONT_ENGINE_HARFBUZZ
#include <hb.h>
#include <hb-ot.h>
#endif

#pragma pack(push,1)
struct _CtxFont
{
#if CTX_ONE_FONT_ENGINE==0
  CtxFontEngine *engine;
#endif
  union
  {
    struct
    {
      const char *name;
      CtxEntry *data;
      int free_data;
    } ctx;
#if CTX_FONT_ENGINE_CTX_FS
    struct
    {
      const char *name;
      char *path;
    } ctx_fs;
#endif
#if CTX_FONT_ENGINE_HARFBUZZ
    struct
    {
      const char *name;
      char *path;
      hb_blob_t *blob;
      hb_face_t *face;
      hb_font_t *font;
      hb_draw_funcs_t *draw_funcs;
#if HB_VERSION_MAJOR >= 7
      hb_paint_funcs_t *paint_funcs;
#endif
      double scale;
    } hb;
#endif

#if 0
    struct { int start; int end; int gw; int gh; const uint8_t *data;} monobitmap;
#endif
  };
#if CTX_ONE_FONT_ENGINE==0
  int     font_no;
  uint8_t type:4; // 0 ctx    1 stb    2 monobitmap 3 fs 4 hb
  char   *path;
  uint8_t monospaced:1;
#endif
  uint8_t has_fligs:1;
};
#pragma pack(pop)

enum _CtxIteratorFlag
{
  CTX_ITERATOR_FLAT           = 0,
  CTX_ITERATOR_EXPAND_BITPACK = 2,
  CTX_ITERATOR_DEFAULTS       = CTX_ITERATOR_EXPAND_BITPACK
};
typedef enum _CtxIteratorFlag CtxIteratorFlag;


struct _CtxIterator
{
  int              pos;
  int              first_run;
  CtxDrawlist *drawlist;
  int              end_pos;
  int              flags;

  int              bitpack_pos;
  int              bitpack_length;     // if non 0 bitpack is active
  CtxEntry         bitpack_command[6]; // the command returned to the
  // user if unpacking is needed.
};

#if CTX_EVENTS 

// include list implementation - since it already is a header+inline online
// implementation?

typedef struct CtxItemCb {
  CtxEventType types;
  CtxCb        cb;
  void*        data1;
  void*        data2;

  void (*finalize) (void *data1, void *data2, void *finalize_data);
  void  *finalize_data;

} CtxItemCb;



typedef struct CtxItem {
  CtxMatrix inv_matrix;  /* for event coordinate transforms */

  /* bounding box */
  float          x0;
  float          y0;
  float          x1;
  float          y1;

  void *path;
  double          path_hash;

  CtxCursor       cursor; /* if 0 then UNSET and no cursor change is requested
                           */

  CtxEventType   types;   /* all cb's ored together */
  CtxItemCb cb[CTX_MAX_CBS];
  int       cb_count;
  int       ref_count;
} CtxItem;


typedef struct CtxBinding {
  char *nick;
  char *command;
  char *label;
  CtxCb cb;
  void *cb_data;
  CtxDestroyNotify destroy_notify;
  void  *destroy_data;
} CtxBinding;

/**
 * ctx_get_bindings:
 *   what is terminating ... ?
 */
CtxBinding *ctx_get_bindings (Ctx *ctx);

typedef struct _CtxEvents CtxEvents;
struct _CtxEvents
{
  int             frozen;
  int             fullscreen;
  CtxList        *grabs; /* could split the grabs per device in the same way,
                            to make dispatch overhead smaller,. probably
                            not much to win though. */
  CtxEvent         drag_event[CTX_MAX_DEVICES];
  CtxList         *idles;
  CtxList         *idles_to_remove;
  CtxList         *idles_to_add;

  CtxList         *events; // for ctx_get_event
  CtxBinding       bindings[CTX_MAX_KEYBINDINGS]; /*< better as list, uses no mem if unused */
  int              n_bindings;
  CtxItem         *prev[CTX_MAX_DEVICES];
  float            pointer_x[CTX_MAX_DEVICES];
  float            pointer_y[CTX_MAX_DEVICES];
  unsigned char    pointer_down[CTX_MAX_DEVICES];
  int              event_depth; // dispatch-level depth - for detecting syntetic events
  uint64_t         last_key_time;
  unsigned int     in_idle_dispatch:1;
  unsigned int     ctx_get_event_enabled:1;
  CtxModifierState modifier_state;
  int              idle_id;
  CtxList         *items;
  CtxItem         *last_item;
  float            tap_hysteresis;
#if CTX_VT
  CtxList         *clients;
  CtxClient *active;
  CtxClient *active_tab;
#endif
  int              tap_delay_min;
  int              tap_delay_max;
  int              tap_delay_hold;
  void (*focus_cb)(Ctx *ctx, int id, void *user_data);
  void            *focus_cb_user_data;
};
#endif

typedef struct _CtxEidInfo
{
  char *eid;
  int   frame;
  int   width;
  int   height;
} CtxEidInfo;


struct _CtxGlyphEntry
{
  uint32_t  unichar;
  uint16_t  offset;
  CtxFont  *font;
};
typedef struct _CtxGlyphEntry CtxGlyphEntry;

struct _Ctx
{
  CtxBackend       *backend;
  void  (*process)  (Ctx *ctx, const CtxCommand *entry);
  CtxState          state;        /**/
  CtxDrawlist       drawlist;
  int               transformation;
  int               width;
  int               height;
  int               dirty;
  Ctx              *texture_cache;
  CtxList          *deferred;
  CtxList          *eid_db;
  int               frame; /* used for texture lifetime */
  uint32_t          bail;
  CtxBackend       *backend_pushed;
  CtxBuffer         texture[CTX_MAX_TEXTURES];
  int               exit;
  CtxCursor         cursor;
  CtxGlyph          glyphs[CTX_SHAPE_GLYPHS];
  int               n_glyphs;
#if CTX_EVENTS 
  CtxEvents         events;
  int               mouse_fd;
  int               mouse_x;
  int               mouse_y;
#endif
#if CTX_CURRENT_PATH
  CtxDrawlist       current_path; // possibly transformed coordinates !
  CtxIterator       current_path_iterator;
#endif
#if CTX_GLYPH_CACHE
  CtxGlyphEntry     glyph_index_cache[CTX_GLYPH_CACHE_SIZE];
#endif
  CtxFont *fonts; // a copy to keep it alive with mp's
                  // garbage collector, the fonts themselves
                  // are static and shared beyond ctx contexts
 
  int frontend_text;
};

#if 0
#define ctx_process(ctx,entry)  ctx->process (ctx, (CtxCommand *)(entry));
#else
static inline void
ctx_process (Ctx *ctx, const CtxEntry *entry)
{
  ctx->process (ctx, (CtxCommand *) entry);
}
#endif

CtxBuffer *ctx_buffer_new (int width, int height,
                           CtxPixelFormat pixel_format);
void ctx_buffer_destroy (CtxBuffer *buffer);

static void
ctx_state_gradient_clear_stops (CtxState *state);

void ctx_interpret_style         (CtxState *state, const CtxEntry *entry, void *data);
void ctx_interpret_transforms    (CtxState *state, const CtxEntry *entry, void *data);
void ctx_interpret_pos           (CtxState *state, CtxEntry *entry, void *data);
void ctx_interpret_pos_transform (CtxState *state, CtxEntry *entry, void *data);

struct _CtxInternalFsEntry
{
  char *path;
  int   length;
  char *data;
};


typedef void (*ctx_apply_coverage_fun) (unsigned int count, uint8_t * __restrict__ dst, uint8_t * __restrict__ src, uint8_t *coverage, CtxRasterizer *r, int x);

struct _CtxPixelFormatInfo
{
  CtxPixelFormat pixel_format:8;
  uint8_t        components; /* number of components */
  uint8_t        bpp; /* bits  per pixel - for doing offset computations
                         along with rowstride found elsewhere, if 0 it indicates
                         1/8  */
  uint8_t        ebpp; /*effective bytes per pixel - for doing offset
                         computations, for formats that get converted, the
                         ebpp of the working space applied */
  uint8_t        dither_red_blue;
  uint8_t        dither_green;
  CtxPixelFormat composite_format:8;

  void         (*to_comp) (CtxRasterizer *r,
                           int x, const void * __restrict__ src, uint8_t * __restrict__ comp, int count);
  void         (*from_comp) (CtxRasterizer *r,
                             int x, const uint8_t * __restrict__ comp, void *__restrict__ dst, int count);
  ctx_apply_coverage_fun apply_coverage;
  void         (*setup) (CtxRasterizer *r);
};


void
_ctx_user_to_device (CtxState *state, float *x, float *y);
void
_ctx_user_to_device_distance (CtxState *state, float *x, float *y);
void ctx_state_init (CtxState *state);
void
ctx_interpret_pos_bare (CtxState *state, const CtxEntry *entry, void *data);
void
ctx_drawlist_deinit (CtxDrawlist *drawlist);

//extern CtxPixelFormatInfo *(*ctx_pixel_format_info) (CtxPixelFormat format);
const CtxPixelFormatInfo *ctx_pixel_format_info (CtxPixelFormat format);



extern void (*ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
                           float          x0,
                           float          y0,
                           float          x1,
                           float          y1,
                           float          line_width);

extern void (*ctx_composite_setup) (CtxRasterizer *rasterizer);


extern void (*ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule);

extern void (*ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
                           float        x0,
                           float        y0,
                           float        x1,
                           float        y1,
                           uint8_t      cov);


const char *ctx_utf8_skip (const char *s, int utf8_length);
int ctx_utf8_strlen (const char *s);
int
ctx_unichar_to_utf8 (uint32_t  ch,
                     uint8_t  *dest);

uint32_t
ctx_utf8_to_unichar (const char *input);


typedef struct _CtxHasher CtxHasher;

typedef void (*CtxFragment) (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz);

#define CTX_MAX_GAUSSIAN_KERNEL_DIM    512

typedef enum {
   CTX_COV_PATH_FALLBACK =0,
   CTX_COV_PATH_RGBA8_OVER,
   CTX_COV_PATH_RGBA8_COPY,
   CTX_COV_PATH_RGBA8_COPY_FRAGMENT,
   CTX_COV_PATH_RGBA8_OVER_FRAGMENT,
   CTX_COV_PATH_GRAYA8_COPY,
   CTX_COV_PATH_GRAY1_COPY,
   CTX_COV_PATH_GRAY2_COPY,
   CTX_COV_PATH_GRAY4_COPY,
   CTX_COV_PATH_RGB565_COPY,
   CTX_COV_PATH_RGB332_COPY,
   CTX_COV_PATH_GRAY8_COPY,
   CTX_COV_PATH_RGBAF_COPY,
   CTX_COV_PATH_RGB8_COPY,
   CTX_COV_PATH_CMYK8_COPY,
   CTX_COV_PATH_CMYKA8_COPY,
   CTX_COV_PATH_CMYKAF_COPY,
   CTX_COV_PATH_GRAYAF_COPY
} CtxCovPath;

struct _CtxRasterizer
{
  CtxBackend backend;
  /* these should be initialized and used as the bounds for rendering into the
     buffer as well XXX: not yet in use, and when in use will only be
     correct for axis aligned clips - proper rasterization of a clipping path
     would be yet another refinement on top.
   */


#define CTX_COMPOSITE_ARGUMENTS unsigned int count, uint8_t * __restrict__ dst, uint8_t * __restrict__ src, uint8_t * __restrict__ coverage, CtxRasterizer *rasterizer, int x0
  void (*comp_op)(CTX_COMPOSITE_ARGUMENTS);
  CtxFragment fragment;
  //Ctx       *ctx;
  CtxState  *state;
  CtxCovPath  comp;
  unsigned int  swap_red_green;
  ctx_apply_coverage_fun apply_coverage;

  unsigned int active_edges;
  unsigned int edge_pos;         // where we're at in iterating all edges
  unsigned int pending_edges;
  unsigned int horizontal_edges;
  unsigned int ending_edges;

  unsigned int aa;          // level of vertical aa
  int  convex;
  unsigned int  scan_aa[4]; // 0=none, 1 = 3, 2 = 5, 3 = 15

  int        scanline;
  int        scan_min;
  int        scan_max;
  int        col_min;
  int        col_max;

  int        inner_x;
  int        inner_y;

  float      x;
  float      y;

  int        first_edge;

  uint16_t    blit_x;
  uint16_t    blit_y;
  int32_t    blit_width;
  int32_t    blit_height;
  uint32_t    blit_stride;


  unsigned int  unused; // kept for layout
  unsigned int  clip_rectangle;
  int           has_prev;
  void      *buf;
#if CTX_ENABLE_SHADOW_BLUR
  unsigned int  in_shadow:1;
  float         feather_x;
  float         feather_y;
  float         feather;
#endif

  const CtxPixelFormatInfo *format;
  Ctx       *texture_source; /* normally same as ctx */
  uint8_t    color[8*5];   // in compositing format - placed right after a pointer to get good alignment
  uint16_t   color_nativeB[8];

  uint16_t   color_native;  //

                           //
  int edges[CTX_MAX_EDGES]; // integer position in edge array
  CtxDrawlist edge_list;
                           
  unsigned int  preserve;
  unsigned int  in_text;


#if static_OPAQUE
  uint8_t opaque[CTX_MAX_SCANLINE_LENGTH];
#endif
#if CTX_ENABLE_CLIP
  CtxBuffer *clip_buffer;
#endif

#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE
  int gradient_cache_valid;
  uint8_t gradient_cache_u8[CTX_GRADIENT_CACHE_ELEMENTS][4];
  int gradient_cache_elements;
#endif
#endif


#if CTX_BRAILLE_TEXT
  unsigned int  term_glyphs:1; // store appropriate glyphs for redisplay
#endif
#if CTX_BRAILLE_TEXT
  CtxList   *glyphs;
#endif

#if CTX_COMPOSITING_GROUPS
  void      *saved_buf; // when group redirected
  CtxBuffer *group[CTX_GROUP_MAX];
#endif
#if CTX_ENABLE_SHADOW_BLUR
  float      kernel[CTX_MAX_GAUSSIAN_KERNEL_DIM];
#endif
  unsigned int shadow_active_edges;
  unsigned int shadow_edge_pos;
  int shadow_edges[CTX_MAX_EDGES*2];

#if CTX_SCANBIN
  uint32_t scan_bins[CTX_MAX_SCANLINES][CTX_MAX_EDGES];
#if CTX_MAX_EDGES>255
  uint32_t scan_bin_count[CTX_MAX_SCANLINES];
#else
  uint8_t scan_bin_count[CTX_MAX_SCANLINES];
#endif
#endif


};

struct _CtxSHA1 {
    uint64_t length;
    uint32_t state[5], curlen;
    unsigned char buf[64];
};
typedef struct _CtxMurmur CtxMurmur;
struct _CtxMurmur {
    uint32_t state[2];
};


#pragma pack(push,1)
typedef struct CtxCommandState
{
  uint16_t pos;
  uint32_t active;
} CtxCommandState;
#pragma pack(pop)

struct _CtxHasher
{
  CtxRasterizer rasterizer;
  int           cols;
  int           rows;
  uint32_t      hashes[CTX_HASH_COLS*CTX_HASH_ROWS];
  CtxMurmur     murmur_fill[CTX_MAX_STATES]; 
  CtxMurmur     murmur_stroke[CTX_MAX_STATES];
  int           source_level;
  int           pos; 

  int           prev_command;

  CtxDrawlist  *drawlist;

};

#if CTX_RASTERIZER
void ctx_rasterizer_deinit (CtxRasterizer *rasterizer);
void ctx_rasterizer_destroy (void *rasterizer);
#endif

enum {
  NC_MOUSE_NONE  = 0,
  NC_MOUSE_PRESS = 1,  /* "mouse-pressed", "mouse-released" */
  NC_MOUSE_DRAG  = 2,  /* + "mouse-drag"   (motion with pressed button) */
  NC_MOUSE_ALL   = 3   /* + "mouse-motion" (also delivered for release) */
};
void _ctx_mouse (Ctx *term, int mode);
void nc_at_exit (void);

int ctx_terminal_width  (int in_fd, int out_fd);
int ctx_terminal_height (int in_fd, int out_fd);
int ctx_terminal_cols   (int in_fd, int out_fd);
int ctx_terminal_rows   (int in_fd, int out_fd);
extern int ctx_frame_ack;


typedef struct _CtxCtx CtxCtx;
struct _CtxCtx
{
   CtxBackend backend;
   int  flags;
   int  width;
   int  height;
   int  cols;
   int  rows;
   int  was_down;
};

extern int _ctx_max_threads;
extern int _ctx_enable_hash_cache;
void
ctx_set (Ctx *ctx, uint32_t key_hash, const char *string, int len);
const char *
ctx_get (Ctx *ctx, const char *key);

Ctx *ctx_new_ctx (int width, int height, int flags);
Ctx *ctx_new_fb (int width, int height);
Ctx *ctx_new_kms (int width, int height);
Ctx *ctx_new_sdl (int width, int height);
Ctx *ctx_new_term (int width, int height);

int ctx_resolve_font (const char *name);

#if CTX_U8_TO_FLOAT_LUT
extern float ctx_u8_float[256];
#define ctx_u8_to_float(val_u8) ctx_u8_float[((uint8_t)(val_u8))]
#else
#define ctx_u8_to_float(val_u8) (val_u8/255.0f)
#endif

static inline uint8_t ctx_float_to_u8 (float val_f)
{
#if 1 
  union { float f; uint32_t i; } u;
  u.f = 32768.0f + val_f * (255.0f / 256.0f);
  return (uint8_t)u.i;
#else
  return val_f < 0.0f ? 0 : val_f > 1.0f ? 0xff : 0xff * val_f +  0.5f;
#endif
}


#define CTX_CSS_LUMINANCE_RED   0.3f
#define CTX_CSS_LUMINANCE_GREEN 0.59f
#define CTX_CSS_LUMINANCE_BLUE  0.11f

/* works on both float and uint8_t */
#define CTX_CSS_RGB_TO_LUMINANCE(rgb)  (\
  (rgb[0]) * CTX_CSS_LUMINANCE_RED + \
  (rgb[1]) * CTX_CSS_LUMINANCE_GREEN +\
  (rgb[2]) * CTX_CSS_LUMINANCE_BLUE)

const char *ctx_nct_get_event (Ctx *n, int timeoutms, int *x, int *y);
const char *ctx_native_get_event (Ctx *n, int timeoutms);
void
ctx_color_get_rgba8 (CtxState *state, CtxColor *color, uint8_t *out);
void ctx_color_get_graya_u8 (CtxState *state, CtxColor *color, uint8_t *out);
float ctx_float_color_rgb_to_gray (CtxState *state, const float *rgb);
void ctx_color_get_graya (CtxState *state, CtxColor *color, float *out);
void ctx_rgb_to_cmyk (float r, float g, float b,
              float *c_out, float *m_out, float *y_out, float *k_out);
uint8_t ctx_u8_color_rgb_to_gray (CtxState *state, const uint8_t *rgb);
#if CTX_ENABLE_CMYK
void ctx_color_get_cmyka (CtxState *state, CtxColor *color, float *out);
#endif
void ctx_color_set_RGBA8 (CtxState *state, CtxColor *color, uint8_t r, uint8_t g, uint8_t b, uint8_t a);
void ctx_color_set_rgba (CtxState *state, CtxColor *color, float r, float g, float b, float a);
void ctx_color_set_drgba (CtxState *state, CtxColor *color, float r, float g, float b, float a);
void ctx_color_get_cmyka (CtxState *state, CtxColor *color, float *out);
void ctx_color_set_cmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, float a);
void ctx_color_set_dcmyka (CtxState *state, CtxColor *color, float c, float m, float y, float k, float a);
void ctx_color_set_graya (CtxState *state, CtxColor *color, float gray, float alpha);

int ctx_color_model_get_components (CtxColorModel model);

void ctx_state_set (CtxState *state, uint32_t key, float value);

static void
ctx_matrix_set (CtxMatrix *matrix, float a, float b, float c, float d, float e, float f, float g, float h, float i);


void ctx_font_setup (Ctx *ctx);
float ctx_state_get (CtxState *state, uint32_t hash);

#if CTX_RASTERIZER

void
ctx_rasterizer_rel_move_to (CtxRasterizer *rasterizer, float x, float y);
void
ctx_rasterizer_rel_line_to (CtxRasterizer *rasterizer, float x, float y);

void
ctx_rasterizer_move_to (CtxRasterizer *rasterizer, float x, float y);
void
ctx_rasterizer_line_to (CtxRasterizer *rasterizer, float x, float y);
void
ctx_rasterizer_curve_to (CtxRasterizer *rasterizer,
                         float x0, float y0,
                         float x1, float y1,
                         float x2, float y2);
void
ctx_rasterizer_rel_curve_to (CtxRasterizer *rasterizer,
                         float x0, float y0,
                         float x1, float y1,
                         float x2, float y2);

void
ctx_rasterizer_reset (CtxRasterizer *rasterizer);
void
ctx_rasterizer_arc (CtxRasterizer *rasterizer,
                    float        x,
                    float        y,
                    float        radius,
                    float        start_angle,
                    float        end_angle,
                    int          anticlockwise);

void
ctx_rasterizer_quad_to (CtxRasterizer *rasterizer,
                        float        cx,
                        float        cy,
                        float        x,
                        float        y);

void
ctx_rasterizer_rel_quad_to (CtxRasterizer *rasterizer,
                        float        cx,
                        float        cy,
                        float        x,
                        float        y);

void
ctx_rasterizer_rectangle (CtxRasterizer *rasterizer,
                          float x,
                          float y,
                          float width,
                          float height);

void ctx_rasterizer_close_path (CtxRasterizer *rasterizer);
void ctx_rasterizer_clip (CtxRasterizer *rasterizer);
void
ctx_rasterizer_set_font (CtxRasterizer *rasterizer, const char *font_name);

void
ctx_rasterizer_gradient_add_stop (CtxRasterizer *rasterizer, float pos, float *rgba);
void
ctx_rasterizer_set_pixel (CtxRasterizer *rasterizer,
                          uint16_t x,
                          uint16_t y,
                          uint8_t r,
                          uint8_t g,
                          uint8_t b,
                          uint8_t a);
void
ctx_rasterizer_round_rectangle (CtxRasterizer *rasterizer, float x, float y, float width, float height, float corner_radius);

#endif

#if CTX_ENABLE_CM // XXX to be moved to ctx.h
void
ctx_set_drgb_space (Ctx *ctx, int device_space);
void
ctx_set_dcmyk_space (Ctx *ctx, int device_space);
void
ctx_rgb_space (Ctx *ctx, int device_space);
void
ctx_set_cmyk_space (Ctx *ctx, int device_space);
#endif

#endif

CtxRasterizer *
ctx_rasterizer_init (CtxRasterizer *rasterizer, Ctx *ctx, Ctx *texture_source, CtxState *state, void *data, int x, int y, int width, int height, int stride, CtxPixelFormat pixel_format, CtxAntialias antialias);

CTX_INLINE static uint8_t ctx_lerp_u8 (uint8_t v0, uint8_t v1, uint8_t dx)
{
#if 0
  return v0 + ((v1-v0) * dx)/255;
#else
  return ( ( ( ( (v0) <<8) + (dx) * ( (v1) - (v0) ) ) ) >>8);
#endif
}

CTX_INLINE static uint32_t ctx_lerp_RGBA8 (const uint32_t v0, const uint32_t v1, const uint8_t dx)
{
#if 0
  char bv0[4];
  char bv1[4];
  char res[4];
  memcpy (&bv0[0], &v0, 4);
  memcpy (&bv1[0], &v1, 4);
  for (int c = 0; c < 4; c++)
    res [c] = ctx_lerp_u8 (bv0[c], bv1[c], dx);
  return ((uint32_t*)(&res[0]))[0];
#else
  const uint32_t cov = dx;
  const uint32_t si_ga = (v1 & 0xff00ff00);
  const uint32_t si_rb = v1 & 0x00ff00ff;
  const uint32_t di_rb = v0 & 0x00ff00ff;
  const uint32_t d_rb = si_rb - di_rb;
  const uint32_t di_ga = v0 & 0xff00ff00;
  const uint32_t d_ga = (si_ga >>8) - (di_ga>>8);
  return
     (((di_rb + ((0xff00ff + d_rb * cov)>>8)) & 0x00ff00ff)) |
     (((di_ga + (0xff00ff + d_ga * cov))      & 0xff00ff00));

#endif
}

CTX_INLINE static void ctx_lerp_RGBA8_split (const uint32_t v0, const uint32_t v1, const uint8_t dx,
                                             uint32_t *dest_ga, uint32_t *dest_rb)
{
  const uint32_t cov = dx;
  const uint32_t si_ga = v1 & 0xff00ff00;
  const uint32_t si_rb = v1 & 0x00ff00ff;
  const uint32_t di_ga = v0 & 0xff00ff00;
  const uint32_t di_rb = v0 & 0x00ff00ff;
  const uint32_t d_rb = si_rb - di_rb;
  const uint32_t d_ga = (si_ga >>8) - (di_ga >> 8);
  *dest_rb = (((di_rb + ((0xff00ff + d_rb * cov)>>8)) & 0x00ff00ff));
  *dest_ga = (((di_ga + (0xff00ff + d_ga * cov))      & 0xff00ff00));
}

CTX_INLINE static uint32_t ctx_lerp_RGBA8_merge (uint32_t di_ga, uint32_t di_rb, uint32_t si_ga, uint32_t si_rb, const uint8_t dx)
{
  const uint32_t cov = dx;
  const uint32_t d_rb = si_rb - di_rb;
  const uint32_t d_ga = (si_ga >> 8) - (di_ga >> 8);
  return
     (((di_rb + ((0xff00ff + d_rb * cov)>>8)) & 0x00ff00ff))  |
      ((di_ga + ((0xff00ff + d_ga * cov)      & 0xff00ff00)));
}

CTX_INLINE static uint32_t ctx_lerp_RGBA8_2 (const uint32_t v0, uint32_t si_ga, uint32_t si_rb, const uint8_t dx)
{
  const uint32_t cov = dx;
  const uint32_t di_ga = ( v0 & 0xff00ff00);
  const uint32_t di_rb = v0 & 0x00ff00ff;
  const uint32_t d_rb = si_rb - di_rb;
  const uint32_t d_ga = si_ga - (di_ga>>8);
  return
     (((di_rb + ((0xff00ff + d_rb * cov)>>8)) & 0x00ff00ff)) |
     (((di_ga + (0xff00ff + d_ga * cov))      & 0xff00ff00));
}

CTX_INLINE static float
ctx_lerpf (float v0, float v1, float dx)
{
  return v0 + (v1-v0) * dx;
}

CTX_INLINE static float
ctx_catmull_rom (float v0, float v1, float v2, float v3, float t)
{
   float ya = v0, yb = v1, yc = v2, yd = v3;
   float a3 = 0.5f * (-ya + 3 * yb - 3 * yc + yd);
   float a2 = 0.5f * (2 * ya - 5 * yb + 4 * yc - yd);
   float a1 = 0.5f * (-ya + yc);
   float a0 = yb;
   return a3 * t * t * t +
          a2 * t * t +
          a1 * t +
          a0;
}

CTX_INLINE static float
ctx_catmull_rom_left (float v0, float v1, float v2, float t)
{
   float ya = v0, yb = v1, yc = v2;
   float a2 = 0.5f * (ya - 2 * yb + yc);
   float a1 = 0.5f * (-3 * ya + 4 * yb - yc);
   float a0 = ya;
   return a2 * t * t +
          a1 * t +
          a0;
}

CTX_INLINE static float
ctx_catmull_rom_right (float v0, float v1, float v2, float t)
{
   float ya = v0, yb = v1, yc = v2;
   float a2 = 0.5f * (ya - 2 * yb + yc);
   float a1 = 0.5f * (-ya + yc);
   float a0 = yb;
   return a2 * t * t +
          a1 * t +
          a0;
}


#ifndef CTX_MIN
#define CTX_MIN(a,b)  (((a)<(b))?(a):(b))
#endif
#ifndef CTX_MAX
#define CTX_MAX(a,b)  (((a)>(b))?(a):(b))
#endif

static inline void *ctx_calloc (size_t size, size_t count);

void ctx_screenshot (Ctx *ctx, const char *output_path);


CtxSHA1 *ctx_sha1_new (void);
void ctx_sha1_free (CtxSHA1 *sha1);
int ctx_sha1_process(CtxSHA1 *sha1, const unsigned char * msg, unsigned long len);
int ctx_sha1_done(CtxSHA1 * sha1, unsigned char *out);

void _ctx_texture_lock (void);
void _ctx_texture_unlock (void);
uint8_t *ctx_define_texture_pixel_data (const CtxEntry *entry);
uint32_t ctx_define_texture_pixel_data_length (const CtxEntry *entry);
void ctx_buffer_pixels_free (void *pixels, void *userdata);

/*ctx_texture_init:
 * return value: eid, as passed in or if NULL generated by hashing pixels and width/height
 * XXX  this is low-level and not to be used directly use define_texture instead.  XXX
 */
const char *ctx_texture_init (
                      Ctx        *ctx,
                      const char *eid,
                      int         width,
                      int         height,
                      int         stride,
                      CtxPixelFormat format,
                      void       *space,
                      uint8_t    *pixels,
                      void (*freefunc) (void *pixels, void *user_data),
                      void *user_data);

typedef struct _EvSource EvSource;
struct _EvSource
{
  void   *priv; /* private storage  */

  /* returns non 0 if there is events waiting */
  int   (*has_event) (EvSource *ev_source);

  /* get an event, the returned event should be freed by the caller  */
  char *(*get_event) (EvSource *ev_source);

  /* destroy/unref this instance */
  void  (*destroy)   (EvSource *ev_source);

  /* get the underlying fd, useful for using select on  */
  int   (*get_fd)    (EvSource *ev_source);


  void  (*set_coord) (EvSource *ev_source, double x, double y);
  /* set_coord is needed to warp relative cursors into normalized range,
   * like normal mice/trackpads/nipples - to obey edges and more.
   */

  /* if this returns non-0 select can be used for non-blocking.. */
};

typedef struct CtxCbJob
{
  int      x0;
  int      y0;
  int      x1;
  int      y1;
  uint32_t bitmask;
  int      renderer;      // 0 - no render
  int      flags;
} CtxCbJob;

#define CTX_CB_MAX_JOBS 8
#define CTX_JOB_PENDING (-1)


typedef struct CtxCbBackend
{
  CtxBackend     backend;

  Ctx           *drawlist_copy;
  Ctx           *rctx[2];
  uint8_t       *temp[2];
  int            temp_len[2];

  int            rendering;
  int            frame_no;

  CtxCbConfig    config;
  int            min_col; // hasher cols and rows
  int            min_row; // hasher cols and rows
  int            max_col; // hasher cols and rows
  int            max_row; // hasher cols and rows
  uint16_t      *scratch;
  int            allocated_fb;
  Ctx           *ctx;

  int n_jobs;
  CtxCbJob  jobs[CTX_CB_MAX_JOBS];
  int jobs_done;

   EvSource    *evsource[4];
   int          evsource_count;

  uint32_t hashes[CTX_HASH_ROWS * CTX_HASH_COLS];

  CtxHasher     hasher;
  uint8_t res[CTX_HASH_ROWS * CTX_HASH_COLS];

  // when non-0 we have non-full res rendered
                                           
  mtx_t mtx;
} CtxCbBackend;

static inline Ctx *ctx_backend_get_ctx (void *backend)
{
  CtxBackend *r = (CtxBackend*)backend;
  if (r) return r->ctx;
  return NULL;
}

void
_ctx_texture_prepare_color_management (CtxState  *state,
                                       CtxBuffer *buffer);

int ctx_is_set (Ctx *ctx, uint32_t hash);

static inline void
_ctx_matrix_apply_transform (const CtxMatrix *m, float *x, float *y)
{
  float x_in = *x;
  float y_in = *y;
  float w =   (x_in * m->m[2][0]) + (y_in * m->m[2][1]) + m->m[2][2];
  float w_recip = 1.0f/w;
  *x = ( (x_in * m->m[0][0]) + (y_in * m->m[0][1]) + m->m[0][2]) * w_recip;
  *y = ( (x_in * m->m[1][0]) + (y_in * m->m[1][1]) + m->m[1][2]) * w_recip;
}

static inline void
_ctx_matrix_multiply (CtxMatrix       *result,
                      const CtxMatrix *t,
                      const CtxMatrix *s)
{
  CtxMatrix r;

  for (unsigned int i = 0; i < 3; i++)
  {
    r.m[i][0] = t->m[i][0] * s->m[0][0]
              + t->m[i][1] * s->m[1][0]
              + t->m[i][2] * s->m[2][0];
    r.m[i][1] = t->m[i][0] * s->m[0][1]
              + t->m[i][1] * s->m[1][1]
              + t->m[i][2] * s->m[2][1];
    r.m[i][2] = t->m[i][0] * s->m[0][2]
              + t->m[i][1] * s->m[1][2]
              + t->m[i][2] * s->m[2][2];
  }
  *result = r;
}

static inline void
_ctx_matrix_identity (CtxMatrix *matrix)
{
  matrix->m[0][0] = 1.0f;
  matrix->m[0][1] = 0.0f;
  matrix->m[0][2] = 0.0f;
  matrix->m[1][0] = 0.0f;
  matrix->m[1][1] = 1.0f;
  matrix->m[1][2] = 0.0f;
  matrix->m[2][0] = 0.0f;
  matrix->m[2][1] = 0.0f;
  matrix->m[2][2] = 1.0f;
}

void
_ctx_user_to_device_prepped (CtxState *state, float x, float y, int *out_x, int *out_y);
void
_ctx_user_to_device_prepped_fixed (CtxState *state, int x, int y, int *x_out, int *y_out);

int ctx_float_to_string_index (float val);

void
ctx_render_ctx_masked (Ctx *ctx, Ctx *d_ctx, uint32_t mask);

void ctx_state_set_blob (CtxState *state, uint32_t key, const void*data, int len);
void *ctx_state_get_blob (CtxState *state, uint32_t key);

static inline void
_ctx_transform_prime (CtxState *state);

void ctx_push_backend (Ctx *ctx,
                       void *backend);
void ctx_pop_backend (Ctx *ctx);

static CTX_INLINE float ctx_fmod1f (float val)
{
  val = ctx_fabsf(val);
  return val - (int)(val);
}

static CTX_INLINE float ctx_fmodf (float val, float modulus)
{
  return ctx_fmod1f(val/modulus) * modulus;
}

static CTX_INLINE int ctx_nearly_zero(float val)
{
  return (val > 0.001f) & (val > -0.001f);
}

#if EMSCRIPTEN
#define CTX_EXPORT EMSCRIPTEN_KEEPALIVE
#else
#define CTX_EXPORT
#endif

float ctx_get_feather (Ctx *ctx);
void  ctx_feather     (Ctx *ctx, float x);

CtxColor   *ctx_color_new      (void);
int         ctx_get_int        (Ctx *ctx, uint32_t hash);
int         ctx_get_is_set     (Ctx *ctx, uint32_t hash);
Ctx        *ctx_new_for_buffer (CtxBuffer *buffer);

/**
 * ctx_pixel_format_components:
 *
 * Returns the number of components for a given pixel format.
 */
int ctx_pixel_format_components     (CtxPixelFormat format);

void ctx_init (int *argc, char ***argv); // is a no-op but could launch
                                         // terminal

void ctx_svg_arc_to (Ctx *ctx, float rx, float ry, 
                     float rotation,  int large, int sweep,
                     float x1, float y1);

/**
 * ctx_clear_bindings:
 * @ctx: a context
 *
 * Clears registered key-bindings.
 */
void  ctx_clear_bindings     (Ctx *ctx);
Ctx *ctx_new_net (int width, int height, int flags, const char *hostip, int port);
Ctx *ctx_new_fds (int width, int height, int in_fd, int out_fd, int flags);

#endif
void
ctx_drawlist_process (Ctx *ctx, const CtxCommand *command);
CtxBackend *ctx_drawlist_backend_new (void);

void ctx_events_deinit (Ctx *ctx);
void
ctx_path_extents_path (Ctx *ctx, float *ex1, float *ey1, float *ex2, float *ey2, CtxDrawlist *path);
int ctx_in_fill_path (Ctx *ctx, float x, float y, CtxDrawlist *path);
void ctx_buffer_deinit (CtxBuffer *buffer);
void ctx_wait_frame (Ctx *ctx, VT *vt);
extern int _ctx_depth;
int ctx_term_raw (int fd);
void ctx_term_noraw (int fd);
void ctx_color_raw (Ctx *ctx, CtxColorModel model, float *components, int stroke);
int ctx_ydec (const char *tmp_src, char *dst, int count);
int ctx_yenc (const char *src, char *dst, int count);
void ctx_font_setup (Ctx *ctx);

typedef struct CtxIdleCb {
  int (*cb) (Ctx *ctx, void *idle_data);
  void *idle_data;

  void (*destroy_notify)(void *destroy_data);
  void *destroy_data;

  int   ticks_full;
  int   ticks_remaining;
  int   is_idle;
  int   id;
} CtxIdleCb;

#define TRANSFORM_SHIFT (10)
#define TRANSFORM_SCALE (1<<TRANSFORM_SHIFT)

static inline int
_ctx_determine_transform_type (const CtxMatrix *m)
{
  // XXX : does not set 4 - which is perspective
  if ((m->m[2][0] != 0.0f) |
      (m->m[2][1] != 0.0f) |
      (m->m[2][2] != 1.0f))
    return 3;
  if ((m->m[0][1] != 0.0f) |
      (m->m[1][0] != 0.0f))
    return 3;
  if ((m->m[0][2] != 0.0f) |
      (m->m[1][2] != 0.0f) |
      (m->m[0][0] != 1.0f) |
      (m->m[1][1] != 1.0f))
    return 2;
  return 1;
}
static inline void
_ctx_transform_prime (CtxState *state)
{
   state->gstate.transform_type = 
     _ctx_determine_transform_type (&state->gstate.transform);

   for (int c = 0; c < 3; c++)
   {
     state->gstate.prepped_transform.m[0][c] =
             (int)(state->gstate.transform.m[0][c] * TRANSFORM_SCALE);
     state->gstate.prepped_transform.m[1][c] =
             (int)(state->gstate.transform.m[1][c] * TRANSFORM_SCALE);
     state->gstate.prepped_transform.m[2][c] =
             (int)(state->gstate.transform.m[2][c] * TRANSFORM_SCALE);
   }
   float scale = ctx_matrix_get_scale (&state->gstate.transform);
   scale = ctx_fabsf (scale);
   if (scale <= 0.01f)
      scale = 0.01f;
     
   {
     state->gstate.tolerance = 0.25f/scale;
     state->gstate.tolerance *= state->gstate.tolerance;
     state->gstate.tolerance_fixed =
     (state->gstate.tolerance * CTX_FIX_SCALE * CTX_FIX_SCALE);
   }
}

static inline void ctx_span_set_color (uint32_t *dst_pix, uint32_t val, int count)
{
  if (count>0)
  while(count--)
    *dst_pix++=val;
}
static inline void ctx_span_set_color_x4 (uint32_t *dst_pix, uint32_t *val, int count)
{
  if (count>0)
  while(count--)
  {
    *dst_pix++=val[0];
    *dst_pix++=val[1];
    *dst_pix++=val[2];
    *dst_pix++=val[3];
  }
}

static inline uint32_t
ctx_over_RGBA8_full_2 (uint32_t dst, uint32_t si_ga_full, uint32_t si_rb_full, uint32_t si_a)
{
  uint32_t rcov = si_a^255;
  uint32_t di_ga = ( dst & 0xff00ff00) >> 8;
  uint32_t di_rb = dst & 0x00ff00ff;
  return
     ((((si_rb_full) + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
      (((si_ga_full) + (di_ga * rcov)) & 0xff00ff00);
}

static inline void
ctx_init_uv (CtxRasterizer *rasterizer,
             int x0,
             int y0,
             float *u0, float *v0, float *w0, float *ud, float *vd, float *wd)
             //float *u0, float *v0, float *w0, float *ud, float *vd, float *wd)
{
  CtxMatrix *transform = &rasterizer->state->gstate.source_fill.transform;
  *u0 = transform->m[0][0] * (x0 + 0.0f) +
        transform->m[0][1] * (y0 + 0.0f) +
        transform->m[0][2];
  *v0 = transform->m[1][0] * (x0 + 0.0f) +
        transform->m[1][1] * (y0 + 0.0f) +
        transform->m[1][2];
  *w0 = transform->m[2][0] * (x0 + 0.0f) +
        transform->m[2][1] * (y0 + 0.0f) +
        transform->m[2][2];
  *ud = transform->m[0][0];
  *vd = transform->m[1][0];
  *wd = transform->m[2][0];
}


void
CTX_SIMD_SUFFIX (ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
                          float          x0,
                          float          y0,
                          float          x1,
                          float          y1,
                          uint8_t        cov);
void
CTX_SIMD_SUFFIX(ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
                           float          x0,
                           float          y0,
                           float          x1,
                           float          y1,
                           float          line_width);

static inline uint16_t
ctx_565_pack (uint8_t  red,
              uint8_t  green,
              uint8_t  blue,
              const int      byteswap)
{
#if 0
  // is this extra precision warranted?
  // for 332 it gives more pure white..
  // it might be the case also for generic 565
  red = ctx_sadd8 (red, 4);
  green = ctx_sadd8 (green, 3);
  blue = ctx_sadd8 (blue, 4);
#endif

  uint32_t c = (red >> 3) << 11;
  c |= (green >> 2) << 5;
  c |= blue >> 3;
  if (byteswap)
    { return (c>>8) | (c<<8); } /* swap bytes */
  return c;
}
static inline uint32_t
ctx_565_unpack_32 (const uint16_t pixel,
                   const int byteswap)
{
  uint16_t byteswapped;
  if (byteswap)
    { byteswapped = (pixel>>8) | (pixel<<8); }
  else
    { byteswapped  = pixel; }
  uint32_t b   = (byteswapped & 31) <<3;
  uint32_t g = ( (byteswapped>>5) & 63) <<2;
  uint32_t r   = ( (byteswapped>>11) & 31) <<3;
#if 0
  b = (b > 248) * 255 + (b <= 248) * b;
  g = (g > 248) * 255 + (g <= 248) * g;
  r = (r > 248) * 255 + (r <= 248) * r;
#endif

  return r +  (g << 8) + (b << 16) + (((unsigned)0xff) << 24);
}



void
CTX_SIMD_SUFFIX (ctx_composite_setup) (CtxRasterizer *rasterizer);
void
CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule);

void
CTX_SIMD_SUFFIX(ctx_RGBA8_source_over_normal_full_cov_fragment) (CTX_COMPOSITE_ARGUMENTS, int scanlines);

extern const CtxPixelFormatInfo CTX_SIMD_SUFFIX(ctx_pixel_formats)[];
void
ctx_rasterizer_define_texture (CtxRasterizer *rasterizer,
                               const char    *eid,
                               int            width,
                               int            height,
                               int            format,
                               char unsigned *data,
                               int            steal_data);
void ctx_cb_destroy (void *data);
void ctx_hasher_process (Ctx *ctx, const CtxCommand *command);

static void
ctx_matrix_set (CtxMatrix *matrix, float a, float b, float c, float d, float e, float f, float g, float h, float i)
{
  matrix->m[0][0] = a;
  matrix->m[0][1] = b;
  matrix->m[0][2] = c;
  matrix->m[1][0] = d;
  matrix->m[1][1] = e;
  matrix->m[1][2] = f;
  matrix->m[2][0] = g;
  matrix->m[2][1] = h;
  matrix->m[2][2] = i;
}

void
_ctx_text (Ctx        *ctx,
           const char *string,
           int         stroke,
           int         visible);

int
_ctx_glyph (Ctx *ctx, int glyph_id, int stroke);

#if CTX_ENABLE_RGB565

static inline void
ctx_RGBA8_to_RGB565_BS (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint16_t *pixel = (uint16_t *) buf;
  while (count--)
    {
#if CTX_RGB565_ALPHA
      if (rgba[3]==0)
        { pixel[0] = ctx_565_pack (255, 0, 255, 1); }
      else
#endif
        { pixel[0] = ctx_565_pack (rgba[0], rgba[1], rgba[2], 1); }
      pixel+=1;
      rgba +=4;
    }
}

static inline void
ctx_RGB565_BS_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint16_t *pixel = (uint16_t *) buf;
  while (count--)
    {
      ((uint32_t*)(rgba))[0] = ctx_565_unpack_32 (*pixel, 1);
#if CTX_RGB565_ALPHA
      if ((rgba[0]==255) & (rgba[2] == 255) & (rgba[1]==0))
        { rgba[3] = 0; }
      else
        { rgba[3] = 255; }
#endif
      pixel+=1;
      rgba +=4;
    }
}

#endif

#define ctx_evsource_has_event(es)   (es)->has_event((es))
#define ctx_evsource_get_event(es)   (es)->get_event((es))
#define ctx_evsource_destroy(es)     do{if((es)->destroy)(es)->destroy((es));}while(0)
#define ctx_evsource_set_coord(es,x,y) do{if((es)->set_coord)(es)->set_coord((es),(x),(y));}while(0)
#define ctx_evsource_get_fd(es)      ((es)->get_fd?(es)->get_fd((es)):0)

#if CTX_EVENTS
extern int _ctx_mice_fd;
#endif

void ctx_drain_fd (int infd);
void ctx_rasterizer_clip (CtxRasterizer *rasterizer);
typedef struct _VtLine   VtLine;
#if CTX_EVENTS
int ctx_clients_handle_events (Ctx *ctx);
void ctx_consume_events (Ctx *ctx);
void _ctx_bindings_key_press (CtxEvent *event, void *data1, void *data2);
Ctx *ctx_new_ui (int width, int height, const char *backend);
EvSource *ctx_evsource_mice_new (void);
EvSource *ctx_evsource_kb_term_new (void);
EvSource *ctx_evsource_kb_raw_new (void);
EvSource *ctx_evsource_linux_ts_new (void);
void ctx_nct_consume_events (Ctx *ctx);

typedef struct _CtxTermGlyph CtxTermGlyph;

struct _CtxTermGlyph
{
  uint32_t unichar;
  int      col;
  int      row;
  uint8_t  rgba_bg[4];
  uint8_t  rgba_fg[4];
};

#include <sys/select.h>
#endif

void ctx_update_current_path (Ctx *ctx, const CtxEntry *entry);
void ctx_rasterizer_process (Ctx *ctx, const CtxCommand *command);
#define CTX_RGBA8_R_SHIFT  0
#define CTX_RGBA8_G_SHIFT  8
#define CTX_RGBA8_B_SHIFT  16
#define CTX_RGBA8_A_SHIFT  24

#define CTX_RGBA8_R_MASK   (0xff << CTX_RGBA8_R_SHIFT)
#define CTX_RGBA8_G_MASK   (0xff << CTX_RGBA8_G_SHIFT)
#define CTX_RGBA8_B_MASK   (0xff << CTX_RGBA8_B_SHIFT)
#define CTX_RGBA8_A_MASK   (0xff << CTX_RGBA8_A_SHIFT)

#define CTX_RGBA8_RB_MASK  (CTX_RGBA8_R_MASK | CTX_RGBA8_B_MASK)
#define CTX_RGBA8_GA_MASK  (CTX_RGBA8_G_MASK | CTX_RGBA8_A_MASK)

static inline void
ctx_RGBA8_associate_alpha (uint8_t *u8)
{
#if 1
  uint32_t val = *((uint32_t*)(u8));
  uint32_t a = u8[3];
  uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
  uint32_t res = g|rb|(a << CTX_RGBA8_A_SHIFT);
  memcpy(u8, &res, 4);
#else
  uint32_t a = u8[3];
  u8[0] = (u8[0] * a + 255) >> 8;
  u8[1] = (u8[1] * a + 255) >> 8;
  u8[2] = (u8[2] * a + 255) >> 8;
#endif
}

typedef struct _CtxTerm CtxTerm;
void ctx_term_destroy (CtxTerm *term);
typedef struct _CtxNet CtxNet;
void ctx_net_destroy (CtxNet *net);
extern CtxList *registered_contents;
void ctx_parser_feed_byte (CtxParser *parser, char byte);
inline static void
ctx_332_unpack (uint8_t pixel,
                uint8_t *red,
                uint8_t *green,
                uint8_t *blue)
{
  *green = (((pixel >> 2) & 7)*255)/7;
  *red   = (((pixel >> 5) & 7)*255)/7;
  *blue  = ((((pixel & 3) << 1) | ((pixel >> 2) & 1))*255)/7;
}
CtxRasterizer *
ctx_hasher_init (CtxRasterizer *rasterizer, Ctx *ctx, CtxState *state, int width, int height, int cols, int rows, CtxDrawlist *drawlist);

CtxFont *ctx_font_get_available (void);
int ctx_glyph_unichar (Ctx *ctx, uint32_t unichar, int stroke);

int
ctx_text_substitute_ligatures (Ctx *ctx, CtxFont *font,
                                uint32_t *unichar, uint32_t next_unichar, uint32_t next_next_unichar);
CtxGlyph *_ctx_glyph_target (Ctx *ctx, int len);
void ctx_reset_caches (Ctx *ctx);

typedef enum {
  CTX_FONT_TYPE_CTX  = 0,
  CTX_FONT_TYPE_NONE = 1,
  CTX_FONT_TYPE_FS   = 3,
  CTX_FONT_TYPE_HB   = 4
} CtxFontType;
void ctx_cmyk_to_rgb (float c, float m, float y, float k, float *r, float *g, float *b);

#ifndef CTX_DRAWLIST_H
#define CTX_DRAWLIST_H

static inline int
ctx_conts_for_entry (const CtxEntry *entry)
{
    switch (entry->code)
    {
      case CTX_DATA:
        return entry->data.u32[1];
      case CTX_RADIAL_GRADIENT:
      case CTX_ARC:
      case CTX_CURVE_TO:
      case CTX_REL_CURVE_TO:
      case CTX_COLOR:
      case CTX_ROUND_RECTANGLE:
      case CTX_SHADOW_COLOR:
        return 2;
      case CTX_ARC_TO:
      case CTX_REL_ARC_TO:
        return 3;
      case CTX_APPLY_TRANSFORM:
      case CTX_SOURCE_TRANSFORM:
        return 4;
      case CTX_FILL_RECT:
      case CTX_STROKE_RECT:
      case CTX_RECTANGLE:
      case CTX_VIEW_BOX:
      case CTX_REL_QUAD_TO:
      case CTX_QUAD_TO:
      case CTX_LINEAR_GRADIENT:
      case CTX_CONIC_GRADIENT:
        return 1;

      case CTX_TEXT:
      case CTX_LINE_DASH:
      case CTX_COLOR_SPACE:
      case CTX_FONT:
      case CTX_TEXTURE:
        {
          int eid_len = entry[1].data.u32[1];
          return eid_len + 1;
        }
      case CTX_DEFINE_TEXTURE:
        {
          int eid_len = entry[2].data.u32[1];
          int pix_len = entry[2 + eid_len + 1].data.u32[1];
          return eid_len + pix_len + 2 + 1;
        }
      default:
        return 0;
    }
}

void
ctx_iterator_init (CtxIterator      *iterator,
                   CtxDrawlist  *drawlist,
                   int               start_pos,
                   int               flags);

int ctx_iterator_pos (CtxIterator *iterator);

void
ctx_drawlist_resize (CtxDrawlist *drawlist, int desired_size);
int
ctx_drawlist_add_single (CtxDrawlist *drawlist, const CtxEntry *entry);
int ctx_drawlist_add_entry (CtxDrawlist *drawlist, const CtxEntry *entry);
int
ctx_drawlist_insert_entry (CtxDrawlist *drawlist, int pos, CtxEntry *entry);
int
ctx_add_data (Ctx *ctx, void *data, int length);

int ctx_drawlist_add_u32 (CtxDrawlist *drawlist, CtxCode code, uint32_t u32[2]);
int ctx_drawlist_add_data (CtxDrawlist *drawlist, const void *data, int length);

static CtxEntry
ctx_void (CtxCode code);
static inline CtxEntry
ctx_f (CtxCode code, float x, float y)
{
  CtxEntry command;
  command.code = code;
  command.data.f[0] = x;
  command.data.f[1] = y;
  return command;
}

static inline CtxEntry
ctx_void (CtxCode code)
{
  CtxEntry command;
  command.code = code;
  return command;
}

static CtxEntry
ctx_u32 (CtxCode code, uint32_t x, uint32_t y);
#if 0
static CtxEntry
ctx_s32 (CtxCode code, int32_t x, int32_t y);
#endif

static inline CtxEntry
ctx_s16 (CtxCode code, int x0, int y0, int x1, int y1);
static CtxEntry
ctx_u8 (CtxCode code,
        uint8_t a, uint8_t b, uint8_t c, uint8_t d,
        uint8_t e, uint8_t f, uint8_t g, uint8_t h);

#define CTX_PROCESS_VOID(cmd) do {\
  CtxEntry commands[1] = {{cmd,{{0}}}};\
  ctx_process (ctx, &commands[0]);}while(0) \

#define CTX_PROCESS_F(cmd,x,y) do {\
  CtxEntry commands[1] = {ctx_f(cmd,x,y),};\
  ctx_process (ctx, &commands[0]);}while(0) \

#define CTX_PROCESS_F1(cmd,x) do {\
  CtxEntry commands[1] = {ctx_f(cmd,x,0),};\
  ctx_process (ctx, &commands[0]);}while(0) \

#define CTX_PROCESS_U32(cmd, x, y) do {\
  CtxEntry commands[1] = {ctx_u32(cmd, x, y)};\
  ctx_process (ctx, &commands[0]);}while(0)

#define CTX_PROCESS_U8(cmd, x) do {\
  CtxEntry commands[4] = {ctx_u8(cmd, x,0,0,0,0,0,0,0)};\
  ctx_process (ctx, &commands[0]);}while(0)


#if CTX_BITPACK_PACKER
static unsigned int
ctx_last_history (CtxDrawlist *drawlist);
#endif

#if CTX_BITPACK_PACKER
static void
ctx_drawlist_remove_tiny_curves (CtxDrawlist *drawlist, int start_pos);

static void
ctx_drawlist_bitpack (CtxDrawlist *drawlist, unsigned int start_pos);
#endif

void
ctx_process_cmd_str (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1);
static void
ctx_process_cmd_str_float (Ctx *ctx, CtxCode code, const char *string, float arg0, float arg1);
static void
ctx_process_cmd_str_with_len (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1, int len);

#pragma pack(push,1)
typedef union 
CtxSegment {
#if CTX_32BIT_SEGMENTS
   struct {
     int16_t code;
     int16_t aa;
     int32_t x0;
     int32_t y0;
     int32_t y1;
     int32_t x1;
     int32_t val;
     int32_t delta;
   };
   struct {
     int16_t code__;
     int16_t aa__;
     int32_t y0_;
     int32_t y1_;
   };
#else
   struct {
     int8_t code;
     int8_t aa;
     int32_t x0;
     int16_t y0;
     int16_t y1;
     int32_t x1;
   };
   struct {
     int8_t code_;
     int8_t aa_;
     int32_t val;
     int16_t y0_;
     int16_t y1_;
     int32_t delta;
   };
#endif
   uint32_t u32[2];
  } CtxSegment;
#pragma pack(pop)

static inline CtxSegment
ctx_segment_s16 (CtxRasterizerCode code, int x0, int y0, int x1, int y1)
{
  CtxSegment segment;
  segment.x0 = x0;
  segment.y0 = y0;
  segment.x1 = x1;
  segment.y1 = y1;
  segment.code = code;
  return segment;
}

static inline void
ctx_edgelist_resize (CtxDrawlist *drawlist, int desired_size)
{
#if CTX_DRAWLIST_STATIC
    {
      static CtxSegment sbuf[CTX_MAX_EDGE_LIST_SIZE];
      drawlist->entries = (CtxEntry*)&sbuf[0];
      drawlist->size = CTX_MAX_EDGE_LIST_SIZE;
      drawlist->flags = CTX_DRAWLIST_DOESNT_OWN_ENTRIES;
    }
#else
  int new_size = desired_size;
  int min_size = CTX_MIN_JOURNAL_SIZE;
  int max_size = CTX_MAX_JOURNAL_SIZE;
    {
      min_size = CTX_MIN_EDGE_LIST_SIZE;
      max_size = CTX_MAX_EDGE_LIST_SIZE;
    }

  if (CTX_UNLIKELY(drawlist->size == max_size))
    { return; }
  new_size = ctx_maxi (new_size, min_size);
  //if (new_size < drawlist->count)
  //  { new_size = drawlist->count + 4; }
  new_size = ctx_mini (new_size, max_size);
  if (new_size != drawlist->size)
    {
      int item_size = sizeof (CtxSegment);
      //fprintf (stderr, "growing drawlist %p %i to %d from %d\n", drawlist, flags, new_size, drawlist->size);
  if (drawlist->entries)
    {
      //printf ("grow %p to %d from %d\n", drawlist, new_size, drawlist->size);
      CtxEntry *ne =  (CtxEntry *) ctx_malloc (item_size * new_size);
      memcpy (ne, drawlist->entries, drawlist->size * item_size );
      ctx_free (drawlist->entries);
      drawlist->entries = ne;
      //drawlist->entries = (CtxEntry*)ctx_malloc (drawlist->entries, item_size * new_size);
    }
  else
    {
      //fprintf (stderr, "allocating for %p %d\n", drawlist, new_size);
      drawlist->entries = (CtxEntry *) ctx_malloc (item_size * new_size);
    }
  drawlist->size = new_size;
    }
  //fprintf (stderr, "drawlist %p is %d\n", drawlist, drawlist->size);
#endif
}

static CTX_INLINE int
ctx_edgelist_add_single (CtxDrawlist *drawlist, CtxEntry *entry)
{
  int ret = drawlist->count;

  if (CTX_UNLIKELY(ret + 2 >= drawlist->size))
    {
      if (CTX_UNLIKELY(ret+2 >= CTX_MAX_EDGE_LIST_SIZE- 20))
        return 0;
      int new_ = ctx_maxi (drawlist->size * 2, ret + 1024);
      new_ = ctx_mini (CTX_MAX_EDGE_LIST_SIZE, new_);
      ctx_edgelist_resize (drawlist, new_);
    }

  ((CtxSegment*)(drawlist->entries))[ret] = *(CtxSegment*)entry;
  drawlist->count++;
  return ret;
}

// special return values - controlling argument behavior for some codes
#define CTX_ARG_COLLECT_NUMBERS             50
#define CTX_ARG_STRING_OR_NUMBER            100
#define CTX_ARG_NUMBER_OF_COMPONENTS        200
#define CTX_ARG_NUMBER_OF_COMPONENTS_PLUS_1 201

static inline int ctx_arguments_for_code (CtxCode code)
{
  switch (code)
    {
      case CTX_SAVE:
      case CTX_START_GROUP:
      case CTX_END_GROUP:
      case CTX_IDENTITY:
      case CTX_CLOSE_PATH:
      case CTX_RESET_PATH:
      case CTX_START_FRAME:
      case CTX_END_FRAME:
      case CTX_RESTORE:
      case CTX_STROKE:
      case CTX_FILL:
      case CTX_PAINT:
      case CTX_DEFINE_FONT:
      case CTX_NEW_PAGE:
      case CTX_CLIP:
        return 0;
      case CTX_GLOBAL_ALPHA:
      case CTX_COMPOSITING_MODE:
      case CTX_BLEND_MODE:
      case CTX_EXTEND:
      case CTX_FONT_SIZE:
      case CTX_LINE_JOIN:
      case CTX_LINE_CAP:
      case CTX_LINE_WIDTH:
      case CTX_LINE_DASH_OFFSET:
      case CTX_STROKE_POS:
      case CTX_FEATHER:
      case CTX_LINE_HEIGHT:
      case CTX_WRAP_LEFT:
      case CTX_WRAP_RIGHT:
      case CTX_IMAGE_SMOOTHING:
      case CTX_SHADOW_BLUR:
      case CTX_SHADOW_OFFSET_X:
      case CTX_SHADOW_OFFSET_Y:
      case CTX_FILL_RULE:
      case CTX_TEXT_ALIGN:
      case CTX_TEXT_BASELINE:
      case CTX_TEXT_DIRECTION:
      case CTX_MITER_LIMIT:
      case CTX_REL_VER_LINE_TO:
      case CTX_REL_HOR_LINE_TO:
      case CTX_HOR_LINE_TO:
      case CTX_VER_LINE_TO:
      case CTX_ROTATE:
      case CTX_GLYPH:
        return 1;
      case CTX_TRANSLATE:
      case CTX_REL_SMOOTHQ_TO:
      case CTX_LINE_TO:
      case CTX_MOVE_TO:
      case CTX_SCALE:
      case CTX_REL_LINE_TO:
      case CTX_REL_MOVE_TO:
      case CTX_SMOOTHQ_TO:
        return 2;
      case CTX_CONIC_GRADIENT:
      case CTX_LINEAR_GRADIENT:
      case CTX_REL_QUAD_TO:
      case CTX_QUAD_TO:
      case CTX_RECTANGLE:
      case CTX_FILL_RECT:
      case CTX_STROKE_RECT:
      case CTX_REL_SMOOTH_TO:
      case CTX_VIEW_BOX:
      case CTX_SMOOTH_TO:
        return 4;
      case CTX_ROUND_RECTANGLE:
        return 5;
      case CTX_ARC:
      case CTX_CURVE_TO:
      case CTX_REL_CURVE_TO:
      case CTX_RADIAL_GRADIENT:
        return 6;
      case CTX_ARC_TO:
      case CTX_REL_ARC_TO:
        return 7;
      case CTX_APPLY_TRANSFORM:
      case CTX_SOURCE_TRANSFORM:
        return 9;
      case CTX_TEXT:
      case CTX_FONT:
      case CTX_COLOR_SPACE:
      case CTX_DEFINE_GLYPH:
      case CTX_KERNING_PAIR:
      case CTX_TEXTURE:
      case CTX_DEFINE_TEXTURE:
        return CTX_ARG_STRING_OR_NUMBER;
      case CTX_LINE_DASH: /* append to current dashes for each argument encountered */
        return CTX_ARG_COLLECT_NUMBERS;
      //case CTX_SET_KEY:
      case CTX_COLOR:
      case CTX_SHADOW_COLOR:
        return CTX_ARG_NUMBER_OF_COMPONENTS;
      case CTX_GRADIENT_STOP:
        return CTX_ARG_NUMBER_OF_COMPONENTS_PLUS_1;

        default:
#if 1
        case CTX_SET_RGBA_U8:
        case CTX_NOP:
        case CTX_CONT:
        case CTX_DATA:
        case CTX_DATA_REV:
        case CTX_SET_PIXEL:
        case CTX_REL_LINE_TO_X4:
        case CTX_REL_LINE_TO_REL_CURVE_TO:
        case CTX_REL_CURVE_TO_REL_LINE_TO:
        case CTX_REL_CURVE_TO_REL_MOVE_TO:
        case CTX_REL_LINE_TO_X2:
        case CTX_MOVE_TO_REL_LINE_TO:
        case CTX_REL_LINE_TO_REL_MOVE_TO:
        case CTX_FILL_MOVE_TO:
        case CTX_REL_QUAD_TO_REL_QUAD_TO:
        case CTX_REL_QUAD_TO_S16:
        case CTX_STROKE_SOURCE:
#endif
        return 0;
    }
}

static CtxEntry
ctx_u32 (CtxCode code, uint32_t x, uint32_t y)
{
  CtxEntry command = ctx_void (code);
  command.data.u32[0] = x;
  command.data.u32[1] = y;
  return command;
}

static CtxEntry
ctx_u8 (CtxCode code,
        uint8_t a, uint8_t b, uint8_t c, uint8_t d,
        uint8_t e, uint8_t f, uint8_t g, uint8_t h)
{
  CtxEntry command;
  command.code = code;
  command.data.u8[0] = a;
  command.data.u8[1] = b;
  command.data.u8[2] = c;
  command.data.u8[3] = d;
  command.data.u8[4] = e;
  command.data.u8[5] = f;
  command.data.u8[6] = g;
  command.data.u8[7] = h;
  return command;
}

static void
ctx_process_cmd_str_with_len (Ctx *ctx, CtxCode code, const char *string, uint32_t arg0, uint32_t arg1, int len)
{
  CtxEntry commands[1 + 2 + (len+1+1)/9];
  memset (commands, 0, sizeof (commands) );
  commands[0] = ctx_u32 (code, arg0, arg1);
  commands[1].code = CTX_DATA;
  commands[1].data.u32[0] = len;
  commands[1].data.u32[1] = (len+1+1)/9 + 1;
  memcpy( (char *) &commands[2].data.u8[0], string, len);
  ( (char *) (&commands[2].data.u8[0]) ) [len]=0;
  ctx_process (ctx, commands);
}

static void
ctx_process_cmd_str_float (Ctx *ctx, CtxCode code, const char *string, float arg0, float arg1)
{
  uint32_t iarg0;
  uint32_t iarg1;
  memcpy (&iarg0, &arg0, sizeof (iarg0));
  memcpy (&iarg1, &arg1, sizeof (iarg1));
  ctx_process_cmd_str_with_len (ctx, code, string, iarg0, iarg1, ctx_strlen (string));
}

void
ctx_drawlist_compact (CtxDrawlist *drawlist);

#endif


#ifndef __clang__
#if CTX_COMPOSITE_O3
#pragma GCC push_options
#pragma GCC optimize("O3")
#endif
#if CTX_COMPOSITE_O2
#pragma GCC push_options
#pragma GCC optimize("O2")
#endif
#endif

#if CTX_COMPOSITE

#define CTX_REFERENCE 0


inline static void
ctx_RGBA8_associate_global_alpha (uint8_t *u8, uint8_t global_alpha)
{
  uint32_t val = *((uint32_t*)(u8));
  uint32_t a = (u8[3] * global_alpha + 255) >> 8;
  uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
  *((uint32_t*)(u8)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
}

inline static uint32_t
ctx_RGBA8_associate_global_alpha_u32 (uint32_t val, uint8_t global_alpha)
{
  uint32_t a = ((val>>24) * global_alpha + 255) >> 8;
  uint32_t g = (((val & CTX_RGBA8_G_MASK) * a) >> 8) & CTX_RGBA8_G_MASK;
  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * a) >> 8) & CTX_RGBA8_RB_MASK;
  return  g|rb|(a << CTX_RGBA8_A_SHIFT);
}

// mixes global alpha in with existing global alpha
inline static uint32_t
ctx_RGBA8_mul_alpha_u32(uint32_t val, uint8_t global_alpha)
{
  uint32_t a = ((val>>24) * global_alpha + 255) >> 8;
  uint32_t g = (((val & CTX_RGBA8_G_MASK) * global_alpha) >> 8) & CTX_RGBA8_G_MASK;
  uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * global_alpha) >> 8) & CTX_RGBA8_RB_MASK;
  return  g|rb|(a << CTX_RGBA8_A_SHIFT);
}

CTX_INLINE static uint32_t ctx_bi_RGBA8_alpha (uint32_t isrc00, uint32_t isrc01, uint32_t isrc10, uint32_t isrc11, uint8_t dx, uint8_t dy)
{
  if (((isrc00 | isrc01 | isrc10 | isrc11) & CTX_RGBA8_A_MASK) == 0)
    return 0;
  uint32_t s0_ga, s0_rb, s1_ga, s1_rb;
  ctx_lerp_RGBA8_split (isrc00, isrc01, dx, &s0_ga, &s0_rb);
  ctx_lerp_RGBA8_split (isrc10, isrc11, dx, &s1_ga, &s1_rb);
  return ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, dy);
}

#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE

inline static int ctx_grad_index (CtxRasterizer *rasterizer, float v)
{
  int ret = (int)(v * (rasterizer->gradient_cache_elements - 1) + 0.5f);
  ret *= (ret>0);
  ret = ctx_mini (rasterizer->gradient_cache_elements-1, ret);
  return ret;
}

CTX_INLINE static int ctx_grad_index_i (CtxRasterizer *rasterizer, int v)
{
  v = v >> 8;
  v *= (v>0);
  return ctx_mini (rasterizer->gradient_cache_elements-1, v);
}

//static void
//ctx_gradient_cache_reset (void)
//{
//  ctx_gradient_cache_valid = 0;
//}
#endif


CTX_INLINE static void
_ctx_fragment_gradient_1d_RGBA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
{
  float v = x;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  CtxState *state = rasterizer->state;
  CtxGradient *g = &state->gradient;
  v *= (v>0);
  if (v > 1) { v = 1; }

  if (g->n_stops == 0)
    {
      rgba[0] = rgba[1] = rgba[2] = (int)(v * 255);
      rgba[3] = 255;
      return;
    }
  CtxGradientStop *stop      = NULL;
  CtxGradientStop *next_stop = &g->stops[0];
  CtxColor *color;
  for (int s = 0; s < g->n_stops; s++)
    {
      stop      = &g->stops[s];
      next_stop = &g->stops[s+1];
      if (s + 1 >= g->n_stops) { next_stop = NULL; }
      if (v >= stop->pos && next_stop && v < next_stop->pos)
        { break; }
      stop = NULL;
      next_stop = NULL;
    }
  if (stop == NULL && next_stop)
    {
      color = & (next_stop->color);
    }
  else if (stop && next_stop == NULL)
    {
      color = & (stop->color);
    }
  else if (stop && next_stop)
    {
      uint8_t stop_rgba[4];
      uint8_t next_rgba[4];
      ctx_color_get_rgba8 (state, & (stop->color), stop_rgba);
      ctx_color_get_rgba8 (state, & (next_stop->color), next_rgba);
      int dx = (int)((v - stop->pos) * 255 / (next_stop->pos - stop->pos));
      ((uint32_t*)rgba)[0] = ctx_lerp_RGBA8 (((uint32_t*)stop_rgba)[0],
                                             ((uint32_t*)next_rgba)[0], dx);
      rgba[3]=(rgba[3]*global_alpha_u8+255)>>8;
      if (rasterizer->swap_red_green)
      {
         uint8_t tmp = rgba[0];
         rgba[0] = rgba[2];
         rgba[2] = tmp;
      }
      ctx_RGBA8_associate_alpha (rgba);
      return;
    }
  else
    {
      color = & (g->stops[g->n_stops-1].color);
    }
  ctx_color_get_rgba8 (state, color, rgba);
  if (rasterizer->swap_red_green)
  {
    uint8_t tmp = rgba[0];
    rgba[0] = rgba[2];
    rgba[2] = tmp;
  }
  rgba[3]=(rgba[3]*global_alpha_u8+255)>>8;
  ctx_RGBA8_associate_alpha (rgba);
}

#if CTX_GRADIENT_CACHE
static void
ctx_gradient_cache_prime (CtxRasterizer *rasterizer);
#endif

CTX_INLINE static void
ctx_fragment_gradient_1d_RGBA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
{
#if CTX_GRADIENT_CACHE
  *((uint32_t*)rgba) = *((uint32_t*)(&rasterizer->gradient_cache_u8[ctx_grad_index(rasterizer, x)][0]));
#else
 _ctx_fragment_gradient_1d_RGBA8 (rasterizer, x, y, rgba);
#endif
}
#endif

CTX_INLINE static void
ctx_u8_associate_alpha (int components, uint8_t *u8)
{
  for (int c = 0; c < components-1; c++)
    u8[c] = (u8[c] * u8[components-1] + 255)>>8;
}

#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE
static void
ctx_gradient_cache_prime (CtxRasterizer *rasterizer)
{
  // XXX : todo  make the number of element dynamic depending on length of gradient
  // in device coordinates.

  if (rasterizer->gradient_cache_valid)
    return;
  

  {
    CtxSource *source = &rasterizer->state->gstate.source_fill;
    float length = 100;
    if (source->type == CTX_SOURCE_LINEAR_GRADIENT)
       length = source->linear_gradient.length;
    else if (source->type == CTX_SOURCE_RADIAL_GRADIENT)
       length = ctx_maxf (source->radial_gradient.r1, source->radial_gradient.r0);
    else if (source->type == CTX_SOURCE_CONIC_GRADIENT)
      length = CTX_GRADIENT_CACHE_ELEMENTS;
  {
     float u = length; float v = length;
     const CtxMatrix *m = &rasterizer->state->gstate.transform;
     //CtxMatrix *transform = &source->transform;
     //
     //  combine with above source transform?
     _ctx_matrix_apply_transform (m, &u, &v);
     length = ctx_maxf (u, v);
  }
    if (length < 4) length = 4;
  
    rasterizer->gradient_cache_elements = ctx_mini ((int)length, CTX_GRADIENT_CACHE_ELEMENTS);
  }

  for (int u = 0; u < rasterizer->gradient_cache_elements; u++)
  {
    float v = u / (rasterizer->gradient_cache_elements - 1.0f);
    _ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0f, &rasterizer->gradient_cache_u8[u][0]);
    //*((uint32_t*)(&rasterizer->gradient_cache_u8_a[u][0]))= *((uint32_t*)(&rasterizer->gradient_cache_u8[u][0]));
    //memcpy(&rasterizer->gradient_cache_u8_a[u][0], &rasterizer->gradient_cache_u8[u][0], 4);
    //ctx_RGBA8_associate_alpha (&rasterizer->gradient_cache_u8_a[u][0]);
  }
  rasterizer->gradient_cache_valid = 1;
}
#endif

CTX_INLINE static void
ctx_fragment_gradient_1d_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, uint8_t *rgba)
{
  float v = x;
  CtxState *state = rasterizer->state;
  CtxGradient *g = &state->gradient;
  if (v < 0) { v = 0; }
  if (v > 1) { v = 1; }
  if (g->n_stops == 0)
    {
      rgba[0] = rgba[1] = rgba[2] = (int)(v * 255);
      rgba[1] = 255;
      return;
    }
  CtxGradientStop *stop      = NULL;
  CtxGradientStop *next_stop = &g->stops[0];
  CtxColor *color;
  for (int s = 0; s < g->n_stops; s++)
    {
      stop      = &g->stops[s];
      next_stop = &g->stops[s+1];
      if (s + 1 >= g->n_stops) { next_stop = NULL; }
      if (v >= stop->pos && next_stop && v < next_stop->pos)
        { break; }
      stop = NULL;
      next_stop = NULL;
    }
  if (stop == NULL && next_stop)
    {
      color = & (next_stop->color);
    }
  else if (stop && next_stop == NULL)
    {
      color = & (stop->color);
    }
  else if (stop && next_stop)
    {
      uint8_t stop_rgba[4];
      uint8_t next_rgba[4];
      ctx_color_get_graya_u8 (state, & (stop->color), stop_rgba);
      ctx_color_get_graya_u8 (state, & (next_stop->color), next_rgba);
      int dx = (int)((v - stop->pos) * 255 / (next_stop->pos - stop->pos));
      for (int c = 0; c < 2; c++)
        { rgba[c] = ctx_lerp_u8 (stop_rgba[c], next_rgba[c], dx); }
      return;
    }
  else
    {
      color = & (g->stops[g->n_stops-1].color);
    }
  ctx_color_get_graya_u8 (state, color, rgba);
}

CTX_INLINE static void
ctx_fragment_gradient_1d_RGBAF (CtxRasterizer *rasterizer, float v, float y, float *rgba)
{
  float global_alpha = rasterizer->state->gstate.global_alpha_f;
  CtxState *state = rasterizer->state;
  CtxGradient *g = &state->gradient;
  v *= (v>0);
  if (v > 1) { v = 1; }
  if (g->n_stops == 0)
    {
      rgba[0] = rgba[1] = rgba[2] = v;
      rgba[3] = 1.0;
      return;
    }
  CtxGradientStop *stop      = NULL;
  CtxGradientStop *next_stop = &g->stops[0];
  CtxColor *color;
  for (int s = 0; s < g->n_stops; s++)
    {
      stop      = &g->stops[s];
      next_stop = &g->stops[s+1];
      if (s + 1 >= g->n_stops) { next_stop = NULL; }
      if (v >= stop->pos && next_stop && v < next_stop->pos)
        { break; }
      stop = NULL;
      next_stop = NULL;
    }
  if (stop == NULL && next_stop)
    {
      color = & (next_stop->color);
    }
  else if (stop && next_stop == NULL)
    {
      color = & (stop->color);
    }
  else if (stop && next_stop)
    {
      float stop_rgba[4];
      float next_rgba[4];
      ctx_color_get_rgba (state, & (stop->color), stop_rgba);
      ctx_color_get_rgba (state, & (next_stop->color), next_rgba);
      float dx = (v - stop->pos) / (next_stop->pos - stop->pos);
      for (int c = 0; c < 4; c++)
        { rgba[c] = ctx_lerpf (stop_rgba[c], next_rgba[c], dx); }
      rgba[3] *= global_alpha;
      for (int c = 0; c < 3; c++)
        rgba[c] *= rgba[3];

      return;
    }
  else
    {
      color = & (g->stops[g->n_stops-1].color);
    }
  ctx_color_get_rgba (state, color, rgba);
  rgba[3] *= global_alpha;
  for (int c = 0; c < 3; c++)
    rgba[c] *= rgba[3];
}
#endif

static void
ctx_fragment_image_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dw)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t is_assoc = (buffer->format->pixel_format == CTX_FORMAT_RGBA8 ||
                      buffer->format->pixel_format == CTX_FORMAT_BGRA8);

  int width = buffer->width;
  int height = buffer->height;
  int image_smoothing = rasterizer->state->gstate.image_smoothing;
  if (width == 1 || height == 1)
    image_smoothing=0;
  for (int i = 0; i < count; i ++)
  {

  int u = (int)x;
  int v = (int)y;
  if ( (u < 0) | (v < 0) | (u >= width) | (v >= height))
      *((uint32_t*)(rgba)) = 0;
  else
    {
      int bpp = buffer->format->bpp/8;
      if (image_smoothing)
      {
        uint8_t *src00 = (uint8_t *) buffer->data;
        src00 += v * buffer->stride + u * bpp;
        uint8_t *src01 = src00;
        if ( u + 1 < width)
        {
          src01 = src00 + bpp;
        }
        uint8_t *src11 = src01;
        uint8_t *src10 = src00;
        if ( v + 1 < height)
        {
          src10 = src00 + buffer->stride;
          src11 = src01 + buffer->stride;
        }
        float dx = (x-(int)(x)) * 255.9f;
        float dy = (y-(int)(y)) * 255.9f;
        uint8_t dxb = (uint8_t)dx;
        uint8_t dyb = (uint8_t)dy;
  
        switch (bpp)
        {
          case 1:
            rgba[0] = rgba[1] = rgba[2] = ctx_lerp_u8 (ctx_lerp_u8 (src00[0], src01[0], dxb),
                                   ctx_lerp_u8 (src10[0], src11[0], dxb), dyb);
            rgba[3] = global_alpha_u8;
            break;
          case 2: // TODO : could be RGB565
            rgba[0] = rgba[1] = rgba[2] = ctx_lerp_u8 (ctx_lerp_u8 (src00[0], src01[0], dxb),
                                   ctx_lerp_u8 (src10[0], src11[0], dxb), dyb);
            rgba[3] = ctx_lerp_u8 (ctx_lerp_u8 (src00[1], src01[1], dxb),
                                   ctx_lerp_u8 (src10[1], src11[1], dxb), dyb);
            rgba[3] = (rgba[3] * global_alpha_u8) / 255;
            break;
          case 3:
            for (int c = 0; c < bpp; c++)
              { rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dxb),
                                       ctx_lerp_u8 (src10[c], src11[c], dxb), dyb);
                      
              }
            rgba[3]=global_alpha_u8;
            break;
          break;
          case 4:
            if (is_assoc)
            {
              if (global_alpha_u8==255) {
                for (int c = 0; c < bpp; c++)
                  rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dxb),
                                          ctx_lerp_u8 (src10[c], src11[c], dxb), dyb);
              }
              else
                for (int c = 0; c < bpp; c++)
                  rgba[c] = (ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dxb),
                                          ctx_lerp_u8 (src10[c], src11[c], dxb), dyb) * global_alpha_u8) / 255;
            }
            else
            {
              for (int c = 0; c < bpp; c++)
              { rgba[c] = ctx_lerp_u8 (ctx_lerp_u8 (src00[c], src01[c], dxb),
                                       ctx_lerp_u8 (src10[c], src11[c], dxb), dyb);
                      
              }
              rgba[3] = (rgba[3] * global_alpha_u8) / 255;
            }
        }
      }
      else
      {
      uint8_t *src = (uint8_t *) buffer->data;
      src += v * buffer->stride + u * bpp;
      switch (bpp)
        {
          case 1:
            for (int c = 0; c < 3; c++)
              { rgba[c] = src[0]; }
            rgba[3] = global_alpha_u8;
            break;
          case 2: // todo could be RGB 565
            for (int c = 0; c < 3; c++)
              { rgba[c] = src[0]; }
            rgba[3] = src[1];
            rgba[3] = (rgba[3] * global_alpha_u8) / 255;
            break;
          case 3:
            for (int c = 0; c < 3; c++)
              { rgba[c] = src[c]; }
            rgba[3] = global_alpha_u8;
            break;
          case 4:
            if (is_assoc)
            {
              if (global_alpha_u8==255)
                for (int c = 0; c < 4; c++)
                  rgba[c] = src[c];
              else
                for (int c = 0; c < 4; c++)
                  rgba[c] = (src[c] * global_alpha_u8)/255;
            }
            else
            {
              for (int c = 0; c < 4; c++)
                { rgba[c] = src[c]; }
              rgba[3] = (rgba[3] * global_alpha_u8) / 255;
            }
            break;
        }

      }
      if (rasterizer->swap_red_green)
      {
        uint8_t tmp = rgba[0];
        rgba[0] = rgba[2];
        rgba[2] = tmp;
      }
    }
    if (!is_assoc)
      ctx_RGBA8_associate_alpha (rgba);
    rgba += 4;
    x += dx;
    y += dy;
  }
}

#if CTX_DITHER
static inline int ctx_dither_mask_a (int x, int y, int c, int divisor)
{
  /* https://pippin.gimp.org/a_dither/ */
  return ( ( ( ( (x + c * 67) + y * 236) * 119) & 255 )-127) / divisor;
}

inline static void
ctx_dither_rgba_u8 (uint8_t *rgba, int x, int y, int dither_red_blue, int dither_green)
{
  if (dither_red_blue == 0)
    { return; }
  for (int c = 0; c < 3; c ++)
    {
      int val = rgba[c] + ctx_dither_mask_a (x, y, 0, c==1?dither_green:dither_red_blue);
      rgba[c] = CTX_CLAMP (val, 0, 255);
    }
}

inline static void
ctx_dither_graya_u8 (uint8_t *rgba, int x, int y, int dither_red_blue, int dither_green)
{
  if (dither_red_blue == 0)
    { return; }
  for (int c = 0; c < 1; c ++)
    {
      int val = rgba[c] + ctx_dither_mask_a (x, y, 0, dither_red_blue);
      rgba[c] = CTX_CLAMP (val, 0, 255);
    }
}
#endif

#if 0
CTX_INLINE static void
ctx_RGBA8_deassociate_alpha (const uint8_t *in, uint8_t *out)
{
    uint32_t val = *((uint32_t*)(in));
    int a = val >> CTX_RGBA8_A_SHIFT;
    if (a)
    {
    if (a ==255)
    {
      *((uint32_t*)(out)) = val;
    } else
    {
      uint32_t g = (((val & CTX_RGBA8_G_MASK) * 255 / a) >> 8) & CTX_RGBA8_G_MASK;
      uint32_t rb =(((val & CTX_RGBA8_RB_MASK) * 255 / a) >> 8) & CTX_RGBA8_RB_MASK;
      *((uint32_t*)(out)) = g|rb|(a << CTX_RGBA8_A_SHIFT);
    }
    }
    else
    {
      *((uint32_t*)(out)) = 0;
    }
}
#endif

CTX_INLINE static void
ctx_u8_deassociate_alpha (int components, const uint8_t *in, uint8_t *out)
{
  if (in[components-1])
  {
    if (in[components-1] != 255)
    for (int c = 0; c < components-1; c++)
      out[c] = (in[c] * 255) / in[components-1];
    else
    for (int c = 0; c < components-1; c++)
      out[c] = in[c];
    out[components-1] = in[components-1];
  }
  else
  {
  for (int c = 0; c < components; c++)
    out[c] = 0;
  }
}

CTX_INLINE static void
ctx_float_associate_alpha (int components, float *rgba)
{
  float alpha = rgba[components-1];
  for (int c = 0; c < components-1; c++)
    rgba[c] *= alpha;
}

CTX_INLINE static void
ctx_float_deassociate_alpha (int components, float *rgba, float *dst)
{
  float ralpha = rgba[components-1];
  if (ralpha != 0.0f) ralpha = 1.0f/ralpha;

  for (int c = 0; c < components-1; c++)
    dst[c] = (rgba[c] * ralpha);
  dst[components-1] = rgba[components-1];
}

CTX_INLINE static void
ctx_RGBAF_associate_alpha (float *rgba)
{
  ctx_float_associate_alpha (4, rgba);
}

CTX_INLINE static void
ctx_RGBAF_deassociate_alpha (float *rgba, float *dst)
{
  ctx_float_deassociate_alpha (4, rgba, dst);
}


static inline void ctx_swap_red_green_u8 (void *data)
{
  uint8_t *rgba = (uint8_t*)data;
  uint8_t tmp = rgba[0];
  rgba[0] = rgba[2];
  rgba[2] = tmp;
}

/**** rgb8 ***/

#define CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(frag) \
static void \
frag##_swap_red_green (CtxRasterizer *rasterizer,\
                       float x, float y, float z,\
                       void *out, int count, float dx, float dy, float dz)\
{\
  frag (rasterizer, x, y, z, out, count, dx, dy, dz);\
  ctx_fragment_swap_red_green_u8 (out, count);\
}



static inline void
ctx_RGBA8_apply_global_alpha_and_associate (CtxRasterizer *rasterizer,
                                         uint8_t *buf, int count)
{
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) buf;
  if (global_alpha_u8 != 255)
  {
    for (int i = 0; i < count; i++)
    {
      ctx_RGBA8_associate_global_alpha (rgba, global_alpha_u8);
      rgba += 4;
    }
  }
  else
  {
    for (int i = 0; i < count; i++)
    {
      ctx_RGBA8_associate_alpha (rgba);
      rgba += 4;
    }
  }
}

#if CTX_FRAGMENT_SPECIALIZE

static void
ctx_fragment_swap_red_green_u8 (void *out, int count)
{
  uint8_t *rgba = (uint8_t*)out;
  for (int x = 0; x < count; x++)
  {
    ctx_swap_red_green_u8 (rgba);
    rgba += 4;
  }
}


static void
ctx_fragment_image_rgb8_RGBA8_box (CtxRasterizer *rasterizer,
                                   float x, float y, float z,
                                   void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  int width = buffer->width;
  int height = buffer->height;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  int dim = (int)((1.0f / factor) / 3);

  int i = 0;

  for (; i < count && (x - dim< 0 || y - dim < 0 || x + dim >= height || y + dim >= height); i++)
  {
    *((uint32_t*)(rgba))=0;
    rgba += 4;
    x += dx;
    y += dy;
  }

  for (; i < count && !(
       x - dim < 0 || y - dim < 0 ||
       x + dim >= width ||
       y + dim >= height); i++)
  {

  int u = (int)x;
  int v = (int)y;
    {
      int bpp = 3;
      rgba[3]=global_alpha_u8; // gets lost
          uint64_t sum[4]={0,0,0,0};
          int count = 0;

          {
            for (int ov = - dim; ov <= dim; ov++)
            {
              uint8_t *src = (uint8_t *) buffer->data + bpp * ((v+ov) * width + (u - dim));
              for (int ou = - dim; ou <= dim; ou++)
              {
                for (int c = 0; c < bpp; c++)
                  sum[c] += src[c];
                count ++;
                src += bpp;
              }

            }
          }

          int recip = 65536/count;
          for (int c = 0; c < bpp; c++)
            rgba[c] = sum[c] * recip >> 16;
          ctx_RGBA8_associate_alpha (rgba);
    }
    rgba += 4;
    x += dx;
    y += dy;
  }

  for (; i < count; i++)
  {
    *((uint32_t*)(rgba))= 0;
    rgba += 4;
  }
}


static void
ctx_fragment_image_rgb8_RGBA8_nearest (CtxRasterizer *rasterizer,
                                       float x, float y, float z,
                                       void *out, int scount,
                                       float dx, float dy, float dz);
static inline void
ctx_fragment_image_rgb8_RGBA8_bi (CtxRasterizer *rasterizer,
                                  float x, float y, float z,
                                  void *out, int scount,
                                  float dx, float dy, float dz)
{
  ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer,
                                         x, y, z,
                                         out, scount,
                                         dx, dy, dz);
  return;
}

static void
ctx_fragment_image_rgb8_RGBA8_nearest (CtxRasterizer *rasterizer,
                                       float x, float y, float z,
                                       void *out, int scount,
                                       float dx, float dy, float dz)
{
  unsigned int count = scount;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint8_t *data = ((uint8_t*)buffer->data);

  int yi_delta = (int)(dy * 65536);
  int xi_delta = (int)(dx * 65536);
  int zi_delta = (int)(dz * 65536);
  int32_t yi = (int)(y * 65536);
  int32_t xi = (int)(x * 65536);
  int32_t zi = (int)(z * 65536);
  {
    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    int32_t z1 = zi + zi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      float z_recip = (z1!=0) * (1.0f/z1);
      if ((u1*z_recip) <0 ||
          (v1*z_recip) <0 ||
          (u1*z_recip) >= (bwidth) - 1 ||
          (v1*z_recip) >= (bheight) - 1)
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
        z1 -= zi_delta;
      }
      else break;
    }
  }

  for (i= 0; i < count; i ++)
  {
    float z_recip = (zi!=0) * (1.0f/zi);
    int u = (int)(xi * z_recip);
    int v = (int)(yi * z_recip);
    if ( u  <= 0 || v  <= 0 || u+1 >= bwidth-1 || v+1 >= bheight-1)
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
  }

  while (i < count)
  {
    float z_recip = (zi!=0) * (1.0f/zi);
    int u = (int)(xi * z_recip);
    int v = (int)(yi * z_recip);
    for (unsigned int c = 0; c < 3; c++)
      rgba[c] = data[(bwidth *v +u)*3+c];
    rgba[3] = global_alpha_u8;
    ctx_RGBA8_associate_alpha (rgba);
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
    i++;
  }
}



CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgb8_RGBA8_box)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgb8_RGBA8_bi)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgb8_RGBA8_nearest)


static inline void
ctx_fragment_image_rgb8_RGBA8 (CtxRasterizer *rasterizer,
                               float x,
                               float y,
                               float z,
                               void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  int image_smoothing = rasterizer->state->gstate.image_smoothing;
  if (buffer->width == 1 || buffer->height == 1)
        image_smoothing = 0;
  if (rasterizer->swap_red_green)
  {
    if (image_smoothing)
    {
      float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
      if (factor <= 0.50f)
        ctx_fragment_image_rgb8_RGBA8_box_swap_red_green (rasterizer,x,y,z,out,count,dx,dy,dz);
  #if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
      else if ((factor > 0.99f) & (factor < 1.01f))
        ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (rasterizer,x,y,z,
                                                            out,count,dx,dy,dz);
  #endif
      else
        ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green (rasterizer,x,y,z,
                                                         out,count, dx, dy, dz);
    }
    else
    {
      ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green (rasterizer,x,y,z,
                                                            out,count,dx,dy,dz);
    }
  }
  else
  {
    if (image_smoothing)
    {
      float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
      if (factor <= 0.50f)
        ctx_fragment_image_rgb8_RGBA8_box (rasterizer,x,y,z,out,
                                           count,dx,dy,dz);
  #if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
      else if ((factor > 0.99f) & (factor < 1.01f))
        ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer, x, y, z, out, count, dx, dy, dz);
  #endif
      else
        ctx_fragment_image_rgb8_RGBA8_bi (rasterizer,x,y,z,out,count,dx,dy,dz);
    }
    else
    {
        ctx_fragment_image_rgb8_RGBA8_nearest (rasterizer,x,y,z,out,
                                               count,dx,dy, dz);
    }
  }
}


/************** rgba8 */

static void
ctx_fragment_image_rgba8_RGBA8_box (CtxRasterizer *rasterizer,
                                    float x, float y, float z,
                                    void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  int width = buffer->width;
  int height = buffer->height;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  int dim = (int)((1.0f / factor) / 3);

  int i = 0;

  for (; i < count && (x - dim< 0 || y - dim < 0 || x + dim >= height || y + dim >= height); i++)
  {
    *((uint32_t*)(rgba))=0;
    rgba += 4;
    x += dx;
    y += dy;
  }

  for (; i < count && !(
       x - dim < 0 || y - dim < 0 ||
       x + dim >= width ||
       y + dim >= height); i++)
  {

  int u = (int)x;
  int v = (int)y;
    {
      int bpp = 4;
          uint64_t sum[4]={0,0,0,0};
          int count = 0;

          {
            for (int ov = - dim; ov <= dim; ov++)
            {
              uint8_t *src = (uint8_t *) buffer->data + bpp * ((v+ov) * width + (u - dim));
              for (int ou = - dim; ou <= dim; ou++)
              {
                for (int c = 0; c < bpp; c++)
                  sum[c] += src[c];
                count ++;
                src += bpp;
              }

            }
          }

          int recip = 65536/count;
          for (int c = 0; c < bpp; c++)
            rgba[c] = sum[c] * recip >> 16;
          rgba[3]=rgba[3]*global_alpha_u8/255; // gets lost
          ctx_RGBA8_associate_alpha (rgba);
    }
    rgba += 4;
    x += dx;
    y += dy;
  }


  for (; i < count; i++)
  {
    *((uint32_t*)(rgba))= 0;
    rgba += 4;
  }
#if CTX_DITHER
//ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
//                    rasterizer->format->dither_green);
#endif
}


static void
ctx_fragment_image_rgba8_RGBA8_nearest_copy (CtxRasterizer *rasterizer,
                                             float x, float y, float z,
                                             void *out, int scount, float dx, float dy, float dz)
{ 
  unsigned int count = scount;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  //if (!buffer) // XXX : this should happen in setup
  //  return;
  uint32_t *dst = (uint32_t*)out;
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int u = (int)x;
  int v = (int)y;

  if ((!((v >= 0) & (v < bheight))))
  {
    memset (dst, 0, count*4);
    return;
  }
  uint32_t *src = ((uint32_t*)buffer->data) + bwidth * v + u;
#if defined(__GNUC__) && !defined(__clang__)
  int pre = ctx_mini(ctx_maxi(-u,0), count);
  for (int i = 0; i < pre;i++)
  { *dst++ = 0; }
  count-=pre;
  src+=pre;
  u+=pre;
 
  int limit = ctx_mini (count, bwidth - u);
  if (limit>0)
  {
    for (int i = 0; i < limit;i++)
     { *dst++ = *src++; }
  }

  count-=limit;
  for (unsigned int i = 0; i < count; i++)
    *dst++ = 0;
#else
  int i = 0;
  for (; (u<0) & ((unsigned)i < count); i++,u++,src++)
    *dst++ = 0;
  for (; (u<bwidth) & ((unsigned)i<count); i++, u++)
    *dst++ = *src++;
  for (; ((unsigned)i<count); i++)
    *dst++ = 0;
#endif
}

#if 0
static void
ctx_fragment_image_rgba8sepA_RGBA8_nearest_copy (CtxRasterizer *rasterizer,
                                                 float x, float y, float z,
                                                 void *out, int scount, float dx, float dy, float dz)
{
  ctx_fragment_image_rgba8_RGBA8_nearest_copy (rasterizer, x, y, z, out, scount, dx, dy, dz);
  ctx_RGBA8_apply_global_alpha_and_associate (rasterizer, (uint8_t*)out, scount);
}
#endif

static void
ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat (CtxRasterizer *rasterizer,
                                                    float x, float y, float z,
                                                    void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  uint32_t *dst = (uint32_t*)out;
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int u = (int)x;
  int v = (int)y;
  if (v < 0) v += bheight * 8192;
  if (u < 0) u += bwidth * 8192;
  v %= bheight;
  u %= bwidth;

  uint32_t *src = ((uint32_t*)buffer->data) + bwidth * v;

  while (count)
  {
     int chunk = ctx_mini (bwidth - u, count);
     memcpy (dst, src + u, chunk * 4);
     dst += chunk;
     count -= chunk;
     u = (u + chunk) % bwidth;
  }
}

static CTX_INLINE void 
_ctx_coords_restrict (CtxExtend extend,
                      int *u, int *v,
                      int bwidth, int bheight)
{
  switch (extend)
  {
    case CTX_EXTEND_REPEAT:
      if(u)
      {
         while (*u < 0) *u += bwidth * 4096;   // XXX need better way to do this
         *u  %= bwidth;
      }
      if(v)
      {
        while (*v < 0) *v += bheight * 4096;
        *v  %= bheight;
      }
  //  return 1;
      break;
    case CTX_EXTEND_REFLECT:
      if (u)
      {
      while (*u < 0) *u += bwidth * 4096;   // XXX need better way to do this
      *u  %= (bwidth*2);

      *u = (*u>=bwidth) * (bwidth*2 - *u) +
           (*u<bwidth) * *u;
      }

      if (v)
      {
      while (*v < 0) *v += bheight * 4096;
      *v  %= (bheight*2);
      *v = (*v>=bheight) * (bheight*2 - *v) +
           (*v<bheight) * *v;
      }

 //   return 1;
      break;
    case CTX_EXTEND_PAD:
      if (u)*u = ctx_mini (ctx_maxi (*u, 0), bwidth-1);
      if (v)*v = ctx_mini (ctx_maxi (*v, 0), bheight-1);
 //   return 1;
      break;
    case CTX_EXTEND_NONE:
      {
      if (u) { int val=*u;  val *= (val>0); val= (val>=bwidth)*bwidth + val * (val<bwidth);  *u = val;}
      if (v) { int val=*v;  val *= (val>0); val= (val>=bheight)*bheight + val * (val<bheight); *v = val;}
  //  return 1;
      }
  }
 //return 0;
}

static void
ctx_fragment_image_rgba8_RGBA8_nearest_affine (CtxRasterizer *rasterizer,
                                               float x, float y, float z,
                                               void *out, int scount, float dx, float dy, float dz)
{
  unsigned int count = scount;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  CtxExtend extend = rasterizer->state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = (int)(dy * 65536);
  int xi_delta = (int)(dx * 65536);
  int32_t yi = (int)(y * 65536);
  int32_t xi = (int)(x * 65536);
  switch (extend){
          case CTX_EXTEND_NONE:
                  {

    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      if (((u1>>16) <0) |
          ((v1>>16) <0) |
          ((u1>>16) >= (bwidth) - 1) |
          ((v1>>16) >= (bheight) - 1))
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    int u = xi >> 16;
    int v = yi >> 16;
    if ((u  <= 0) | (v  <= 0) | (u+1 >= bwidth-1) | (v+1 >= bheight-1))
    {
      *((uint32_t*)(rgba))= 0;
    }
    else break;
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;
  }

  if (global_alpha_u8 == 255)
  while (i < count)
  {
    int u = xi >> 16;
    int v = yi >> 16;
    ((uint32_t*)(&rgba[0]))[0] = data[bwidth *v +u];
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;
    i++;
  }
  else
  while (i < count)
  {
    int u = xi >> 16;
    int v = yi >> 16;
    ((uint32_t*)(&rgba[0]))[0] =
      ctx_RGBA8_mul_alpha_u32 (data[bwidth *v +u], global_alpha_u8);
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;
    i++;
  }
                  }
  break;
          default:
  if (global_alpha_u8 == 255)
    while (i < count)
    {
      int u = xi >> 16;
      int v = yi >> 16;
      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      ((uint32_t*)(&rgba[0]))[0] = data[bwidth *v +u];
      xi += xi_delta;
      yi += yi_delta;
      rgba += 4;
      i++;
    }
   else
    while (i < count)
    {
      int u = xi >> 16;
      int v = yi >> 16;
      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      ((uint32_t*)(&rgba[0]))[0] =
        ctx_RGBA8_mul_alpha_u32 (data[bwidth *v +u], global_alpha_u8);
      xi += xi_delta;
      yi += yi_delta;
      rgba += 4;
      i++;
    }
   break;
  }
}


static void
ctx_fragment_image_rgba8_RGBA8_nearest_scale (CtxRasterizer *rasterizer,
                                              float x, float y, float z,
                                              void *out, int scount, float dx, float dy, float dz)
{
  unsigned int count = scount;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  CtxExtend  extend = rasterizer->state->gstate.extend;
  uint32_t *src = NULL;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  int ideltax = (int)(dx * 65536);
  uint32_t *dst = (uint32_t*)out;
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int bbheight = bheight << 16;
  int bbwidth  = bwidth << 16;
//  x += 0.5f;
//  y += 0.5f;

  src = (uint32_t*)buffer->data;
  //if (!src){ fprintf (stderr, "eeek bailing in nearest fragment\n"); return;};

  {
    unsigned int i = 0;
    int32_t ix = (int)(x * 65536);
    int32_t iy = (int)(y * 65536);

    if (extend == CTX_EXTEND_NONE)
    {
    int32_t u1 = ix + ideltax * (count-1);
    int32_t v1 = iy;
    uint32_t *edst = ((uint32_t*)out)+count - 1;
    for (; i < count; )
    {
      if ((u1 <0) | (v1 < 0) | (u1 >= bbwidth) | (v1 >= bbheight))
      {
        *edst-- = 0;
        count --;
        u1 -= ideltax;
      }
      else break;
    }

    for (i = 0; i < count; i ++)
    {
      if ((ix < 0) | (iy < 0) | (ix >= bbwidth)  | (iy >= bbheight))
      {
        *dst++ = 0;
        x += dx;
        ix += ideltax;
      }
      else break;
    }

      int v = iy >> 16;
      int u = ix >> 16;
      int o = (v)*bwidth;
      if (global_alpha_u8==255)
        for (; i < count; i ++)
        {
          u = ix >> 16;
          *dst++ = src[o + (u)];
          ix += ideltax;
        }
      else
        for (; i < count; i ++)
        {
          u = ix >> 16;
          *dst++ = ctx_RGBA8_mul_alpha_u32 (src[o + (u)], global_alpha_u8);
          ix += ideltax;
        }
    }
    else
    {

      int v = iy >> 16;
      int u = ix >> 16;
      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      int o = (v)*bwidth;
      if (global_alpha_u8==255)
      for (; i < count; i ++)
      {
        u = ix >> 16;
        _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
        *dst++ = src[o + (u)];
        ix += ideltax;
      }
      else
      {
        u = ix >> 16;
        _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
        *dst++ = ctx_RGBA8_mul_alpha_u32 (src[o + (u)], global_alpha_u8);
        ix += ideltax;
      }
    }
  }
}

static void
ctx_fragment_image_rgba8_RGBA8_nearest_generic (CtxRasterizer *rasterizer,
                                                float x, float y, float z,
                                                void *out, int scount, float dx, float dy, float dz)
{
  unsigned int count = scount;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  CtxExtend extend = rasterizer->state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = (int)(dy * 65536);
  int xi_delta = (int)(dx * 65536);
  int zi_delta = (int)(dz * 65536);
  int32_t yi = (int)(y * 65536);
  int32_t xi = (int)(x * 65536);
  int32_t zi = (int)(z * 65536);
  switch (extend){
          case CTX_EXTEND_NONE:
                  {

    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    int32_t z1 = zi + zi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      float z_recip = (z1!=0) * (1.0f/z1);

      if (((u1*z_recip) <0) |
          ((v1*z_recip) <0) |
          ((u1*z_recip) >= (bwidth) - 1) |
          ((v1*z_recip) >= (bheight) - 1))
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
        z1 -= zi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    float z_recip = (zi!=0) * (1.0f/zi);
    int u = (int)(xi * z_recip);
    int v = (int)(yi * z_recip);
    if ( (u <= 0) | (v  <= 0) | (u+1 >= bwidth-1) | (v+1 >= bheight-1))
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
  }

  if (global_alpha_u8!=255)
  while (i < count)
  {
    float z_recip = (zi!=0) * (1.0f/zi);
    int u = (int)(xi * z_recip);
    int v = (int)(yi * z_recip);
    ((uint32_t*)(&rgba[0]))[0] =
      ctx_RGBA8_mul_alpha_u32 (data[bwidth *v +u], global_alpha_u8);
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
    i++;
  }
  else
  while (i < count)
  {
    float z_recip = (zi!=0) * (1.0f/zi);
    int u = (int)(xi * z_recip);
    int v = (int)(yi * z_recip);
    ((uint32_t*)(&rgba[0]))[0] = data[bwidth *v +u];
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
    i++;
  }
                  }
  break;
  default:
    if (global_alpha_u8!=255)
    while (i < count)
    {
      float z_recip = (zi!=0) * (1.0f/zi);
      int u = (int)(xi * z_recip);
      int v = (int)(yi * z_recip);
      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      ((uint32_t*)(&rgba[0]))[0] =
        ctx_RGBA8_mul_alpha_u32 (data[bwidth *v +u], global_alpha_u8);
      xi += xi_delta;
      yi += yi_delta;
      zi += zi_delta;
      rgba += 4;
      i++;
    }
    else
    while (i < count)
    {
      float z_recip = (zi!=0) * (1.0f/zi);
      int u = (int)(xi * z_recip);
      int v = (int)(yi * z_recip);
      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      ((uint32_t*)(&rgba[0]))[0] = data[bwidth *v +u];
      xi += xi_delta;
      yi += yi_delta;
      zi += zi_delta;
      rgba += 4;
      i++;
    }
    break;
  }
}

static void
ctx_fragment_image_rgba8_RGBA8_nearest (CtxRasterizer *rasterizer,
                                   float x, float y, float z,
                                   void *out, int icount, float dx, float dy, float dz)
{
  unsigned int count = icount;
  CtxExtend extend = rasterizer->state->gstate.extend;
  if ((z == 1.0f) & (dz == 0.0f)) // this also catches other constant z!
  {
    if ((dy == 0.0f) & (dx == 1.0f) & (extend == CTX_EXTEND_NONE))
      ctx_fragment_image_rgba8_RGBA8_nearest_copy (rasterizer, x, y, z, out, count, dx, dy, dz);
    else
      ctx_fragment_image_rgba8_RGBA8_nearest_affine (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
  else
  {
    ctx_fragment_image_rgba8_RGBA8_nearest_generic (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
}



static inline void
ctx_fragment_image_rgba8_RGBA8_bi_scale_with_alpha (CtxRasterizer *rasterizer,
                                                    float x, float y, float z,
                                                    void *out, int scount, float dx, float dy, float dz)
{
    uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
    uint32_t count = scount;
    x -= 0.5f;
    y -= 0.5f;
    uint8_t *rgba = (uint8_t *) out;
    CtxSource *g = &rasterizer->state->gstate.source_fill;
    CtxExtend  extend = rasterizer->state->gstate.extend;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
    const int bwidth = buffer->width;
    const int bheight = buffer->height;
    unsigned int i = 0;

    if (!extend)
    {
    if (!((y >= 0) & (y < bheight)))
    {
      uint32_t *dst = (uint32_t*)rgba;
      for (i = 0 ; i < count; i++)
        *dst++ = 0;
      return;
    }
    }

    //x+=1; // XXX off by one somewhere? ,, needed for alignment with nearest

    int32_t yi = (int)(y * 65536);
    int32_t xi = (int)(x * 65536);

    int xi_delta = (int)(dx * 65536);

    if (!extend)
    {
    int32_t u1 = xi + xi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      if ((u1 <0) | (u1 +65536 >= (bwidth<<16)))
    {
      *edst-- = 0;
      count --;
      u1 -= xi_delta;
    }
    else break;
  }
    for (i= 0; i < count; i ++)
    {
      int u = xi >> 16;
      if ((u < 0) | (u >= bwidth-1))
      {
        *((uint32_t*)(rgba))= 0;
        xi += xi_delta;
        rgba += 4;
      }
      else
        break;
    }
    }

 
  int v = yi >> 16;


  int dv = (yi >> 8) & 0xff;

  int u = xi >> 16;

  int v1 = v+1;

  _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
  _ctx_coords_restrict (extend, NULL, &v1, bwidth, bheight);

  uint32_t *data = ((uint32_t*)buffer->data) + bwidth * v;
  uint32_t *ndata = data + bwidth * !((!extend) & (v1 > bheight-1));

  if (extend)
  {
    if (xi_delta == 65536)
    {
      uint32_t *src0 = data, *src1 = ndata;
      uint32_t s1_ga = 0, s1_rb = 0;
      int du = (xi >> 8) & 0xff;

      src0 = data + u;
      src1 = ndata + u;
      ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s1_ga, &s1_rb);
  
      for (; i < count; i ++)
      {
        uint32_t s0_ga = s1_ga;
        uint32_t s0_rb = s1_rb; 
        _ctx_coords_restrict (extend, &u, NULL, bwidth, bheight);
        ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
        ((uint32_t*)(&rgba[0]))[0] = 
          ctx_RGBA8_mul_alpha_u32 (
                  ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, du), global_alpha_u8);
        rgba += 4;
        u++;
        src0 ++;
        src1 ++;
      }
    }
    else
    {
      uint32_t s0_ga = 0, s1_ga = 0, s0_rb = 0, s1_rb = 0;
      int prev_u = -1000;
      for (; (i < count); i++)
      {
        if (prev_u != u)
        {
          if (prev_u == u-1)
          {
            s0_ga = s1_ga;
            s0_rb = s1_rb;
            ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          }
          else
          {
            ctx_lerp_RGBA8_split (data[u],ndata[u], dv, &s0_ga, &s0_rb);
            ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          }
          prev_u = u;
        }
        ((uint32_t*)(&rgba[0]))[0] = 
          ctx_RGBA8_mul_alpha_u32 (
                  ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, (xi>>8)), global_alpha_u8);
        rgba += 4;
        u = (xi+=xi_delta) >> 16;
        _ctx_coords_restrict (extend, &u, NULL, bwidth, bheight);
      }
    }
  }
  else
  {
    if (xi_delta == 65536)
    {
      uint32_t *src0 = data, *src1 = ndata;
      uint32_t s1_ga = 0, s1_rb = 0;
      int du = (xi >> 8) & 0xff;
  
      src0 = data + u;
      src1 = ndata + u;
      ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s1_ga, &s1_rb);
  
      for (; i < count; i ++)
      {
        uint32_t s0_ga = s1_ga;
        uint32_t s0_rb = s1_rb;
        ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
        ((uint32_t*)(&rgba[0]))[0] = 
          ctx_RGBA8_mul_alpha_u32 (
                  ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, du), global_alpha_u8);
        rgba += 4;
        u++;
        src0 ++;
        src1 ++;
      }
    }
    else
    {
      uint32_t s0_ga = 0, s1_ga = 0, s0_rb = 0, s1_rb = 0;
      int prev_u = -1000;
      for (; (i < count); i++)
      {
        if (prev_u != u)
        {
          if (prev_u == u-1)
          {
            s0_ga = s1_ga;
            s0_rb = s1_rb;
            ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          }
          else
          {
            ctx_lerp_RGBA8_split (data[u],ndata[u], dv, &s0_ga, &s0_rb);
            ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          }
          prev_u = u;
        }
        ((uint32_t*)(&rgba[0]))[0] = 
          ctx_RGBA8_mul_alpha_u32 (
                  ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, (xi>>8)), global_alpha_u8);
        rgba += 4;
        u = (xi+=xi_delta) >> 16;
      }
    }
  }
}

static inline void
ctx_fragment_image_rgba8_RGBA8_bi_scale (CtxRasterizer *rasterizer,
                                         float x, float y, float z,
                                         void *out, int scount, float dx, float dy, float dz)
{
    uint32_t count = scount;
    x -= 0.5f;
    y -= 0.5f;
    uint8_t *rgba = (uint8_t *) out;
    CtxSource *g = &rasterizer->state->gstate.source_fill;
    CtxExtend  extend = rasterizer->state->gstate.extend;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
    const int bwidth = buffer->width;
    const int bheight = buffer->height;
    unsigned int i = 0;

    if (!extend)
    {
    if (!((y >= 0) & (y < bheight)))
    {
      uint32_t *dst = (uint32_t*)rgba;
      for (i = 0 ; i < count; i++)
        *dst++ = 0;
      return;
    }
    }

    //x+=1; // XXX off by one somewhere? ,, needed for alignment with nearest

    int32_t yi = (int)(y * 65536);
    int32_t xi = (int)(x * 65536);

    int xi_delta = (int)(dx * 65536);

    if (!extend)
    {
    int32_t u1 = xi + xi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      if ((u1 <0) | (u1 +65536 >= (bwidth<<16)))
    {
      *edst-- = 0;
      count --;
      u1 -= xi_delta;
    }
    else break;
  }
    for (i= 0; i < count; i ++)
    {
      int u = xi >> 16;
      if ((u < 0) | (u >= bwidth-1))
      {
        *((uint32_t*)(rgba))= 0;
        xi += xi_delta;
        rgba += 4;
      }
      else
        break;
    }
    }

 
  int v = yi >> 16;
  int dv = (yi >> 8) & 0xff;
  int u = xi >> 16;

  int v1 = v+1;

  _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
  _ctx_coords_restrict (extend, NULL, &v1, bwidth, bheight);

  uint32_t *data = ((uint32_t*)buffer->data) + bwidth * v;
  uint32_t *ndata = data + bwidth * !((!extend) & (v1 > bheight-1));

  if (extend)
  {
    if (xi_delta == 65536)
    {
      uint32_t *src0 = data, *src1 = ndata;
      uint32_t s1_ga = 0, s1_rb = 0;
      int du = (xi >> 8) & 0xff;

      src0 = data + u;
      src1 = ndata + u;
      ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s1_ga, &s1_rb);
  
      for (; i < count; i ++)
      {
        uint32_t s0_ga = s1_ga;
        uint32_t s0_rb = s1_rb; 
        _ctx_coords_restrict (extend, &u, NULL, bwidth, bheight);
        ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
        ((uint32_t*)(&rgba[0]))[0] = ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, du);
        rgba += 4;
        u++;
        src0 ++;
        src1 ++;
      }
    }
    else
    {
      uint32_t s0_ga = 0, s1_ga = 0, s0_rb = 0, s1_rb = 0;
      int prev_u = -1000;
      for (; (i < count); i++)
      {
        if (prev_u != u)
        {
          ctx_lerp_RGBA8_split (data[u],ndata[u], dv, &s0_ga, &s0_rb);
          ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          prev_u = u;
        }
        ((uint32_t*)(&rgba[0]))[0] = ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, (xi>>8));
        rgba += 4;
        u = (xi+=xi_delta) >> 16;
        _ctx_coords_restrict (extend, &u, NULL, bwidth, bheight);
      }
    }
  }
  else // no extend
  {
    if (xi_delta == 65536)
    {
      uint32_t *src0 = data, *src1 = ndata;
      uint32_t s1_ga = 0, s1_rb = 0;
      int du = (xi >> 8) & 0xff;
  
      src0 = data + u;
      src1 = ndata + u;
      ctx_lerp_RGBA8_split (src0[0],src1[0], dv, &s1_ga, &s1_rb);
  
      for (; i < count; i ++)
      {
        uint32_t s0_ga = s1_ga;
        uint32_t s0_rb = s1_rb;
        ctx_lerp_RGBA8_split (src0[1],src1[1], dv, &s1_ga, &s1_rb);
        ((uint32_t*)(&rgba[0]))[0] = ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, du);
        rgba += 4;
        u++;
        src0 ++;
        src1 ++;
      }
    }
    else 
    {
      uint32_t s0_ga = 0, s1_ga = 0, s0_rb = 0, s1_rb = 0;
      int prev_u = -1000;
      for (; (i < count); i++)
      {
        if (prev_u != u)
        {
          ctx_lerp_RGBA8_split (data[u],ndata[u], dv, &s0_ga, &s0_rb);
          ctx_lerp_RGBA8_split (data[u+1],ndata[u+1], dv, &s1_ga, &s1_rb);
          prev_u = u;
        }
        ((uint32_t*)(&rgba[0]))[0] = ctx_lerp_RGBA8_merge (s0_ga, s0_rb, s1_ga, s1_rb, (xi>>8));
        rgba += 4;
        u = (xi+=xi_delta) >> 16;
      }
    }
  }
}

static inline void
ctx_fragment_image_rgba8_RGBA8_bi_affine_with_alpha (CtxRasterizer *rasterizer,
                                          float x, float y, float z,
                                          void *out, int scount,
                                          float dx, float dy, float dz)
{
  CtxState *state = rasterizer->state;
  uint8_t global_alpha_u8 = state->gstate.global_alpha_u8;
        x-=0.5f;
        y-=0.5f;
  uint32_t count = scount;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  CtxExtend extend = state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = (int)(dy * 65536);
  int xi_delta = (int)(dx * 65536);
  int32_t yi = (int)(y * 65536);
  int32_t xi = (int)(x * 65536);

  if (extend == CTX_EXTEND_NONE)
  {
    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      if (((u1>>16) <0) |
          ((v1>>16) <0) |
          ((u1>>16) >= (bwidth) - 1) |
          ((v1>>16) >= (bheight) - 1))
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    int u = xi >> 16;
    int v = yi >> 16;
    if ((u <= 0) | (v <= 0) | (u+1 >= bwidth-1) | (v+1 >= bheight-1))
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;
  }
  }

  uint32_t *src00=data;
  uint32_t *src01=data;
  uint32_t *src10=data;
  uint32_t *src11=data;

  while (i < count)
  {
    int du = xi >> 8;
    int u = du >> 8;
    int dv = yi >> 8;
    int v = dv >> 8;
#if 0
    if (CTX_UNLIKELY((u < 0) | (v < 0) | (u+1 >= bwidth) | (v+1 >=bheight))) // default to next sample down and to right
    {
      int u1 = u + 1;
      int v1 = v + 1;

      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      _ctx_coords_restrict (extend, &u1, &v1, bwidth, bheight);

      src00 = data  + bwidth * v + u;
      src01 = data  + bwidth * v + u1;
      src10 = data  + bwidth * v1 + u;
      src11 = data  + bwidth * v1 + u1;
    }
    else 
#endif
    {
      src00 = data  + bwidth * v + u;
      src01 = src00 + 1;
      src10 = src00 + bwidth;
      src11 = src01 + bwidth;
    }
    ((uint32_t*)(&rgba[0]))[0] = ctx_RGBA8_mul_alpha_u32 ( ctx_bi_RGBA8_alpha (*src00,*src01,*src10,*src11, du,dv), global_alpha_u8);
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;

    i++;
  }
}

static inline void
ctx_fragment_image_rgba8_RGBA8_bi_affine (CtxRasterizer *rasterizer,
                                          float x, float y, float z,
                                          void *out, int scount,
                                          float dx, float dy, float dz)
{
  x-=0.5f;
  y-=0.5f;
  uint32_t count = scount;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  CtxExtend extend = rasterizer->state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = (int)(dy * 65536);
  int xi_delta = (int)(dx * 65536);
  int32_t yi = (int)(y * 65536);
  int32_t xi = (int)(x * 65536);

  if (extend == CTX_EXTEND_NONE)
  {
    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      if (((u1>>16) <0) |
          ((v1>>16) <0) |
          ((u1>>16) >= (bwidth) - 1) |
          ((v1>>16) >= (bheight) - 1))
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    int u = xi >> 16;
    int v = yi >> 16;
    if ((u <= 0) | (v <= 0) | (u+1 >= bwidth-1) | (v+1 >= bheight-1))
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;
  }
  }

  uint32_t *src00=data;
  uint32_t *src01=data;
  uint32_t *src10=data;
  uint32_t *src11=data;

  while (i < count)
  {
    int du = xi >> 8;
    int u = du >> 8;
    int dv = yi >> 8;
    int v = dv >> 8;


    //if (((u < 0) | (v < 0) | (u+1 >= bwidth) | (v+1 >=bheight))) // default to next sample down and to right
#if 0
    if(0){
      int u1 = u + 1;
      int v1 = v + 1;

      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      _ctx_coords_restrict (extend, &u1, &v1, bwidth, bheight);

      src00 = data  + bwidth * v + u;
      src01 = data  + bwidth * v + u1;
      src10 = data  + bwidth * v1 + u;
      src11 = data  + bwidth * v1 + u1;
    }
    else 
#endif
    {
      src00 = data  + bwidth * v + u;
      src01 = src00 + 1;
      src10 = src00 + bwidth;
      src11 = src01 + bwidth;
    }
    ((uint32_t*)(&rgba[0]))[0] = ctx_bi_RGBA8_alpha (*src00,*src01,*src10,*src11, du,dv);
    xi += xi_delta;
    yi += yi_delta;
    rgba += 4;

    i++;
  }
}


static inline void
ctx_fragment_image_rgba8_RGBA8_bi_generic (CtxRasterizer *rasterizer,
                                           float x, float y, float z,
                                           void *out, int scount,
                                           float dx, float dy, float dz)
{
        x-=0.5f;
        y-=0.5f;
  uint32_t count = scount;
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  CtxExtend extend = rasterizer->state->gstate.extend;
  const int bwidth = buffer->width;
  const int bheight = buffer->height;
  unsigned int i = 0;
  uint32_t *data = ((uint32_t*)buffer->data);

  int yi_delta = (int)(dy * 65536);
  int xi_delta = (int)(dx * 65536);
  int zi_delta = (int)(dz * 65536);
  int32_t yi = (int)(y * 65536);
  int32_t xi = (int)(x * 65536);
  int32_t zi = (int)(z * 65536);
  if (extend == CTX_EXTEND_NONE) {
    int32_t u1 = xi + xi_delta* (count-1);
    int32_t v1 = yi + yi_delta* (count-1);
    int32_t z1 = zi + zi_delta* (count-1);
    uint32_t *edst = ((uint32_t*)out)+(count-1);
    for (; i < count; )
    {
      float z_recip = (z1!=0) * (1.0f/z1);
      if ((u1*z_recip) <0 ||
          (v1*z_recip) <0 ||
          (u1*z_recip) >= (bwidth) - 1 ||
          (v1*z_recip) >= (bheight) - 1)
      {
        *edst-- = 0;
        count --;
        u1 -= xi_delta;
        v1 -= yi_delta;
        z1 -= zi_delta;
      }
      else break;
    }

  for (i= 0; i < count; i ++)
  {
    float z_recip = (zi!=0) * (1.0f/zi);
    int u = (int)(xi * z_recip);
    int v = (int)(yi * z_recip);
    if ((u <= 0) | (v <= 0) | (u+1 >= bwidth-1) | (v+1 >= bheight-1))
    {
      *((uint32_t*)(rgba))= 0;
    }
    else
      break;
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;
  }
  }

  uint32_t *src00=data;
  uint32_t *src01=data;
  uint32_t *src10=data;
  uint32_t *src11=data;

  if (global_alpha_u8==255)
  while (i < count)
  {
    float zr = (zi!=0)*(1.0f/zi) * 256;
    int du = (int)(xi * zr);
    int u = du >> 8;
    int dv = (int)(yi * zr);
    int v = dv >> 8;
    if (CTX_UNLIKELY((u < 0) | (v < 0) | (u+1 >= bwidth) | (v+1 >=bheight))) // default to next sample down and to right
    {
      int u1 = u + 1;
      int v1 = v + 1;

      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      _ctx_coords_restrict (extend, &u1, &v1, bwidth, bheight);

      src00 = data  + bwidth * v + u;
      src01 = data  + bwidth * v + u1;
      src10 = data  + bwidth * v1 + u;
      src11 = data  + bwidth * v1 + u1;
    }
    else 
    {
      src00 = data  + bwidth * v + u;
      src01 = src00 + 1;
      src10 = src00 + bwidth;
      src11 = src01 + bwidth;
    }
    ((uint32_t*)(&rgba[0]))[0] = ctx_bi_RGBA8_alpha (*src00,*src01,*src10,*src11, du,dv);
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;

    i++;
  }
  else
  while (i < count)
  {
    float zr = (zi!=0)*(1.0f/zi) * 256;
    int du = (int)(xi * zr);
    int u = du >> 8;
    int dv = (int)(yi * zr);
    int v = dv >> 8;
    if (CTX_UNLIKELY((u < 0) | (v < 0) | (u+1 >= bwidth) | (v+1 >=bheight))) // default to next sample down and to right
    {
      int u1 = u + 1;
      int v1 = v + 1;

      _ctx_coords_restrict (extend, &u, &v, bwidth, bheight);
      _ctx_coords_restrict (extend, &u1, &v1, bwidth, bheight);

      src00 = data  + bwidth * v + u;
      src01 = data  + bwidth * v + u1;
      src10 = data  + bwidth * v1 + u;
      src11 = data  + bwidth * v1 + u1;
    }
    else 
    {
      src00 = data  + bwidth * v + u;
      src01 = src00 + 1;
      src10 = src00 + bwidth;
      src11 = src01 + bwidth;
    }
    ((uint32_t*)(&rgba[0]))[0] =
        ctx_RGBA8_mul_alpha_u32 (
            ctx_bi_RGBA8_alpha (*src00,*src01,*src10,*src11, du,dv), global_alpha_u8);
    xi += xi_delta;
    yi += yi_delta;
    zi += zi_delta;
    rgba += 4;

    i++;
  }
}


static void
ctx_fragment_image_rgba8_RGBA8_bi (CtxRasterizer *rasterizer,
                                   float x, float y, float z,
                                   void *out, int icount, float dx, float dy, float dz)
{
  unsigned int count = icount;
  if ((dy == 0.0f) & (dx > 0.0f) & (z==1.0f) & (dz==0.0f))
  {
    ctx_fragment_image_rgba8_RGBA8_bi_scale (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
  else if ((z == 1.0f) & (dz == 0.0f))
    ctx_fragment_image_rgba8_RGBA8_bi_affine (rasterizer, x, y, z, out, count, dx, dy, dz);
  else
  {
    ctx_fragment_image_rgba8_RGBA8_bi_generic (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
}
#endif

#define ctx_clamp_byte(val) \
  val *= val > 0;\
  val = (val > 255) * 255 + (val <= 255) * val

#if CTX_YUV_LUTS
static const int16_t ctx_y_to_cy[256]={
-19,-18,-17,-16,-14,-13,-12,-11,-10,-9,-7,-6,-5,-4,-3,
-2,0,1,2,3,4,5,6,8,9,10,11,12,13,15,
16,17,18,19,20,22,23,24,25,26,27,29,30,31,32,
33,34,36,37,38,39,40,41,43,44,45,46,47,48,50,
51,52,53,54,55,57,58,59,60,61,62,64,65,66,67,
68,69,71,72,73,74,75,76,78,79,80,81,82,83,84,
86,87,88,89,90,91,93,94,95,96,97,98,100,101,102,
103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,
121,122,123,124,125,126,128,129,130,131,132,133,135,136,137,
138,139,140,142,143,144,145,146,147,149,150,151,152,153,154,
156,157,158,159,160,161,163,164,165,166,167,168,169,171,172,
173,174,175,176,178,179,180,181,182,183,185,186,187,188,189,
190,192,193,194,195,196,197,199,200,201,202,203,204,206,207,
208,209,210,211,213,214,215,216,217,218,220,221,222,223,224,
225,227,228,229,230,231,232,234,235,236,237,238,239,241,242,
243,244,245,246,248,249,250,251,252,253,254,256,257,258,259,
260,261,263,264,265,266,267,268,270,271,272,273,274,275,277,
278};
static const int16_t ctx_u_to_cb[256]={
-259,-257,-255,-253,-251,-249,-247,-245,-243,-241,-239,-237,-234,-232,-230,
-228,-226,-224,-222,-220,-218,-216,-214,-212,-210,-208,-206,-204,-202,-200,
-198,-196,-194,-192,-190,-188,-186,-184,-182,-180,-178,-176,-174,-172,-170,
-168,-166,-164,-162,-160,-158,-156,-154,-152,-150,-148,-146,-144,-142,-140,
-138,-136,-134,-132,-130,-128,-126,-124,-122,-120,-117,-115,-113,-111,-109,
-107,-105,-103,-101,-99,-97,-95,-93,-91,-89,-87,-85,-83,-81,-79,
-77,-75,-73,-71,-69,-67,-65,-63,-61,-59,-57,-55,-53,-51,-49,
-47,-45,-43,-41,-39,-37,-35,-33,-31,-29,-27,-25,-23,-21,-19,
-17,-15,-13,-11,-9,-7,-5,-3,0,2,4,6,8,10,12,
14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,
44,46,48,50,52,54,56,58,60,62,64,66,68,70,72,
74,76,78,80,82,84,86,88,90,92,94,96,98,100,102,
104,106,108,110,112,114,116,119,121,123,125,127,129,131,133,
135,137,139,141,143,145,147,149,151,153,155,157,159,161,163,
165,167,169,171,173,175,177,179,181,183,185,187,189,191,193,
195,197,199,201,203,205,207,209,211,213,215,217,219,221,223,
225,227,229,231,233,236,238,240,242,244,246,248,250,252,254,
256};
static const int16_t ctx_v_to_cr[256]={
-205,-203,-202,-200,-198,-197,-195,-194,-192,-190,-189,-187,-186,-184,-182,
-181,-179,-178,-176,-174,-173,-171,-170,-168,-166,-165,-163,-162,-160,-159,
-157,-155,-154,-152,-151,-149,-147,-146,-144,-143,-141,-139,-138,-136,-135,
-133,-131,-130,-128,-127,-125,-123,-122,-120,-119,-117,-115,-114,-112,-111,
-109,-107,-106,-104,-103,-101,-99,-98,-96,-95,-93,-91,-90,-88,-87,
-85,-83,-82,-80,-79,-77,-76,-74,-72,-71,-69,-68,-66,-64,-63,
-61,-60,-58,-56,-55,-53,-52,-50,-48,-47,-45,-44,-42,-40,-39,
-37,-36,-34,-32,-31,-29,-28,-26,-24,-23,-21,-20,-18,-16,-15,
-13,-12,-10,-8,-7,-5,-4,-2,0,1,3,4,6,7,9,
11,12,14,15,17,19,20,22,23,25,27,28,30,31,33,
35,36,38,39,41,43,44,46,47,49,51,52,54,55,57,
59,60,62,63,65,67,68,70,71,73,75,76,78,79,81,
82,84,86,87,89,90,92,94,95,97,98,100,102,103,105,
106,108,110,111,113,114,116,118,119,121,122,124,126,127,129,
130,132,134,135,137,138,140,142,143,145,146,148,150,151,153,
154,156,158,159,161,162,164,165,167,169,170,172,173,175,177,
178,180,181,183,185,186,188,189,191,193,194,196,197,199,201,
202};

#endif

static inline uint32_t ctx_yuv_to_rgba32 (uint8_t y, uint8_t u, uint8_t v)
{
#if CTX_YUV_LUTS
  int cy  = ctx_y_to_cy[y];
  int red = cy + ctx_v_to_cr[v];
  int green = cy - (((u-128) * 25674 + (v-128) * 53278) >> 16);
  int blue = cy + ctx_u_to_cb[u];
#else
  int cy  = ((y - 16) * 76309) >> 16;
  int cr  = (v - 128);
  int cb  = (u - 128);
  int red = cy + ((cr * 104597) >> 16);
  int green = cy - ((cb * 25674 + cr * 53278) >> 16);
  int blue = cy + ((cb * 132201) >> 16);
#endif
  ctx_clamp_byte (red);
  ctx_clamp_byte (green);
  ctx_clamp_byte (blue);
  return red |
  (green << 8) |
  (blue << 16) |
  (0xff << 24);
}

static void
ctx_fragment_image_yuv420_RGBA8_nearest (CtxRasterizer *rasterizer,
                                         float x, float y, float z,
                                         void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer;
#if CTX_ENABLE_CM
  if (buffer->color_managed)
    buffer = buffer->color_managed;
#endif
  uint8_t *src = (uint8_t *) buffer->data;
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int bwidth_div_2  = bwidth/2;
  int bheight_div_2  = bheight/2;
  x += 0.5f;
  y += 0.5f;

#if CTX_DITHER
  int bits = rasterizer->format->bpp;
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int dither_red_blue = rasterizer->format->dither_red_blue;
  int dither_green  = rasterizer->format->dither_green;
#endif

  if (isinf(dx) || isnan(dx) || isnan (dy) || isinf (dy))
    return;

  if (!src)
    return;

  {
    int i = 0;

    float  u1 = x + dx * (count-1);
    float  v1 = y + dy * (count-1);
    uint32_t *edst = ((uint32_t*)out)+count - 1;
    for (; i < count; )
    {
      if ((u1 <0) | (v1 < 0) | (u1 >= bwidth) | (v1 >= bheight))
      {
        *edst-- = 0;
        count --;
        u1 -= dx;
        v1 -= dy;
      }
      else break;
    }

    for (; i < count; i ++)
    {
      int u = (int)x;
      int v = (int)y;
      if ((u < 0) | (v < 0) | (u >= bwidth) | (v >= bheight))
      {
        *((uint32_t*)(rgba))= 0;
      }
      else
      {
        break;
      }
      x += dx;
      y += dy;
      rgba += 4;
    }

    uint32_t u_offset = bheight * bwidth;
    uint32_t v_offset = u_offset + bheight_div_2 * bwidth_div_2;

    if (rasterizer->swap_red_green)
    {
      v_offset = bheight * bwidth;
      u_offset = v_offset + bheight_div_2 * bwidth_div_2;
    }

    // XXX this is incorrect- but fixes some bug!
    int ix = 65536;//x * 65536;
    int iy = (int)(y * 65536);

    int ideltax = (int)(dx * 65536);
    int ideltay = (int)(dy * 65536);

    if (ideltay == 0)
    {
      int u = ix >> 16;
      int v = iy >> 16;

      uint32_t y  = v * bwidth;
      uint32_t uv = (v / 2) * bwidth_div_2;

      if ((v < 0) | (v >= bheight) | 
          (u < 0) | (u >= bwidth) |
          (((iy + ideltay * count)>>16) < 0) | (((iy + ideltay *count)>>16) >= bheight) | 
          (((ix + ideltax * count)>>16) < 0) | (((ix + ideltax *count)>>16) >= bwidth))
        return;

      if ((v >= 0) & (v < bheight))
      {
#if CTX_DITHER
       if (bits < 24)
       {
         while (i < count)// && u >= 0 && u+1 < bwidth)
         {
           *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y+u],
                        src[u_offset+uv+u/2], src[v_offset+uv+u/2]);

           ctx_dither_rgba_u8 (rgba, i, scan, dither_red_blue, dither_green);
           ix += ideltax;
           rgba += 4;
           u = ix >> 16;
           i++;
         }
        }
        else
#endif
        while (i < count)// && u >= 0 && u+1 < bwidth)
        {
          *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y+u],
                          src[u_offset+uv+u/2], src[v_offset+uv+u/2]);
  
          ix += ideltax;
          rgba += 4;
          u = ix >> 16;
          i++;
        }
      }
    }
    else
    {
      int u = ix >> 16;
      int v = iy >> 16;

      if ((v < 0) | (v >= bheight) | 
          (u < 0) | (u >= bwidth) |
          (((iy + ideltay * count)>>16) < 0) | (((iy + ideltay *count)>>16) >= bheight) | 
          (((ix + ideltax * count)>>16) < 0) | (((ix + ideltax *count)>>16) >= bwidth))
        return;

#if CTX_DITHER
       if (bits < 24)
       {
         while (i < count)// && u >= 0 && v >= 0 && u < bwidth && v < bheight)
         {
           uint32_t y  = v * bwidth + u;
           uint32_t uv = (v / 2) * bwidth_div_2 + (u / 2);

           *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y],
                        src[u_offset+uv], src[v_offset+uv]);

           ctx_dither_rgba_u8 (rgba, i, scan, dither_red_blue, dither_green);
           ix += ideltax;
           iy += ideltay;
           rgba += 4;
           u = ix >> 16;
           v = iy >> 16;
           i++;
         }
       } else
#endif
       while (i < count)// && u >= 0 && v >= 0 && u < bwidth && v < bheight)
       {
          uint32_t y  = v * bwidth + u;
          uint32_t uv = (v / 2) * bwidth_div_2 + (u / 2);

          *((uint32_t*)(rgba))= ctx_yuv_to_rgba32 (src[y],
                        src[u_offset+uv], src[v_offset+uv]);

          ix += ideltax;
          iy += ideltay;
          rgba += 4;
          u = ix >> 16;
          v = iy >> 16;
          i++;
       }
    }

    for (; i < count; i++)
    {
      *((uint32_t*)(rgba))= 0;
      rgba += 4;
    }
  }

  if (rasterizer->state->gstate.global_alpha_u8 != 255)
    ctx_RGBA8_apply_global_alpha_and_associate (rasterizer, (uint8_t*)out, count);
}

#if CTX_FRAGMENT_SPECIALIZE

CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_box)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest)

CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_copy)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_scale)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_affine)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_nearest_generic)

CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi_scale)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi_affine)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi_generic)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi_scale_with_alpha)
CTX_DECLARE_SWAP_RED_GREEN_FRAGMENT(ctx_fragment_image_rgba8_RGBA8_bi_affine_with_alpha)

static inline void
ctx_fragment_image_rgba8_RGBA8 (CtxRasterizer *rasterizer,
                                float x, float y, float z,
                                void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  int image_smoothing = rasterizer->state->gstate.image_smoothing;
  if (buffer->width == 1 || buffer->height == 1)
        image_smoothing = 0;
  if (image_smoothing)
  {
    float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
    if (factor <= 0.50f)
    {
      if (rasterizer->swap_red_green)
        ctx_fragment_image_rgba8_RGBA8_box_swap_red_green (rasterizer, x, y, z, out, count, dx, dy, dz);
      else
        ctx_fragment_image_rgba8_RGBA8_box (rasterizer, x, y, z, out, count, dx, dy, dz);
    }
#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
    else if ((factor > 0.99f) & (factor < 1.01f))
    {
      // XXX: also verify translate == 0 for this fast path to be valid
      if (rasterizer->swap_red_green)
        ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (rasterizer, x, y, z, out, count, dx, dy, dz);
      else
        ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, z, out, count, dx, dy, dz);
    }
#endif
    else
    {
      if (rasterizer->swap_red_green)
        ctx_fragment_image_rgba8_RGBA8_bi_swap_red_green (rasterizer, x, y, z, out, count, dx, dy, dz);
      else
        ctx_fragment_image_rgba8_RGBA8_bi (rasterizer, x, y, z, out, count, dx, dy, dz);
    }
  }
  else
  {
    if (rasterizer->swap_red_green)
      ctx_fragment_image_rgba8_RGBA8_nearest_swap_red_green (rasterizer, x, y, z, out, count, dx, dy, dz);
    else
      ctx_fragment_image_rgba8_RGBA8_nearest (rasterizer, x, y, z, out, count, dx, dy, dz);
  }
  //ctx_fragment_swap_red_green_u8 (out, count);
#if 0
#if CTX_DITHER
  uint8_t *rgba = (uint8_t*)out;
  ctx_dither_rgba_u8 (rgba, x, y, rasterizer->format->dither_red_blue,
                      rasterizer->format->dither_green);
#endif
#endif
}

static void
ctx_fragment_image_gray1_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  CtxBuffer *buffer = g->texture.buffer;
  for (int i = 0; i < count; i ++)
  {
  int u = (int)x;
  int v = (int)y;
  if ( (u < 0) | (v < 0) |
       (u >= buffer->width) |
       (v >= buffer->height))
    {
      rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
    }
  else
    {
      uint8_t *src = (uint8_t *) buffer->data;
      src += v * buffer->stride + u / 8;
      if (*src & (1<< (u & 7) ) )
        {
          rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
        }
      else
        {
          for (int c = 0; c < 4; c++)
            { rgba[c] = 255;
            }//g->texture.rgba[c];
            //}
        }
    }

    rgba += 4;
    x += dx;
    y += dy;
  }
}

#endif


#if CTX_GRADIENTS
static void
ctx_fragment_radial_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_DITHER
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int dither_red_blue = rasterizer->format->dither_red_blue;
  int dither_green  = rasterizer->format->dither_green;
  int ox = (int)x;
#endif

  float rg_x0 = g->radial_gradient.x0;
  float rg_y0 = g->radial_gradient.y0;
  float rg_r0 = g->radial_gradient.r0;
  float rg_rdelta = g->radial_gradient.rdelta;

  x = rg_x0 - x;
  y = rg_y0 - y;

  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  if (global_alpha_u8 != 255)
  for (int i = 0; i <  count; i ++)
  {
    float v = (ctx_hypotf_fast (x, y) - rg_r0) * (rg_rdelta);
#if CTX_GRADIENT_CACHE
    uint32_t *rgbap = (uint32_t*)&(rasterizer->gradient_cache_u8[ctx_grad_index(rasterizer, v)][0]);
    *((uint32_t*)rgba) = *rgbap;
#else
    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0, rgba);
#endif

#if CTX_DITHER
    ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
    *((uint32_t*)rgba) =
      ctx_RGBA8_mul_alpha_u32(*((uint32_t*)rgba), global_alpha_u8);
    rgba += 4;
    x -= dx;
    y -= dy;
  }
  else
  if (dy == 0.0f)
  {
     float sq_y = y * y;
  for (int i = 0; i <  count; i ++)
  {
    float v = (ctx_sqrtf_fast (x*x+sq_y) - rg_r0) * (rg_rdelta);
#if CTX_GRADIENT_CACHE
    uint32_t *rgbap = (uint32_t*)&(rasterizer->gradient_cache_u8[ctx_grad_index(rasterizer, v)][0]);
    *((uint32_t*)rgba) = *rgbap;
#else
    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0, rgba);
#endif

#if CTX_DITHER
    ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
    rgba += 4;
    x -= dx;
  }
  }
  else
  for (int i = 0; i <  count; i ++)
  {
    float v = (ctx_hypotf_fast (x, y) - rg_r0) * (rg_rdelta);
#if CTX_GRADIENT_CACHE
    uint32_t *rgbap = (uint32_t*)&(rasterizer->gradient_cache_u8[ctx_grad_index(rasterizer, v)][0]);
    *((uint32_t*)rgba) = *rgbap;
#else
    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 0.0, rgba);
#endif

#if CTX_DITHER
    ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
    rgba += 4;
    x -= dx;
    y -= dy;
  }
}

static void
ctx_fragment_conic_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;
#if CTX_DITHER
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int dither_red_blue = rasterizer->format->dither_red_blue;
  int dither_green  = rasterizer->format->dither_green;
  int ox = (int)x;
#endif
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float cx = g->conic_gradient.x;
  float cy = g->conic_gradient.y;
  float offset = g->conic_gradient.start_angle;
  float cycles = g->conic_gradient.cycles;
  if (cycles < 0.01) cycles = 1.0f;

  float scale = cycles/(M_PI * 2);
#if CTX_GRADIENT_CACHE
  float fscale = (rasterizer->gradient_cache_elements-1) * 256;
#endif

  x-=cx;
  y-=cy;

  offset += M_PI;

  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  if (global_alpha_u8 != 255)
  for (int i = 0; i < count ; i++)
  {
#if CTX_GRADIENT_CACHE
    int vv = ctx_fmod1f((ctx_atan2f (x,y) + offset) * scale) * fscale;
  *((uint32_t*)rgba) = *((uint32_t*)(&rasterizer->gradient_cache_u8[ctx_grad_index_i (rasterizer, vv)][0]));
#else
    float vv = ctx_fmod1f((ctx_atan2f (x,y) + offset) * scale);
  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0, rgba);
#endif
#if CTX_DITHER
      ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
  *((uint32_t*)rgba) =
    ctx_RGBA8_mul_alpha_u32(*((uint32_t*)rgba), global_alpha_u8);
    rgba+= 4;
    x += dx;
    y += dy;
  }
  else
  {
  if ((dy == 0.0f) & (y != 0.0f))
  {
    float y_recip = 1.0f/y;
  for (int i = 0; i < count ; i++)
  {
#if CTX_GRADIENT_CACHE
    int vv = ctx_fmod1f((ctx_atan2f_rest (x,y_recip) + offset) * scale) * fscale;
  *((uint32_t*)rgba) = *((uint32_t*)(&rasterizer->gradient_cache_u8[ctx_grad_index_i (rasterizer, vv)][0]));
#else
    float vv = ctx_fmod1f((ctx_atan2f_rest (x,y_recip) + offset) * scale);
  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0f, rgba);
#endif
#if CTX_DITHER
      ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
    rgba+= 4;
    x += dx;
  }
  }
  else
  for (int i = 0; i < count ; i++)
  {
#if CTX_GRADIENT_CACHE
    int vv = ctx_fmod1f((ctx_atan2f (x,y) + offset) * scale) * fscale;
  *((uint32_t*)rgba) = *((uint32_t*)(&rasterizer->gradient_cache_u8[ctx_grad_index_i (rasterizer, vv)][0]));
#else
    float vv = ctx_fmod1f((ctx_atan2f (x,y) + offset) * scale);
  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0f, rgba);
#endif
#if CTX_DITHER
      ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
    rgba+= 4;
    x += dx;
    y += dy;
  }
    
  }
}
  
static void
ctx_fragment_linear_gradient_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba = (uint8_t *) out;

  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float u0 = x; float v0 = y;
  float ud = dx; float vd = dy;
  float linear_gradient_dx = g->linear_gradient.dx_scaled;
  float linear_gradient_dy = g->linear_gradient.dy_scaled;
  float linear_gradient_start = g->linear_gradient.start_scaled;

#if CTX_DITHER
  int dither_red_blue = rasterizer->format->dither_red_blue;
  int dither_green = rasterizer->format->dither_green;
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int ox = (int)x;
#endif

  u0 *= linear_gradient_dx;
  v0 *= linear_gradient_dy;
  ud *= linear_gradient_dx;
  vd *= linear_gradient_dy;

#if CTX_GRADIENT_CACHE
  int vv = (int)(((u0 + v0) - linear_gradient_start) * (rasterizer->gradient_cache_elements-1) * 256);
  int ud_plus_vd = (int)((ud + vd) * (rasterizer->gradient_cache_elements-1) * 256);
#else
  float vv = ((u0 + v0) - linear_gradient_start);
  float ud_plus_vd = (ud + vd);
#endif

  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  if (global_alpha_u8 != 255)
  for (int i = 0; i < count ; i++)
  {
#if CTX_GRADIENT_CACHE
  *((uint32_t*)rgba) = *((uint32_t*)(&rasterizer->gradient_cache_u8[ctx_grad_index_i (rasterizer, vv)][0]));
#else
  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0, rgba);
#endif
#if CTX_DITHER
      ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
  *((uint32_t*)rgba) =
    ctx_RGBA8_mul_alpha_u32(*((uint32_t*)rgba), global_alpha_u8);
    rgba+= 4;
    vv += ud_plus_vd;
  }
  else
  for (int i = 0; i < count ; i++)
  {
#if CTX_GRADIENT_CACHE
  *((uint32_t*)rgba) = *((uint32_t*)(&rasterizer->gradient_cache_u8[ctx_grad_index_i (rasterizer, vv)][0]));
#else
  _ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0, rgba);
#endif
#if CTX_DITHER
      ctx_dither_rgba_u8 (rgba, ox+i, scan, dither_red_blue, dither_green);
#endif
    rgba+= 4;
    vv += ud_plus_vd;
  }
}

#endif

static void
ctx_fragment_none_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba_out = (uint8_t *) out;
  uint32_t blank = 0;
  for (int i = 0; i < count; i++)
    memcpy (rgba_out + count * 4, &blank, 4);
}

static void
ctx_fragment_color_RGBA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *rgba_out = (uint8_t *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  ctx_color_get_rgba8 (rasterizer->state, &g->color, rgba_out);
  ctx_RGBA8_associate_alpha (rgba_out);
  if (rasterizer->swap_red_green)
  {
    int tmp = rgba_out[0];
    rgba_out[0] = rgba_out[2];
    rgba_out[2] = tmp;
  }
  for (int i = 1; i < count; i++)
    memcpy (rgba_out + count * 4, rgba_out, 4);
}
#if CTX_ENABLE_FLOAT

#if CTX_GRADIENTS
static void
ctx_fragment_linear_gradient_RGBAF (CtxRasterizer *rasterizer, float u0, float v0, float z, void *out, int count, float ud, float vd, float dz)
{
  float *rgba = (float *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float linear_gradient_dx = g->linear_gradient.dx_scaled;
  float linear_gradient_dy = g->linear_gradient.dy_scaled;
  float linear_gradient_start = g->linear_gradient.start_scaled;

  u0 *= linear_gradient_dx;
  v0 *= linear_gradient_dy;
  ud *= linear_gradient_dx;
  vd *= linear_gradient_dy;

  float vv = ((u0 + v0) - linear_gradient_start);
  float ud_plus_vd = (ud + vd);

  for (int i = 0; i < count ; i++)
  {
    ctx_fragment_gradient_1d_RGBAF (rasterizer, vv, 1.0f, rgba);
    rgba+= 4;
    vv += ud_plus_vd;
  }
}

static void
ctx_fragment_radial_gradient_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *rgba = (float *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float rg_x0 = g->radial_gradient.x0;
  float rg_y0 = g->radial_gradient.y0;
  float rg_r0 = g->radial_gradient.r0;
  float rg_rdelta = g->radial_gradient.rdelta;
  for (int i = 0; i < count; i++)
  {
    float v = (ctx_hypotf (rg_x0 - x, rg_y0 - y) - rg_r0) * rg_rdelta;
    ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0f, rgba);
    x+=dx;
    y+=dy;
    rgba +=4;
  }
}

static void
ctx_fragment_conic_gradient_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *rgba = (float *) out;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float cx = g->conic_gradient.x;
  float cy = g->conic_gradient.y;
  float offset = g->conic_gradient.start_angle;
  float cycles = g->conic_gradient.cycles;
  if (cycles < 0.01) cycles = 1.0f;

  float scale = cycles/(M_PI * 2);

  x-=cx;
  y-=cy;

  offset += M_PI;

  for (int i = 0; i < count ; i++)
  {
    float v = (ctx_atan2f (x,y) + offset) * scale;
    v = ctx_fmod1f(v);
    ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0f, rgba);
    rgba+= 4;
    x += dx;
    y += dy;
  }
}
  
#endif

static void
ctx_fragment_none_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *rgba = (float *) out;
  for (int i = 0; i < count * 4; i++)
  {
    rgba[i] = 0.0f;
  }
}

static void
ctx_fragment_color_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *rgba = (float *) out;
  float  in[4];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  ctx_color_get_rgba (rasterizer->state, &g->color, in);
  for (int c = 0; c < 3; c++)
    in[c] *= in[3];
  while (count--)
  {
    for (int c = 0; c < 4; c++)
      rgba[c] = in[c];
    rgba += 4;
  }
}


static void ctx_fragment_image_RGBAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *outf = (float *) out;
  uint8_t rgba[4 * count];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
  CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
  CtxBuffer *buffer = g->texture.buffer;
#endif
  switch (buffer->format->bpp)
    {
#if CTX_FRAGMENT_SPECIALIZE
      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);  break;
      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
#endif
      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);       break;
    }
  for (int c = 0; c < 4 * count; c ++) { outf[c] = ctx_u8_to_float (rgba[c]); }
}

static CtxFragment ctx_rasterizer_get_fragment_RGBAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:         return ctx_fragment_image_RGBAF;
      case CTX_SOURCE_COLOR:           return ctx_fragment_color_RGBAF;
      case CTX_SOURCE_NONE:            return ctx_fragment_none_RGBAF;
#if CTX_GRADIENTS
      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_RGBAF;
      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_RGBAF;
      case CTX_SOURCE_CONIC_GRADIENT: return ctx_fragment_conic_gradient_RGBAF;
#endif
    }
  return ctx_fragment_none_RGBAF;
}
#endif


static inline int
ctx_matrix_no_perspective (CtxMatrix *matrix)
{
  if (fabsf(matrix->m[2][0]) >0.001f) return 0;
  if (fabsf(matrix->m[2][1]) >0.001f) return 0;
  if (fabsf(matrix->m[2][2] - 1.0f)>0.001f) return 0;
  return 1;
}

/* for multiples of 90 degree rotations, we return no rotation */
static inline int
ctx_matrix_no_skew_or_rotate (CtxMatrix *matrix)
{
  if (fabsf(matrix->m[0][1]) >0.001f) return 0;
  if (fabsf(matrix->m[1][0]) >0.001f) return 0;
  return ctx_matrix_no_perspective (matrix);
}

static inline float
ctx_matrix_determinant (const CtxMatrix *m);

static int ctx_sane_transform(CtxMatrix *transform)
{
  if ((int)(ctx_fabsf (transform->m[0][0]) < 0.0001f) |
      (int)(ctx_fabsf (transform->m[1][1]) < 0.0001f) |
      (int)(ctx_fabsf (transform->m[2][2]) < 0.0001f))
          return 0;
  return 1;
  //return (ctx_fabsf(ctx_matrix_determinant (&gstate->transform)) >= 0.0001f);
}

static CtxFragment ctx_rasterizer_get_fragment_RGBA8 (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:
      {
#if CTX_ENABLE_CM
         CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
         CtxBuffer *buffer = g->texture.buffer;
#endif


        if (!buffer || !buffer->format)
          return ctx_fragment_none_RGBA8;

#if CTX_FRAGMENT_SPECIALIZE
         int image_smoothing = gstate->image_smoothing;
         if (buffer->width == 1 || buffer->height == 1)
           image_smoothing = 0;
#endif
  
        if (!ctx_sane_transform(&gstate->source_fill.transform))
          return ctx_fragment_none_RGBA8;


        if (buffer->format->pixel_format == CTX_FORMAT_YUV420)
        {
          return ctx_fragment_image_yuv420_RGBA8_nearest;
        }
        else
#if CTX_FRAGMENT_SPECIALIZE
        switch (buffer->format->bpp)
          {
            case 1: return ctx_fragment_image_gray1_RGBA8;
#if 1
            case 24: 
              {
                if (image_smoothing)
                {
                  float factor = ctx_matrix_get_scale (&gstate->transform);
                          //fprintf (stderr, "{%.3f}", factor);
                  if (factor < 0.5f)
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgb8_RGBA8_box_swap_red_green;
                    return ctx_fragment_image_rgb8_RGBA8_box;
                  }
#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
                  else if ((factor > 0.99f) & (factor < 1.01f))
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green;
                    return ctx_fragment_image_rgb8_RGBA8_nearest;
                  }
#endif
                  else
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgb8_RGBA8_bi_swap_red_green;
                    return ctx_fragment_image_rgb8_RGBA8_bi;
                  }
                }
                else
                {
                  if (rasterizer->swap_red_green)
                    return ctx_fragment_image_rgb8_RGBA8_nearest_swap_red_green;
                  return ctx_fragment_image_rgb8_RGBA8_nearest;
                }
              }
              break;
#endif
            case 32:
              {
                CtxMatrix *transform = &gstate->source_fill.transform;
                CtxExtend extend = rasterizer->state->gstate.extend;
                if (image_smoothing)
                {
                  float factor = ctx_matrix_get_scale (&gstate->transform);
                          //fprintf (stderr, "[%.3f]", factor);
                  if (factor < 0.5f)
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgba8_RGBA8_box_swap_red_green;
                    return ctx_fragment_image_rgba8_RGBA8_box;
                  }
#if CTX_ALWAYS_USE_NEAREST_FOR_SCALE1
                  else if ((factor > 0.99f) & (factor < 1.01f) & (extend == CTX_EXTEND_NONE))
                  {
                    if (rasterizer->swap_red_green)
                      return ctx_fragment_image_rgba8_RGBA8_nearest_copy_swap_red_green;
                    return ctx_fragment_image_rgba8_RGBA8_nearest_copy;
                  }
#endif
                  else
                  {
                    if (rasterizer->swap_red_green)
                    {
                      if (ctx_matrix_no_perspective (transform))
                      {
                        if (ctx_matrix_no_skew_or_rotate (transform))
                        {
                          if ((int)(ctx_fabsf (transform->m[0][0] - 1.0f) < 0.001f) &
                              (ctx_fabsf (transform->m[1][1] - 1.0f) < 0.001f) &
                              (ctx_fmod1f (transform->m[0][2]) < 0.001f) &
                              (ctx_fmod1f (transform->m[1][2]) < 0.001f))
                          {
                            if (extend == CTX_EXTEND_NONE)
                              return ctx_fragment_image_rgba8_RGBA8_nearest_copy_swap_red_green;
                            else if (extend == CTX_EXTEND_REPEAT)
                              return ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat_swap_red_green;
                          }
                          return gstate->global_alpha_u8==255?
                              ctx_fragment_image_rgba8_RGBA8_bi_scale_swap_red_green
                             :ctx_fragment_image_rgba8_RGBA8_bi_scale_with_alpha_swap_red_green;
                        }
                        return gstate->global_alpha_u8==255?
                        ctx_fragment_image_rgba8_RGBA8_bi_affine_swap_red_green
                        :ctx_fragment_image_rgba8_RGBA8_bi_affine_with_alpha_swap_red_green;
                      }
                        return ctx_fragment_image_rgba8_RGBA8_bi_generic_swap_red_green;
                    }

                    if (ctx_matrix_no_perspective (transform))
                    {
                      if (ctx_matrix_no_skew_or_rotate (transform))
                      {
                        if ((int)(ctx_fabsf (transform->m[0][0] - 1.0f) < 0.001f) &
                            (ctx_fabsf (transform->m[1][1] - 1.0f) < 0.001f) &
                            (ctx_fmod1f (transform->m[0][2]) < 0.001f) &
                            (ctx_fmod1f (transform->m[1][2]) < 0.001f))
                        {
                          if (extend == CTX_EXTEND_NONE)
                            return ctx_fragment_image_rgba8_RGBA8_nearest_copy;
                          else if (extend == CTX_EXTEND_REPEAT)
                            return ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat;
                        }
                        return gstate->global_alpha_u8==255?
                               ctx_fragment_image_rgba8_RGBA8_bi_scale:
                               ctx_fragment_image_rgba8_RGBA8_bi_scale_with_alpha;
                      }
                      return gstate->global_alpha_u8==255?
                        ctx_fragment_image_rgba8_RGBA8_bi_affine:
                        ctx_fragment_image_rgba8_RGBA8_bi_affine_with_alpha;
                    }
                      return ctx_fragment_image_rgba8_RGBA8_bi_generic;
                  }
                }
                else
                {
                  if (rasterizer->swap_red_green)
                  {
                    if (ctx_matrix_no_perspective (transform))
                    {
                      if (ctx_matrix_no_skew_or_rotate (transform))
                      {
                        if ((int)(ctx_fabsf (transform->m[0][0] - 1.0f) < 0.001f) &
                            (ctx_fabsf (transform->m[1][1] - 1.0f) < 0.001f))
                        {
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy_swap_red_green;
                         if (extend == CTX_EXTEND_NONE)
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy_swap_red_green;
                         else if (extend == CTX_EXTEND_REPEAT)
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat_swap_red_green;
                        }
                        return ctx_fragment_image_rgba8_RGBA8_nearest_scale_swap_red_green;
                      }
                      return ctx_fragment_image_rgba8_RGBA8_nearest_affine_swap_red_green;
                    }
                    return ctx_fragment_image_rgba8_RGBA8_nearest_generic_swap_red_green;
                  }
                  if (ctx_matrix_no_perspective (transform))
                  {
                    if (ctx_matrix_no_skew_or_rotate (transform))
                    {
                      if ((int)(ctx_fabsf (transform->m[0][0] - 1.0f) < 0.001f) &
                          (ctx_fabsf (transform->m[1][1] - 1.0f) < 0.001f))
                      {
                         if (extend == CTX_EXTEND_NONE)
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy;
                         else if (extend == CTX_EXTEND_REPEAT)
                           return ctx_fragment_image_rgba8_RGBA8_nearest_copy_repeat;
                      }
                      return ctx_fragment_image_rgba8_RGBA8_nearest_scale;
                    }
                    return ctx_fragment_image_rgba8_RGBA8_nearest_affine;
                  }
                  return ctx_fragment_image_rgba8_RGBA8_nearest_generic;
                }
              }
            default: return ctx_fragment_image_RGBA8;
          }
#else
          return ctx_fragment_image_RGBA8;
#endif
      }

      case CTX_SOURCE_COLOR:           return ctx_fragment_color_RGBA8;
      case CTX_SOURCE_NONE:            return ctx_fragment_none_RGBA8;
#if CTX_GRADIENTS
      case CTX_SOURCE_CONIC_GRADIENT: return ctx_fragment_conic_gradient_RGBA8;
      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_RGBA8;
      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_RGBA8;
#endif
    }
  return ctx_fragment_none_RGBA8;
}

static inline void
ctx_u8_copy_normal (int components, CTX_COMPOSITE_ARGUMENTS)
{
  if (CTX_UNLIKELY(rasterizer->fragment))
    {
      float u0 = 0; float v0 = 0;
      float ud = 0; float vd = 0;
      float w0 = 1; float wd = 0;
      ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);
      while (count--)
      {
        uint8_t cov = *coverage;
        if (CTX_UNLIKELY(cov == 0))
        {
          u0+=ud;
          v0+=vd;
        }
        else
        {
          rasterizer->fragment (rasterizer, u0, v0, w0, src, 1, ud, vd, wd);
          u0+=ud;
          v0+=vd;
          if (cov == 255)
          {
            for (int c = 0; c < components; c++)
              dst[c] = src[c];
          }
          else
          {
            uint8_t rcov = 255 - cov;
            for (int c = 0; c < components; c++)
              { dst[c] = (src[c]*cov + dst[c]*rcov)/255; }
          }
        }
        dst += components;
        coverage ++;
      }
      return;
    }

  while (count--)
  {
    uint8_t cov = *coverage;
    uint8_t rcov = 255-cov;
    for (int c = 0; c < components; c++)
      { dst[c] = (src[c]*cov+dst[c]*rcov)/255; }
    dst += components;
    coverage ++;
  }
}

static void
ctx_u8_clear_normal (int components, CTX_COMPOSITE_ARGUMENTS)
{
  while (count--)
  {
    uint8_t cov = *coverage;
    for (int c = 0; c < components; c++)
      { dst[c] = (dst[c] * (256-cov)) >> 8; }
    coverage ++;
    dst += components;
  }
}

typedef enum {
  CTX_PORTER_DUFF_0,
  CTX_PORTER_DUFF_1,
  CTX_PORTER_DUFF_ALPHA,
  CTX_PORTER_DUFF_1_MINUS_ALPHA,
} CtxPorterDuffFactor;

#define  \
ctx_porter_duff_factors(mode, foo, bar)\
{\
  switch (mode)\
  {\
     case CTX_COMPOSITE_SOURCE_ATOP:\
        f_s = CTX_PORTER_DUFF_ALPHA;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
      break;\
     case CTX_COMPOSITE_DESTINATION_ATOP:\
        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
        f_d = CTX_PORTER_DUFF_ALPHA;\
      break;\
     case CTX_COMPOSITE_DESTINATION_IN:\
        f_s = CTX_PORTER_DUFF_0;\
        f_d = CTX_PORTER_DUFF_ALPHA;\
      break;\
     case CTX_COMPOSITE_DESTINATION:\
        f_s = CTX_PORTER_DUFF_0;\
        f_d = CTX_PORTER_DUFF_1;\
       break;\
     case CTX_COMPOSITE_SOURCE_OVER:\
        f_s = CTX_PORTER_DUFF_1;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
       break;\
     case CTX_COMPOSITE_DESTINATION_OVER:\
        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
        f_d = CTX_PORTER_DUFF_1;\
       break;\
     case CTX_COMPOSITE_XOR:\
        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
       break;\
     case CTX_COMPOSITE_DESTINATION_OUT:\
        f_s = CTX_PORTER_DUFF_0;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
       break;\
     case CTX_COMPOSITE_SOURCE_OUT:\
        f_s = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
        f_d = CTX_PORTER_DUFF_0;\
       break;\
     case CTX_COMPOSITE_SOURCE_IN:\
        f_s = CTX_PORTER_DUFF_ALPHA;\
        f_d = CTX_PORTER_DUFF_0;\
       break;\
     case CTX_COMPOSITE_COPY:\
        f_s = CTX_PORTER_DUFF_1;\
        f_d = CTX_PORTER_DUFF_1_MINUS_ALPHA;\
       break;\
     default:\
     case CTX_COMPOSITE_CLEAR:\
        f_s = CTX_PORTER_DUFF_0;\
        f_d = CTX_PORTER_DUFF_0;\
       break;\
  }\
}

static inline void
ctx_u8_source_over_normal_color (int components,
                                 unsigned int count,
                                 uint8_t * __restrict__ dst,
                                 uint8_t * __restrict__ src,
                                 uint8_t * __restrict__ coverage,
                                 CtxRasterizer         *rasterizer,
                                 int                    x0)
{
  uint8_t tsrc[5];
  *((uint32_t*)tsrc) = *((uint32_t*)src);

  while (count--)
  {
    uint8_t cov = *coverage++;
    for (int c = 0; c < components; c++)
      dst[c] =  ((((tsrc[c] * cov)) + (dst[c] * (((((255+(tsrc[components-1] * cov))>>8))^255 ))))>>8);
    dst+=components;
  }
}

static inline void
ctx_u8_source_copy_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
{
  while (count--)
  {
    for (int c = 0; c < components; c++)
      dst[c] =  ctx_lerp_u8(dst[c],src[c],coverage[0]);
    coverage ++;
    dst+=components;
  }
}

static CTX_INLINE void
ctx_RGBA8_source_over_normal_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *tsrc)
{
  while (count--)
  {
     uint32_t si_ga = ((*((uint32_t*)tsrc)) & 0xff00ff00) >> 8;
     uint32_t si_rb = (*((uint32_t*)tsrc)) & 0x00ff00ff;
//   uint32_t di_ga = ((*((uint32_t*)dst)) & 0xff00ff00) >> 8;
//   uint32_t di_rb = (*((uint32_t*)dst)) & 0x00ff00ff;
     uint32_t si_a  = si_ga >> 16;
     uint32_t cov = *coverage;
     uint32_t racov = (255-((255+si_a*cov)>>8));
     *((uint32_t*)(dst)) =

     (((si_rb*cov+0xff00ff+(((*((uint32_t*)(dst)))&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
     ((si_ga*cov+0xff00ff+((((*((uint32_t*)(dst)))&0xff00ff00)>>8)*racov))&0xff00ff00);

     coverage ++;
     tsrc += 4;
     dst  += 4;
  }
}

static CTX_INLINE void
ctx_RGBA8_source_over_normal_full_cov_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *__restrict__ tsrc)
{
  uint32_t *ttsrc = (uint32_t*)tsrc;
  uint32_t *ddst  = (uint32_t*)dst;
  while (count--)
  {
     uint32_t si_ga = ((*ttsrc) & 0xff00ff00) >> 8;
     uint32_t si_rb = (*ttsrc++) & 0x00ff00ff;
     uint32_t si_a  = si_ga >> 16;
     uint32_t racov = si_a^255;
     *(ddst) =
     (((si_rb*255+0xff00ff+(((*ddst)&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
     ((si_ga*255+0xff00ff+((((*ddst)&0xff00ff00)>>8)*racov))&0xff00ff00);
     ddst++;
  }
}

static inline void
ctx_RGBA8_source_copy_normal_buf (CTX_COMPOSITE_ARGUMENTS, uint8_t *__restrict__ tsrc)
{
  uint32_t *ttsrc = (uint32_t*)tsrc;
  uint32_t *ddst  = (uint32_t*)dst;
  while (count--)
  {
    *ddst=ctx_lerp_RGBA8 (*ddst, *(ttsrc++), *(coverage++));
    ddst++;
  }
}

static inline void
ctx_RGBA8_source_over_normal_fragment (CTX_COMPOSITE_ARGUMENTS)
{
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;
  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);
  uint8_t _tsrc[4 * (count)];
  rasterizer->fragment (rasterizer, u0, v0, w0, &_tsrc[0], count, ud, vd, wd);
  ctx_RGBA8_source_over_normal_buf (count,
                       dst, src, coverage, rasterizer, x0, &_tsrc[0]);
}

void
CTX_SIMD_SUFFIX(ctx_RGBA8_source_over_normal_full_cov_fragment) (CTX_COMPOSITE_ARGUMENTS, int scanlines)
{
  CtxMatrix *transform = &rasterizer->state->gstate.source_fill.transform;
  int scan = rasterizer->scanline /CTX_FULL_AA;
  CtxFragment fragment = rasterizer->fragment;

  if (CTX_LIKELY(ctx_matrix_no_perspective (transform)))
  {
    float u0, v0, ud, vd, w0, wd;
    uint8_t _tsrc[4 * count];
    ctx_init_uv (rasterizer, x0, scan, &u0, &v0, &w0, &ud, &vd, &wd);
    for (int y = 0; y < scanlines; y++)
    {
      fragment (rasterizer, u0, v0, w0, &_tsrc[0], count, ud, vd, wd);
      ctx_RGBA8_source_over_normal_full_cov_buf (count,
                          dst, src, coverage, rasterizer, x0, &_tsrc[0]);
      u0 -= vd;
      v0 += ud;
      dst += rasterizer->blit_stride;
    }
  }
  else
  {
    uint8_t _tsrc[4 * count];
    for (int y = 0; y < scanlines; y++)
    {
      float u0, v0, ud, vd, w0, wd;
      ctx_init_uv (rasterizer, x0, scan+y, &u0, &v0, &w0, &ud, &vd, &wd);
      fragment (rasterizer, u0, v0, w0, &_tsrc[0], count, ud, vd, wd);
      ctx_RGBA8_source_over_normal_full_cov_buf (count,
                          dst, src, coverage, rasterizer, x0, &_tsrc[0]);
      dst += rasterizer->blit_stride;
    }
  }
}

static inline void
ctx_RGBA8_source_copy_normal_fragment (CTX_COMPOSITE_ARGUMENTS)
{
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;
  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);
  uint8_t _tsrc[4 * (count)];
  rasterizer->fragment (rasterizer, u0, v0, w0, &_tsrc[0], count, ud, vd, wd);
  ctx_RGBA8_source_copy_normal_buf (count,
                       dst, src, coverage, rasterizer, x0, &_tsrc[0]);
}


static void
ctx_RGBA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
#if CTX_REFERENCE
  ctx_u8_source_over_normal_color (4, count, dst, src, coverage, count, rasterizer, x0);
#else
  uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
  uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
  uint32_t si_a  = si_ga >> 16;

  while (count--)
  {
     uint32_t cov   = *coverage++;
     uint32_t rcov  = (((255+si_a * cov)>>8))^255;
     uint32_t di    = *((uint32_t*)dst);
     uint32_t di_ga = ((di & 0xff00ff00) >> 8);
     uint32_t di_rb = (di & 0x00ff00ff);
     *((uint32_t*)(dst)) =
     (((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
      ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00);
     dst+=4;
  }
#endif
}

static void
ctx_RGBA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
#if CTX_REFERENCE
  ctx_u8_source_copy_normal_color (4, rasterizer, dst, src, x0, coverage, count);
#else
  uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
  uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];

  while (count--)
  {
     uint32_t cov   = *coverage++;
     uint32_t di    = *((uint32_t*)dst);
     uint32_t di_ga = (di & 0xff00ff00);
     uint32_t di_rb = (di & 0x00ff00ff);

     uint32_t d_rb  = si_rb - di_rb;
     uint32_t d_ga  = si_ga - (di_ga>>8);

     *((uint32_t*)(dst)) =

     (((di_rb + ((d_rb * cov)>>8)) & 0x00ff00ff))  |
      ((di_ga + ((d_ga * cov)      & 0xff00ff00)));
     dst +=4;
  }
#endif
}

static void
ctx_RGBA8_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_clear_normal (4, count, dst, src, coverage, rasterizer, x0);
}

static void
ctx_u8_blend_normal (int components, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)
{
  for (int j = 0; j < count; j++)
  {
  switch (components)
  {
     case 3:
       ((uint8_t*)(blended))[2] = ((uint8_t*)(src))[2];
       *((uint16_t*)(blended)) = *((uint16_t*)(src));
       break;
     case 2:
       *((uint16_t*)(blended)) = *((uint16_t*)(src));
       break;
     case 5:
       *((uint32_t*)(blended)) = *((uint32_t*)(src));
       ((uint8_t*)(blended))[4] = ((uint8_t*)(src))[4];
       break;
     case 4:
       *((uint32_t*)(blended)) = *((uint32_t*)(src));
       break;
     default:
       {
        for (int i = 0; i<components;i++)
           blended[i] = src[i];
       }
       break;
  }
    blended+=components;
    src+=components;
  }
}

/* branchless 8bit add that maxes out at 255 */
static inline uint8_t ctx_sadd8(uint8_t a, uint8_t b)
{
  uint16_t s = (uint16_t)a+b;
  return -(s>>8) | (uint8_t)s;
}

#if CTX_BLENDING_AND_COMPOSITING

#define ctx_u8_blend_define(name, CODE) \
static inline void \
ctx_u8_blend_##name (int components, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)\
{\
  for (int j = 0; j < count; j++) { \
  uint8_t *s=src; uint8_t b[components];\
  ctx_u8_deassociate_alpha (components, dst, b);\
    CODE;\
  blended[components-1] = src[components-1];\
  ctx_u8_associate_alpha (components, blended);\
  src += components;\
  dst += components;\
  blended += components;\
  }\
}

#define ctx_u8_blend_define_seperable(name, CODE) \
        ctx_u8_blend_define(name, for (int c = 0; c < components-1; c++) { CODE ;}) \

ctx_u8_blend_define_seperable(multiply,     blended[c] = (b[c] * s[c])/255;)
ctx_u8_blend_define_seperable(screen,       blended[c] = s[c] + b[c] - (s[c] * b[c])/255;)
ctx_u8_blend_define_seperable(overlay,      blended[c] = b[c] < 127 ? (s[c] * b[c])/255 :
                                                         s[c] + b[c] - (s[c] * b[c])/255;)
ctx_u8_blend_define_seperable(darken,       blended[c] = ctx_mini (b[c], s[c]))
ctx_u8_blend_define_seperable(lighten,      blended[c] = ctx_maxi (b[c], s[c]))
ctx_u8_blend_define_seperable(color_dodge,  blended[c] = b[c] == 0 ? 0 :
                                     s[c] == 255 ? 255 : ctx_mini(255, (255 * b[c]) / (255-s[c])))
ctx_u8_blend_define_seperable(color_burn,   blended[c] = b[c] == 1 ? 1 :
                                     s[c] == 0 ? 0 : 255 - ctx_mini(255, (255*(255 - b[c])) / s[c]))
ctx_u8_blend_define_seperable(hard_light,   blended[c] = s[c] < 127 ? (b[c] * s[c])/255 :
                                                          b[c] + s[c] - (b[c] * s[c])/255;)
ctx_u8_blend_define_seperable(difference,   blended[c] = (b[c] - s[c]))
ctx_u8_blend_define_seperable(divide,       blended[c] = s[c]?(255 * b[c]) / s[c]:0)
ctx_u8_blend_define_seperable(addition,     blended[c] = ctx_sadd8 (s[c], b[c]))
ctx_u8_blend_define_seperable(subtract,     blended[c] = ctx_maxi(0, s[c]-b[c]))
ctx_u8_blend_define_seperable(exclusion,    blended[c] = b[c] + s[c] - 2 * (b[c] * s[c]/255))
ctx_u8_blend_define_seperable(soft_light,
  if (s[c] <= 255/2)
  {
    blended[c] = b[c] - (255 - 2 * s[c]) * b[c] * (255 - b[c]) / (255 * 255);
  }
  else
  {
    int d;
    if (b[c] <= 255/4)
      d = (((16 * b[c] - 12 * 255)/255 * b[c] + 4 * 255) * b[c])/255;
    else
      d = (int)(ctx_sqrtf(b[c]/255.0f) * 255.4f);
    blended[c] = (b[c] + (2 * s[c] - 255) * (d - b[c]))/255;
  }
)

static int ctx_int_get_max (int components, int *c)
{
  int max = 0;
  for (int i = 0; i < components - 1; i ++)
  {
    if (c[i] > max) max = c[i];
  }
  return max;
}

static int ctx_int_get_min (int components, int *c)
{
  int min = 400;
  for (int i = 0; i < components - 1; i ++)
  {
    if (c[i] < min) min = c[i];
  }
  return min;
}

static int ctx_int_get_lum (int components, int *c)
{
  switch (components)
  {
    case 3:
    case 4:
            return (int)(CTX_CSS_RGB_TO_LUMINANCE(c));
    case 1:
    case 2:
            return c[0];
            break;
    default:
       {
         int sum = 0;
         for (int i = 0; i < components - 1; i ++)
         {
           sum += c[i];
         }
         return sum / (components - 1);
       }
            break;
  }
}

static int ctx_u8_get_lum (int components, uint8_t *c)
{
  switch (components)
  {
    case 3:
    case 4:
            return (int)(CTX_CSS_RGB_TO_LUMINANCE(c));
    case 1:
    case 2:
            return c[0];
            break;
    default:
       {
         int sum = 0;
         for (int i = 0; i < components - 1; i ++)
         {
           sum += c[i];
         }
         return sum / (components - 1);
       }
            break;
  }
}
static int ctx_u8_get_sat (int components, uint8_t *c)
{
  switch (components)
  {
    case 3:
    case 4:
            { int r = c[0];
              int g = c[1];
              int b = c[2];
              return ctx_maxi(r, ctx_maxi(g,b)) - ctx_mini(r,ctx_mini(g,b));
            }
            break;
    case 1:
    case 2:
            return 0.0;
            break;
    default:
       {
         int min = 1000;
         int max = -1000;
         for (int i = 0; i < components - 1; i ++)
         {
           if (c[i] < min) min = c[i];
           if (c[i] > max) max = c[i];
         }
         return max-min;
       }
       break;
  }
}

static void ctx_u8_set_lum (int components, uint8_t *c, uint8_t lum)
{
  int d = lum - ctx_u8_get_lum (components, c);
  int tc[components];
  for (int i = 0; i < components - 1; i++)
  {
    tc[i] = c[i] + d;
  }

  int l = ctx_int_get_lum (components, tc);
  int n = ctx_int_get_min (components, tc);
  int x = ctx_int_get_max (components, tc);

  if ((n < 0) & (l!=n))
  {
    for (int i = 0; i < components - 1; i++)
      tc[i] = l + (((tc[i] - l) * l) / (l-n));
  }

  if ((x > 255) & (x!=l))
  {
    for (int i = 0; i < components - 1; i++)
      tc[i] = l + (((tc[i] - l) * (255 - l)) / (x-l));
  }
  for (int i = 0; i < components - 1; i++)
    c[i] = tc[i];
}

static void ctx_u8_set_sat (int components, uint8_t *c, uint8_t sat)
{
  int max = 0, mid = 1, min = 2;
  
  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
  if (c[mid] > c[max]){int t = mid; mid = max; max = t;}
  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}

  if (c[max] > c[min])
  {
    c[mid] = ((c[mid]-c[min]) * sat) / (c[max] - c[min]);
    c[max] = sat;
  }
  else
  {
    c[mid] = c[max] = 0;
  }
  c[min] = 0;
}

ctx_u8_blend_define(color,
  for (int i = 0; i < components; i++)
    blended[i] = s[i];
  ctx_u8_set_lum(components, blended, ctx_u8_get_lum (components, s));
)

ctx_u8_blend_define(hue,
  int in_sat = ctx_u8_get_sat(components, b);
  int in_lum = ctx_u8_get_lum(components, b);
  for (int i = 0; i < components; i++)
    blended[i] = s[i];
  ctx_u8_set_sat(components, blended, in_sat);
  ctx_u8_set_lum(components, blended, in_lum);
)

ctx_u8_blend_define(saturation,
  int in_sat = ctx_u8_get_sat(components, s);
  int in_lum = ctx_u8_get_lum(components, b);
  for (int i = 0; i < components; i++)
    blended[i] = b[i];
  ctx_u8_set_sat(components, blended, in_sat);
  ctx_u8_set_lum(components, blended, in_lum);
)

ctx_u8_blend_define(luminosity,
  int in_lum = ctx_u8_get_lum(components, s);
  for (int i = 0; i < components; i++)
    blended[i] = b[i];
  ctx_u8_set_lum(components, blended, in_lum);
)
#endif

CTX_INLINE static void
ctx_u8_blend (int components, CtxBlend blend, uint8_t * __restrict__ dst, uint8_t *src, uint8_t *blended, int count)
{
#if CTX_BLENDING_AND_COMPOSITING
  switch (blend)
  {
    case CTX_BLEND_NORMAL:      ctx_u8_blend_normal      (components, dst, src, blended, count); break;
    case CTX_BLEND_MULTIPLY:    ctx_u8_blend_multiply    (components, dst, src, blended, count); break;
    case CTX_BLEND_SCREEN:      ctx_u8_blend_screen      (components, dst, src, blended, count); break;
    case CTX_BLEND_OVERLAY:     ctx_u8_blend_overlay     (components, dst, src, blended, count); break;
    case CTX_BLEND_DARKEN:      ctx_u8_blend_darken      (components, dst, src, blended, count); break;
    case CTX_BLEND_LIGHTEN:     ctx_u8_blend_lighten     (components, dst, src, blended, count); break;
    case CTX_BLEND_COLOR_DODGE: ctx_u8_blend_color_dodge (components, dst, src, blended, count); break;
    case CTX_BLEND_COLOR_BURN:  ctx_u8_blend_color_burn  (components, dst, src, blended, count); break;
    case CTX_BLEND_HARD_LIGHT:  ctx_u8_blend_hard_light  (components, dst, src, blended, count); break;
    case CTX_BLEND_SOFT_LIGHT:  ctx_u8_blend_soft_light  (components, dst, src, blended, count); break;
    case CTX_BLEND_DIFFERENCE:  ctx_u8_blend_difference  (components, dst, src, blended, count); break;
    case CTX_BLEND_EXCLUSION:   ctx_u8_blend_exclusion   (components, dst, src, blended, count); break;
    case CTX_BLEND_COLOR:       ctx_u8_blend_color       (components, dst, src, blended, count); break;
    case CTX_BLEND_HUE:         ctx_u8_blend_hue         (components, dst, src, blended, count); break;
    case CTX_BLEND_SATURATION:  ctx_u8_blend_saturation  (components, dst, src, blended, count); break;
    case CTX_BLEND_LUMINOSITY:  ctx_u8_blend_luminosity  (components, dst, src, blended, count); break;
    case CTX_BLEND_ADDITION:    ctx_u8_blend_addition    (components, dst, src, blended, count); break;
    case CTX_BLEND_DIVIDE:      ctx_u8_blend_divide      (components, dst, src, blended, count); break;
    case CTX_BLEND_SUBTRACT:    ctx_u8_blend_subtract    (components, dst, src, blended, count); break;
  }
#else
  switch (blend)
  {
    default:                    ctx_u8_blend_normal      (components, dst, src, blended, count); break;
  }

#endif
}

CTX_INLINE static void
__ctx_u8_porter_duff (CtxRasterizer         *rasterizer,
                     int                    components,
                     uint8_t *              dst,
                     uint8_t *              src,
                     int                    x0,
                     uint8_t * __restrict__ coverage,
                     int                    count,
                     CtxCompositingMode     compositing_mode,
                     CtxFragment            fragment,
                     CtxBlend               blend)
{
  CtxPorterDuffFactor f_s, f_d;
  ctx_porter_duff_factors (compositing_mode, &f_s, &f_d);
  CtxGState *gstate = &rasterizer->state->gstate;
  uint8_t global_alpha_u8 = gstate->global_alpha_u8;
  uint8_t tsrc[components * count];
  int src_step = 0;

  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
  {
    src = &tsrc[0];
    memcpy (src, rasterizer->color, 4);
    if (blend != CTX_BLEND_NORMAL)
      ctx_u8_blend (components, blend, dst, src, src, 1);
  }
  else
  {
    float u0 = 0; float v0 = 0;
    float ud = 0; float vd = 0;
    float w0 = 1; float wd = 0;
    src = &tsrc[0];

    ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);
    fragment (rasterizer, u0, v0, w0, src, count, ud, vd, wd);
    if (blend != CTX_BLEND_NORMAL)
      ctx_u8_blend (components, blend, dst, src, src, count);
    src_step = components;
  }

  while (count--)
  {
    uint32_t cov = *coverage;

    if (CTX_UNLIKELY(global_alpha_u8 != 255))
      cov = (cov * global_alpha_u8 + 255) >> 8;

    uint8_t csrc[components];
    for (int c = 0; c < components; c++)
      csrc[c] = (src[c] * cov + 255) >> 8;

    for (int c = 0; c < components; c++)
    {
      uint32_t res = 0;
#if 1
      switch (f_s)
      {
        case CTX_PORTER_DUFF_0:             break;
        case CTX_PORTER_DUFF_1:             res += (csrc[c] ); break;
        case CTX_PORTER_DUFF_ALPHA:         res += (csrc[c] * dst[components-1] + 255) >> 8; break;
        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (csrc[c] * (256-dst[components-1])) >> 8; break;
      }
      switch (f_d)
      {
        case CTX_PORTER_DUFF_0: break;
        case CTX_PORTER_DUFF_1:             res += dst[c]; break;
        case CTX_PORTER_DUFF_ALPHA:         res += (dst[c] * csrc[components-1] + 255) >> 8; break;
        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (dst[c] * (256-csrc[components-1])) >> 8; break;
      }
#else
      switch (f_s)
      {
        case CTX_PORTER_DUFF_0:             break;
        case CTX_PORTER_DUFF_1:             res += (csrc[c] ); break;
        case CTX_PORTER_DUFF_ALPHA:         res += (csrc[c] * dst[components-1])/255; break;
        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (csrc[c] * (255-dst[components-1]))/255; break;
      }
      switch (f_d)
      {
        case CTX_PORTER_DUFF_0: break;
        case CTX_PORTER_DUFF_1:             res += dst[c]; break;
        case CTX_PORTER_DUFF_ALPHA:         res += (dst[c] * csrc[components-1])/255; break;
        case CTX_PORTER_DUFF_1_MINUS_ALPHA: res += (dst[c] * (255-csrc[components-1]))/255; break;
      }
#endif
      dst[c] = res;
    }
    coverage ++;
    src+=src_step;
    dst+=components;
  }
}

CTX_INLINE static void
_ctx_u8_porter_duff (CtxRasterizer         *rasterizer,
                     int                    components,
                     uint8_t *              dst,
                     uint8_t * __restrict__ src,
                     int                    x0,
                     uint8_t *              coverage,
                     int                    count,
                     CtxCompositingMode     compositing_mode,
                     CtxFragment            fragment,
                     CtxBlend               blend)
{
  __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count, compositing_mode, fragment, blend);
}

#define _ctx_u8_porter_duffs(comp_format, components, source, fragment, blend) \
   switch (rasterizer->state->gstate.compositing_mode) \
   { \
     case CTX_COMPOSITE_SOURCE_ATOP: \
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count, \
        CTX_COMPOSITE_SOURCE_ATOP, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION_ATOP:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_ATOP, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION_IN:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_IN, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_OVER:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_OVER, fragment, blend);\
       break;\
     case CTX_COMPOSITE_DESTINATION_OVER:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_OVER, fragment, blend);\
       break;\
     case CTX_COMPOSITE_XOR:\
      __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_XOR, fragment, blend);\
       break;\
     case CTX_COMPOSITE_DESTINATION_OUT:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_OUT, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_OUT:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_OUT, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_IN:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_IN, fragment, blend);\
       break;\
     case CTX_COMPOSITE_COPY:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_COPY, fragment, blend);\
       break;\
     case CTX_COMPOSITE_CLEAR:\
       __ctx_u8_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_CLEAR, fragment, blend);\
       break;\
   }

/* generating one function per compositing_mode would be slightly more efficient,
 * but on embedded targets leads to slightly more code bloat,
 * here we trade off a slight amount of performance
 */
#define ctx_u8_porter_duff(comp_format, components, source, fragment, blend) \
static void \
ctx_##comp_format##_porter_duff_##source (CTX_COMPOSITE_ARGUMENTS) \
{ \
  _ctx_u8_porter_duffs(comp_format, components, source, fragment, blend);\
}

ctx_u8_porter_duff(RGBA8, 4,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)
//ctx_u8_porter_duff(comp_name, components,color_##blend_name,  NULL, blend_mode)


#if CTX_INLINED_NORMAL_RGBA8

ctx_u8_porter_duff(RGBA8, 4,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_GRADIENTS
ctx_u8_porter_duff(RGBA8, 4,linear_gradient, ctx_fragment_linear_gradient_RGBA8, rasterizer->state->gstate.blend_mode)
ctx_u8_porter_duff(RGBA8, 4,radial_gradient, ctx_fragment_radial_gradient_RGBA8, rasterizer->state->gstate.blend_mode)
#endif
ctx_u8_porter_duff(RGBA8, 4,image,           ctx_fragment_image_RGBA8,           rasterizer->state->gstate.blend_mode)
#endif


static inline void
ctx_RGBA8_nop (CTX_COMPOSITE_ARGUMENTS)
{
}


static inline void
ctx_setup_native_color (CtxRasterizer *rasterizer)
{
  if (rasterizer->state->gstate.source_fill.type == CTX_SOURCE_COLOR)
  {
    rasterizer->format->from_comp (rasterizer, 0,
      &rasterizer->color[0],
      &rasterizer->color_native,
      1);
  }
}

static void
ctx_setup_apply_coverage (CtxRasterizer *rasterizer)
{
  rasterizer->apply_coverage = rasterizer->format->apply_coverage ?
                               rasterizer->format->apply_coverage :
                               rasterizer->comp_op;
}

static void
ctx_setup_RGBA8 (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  rasterizer->fragment = ctx_rasterizer_get_fragment_RGBA8 (rasterizer);
  rasterizer->comp_op  = ctx_RGBA8_porter_duff_generic;
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
  CtxSourceType source_type = (CtxSourceType)gstate->source_fill.type;

  int blend_mode       = gstate->blend_mode;
  int compositing_mode = gstate->compositing_mode;

  if (source_type == CTX_SOURCE_NONE)
  {
    ctx_setup_apply_coverage (rasterizer);
    return;
  }
  if (source_type == CTX_SOURCE_COLOR)
    {
      ctx_fragment_color_RGBA8 (rasterizer, 0,0, 1,rasterizer->color, 1, 0,0,0);
      if (gstate->global_alpha_u8 != 255)
      {
        for (int c = 0; c < 4; c ++)
          rasterizer->color[c] = (rasterizer->color[c] * gstate->global_alpha_u8 + 255)>>8;
      }
      uint32_t src_pix    = ((uint32_t*)rasterizer->color)[0];
      uint32_t si_ga      = (src_pix & 0xff00ff00) >> 8;
      uint32_t si_rb      = src_pix & 0x00ff00ff;
      uint32_t si_ga_full = si_ga * 255 + 0xff00ff;
      uint32_t si_rb_full = si_rb * 255 + 0xff00ff;

      ((uint32_t*)rasterizer->color)[1] = si_ga;
      ((uint32_t*)rasterizer->color)[2] = si_rb;
      ((uint32_t*)rasterizer->color)[3] = si_ga_full;
      ((uint32_t*)rasterizer->color)[4] = si_rb_full;
    }

#if CTX_INLINED_NORMAL_RGBA8
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_RGBA8_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_RGBA8_copy_normal;
          if (gstate->source_fill.type == CTX_SOURCE_COLOR)
            rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;

        }
        else if (gstate->global_alpha_u8 == 0)
        {
          rasterizer->comp_op = ctx_RGBA8_nop;
        }
        else
        switch (source_type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              rasterizer->comp_op = ctx_RGBA8_source_over_normal_color;
              if (rasterizer->color[3] == 255)
                rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;
            }
            else
            {
              rasterizer->comp_op = ctx_RGBA8_porter_duff_color_normal;
            }
            break;
#if CTX_GRADIENTS
          case CTX_SOURCE_LINEAR_GRADIENT:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_linear_gradient_normal;
            break;
          case CTX_SOURCE_RADIAL_GRADIENT:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_radial_gradient_normal;
            break;
#endif
          case CTX_SOURCE_TEXTURE:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_image_normal;
            break;
          default:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        switch (source_type)
        {
          case CTX_SOURCE_COLOR:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_color;
            break;
#if CTX_GRADIENTS
          case CTX_SOURCE_LINEAR_GRADIENT:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_linear_gradient;
            break;
          case CTX_SOURCE_RADIAL_GRADIENT:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_radial_gradient;
            break;
#endif
          case CTX_SOURCE_TEXTURE:
            rasterizer->comp_op = ctx_RGBA8_porter_duff_image;
            break;
        }
        break;
    }

#else

  if (source_type == CTX_SOURCE_COLOR)
    {
      if (blend_mode == CTX_BLEND_NORMAL)
      {
        if(compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_RGBA8_source_copy_normal_color;
          rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;
        }
        else if (compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
        {
          if (rasterizer->color[3] == 255)
          {
            rasterizer->comp_op = ctx_RGBA8_source_copy_normal_color;
            rasterizer->comp = CTX_COV_PATH_RGBA8_COPY;
          }
          else
          {
            rasterizer->comp_op = ctx_RGBA8_source_over_normal_color;
            rasterizer->comp = CTX_COV_PATH_RGBA8_OVER;
          }
        }
      }
      else if (compositing_mode == CTX_COMPOSITE_CLEAR)
      {
        rasterizer->comp_op = ctx_RGBA8_clear_normal;
      }
  }
  else if (blend_mode == CTX_BLEND_NORMAL)
  {
    if(compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
    {
       rasterizer->comp_op = ctx_RGBA8_source_over_normal_fragment;
       rasterizer->comp = CTX_COV_PATH_RGBA8_OVER_FRAGMENT;
    }
    else if (compositing_mode == CTX_COMPOSITE_COPY)
    {
       rasterizer->comp_op = ctx_RGBA8_source_copy_normal_fragment;
       rasterizer->comp = CTX_COV_PATH_RGBA8_COPY_FRAGMENT;
    }
  }
#endif
  ctx_setup_apply_coverage (rasterizer);
}


static inline void
ctx_setup_RGB (CtxRasterizer *rasterizer)
{
  ctx_setup_RGBA8 (rasterizer);
  ctx_setup_native_color (rasterizer);

  rasterizer->comp = CTX_COV_PATH_FALLBACK;
}



#if CTX_ENABLE_RGB332
static void
ctx_setup_RGB332 (CtxRasterizer *rasterizer)
{
  ctx_setup_RGBA8 (rasterizer);
  ctx_setup_native_color (rasterizer);

  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
    rasterizer->comp = CTX_COV_PATH_RGB332_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}
#endif

#if CTX_ENABLE_RGB565
static void
ctx_setup_RGB565 (CtxRasterizer *rasterizer)
{
  ctx_setup_RGBA8 (rasterizer);
  ctx_setup_native_color (rasterizer);

  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
    rasterizer->comp = CTX_COV_PATH_RGB565_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}
#endif

#if CTX_ENABLE_RGB8
static void
ctx_setup_RGB8 (CtxRasterizer *rasterizer)
{
  ctx_setup_RGBA8 (rasterizer);
  ctx_setup_native_color (rasterizer);

  if (rasterizer->comp == CTX_COV_PATH_RGBA8_COPY)
    rasterizer->comp = CTX_COV_PATH_RGB8_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}
#endif

static inline void
ctx_composite_convert (CTX_COMPOSITE_ARGUMENTS)
{
  uint8_t pixels[count * rasterizer->format->ebpp];
  rasterizer->format->to_comp (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (count, &pixels[0], rasterizer->color, coverage, rasterizer, x0);
  rasterizer->format->from_comp (rasterizer, x0, &pixels[0], dst, count);
}

#if CTX_ENABLE_FLOAT
static inline void
ctx_float_copy_normal (int components, CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;
  float *srcf = (float*)src;
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;

  ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);

  while (count--)
  {
    uint8_t cov = *coverage;
    float covf = ctx_u8_to_float (cov);
    for (int c = 0; c < components; c++)
      dstf[c] = dstf[c]*(1.0f-covf) + srcf[c]*covf;
    dstf += components;
    coverage ++;
  }
}

static inline void
ctx_float_clear_normal (int components, CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;
  while (count--)
  {
#if 0
    uint8_t cov = *coverage;
    if (cov == 0)
    {
    }
    else if (cov == 255)
    {
#endif
      switch (components)
      {
        case 2:
          ((uint64_t*)(dst))[0] = 0;
          break;
        case 4:
          ((uint64_t*)(dst))[0] = 0;
          ((uint64_t*)(dst))[1] = 0;
          break;
        default:
          for (int c = 0; c < components; c++)
            dstf[c] = 0.0f;
      }
#if 0
    }
    else
    {
      float ralpha = 1.0f - ctx_u8_to_float (cov);
      for (int c = 0; c < components; c++)
        { dstf[c] = (dstf[c] * ralpha); }
    }
    coverage ++;
#endif
    dstf += components;
  }
}


static inline void
ctx_float_source_over_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;
  float *srcf = (float*)src;
  while (count--)
  {
    uint8_t cov = *coverage;
    float fcov = ctx_u8_to_float (cov);
    float ralpha = 1.0f - fcov * srcf[components-1];
    for (int c = 0; c < components; c++)
      dstf[c] = srcf[c]*fcov + dstf[c] * ralpha;
    coverage ++;
    dstf+= components;
  }
}

static inline void
ctx_float_source_copy_normal_color (int components, CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;
  float *srcf = (float*)src;

  while (count--)
  {
    uint8_t cov = *coverage;
    float fcov = ctx_u8_to_float (cov);
    float ralpha = 1.0f - fcov;
    for (int c = 0; c < components; c++)
      dstf[c] = (srcf[c]*fcov + dstf[c] * ralpha);
    coverage ++;
    dstf+= components;
  }
}

inline static void
ctx_float_blend_normal (int components, float *dst, float *src, float *blended)
{
  float a = src[components-1];
  for (int c = 0; c <  components - 1; c++)
    blended[c] = src[c] * a;
  blended[components-1]=a;
}

static float ctx_float_get_max (int components, float *c)
{
  float max = -1000.0f;
  for (int i = 0; i < components - 1; i ++)
  {
    if (c[i] > max) max = c[i];
  }
  return max;
}

static float ctx_float_get_min (int components, float *c)
{
  float min = 400.0;
  for (int i = 0; i < components - 1; i ++)
  {
    if (c[i] < min) min = c[i];
  }
  return min;
}

static float ctx_float_get_lum (int components, float *c)
{
  switch (components)
  {
    case 3:
    case 4:
            return CTX_CSS_RGB_TO_LUMINANCE(c);
    case 1:
    case 2:
            return c[0];
            break;
    default:
       {
         float sum = 0;
         for (int i = 0; i < components - 1; i ++)
         {
           sum += c[i];
         }
         return sum / (components - 1);
       }
  }
}

static float ctx_float_get_sat (int components, float *c)
{
  switch (components)
  {
    case 3:
    case 4:
            { float r = c[0];
              float g = c[1];
              float b = c[2];
              return ctx_maxf(r, ctx_maxf(g,b)) - ctx_minf(r,ctx_minf(g,b));
            }
            break;
    case 1:
    case 2: return 0.0;
            break;
    default:
       {
         float min = 1000;
         float max = -1000;
         for (int i = 0; i < components - 1; i ++)
         {
           if (c[i] < min) min = c[i];
           if (c[i] > max) max = c[i];
         }
         return max-min;
       }
  }
}

static void ctx_float_set_lum (int components, float *c, float lum)
{
  float d = lum - ctx_float_get_lum (components, c);
  float tc[components];
  for (int i = 0; i < components - 1; i++)
  {
    tc[i] = c[i] + d;
  }

  float l = ctx_float_get_lum (components, tc);
  float n = ctx_float_get_min (components, tc);
  float x = ctx_float_get_max (components, tc);

  if ((n < 0.0f) & (l != n))
  {
    for (int i = 0; i < components - 1; i++)
      tc[i] = l + (((tc[i] - l) * l) / (l-n));
  }

  if ((x > 1.0f) & (x != l))
  {
    for (int i = 0; i < components - 1; i++)
      tc[i] = l + (((tc[i] - l) * (1.0f - l)) / (x-l));
  }
  for (int i = 0; i < components - 1; i++)
    c[i] = tc[i];
}

static void ctx_float_set_sat (int components, float *c, float sat)
{
  int max = 0, mid = 1, min = 2;
  
  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}
  if (c[mid] > c[max]){int t = mid; mid = max; max = t;}
  if (c[min] > c[mid]){int t = min; min = mid; mid = t;}

  if (c[max] > c[min])
  {
    c[mid] = ((c[mid]-c[min]) * sat) / (c[max] - c[min]);
    c[max] = sat;
  }
  else
  {
    c[mid] = c[max] = 0.0f;
  }
  c[min] = 0.0f;

}

#define ctx_float_blend_define(name, CODE) \
static inline void \
ctx_float_blend_##name (int components, float * __restrict__ dst, float *src, float *blended)\
{\
  float *s = src; float b[components];\
  ctx_float_deassociate_alpha (components, dst, b);\
    CODE;\
  blended[components-1] = s[components-1];\
  ctx_float_associate_alpha (components, blended);\
}

#define ctx_float_blend_define_seperable(name, CODE) \
        ctx_float_blend_define(name, for (int c = 0; c < components-1; c++) { CODE ;}) \

ctx_float_blend_define_seperable(multiply,    blended[c] = (b[c] * s[c]);)
ctx_float_blend_define_seperable(screen,      blended[c] = b[c] + s[c] - (b[c] * s[c]);)
ctx_float_blend_define_seperable(overlay,     blended[c] = b[c] < 0.5f ? (s[c] * b[c]) :
                                                          s[c] + b[c] - (s[c] * b[c]);)
ctx_float_blend_define_seperable(darken,      blended[c] = ctx_minf (b[c], s[c]))
ctx_float_blend_define_seperable(lighten,     blended[c] = ctx_maxf (b[c], s[c]))
ctx_float_blend_define_seperable(color_dodge, blended[c] = (b[c] == 0.0f) ? 0.0f :
                                     s[c] == 1.0f ? 1.0f : ctx_minf(1.0f, (b[c]) / (1.0f-s[c])))
ctx_float_blend_define_seperable(color_burn,  blended[c] = (b[c] == 1.0f) ? 1.0f :
                                     s[c] == 0.0f ? 0.0f : 1.0f - ctx_minf(1.0f, ((1.0f - b[c])) / s[c]))
ctx_float_blend_define_seperable(hard_light,  blended[c] = s[c] < 0.f ? (b[c] * s[c]) :
                                                          b[c] + s[c] - (b[c] * s[c]);)
ctx_float_blend_define_seperable(difference,  blended[c] = (b[c] - s[c]))

ctx_float_blend_define_seperable(divide,      blended[c] = s[c]?(b[c]) / s[c]:0.0f)
ctx_float_blend_define_seperable(addition,    blended[c] = s[c]+b[c])
ctx_float_blend_define_seperable(subtract,    blended[c] = s[c]-b[c])

ctx_float_blend_define_seperable(exclusion,   blended[c] = b[c] + s[c] - 2.0f * b[c] * s[c])
ctx_float_blend_define_seperable(soft_light,
  if (s[c] <= 0.5f)
  {
    blended[c] = b[c] - (1.0f - 2.0f * s[c]) * b[c] * (1.0f - b[c]);
  }
  else
  {
    int d;
    if (b[c] <= 255/4)
      d = (((16 * b[c] - 12.0f) * b[c] + 4.0f) * b[c]);
    else
      d = ctx_sqrtf(b[c]);
    blended[c] = (b[c] + (2.0f * s[c] - 1.0f) * (d - b[c]));
  }
)


ctx_float_blend_define(color,
  for (int i = 0; i < components; i++)
    blended[i] = s[i];
  ctx_float_set_lum(components, blended, ctx_float_get_lum (components, s));
)

ctx_float_blend_define(hue,
  float in_sat = ctx_float_get_sat(components, b);
  float in_lum = ctx_float_get_lum(components, b);
  for (int i = 0; i < components; i++)
    blended[i] = s[i];
  ctx_float_set_sat(components, blended, in_sat);
  ctx_float_set_lum(components, blended, in_lum);
)

ctx_float_blend_define(saturation,
  float in_sat = ctx_float_get_sat(components, s);
  float in_lum = ctx_float_get_lum(components, b);
  for (int i = 0; i < components; i++)
    blended[i] = b[i];
  ctx_float_set_sat(components, blended, in_sat);
  ctx_float_set_lum(components, blended, in_lum);
)

ctx_float_blend_define(luminosity,
  float in_lum = ctx_float_get_lum(components, s);
  for (int i = 0; i < components; i++)
    blended[i] = b[i];
  ctx_float_set_lum(components, blended, in_lum);
)

inline static void
ctx_float_blend (int components, CtxBlend blend, float * __restrict__ dst, float *src, float *blended)
{
  switch (blend)
  {
    case CTX_BLEND_NORMAL:      ctx_float_blend_normal      (components, dst, src, blended); break;
    case CTX_BLEND_MULTIPLY:    ctx_float_blend_multiply    (components, dst, src, blended); break;
    case CTX_BLEND_SCREEN:      ctx_float_blend_screen      (components, dst, src, blended); break;
    case CTX_BLEND_OVERLAY:     ctx_float_blend_overlay     (components, dst, src, blended); break;
    case CTX_BLEND_DARKEN:      ctx_float_blend_darken      (components, dst, src, blended); break;
    case CTX_BLEND_LIGHTEN:     ctx_float_blend_lighten     (components, dst, src, blended); break;
    case CTX_BLEND_COLOR_DODGE: ctx_float_blend_color_dodge (components, dst, src, blended); break;
    case CTX_BLEND_COLOR_BURN:  ctx_float_blend_color_burn  (components, dst, src, blended); break;
    case CTX_BLEND_HARD_LIGHT:  ctx_float_blend_hard_light  (components, dst, src, blended); break;
    case CTX_BLEND_SOFT_LIGHT:  ctx_float_blend_soft_light  (components, dst, src, blended); break;
    case CTX_BLEND_DIFFERENCE:  ctx_float_blend_difference  (components, dst, src, blended); break;
    case CTX_BLEND_EXCLUSION:   ctx_float_blend_exclusion   (components, dst, src, blended); break;
    case CTX_BLEND_COLOR:       ctx_float_blend_color       (components, dst, src, blended); break;
    case CTX_BLEND_HUE:         ctx_float_blend_hue         (components, dst, src, blended); break;
    case CTX_BLEND_SATURATION:  ctx_float_blend_saturation  (components, dst, src, blended); break;
    case CTX_BLEND_LUMINOSITY:  ctx_float_blend_luminosity  (components, dst, src, blended); break;
    case CTX_BLEND_ADDITION:    ctx_float_blend_addition    (components, dst, src, blended); break;
    case CTX_BLEND_SUBTRACT:    ctx_float_blend_subtract    (components, dst, src, blended); break;
    case CTX_BLEND_DIVIDE:      ctx_float_blend_divide      (components, dst, src, blended); break;
  }
}

/* this is the grunt working function, when inlined code-path elimination makes
 * it produce efficient code.
 */
CTX_INLINE static void
ctx_float_porter_duff (CtxRasterizer         *rasterizer,
                       int                    components,
                       uint8_t * __restrict__ dst,
                       uint8_t * __restrict__ src,
                       int                    x0,
                       uint8_t * __restrict__ coverage,
                       int                    count,
                       CtxCompositingMode     compositing_mode,
                       CtxFragment            fragment,
                       CtxBlend               blend)
{
  float *dstf = (float*)dst;

  CtxPorterDuffFactor f_s, f_d;
  ctx_porter_duff_factors (compositing_mode, &f_s, &f_d);
  uint8_t global_alpha_u8 = rasterizer->state->gstate.global_alpha_u8;
  float   global_alpha_f = rasterizer->state->gstate.global_alpha_f;
  
  if (rasterizer->state->gstate.source_fill.type == CTX_SOURCE_COLOR)
  {
    float tsrc[components];

    while (count--)
    {
      uint8_t cov = *coverage;
#if 1
      if (
        CTX_UNLIKELY((compositing_mode == CTX_COMPOSITE_DESTINATION_OVER && dst[components-1] == 1.0f)||
        (cov == 0 && (compositing_mode == CTX_COMPOSITE_SOURCE_OVER ||
        compositing_mode == CTX_COMPOSITE_XOR               ||
        compositing_mode == CTX_COMPOSITE_DESTINATION_OUT   ||
        compositing_mode == CTX_COMPOSITE_SOURCE_ATOP      
        ))))
      {
        coverage ++;
        dstf+=components;
        continue;
      }
#endif
      memcpy (tsrc, rasterizer->color, sizeof(tsrc));

      if (blend != CTX_BLEND_NORMAL)
        ctx_float_blend (components, blend, dstf, tsrc, tsrc);
      float covf = ctx_u8_to_float (cov);

      if (global_alpha_u8 != 255)
        covf = covf * global_alpha_f;

      if (covf != 1.0f)
      {
        for (int c = 0; c < components; c++)
          tsrc[c] *= covf;
      }

      for (int c = 0; c < components; c++)
      {
        float res;
        /* these switches and this whole function is written to be
         * inlined when compiled when the enum values passed in are
         * constants.
         */
        switch (f_s)
        {
          case CTX_PORTER_DUFF_0: res = 0.0f; break;
          case CTX_PORTER_DUFF_1:             res = (tsrc[c]); break;
          case CTX_PORTER_DUFF_ALPHA:         res = (tsrc[c] *       dstf[components-1]); break;
          case CTX_PORTER_DUFF_1_MINUS_ALPHA: res = (tsrc[c] * (1.0f-dstf[components-1])); break;
        }
        switch (f_d)
        {
          case CTX_PORTER_DUFF_0: dstf[c] = res; break;
          case CTX_PORTER_DUFF_1:             dstf[c] = res + (dstf[c]); break;
          case CTX_PORTER_DUFF_ALPHA:         dstf[c] = res + (dstf[c] *       tsrc[components-1]); break;
          case CTX_PORTER_DUFF_1_MINUS_ALPHA: dstf[c] = res + (dstf[c] * (1.0f-tsrc[components-1])); break;
        }
      }
      coverage ++;
      dstf     +=components;
    }
  }
  else
  {
    float tsrc[components];
    float u0 = 0; float v0 = 0;
    float ud = 0; float vd = 0;
    float w0 = 1; float wd = 0;
    for (int c = 0; c < components; c++) tsrc[c] = 0.0f;
    ctx_init_uv (rasterizer, x0, rasterizer->scanline/CTX_FULL_AA, &u0, &v0, &w0, &ud, &vd, &wd);

    while (count--)
    {
      uint8_t cov = *coverage;
#if 1
      if (
        CTX_UNLIKELY((compositing_mode == CTX_COMPOSITE_DESTINATION_OVER && dst[components-1] == 1.0f)||
        (cov == 0 && (compositing_mode == CTX_COMPOSITE_SOURCE_OVER ||
        compositing_mode == CTX_COMPOSITE_XOR               ||
        compositing_mode == CTX_COMPOSITE_DESTINATION_OUT   ||
        compositing_mode == CTX_COMPOSITE_SOURCE_ATOP      
        ))))
      {
        u0 += ud;
        v0 += vd;
        coverage ++;
        dstf+=components;
        continue;
      }
#endif

      fragment (rasterizer, u0, v0, w0, tsrc, 1, ud, vd, wd);
      if (blend != CTX_BLEND_NORMAL)
        ctx_float_blend (components, blend, dstf, tsrc, tsrc);
      u0 += ud;
      v0 += vd;
      float covf = ctx_u8_to_float (cov);

      if (global_alpha_u8 != 255)
        covf = covf * global_alpha_f;

      if (covf != 1.0f)
      {
        for (int c = 0; c < components; c++)
          tsrc[c] *= covf;
      }

      for (int c = 0; c < components; c++)
      {
        float res;
        /* these switches and this whole function is written to be
         * inlined when compiled when the enum values passed in are
         * constants.
         */
        switch (f_s)
        {
          case CTX_PORTER_DUFF_0: res = 0.0f; break;
          case CTX_PORTER_DUFF_1:             res = (tsrc[c]); break;
          case CTX_PORTER_DUFF_ALPHA:         res = (tsrc[c] *       dstf[components-1]); break;
          case CTX_PORTER_DUFF_1_MINUS_ALPHA: res = (tsrc[c] * (1.0f-dstf[components-1])); break;
        }
        switch (f_d)
        {
          case CTX_PORTER_DUFF_0: dstf[c] = res; break;
          case CTX_PORTER_DUFF_1:             dstf[c] = res + (dstf[c]); break;
          case CTX_PORTER_DUFF_ALPHA:         dstf[c] = res + (dstf[c] *       tsrc[components-1]); break;
          case CTX_PORTER_DUFF_1_MINUS_ALPHA: dstf[c] = res + (dstf[c] * (1.0f-tsrc[components-1])); break;
        }
      }
      coverage ++;
      dstf     +=components;
    }
  }
}

/* generating one function per compositing_mode would be slightly more efficient,
 * but on embedded targets leads to slightly more code bloat,
 * here we trade off a slight amount of performance
 */
#define ctx_float_porter_duff(compformat, components, source, fragment, blend) \
static void \
ctx_##compformat##_porter_duff_##source (CTX_COMPOSITE_ARGUMENTS) \
{ \
   switch (rasterizer->state->gstate.compositing_mode) \
   { \
     case CTX_COMPOSITE_SOURCE_ATOP: \
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count, \
        CTX_COMPOSITE_SOURCE_ATOP, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION_ATOP:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_ATOP, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION_IN:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_IN, fragment, blend);\
      break;\
     case CTX_COMPOSITE_DESTINATION:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_OVER:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_OVER, fragment, blend);\
       break;\
     case CTX_COMPOSITE_DESTINATION_OVER:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_OVER, fragment, blend);\
       break;\
     case CTX_COMPOSITE_XOR:\
      ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_XOR, fragment, blend);\
       break;\
     case CTX_COMPOSITE_DESTINATION_OUT:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_DESTINATION_OUT, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_OUT:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_OUT, fragment, blend);\
       break;\
     case CTX_COMPOSITE_SOURCE_IN:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_SOURCE_IN, fragment, blend);\
       break;\
     case CTX_COMPOSITE_COPY:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_COPY, fragment, blend);\
       break;\
     case CTX_COMPOSITE_CLEAR:\
       ctx_float_porter_duff (rasterizer, components, dst, src, x0, coverage, count,\
        CTX_COMPOSITE_CLEAR, fragment, blend);\
       break;\
   }\
}
#endif

#if CTX_ENABLE_RGBAF

ctx_float_porter_duff(RGBAF, 4,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff(RGBAF, 4,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_INLINED_NORMAL
#if CTX_GRADIENTS
ctx_float_porter_duff(RGBAF, 4,conic_gradient, ctx_fragment_conic_gradient_RGBAF, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff(RGBAF, 4,linear_gradient, ctx_fragment_linear_gradient_RGBAF, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff(RGBAF, 4,radial_gradient, ctx_fragment_radial_gradient_RGBAF, rasterizer->state->gstate.blend_mode)
#endif
ctx_float_porter_duff(RGBAF, 4,image,           ctx_fragment_image_RGBAF,           rasterizer->state->gstate.blend_mode)


#if CTX_GRADIENTS
#define ctx_float_porter_duff_blend(comp_name, components, blend_mode, blend_name)\
ctx_float_porter_duff(comp_name, components,color_##blend_name,            rasterizer->fragment,                               blend_mode)\
ctx_float_porter_duff(comp_name, components,generic_##blend_name,          rasterizer->fragment,               blend_mode)\
ctx_float_porter_duff(comp_name, components,linear_gradient_##blend_name,  ctx_fragment_linear_gradient_RGBA8, blend_mode)\
ctx_float_porter_duff(comp_name, components,radial_gradient_##blend_name,  ctx_fragment_radial_gradient_RGBA8, blend_mode)\
ctx_float_porter_duff(comp_name, components,image_##blend_name,            ctx_fragment_image_RGBAF,           blend_mode)
#else
#define ctx_float_porter_duff_blend(comp_name, components, blend_mode, blend_name)\
ctx_float_porter_duff(comp_name, components,color_##blend_name,            rasterizer->fragment,                               blend_mode)\
ctx_float_porter_duff(comp_name, components,generic_##blend_name,          rasterizer->fragment,               blend_mode)\
ctx_float_porter_duff(comp_name, components,image_##blend_name,            ctx_fragment_image_RGBAF,           blend_mode)
#endif

ctx_float_porter_duff_blend(RGBAF, 4, CTX_BLEND_NORMAL, normal)


static void
ctx_RGBAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_copy_normal (4, count, dst, src, coverage, rasterizer, x0);
}

static void
ctx_RGBAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_clear_normal (4, count, dst, src, coverage, rasterizer, x0);
}

#if 1
static void
ctx_RGBAF_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_source_over_normal_color (4, count, dst, rasterizer->color, coverage, rasterizer, x0);
}
#endif
#endif

static void
ctx_setup_RGBAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components = 4;

  rasterizer->comp_op  = ctx_RGBAF_porter_duff_generic;
  rasterizer->fragment = ctx_rasterizer_get_fragment_RGBAF (rasterizer);
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
#if 1
  if (gstate->source_fill.type == CTX_SOURCE_NONE)
  {
    ctx_setup_apply_coverage (rasterizer);
    return;
  }
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      rasterizer->comp_op = ctx_RGBAF_porter_duff_color;
      ctx_fragment_color_RGBAF (rasterizer, 0,0,1, rasterizer->color, 1, 0,0,0);
      if (gstate->global_alpha_u8 != 255)
        for (int c = 0; c < components; c ++)
          ((float*)rasterizer->color)[c] *= gstate->global_alpha_f;

      if (rasterizer->format->from_comp)
        rasterizer->format->from_comp (rasterizer, 0,
          &rasterizer->color[0],
          &rasterizer->color_native,
          1);
    }
  else
#endif
  {
    rasterizer->comp_op = ctx_RGBAF_porter_duff_generic;
  }

#if CTX_INLINED_NORMAL
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_RGBAF_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_RGBAF_copy_normal;
          if (gstate->source_fill.type == CTX_SOURCE_COLOR)
            rasterizer->comp = CTX_COV_PATH_RGBAF_COPY;

        }
        else if (gstate->global_alpha_u8 == 0)
        {
          rasterizer->comp_op = ctx_RGBA8_nop;
        }
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              rasterizer->comp_op = ctx_RGBAF_source_over_normal_color;
              if ( ((float*)rasterizer->color)[3] >= 0.999f)
                rasterizer->comp = CTX_COV_PATH_RGBAF_COPY;
            }
            else
            {
              rasterizer->comp_op = ctx_RGBAF_porter_duff_color_normal;
            }
            break;
#if CTX_GRADIENTS
          case CTX_SOURCE_LINEAR_GRADIENT:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_linear_gradient_normal;
            break;
          case CTX_SOURCE_RADIAL_GRADIENT:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_radial_gradient_normal;
            break;
#endif
          case CTX_SOURCE_TEXTURE:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_image_normal;
            break;
          default:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_color;
            break;
#if CTX_GRADIENTS
          case CTX_SOURCE_LINEAR_GRADIENT:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_linear_gradient;
            break;
          case CTX_SOURCE_RADIAL_GRADIENT:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_radial_gradient;
            break;
#endif
          case CTX_SOURCE_TEXTURE:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_image;
            break;
          default:
            rasterizer->comp_op = ctx_RGBAF_porter_duff_generic;
            break;
        }
        break;
    }
#endif
  ctx_setup_apply_coverage (rasterizer);
}

#endif
#if CTX_ENABLE_GRAYAF

#if CTX_GRADIENTS
static void
ctx_fragment_linear_gradient_GRAYAF (CtxRasterizer *rasterizer, float u0, float v0, float z, void *out, int count, float ud, float vd, float dz)
{
  float rgba[4];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float linear_gradient_dx = g->linear_gradient.dx_scaled;
  float linear_gradient_dy = g->linear_gradient.dy_scaled;
  float linear_gradient_start = g->linear_gradient.start_scaled;

  u0 *= linear_gradient_dx;
  v0 *= linear_gradient_dy;
  ud *= linear_gradient_dx;
  vd *= linear_gradient_dy;

  float vv = ((u0 + v0) - linear_gradient_start);
  float ud_plus_vd = ud + vd;

  for (int i = 0 ; i < count; i++)
  {
    ctx_fragment_gradient_1d_RGBAF (rasterizer, vv, 1.0f, rgba);
    ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgba);
    ((float*)out)[1] = rgba[3];
     out = ((float*)(out)) + 2;
    vv += ud_plus_vd;
  }
}

static void
ctx_fragment_radial_gradient_GRAYAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float rgba[4];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float rg_x0 = g->radial_gradient.x0;
  float rg_y0 = g->radial_gradient.y0;
  float rg_r0 = g->radial_gradient.r0;
  float rg_rdelta = g->radial_gradient.rdelta;
  for (int i = 0; i < count; i ++)
  {
  float v = (ctx_hypotf (rg_x0 - x, rg_y0 - y) - rg_r0) * (rg_rdelta);
  ctx_fragment_gradient_1d_RGBAF (rasterizer, v, 0.0, rgba);
  ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgba);
  ((float*)out)[1] = rgba[3];
     out = ((float*)(out)) + 2;
     x += dx;
     y += dy;
  }
}
#endif

static void
ctx_fragment_none_GRAYAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *ga = (float*)out;
  for (int i = 0; i < count * 2; i++)
    ga[i] = 0.0f;
}

static void
ctx_fragment_color_GRAYAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  for (int i = 0; i < count; i++)
  {
     ctx_color_get_graya (rasterizer->state, &g->color, (float*)out);
     out = ((float*)(out)) + 2;
     x += dx;
     y += dy;
  }
}

static void ctx_fragment_image_GRAYAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t rgba[4*count];
  float rgbaf[4*count];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
         CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
         CtxBuffer *buffer = g->texture.buffer;
#endif
  switch (buffer->format->bpp)
    {
#if CTX_FRAGMENT_SPECIALIZE
      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);  break;
      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
#endif
      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);       break;
    }
  for (int c = 0; c < 2 * count; c ++) { 
    rgbaf[c] = ctx_u8_to_float (rgba[c]);
    ((float*)out)[0] = ctx_float_color_rgb_to_gray (rasterizer->state, rgbaf);
    ((float*)out)[1] = rgbaf[3];
    out = ((float*)out) + 2;
  }
}

static CtxFragment ctx_rasterizer_get_fragment_GRAYAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:           return ctx_fragment_image_GRAYAF;
      case CTX_SOURCE_COLOR:           return ctx_fragment_color_GRAYAF;
      case CTX_SOURCE_NONE:            return ctx_fragment_none_GRAYAF;
#if CTX_GRADIENTS
      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_GRAYAF;
      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_GRAYAF;
#endif
    }
  return ctx_fragment_none_GRAYAF;
}

ctx_float_porter_duff(GRAYAF, 2,color,   rasterizer->fragment, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff(GRAYAF, 2,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_INLINED_NORMAL
ctx_float_porter_duff(GRAYAF, 2,color_normal,   rasterizer->fragment, CTX_BLEND_NORMAL)
ctx_float_porter_duff(GRAYAF, 2,generic_normal, rasterizer->fragment, CTX_BLEND_NORMAL)

static void
ctx_GRAYAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_copy_normal (2, count, dst, src, coverage, rasterizer, x0);
}

static void
ctx_GRAYAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_clear_normal (2, count, dst, src, coverage, rasterizer, x0);
}

static void
ctx_GRAYAF_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_source_copy_normal_color (2, count, dst, rasterizer->color, coverage, rasterizer, x0);
}
#endif

static void
ctx_setup_GRAYAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components = 2;

  rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic;
  rasterizer->fragment = ctx_rasterizer_get_fragment_GRAYAF (rasterizer);
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
  if (gstate->source_fill.type == CTX_SOURCE_NONE)
  {
    ctx_setup_apply_coverage (rasterizer);
    return;
  }

  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      rasterizer->comp_op = ctx_GRAYAF_porter_duff_color;
      ctx_color_get_rgba (rasterizer->state, &gstate->source_fill.color, (float*)rasterizer->color);
      if (gstate->global_alpha_u8 != 255)
        for (int c = 0; c < components; c ++)
          ((float*)rasterizer->color)[c] *= gstate->global_alpha_f;

      if (rasterizer->format->from_comp)
        rasterizer->format->from_comp (rasterizer, 0,
          &rasterizer->color[0],
          &rasterizer->color_native,
          1);
    }

#if CTX_INLINED_NORMAL
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_GRAYAF_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_GRAYAF_copy_normal;
        }
        else if (gstate->global_alpha_u8 == 0)
          rasterizer->comp_op = ctx_RGBA8_nop;
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              if (((float*)rasterizer->color)[components-1] == 0.0f)
                rasterizer->comp_op = ctx_RGBA8_nop;
#if 1
              else //if (((float*)rasterizer->color)[components-1] == 0.0f)
                rasterizer->comp_op = ctx_GRAYAF_source_copy_normal_color;
#endif
              //else
          //      rasterizer->comp_op = ctx_GRAYAF_porter_duff_color_normal;
            }
            else
            {
              rasterizer->comp_op = ctx_GRAYAF_porter_duff_color_normal;
            }
            break;
          default:
            rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            rasterizer->comp_op = ctx_GRAYAF_porter_duff_color;
            break;
          default:
            rasterizer->comp_op = ctx_GRAYAF_porter_duff_generic;
            break;
        }
        break;
    }
#endif
  ctx_setup_apply_coverage (rasterizer);
}

#endif
#if CTX_ENABLE_GRAYF

static void
ctx_composite_GRAYF (CTX_COMPOSITE_ARGUMENTS)
{
  float *dstf = (float*)dst;

  float temp[count*2];
  for (unsigned int i = 0; i < count; i++)
  {
    temp[i*2] = dstf[i];
    temp[i*2+1] = 1.0f;
  }
  rasterizer->comp_op (count, (uint8_t*)temp, rasterizer->color, coverage, rasterizer, x0);
  for (unsigned int i = 0; i < count; i++)
  {
    dstf[i] = temp[i*2];
  }
}

#endif
#if CTX_ENABLE_BGRA8

inline static void
ctx_swap_red_green (uint8_t *rgba)
{
  uint32_t *buf  = (uint32_t *) rgba;
  uint32_t  orig = *buf;
  uint32_t  green_alpha = (orig & 0xff00ff00);
  uint32_t  red_blue    = (orig & 0x00ff00ff);
  uint32_t  red         = red_blue << 16;
  uint32_t  blue        = red_blue >> 16;
  *buf = green_alpha | red | blue;
}

static void
ctx_BGRA8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  uint32_t *srci = (uint32_t *) buf;
  uint32_t *dsti = (uint32_t *) rgba;
  while (count--)
    {
      uint32_t val = *srci++;
      ctx_swap_red_green ( (uint8_t *) &val);
      *dsti++      = val;
    }
}

static void
ctx_RGBA8_to_BGRA8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  ctx_BGRA8_to_RGBA8 (rasterizer, x, rgba, (uint8_t *) buf, count);
}

static void
ctx_composite_BGRA8 (CTX_COMPOSITE_ARGUMENTS)
{
  // for better performance, this could be done without a pre/post conversion,
  // by swapping R and B of source instead... as long as it is a color instead
  // of gradient or image
  //
  //
  uint8_t pixels[count * 4];
  ctx_BGRA8_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (count, &pixels[0], rasterizer->color, coverage, rasterizer, x0);
  ctx_BGRA8_to_RGBA8  (rasterizer, x0, &pixels[0], dst, count);
}


#endif
static inline void
ctx_composite_direct (CTX_COMPOSITE_ARGUMENTS)
{
  // for better performance, this could be done without a pre/post conversion,
  // by swapping R and B of source instead... as long as it is a color instead
  // of gradient or image
  //
  //
  rasterizer->comp_op (count, dst, rasterizer->color, coverage, rasterizer, x0);
}

#if CTX_ENABLE_CMYKAF

static void
ctx_fragment_other_CMYKAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  float *cmyka = (float*)out;
  float _rgba[4 * count];
  float *rgba = &_rgba[0];
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:
        ctx_fragment_image_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
      case CTX_SOURCE_COLOR:
        ctx_fragment_color_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
      case CTX_SOURCE_NONE:
        ctx_fragment_none_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
#if CTX_GRADIENTS
      case CTX_SOURCE_CONIC_GRADIENT:
        ctx_fragment_conic_gradient_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
      case CTX_SOURCE_LINEAR_GRADIENT:
        ctx_fragment_linear_gradient_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
      case CTX_SOURCE_RADIAL_GRADIENT:
        ctx_fragment_radial_gradient_RGBAF (rasterizer, x, y, z, rgba, count, dx, dy, dz);
        break;
#endif
      default:
        rgba[0]=rgba[1]=rgba[2]=rgba[3]=0.0f;
        break;
    }
  for (int i = 0; i < count; i++)
  {
    cmyka[4]=rgba[3];
    ctx_rgb_to_cmyk (rgba[0], rgba[1], rgba[2], &cmyka[0], &cmyka[1], &cmyka[2], &cmyka[3]);
    cmyka += 5;
    rgba += 4;
  }
}

static void
ctx_fragment_color_CMYKAF (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  float *cmyka = (float*)out;
  float cmyka_in[5];
  ctx_color_get_cmyka (rasterizer->state, &gstate->source_fill.color, cmyka_in);
  for (int i = 0; i < count; i++)
  {
    for (int c = 0; c < 4; c ++)
    {
      cmyka[c] = (1.0f - cmyka_in[c]);
    }
    cmyka[4] = cmyka_in[4];
    cmyka += 5;
  }
}

static CtxFragment ctx_rasterizer_get_fragment_CMYKAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_COLOR:
        return ctx_fragment_color_CMYKAF;
    }
  return ctx_fragment_other_CMYKAF;
}

ctx_float_porter_duff (CMYKAF, 5,color,           rasterizer->fragment, rasterizer->state->gstate.blend_mode)
ctx_float_porter_duff (CMYKAF, 5,generic,         rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_INLINED_NORMAL
ctx_float_porter_duff (CMYKAF, 5,color_normal,            rasterizer->fragment, CTX_BLEND_NORMAL)
ctx_float_porter_duff (CMYKAF, 5,generic_normal,          rasterizer->fragment, CTX_BLEND_NORMAL)

static void
ctx_CMYKAF_copy_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_copy_normal (5, count, dst, src, coverage, rasterizer, x0);
}

static void
ctx_CMYKAF_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_clear_normal (5, count, dst, src, coverage, rasterizer, x0);
}

static void
ctx_CMYKAF_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_float_source_copy_normal_color (5, count, dst, rasterizer->color, coverage, rasterizer, x0);
}
#endif

static void
ctx_setup_CMYKAF (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components = 5;
  rasterizer->fragment = ctx_rasterizer_get_fragment_CMYKAF (rasterizer);
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
  rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
  if (gstate->source_fill.type == CTX_SOURCE_NONE)
  {
    ctx_setup_apply_coverage (rasterizer);
    return;
  }

  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      rasterizer->comp_op = ctx_CMYKAF_porter_duff_color;
      rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
      ctx_color_get_cmyka (rasterizer->state, &gstate->source_fill.color, (float*)rasterizer->color);
      if (gstate->global_alpha_u8 != 255)
        ((float*)rasterizer->color)[components-1] *= gstate->global_alpha_f;

      if (rasterizer->format->from_comp)
        rasterizer->format->from_comp (rasterizer, 0,
          &rasterizer->color[0],
          &rasterizer->color_native,
          1);
    }

#if CTX_INLINED_NORMAL
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_CMYKAF_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_CMYKAF_copy_normal;
        }
        else if (gstate->global_alpha_u8 == 0)
          rasterizer->comp_op = ctx_RGBA8_nop;
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              if (((float*)rasterizer->color)[components-1] == 0.0f)
                rasterizer->comp_op = ctx_RGBA8_nop;
              else if (((float*)rasterizer->color)[components-1] == 1.0f)
              {
                rasterizer->comp_op = ctx_CMYKAF_source_copy_normal_color;
                rasterizer->comp = CTX_COV_PATH_CMYKAF_COPY;
              }
              else
                rasterizer->comp_op = ctx_CMYKAF_porter_duff_color_normal;
            }
            else
            {
              rasterizer->comp_op = ctx_CMYKAF_porter_duff_color_normal;
            }
            break;
          default:
            rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic_normal;
            break;
        }
        break;
      default:
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            rasterizer->comp_op = ctx_CMYKAF_porter_duff_color;
            break;
          default:
            rasterizer->comp_op = ctx_CMYKAF_porter_duff_generic;
            break;
        }
        break;
    }
#else

    if (gstate->blend_mode == CTX_BLEND_NORMAL &&
        gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp = CTX_COV_PATH_CMYKAF_COPY;
        }
        else if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER &&
                 rasterizer->color[components-1] == 255)
        {
          rasterizer->comp = CTX_COV_PATH_CMYKAF_COPY;
        }
    }
#endif
  ctx_setup_apply_coverage (rasterizer);
}

static void
ctx_setup_CMYKA8 (CtxRasterizer *rasterizer)
{
  ctx_setup_CMYKAF (rasterizer);

  if (rasterizer->comp == CTX_COV_PATH_CMYKAF_COPY)
    rasterizer->comp = CTX_COV_PATH_CMYKA8_COPY;
}

static void
ctx_setup_CMYK8 (CtxRasterizer *rasterizer)
{
  ctx_setup_CMYKAF (rasterizer);
  if (rasterizer->comp == CTX_COV_PATH_CMYKAF_COPY)
    rasterizer->comp = CTX_COV_PATH_CMYK8_COPY;
}

#endif
#if CTX_ENABLE_CMYKA8

static void
ctx_CMYKA8_to_CMYKAF (CtxRasterizer *rasterizer, uint8_t *src, float *dst, int count)
{
  for (int i = 0; i < count; i ++)
    {
      for (int c = 0; c < 4; c ++)
        { dst[c] = ctx_u8_to_float ( (255-src[c]) ); }
      dst[4] = ctx_u8_to_float (src[4]);
      for (int c = 0; c < 4; c++)
        { dst[c] *= dst[4]; }
      src += 5;
      dst += 5;
    }
}
static void
ctx_CMYKAF_to_CMYKA8 (CtxRasterizer *rasterizer, float *src, uint8_t *dst, int count)
{
  for (int i = 0; i < count; i ++)
    {
      int a = ctx_float_to_u8 (src[4]);
      if ((a != 0) & (a != 255))
      {
        float recip = 1.0f/src[4];
        for (int c = 0; c < 4; c++)
        {
          dst[c] = ctx_float_to_u8 (1.0f - src[c] * recip);
        }
      }
      else
      {
        for (int c = 0; c < 4; c++)
          dst[c] = 255 - ctx_float_to_u8 (src[c]);
      }
      dst[4]=a;

      src += 5;
      dst += 5;
    }
}

static void
ctx_composite_CMYKA8 (CTX_COMPOSITE_ARGUMENTS)
{
  float pixels[count * 5];
  ctx_CMYKA8_to_CMYKAF (rasterizer, dst, &pixels[0], count);
  rasterizer->comp_op (count, (uint8_t *) &pixels[0], rasterizer->color, coverage, rasterizer, x0);
  ctx_CMYKAF_to_CMYKA8 (rasterizer, &pixels[0], dst, count);
}

#endif
#if CTX_ENABLE_CMYK8

static void
ctx_CMYK8_to_CMYKAF (CtxRasterizer *rasterizer, uint8_t *src, float *dst, int count)
{
  for (int i = 0; i < count; i ++)
    {
      dst[0] = ctx_u8_to_float (255-src[0]);
      dst[1] = ctx_u8_to_float (255-src[1]);
      dst[2] = ctx_u8_to_float (255-src[2]);
      dst[3] = ctx_u8_to_float (255-src[3]);
      dst[4] = 1.0f;
      src += 4;
      dst += 5;
    }
}
static void
ctx_CMYKAF_to_CMYK8 (CtxRasterizer *rasterizer, float *src, uint8_t *dst, int count)
{
  for (int i = 0; i < count; i ++)
    {
      float c = src[0];
      float m = src[1];
      float y = src[2];
      float k = src[3];
      float a = src[4];
      if ((a != 0.0f) & (a != 1.0f))
        {
          float recip = 1.0f/a;
          c *= recip;
          m *= recip;
          y *= recip;
          k *= recip;
        }
      c = 1.0f - c;
      m = 1.0f - m;
      y = 1.0f - y;
      k = 1.0f - k;
      dst[0] = ctx_float_to_u8 (c);
      dst[1] = ctx_float_to_u8 (m);
      dst[2] = ctx_float_to_u8 (y);
      dst[3] = ctx_float_to_u8 (k);
      src += 5;
      dst += 4;
    }
}

static void
ctx_composite_CMYK8 (CTX_COMPOSITE_ARGUMENTS)
{
  float pixels[count * 5];
  ctx_CMYK8_to_CMYKAF (rasterizer, dst, &pixels[0], count);
  rasterizer->comp_op (count, (uint8_t *) &pixels[0], src, coverage, rasterizer, x0);
  ctx_CMYKAF_to_CMYK8 (rasterizer, &pixels[0], dst, count);
}
#endif


#if CTX_ENABLE_BGR8

inline static void
ctx_BGR8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (const uint8_t *) buf;
  uint32_t *dst = (uint32_t*)rgba;
  while (count--)
    {
      *dst = pixel[1] +  (pixel[0] << 8) + (pixel[2] << 16) + (((unsigned)0xff) << 24);
      pixel+=3;
      dst++;
    }
}

inline static void
ctx_RGBA8_to_BGR8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  const uint32_t *src = (const uint32_t*)rgba;
  while (count--)
    {
      uint32_t s = *src;
      uint8_t r = s & 0xff;
      uint8_t g = (s>>8) & 0xff;
      uint8_t b = (s>>16) & 0xff;
      pixel[0] = g;
      pixel[1] = r;
      pixel[2] = b;
      pixel+=3;
      src++;
    }
}

static void
ctx_composite_BGR8 (CTX_COMPOSITE_ARGUMENTS)
{
#if 1 // code is OK - but less code is better
  if (rasterizer->comp_op == ctx_RGBA8_source_over_normal_color)
  {
    uint8_t *src = ((uint8_t*)rasterizer->color);
    uint8_t *dst_u8 = (uint8_t*)dst;
    while (count--)
    {
        uint32_t cov = ((*coverage++) * src[3] + 255) >> 8;
        dst_u8[0] = ctx_lerp_u8 (dst_u8[0], src[1], cov);
        dst_u8[1] = ctx_lerp_u8 (dst_u8[1], src[0], cov);
        dst_u8[2] = ctx_lerp_u8 (dst_u8[2], src[2], cov);
        dst_u8+=3;
    }
    return;
  }
#endif
#if 1
  if (rasterizer->comp_op == ctx_RGBA8_source_copy_normal_color)
  {
    uint8_t *src = ((uint8_t*)rasterizer->color);
    uint8_t *dst_u8 = (uint8_t*)dst;
    while (count--)
    {
        uint32_t cov = *coverage++;
        dst_u8[0] = ctx_lerp_u8 (dst_u8[0], src[1], cov);
        dst_u8[1] = ctx_lerp_u8 (dst_u8[1], src[0], cov);
        dst_u8[2] = ctx_lerp_u8 (dst_u8[2], src[2], cov);
        dst_u8+=3;
    }
    return;
  }
#endif

  uint8_t pixels[count * 4];
  ctx_BGR8_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (count, &pixels[0], rasterizer->color, coverage, rasterizer, x0);
  ctx_RGBA8_to_BGR8 (rasterizer, x0, &pixels[0], dst, count);
}

#endif

#if CTX_ENABLE_RGB8

inline static void
ctx_RGB8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (const uint8_t *) buf;
  uint32_t *dst = (uint32_t*)rgba;
  while (count--)
    {
      *dst = pixel[0] +  (pixel[1] << 8) + (pixel[2] << 16) + (((unsigned)0xff) << 24);
      pixel+=3;
      dst++;
    }
}

inline static void
ctx_RGBA8_to_RGB8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  const uint32_t *src = (const uint32_t*)rgba;
  while (count--)
    {
      uint32_t s = *src;
      uint8_t r = s & 0xff;
      uint8_t g = (s>>8) & 0xff;
      uint8_t b = (s>>16) & 0xff;
      pixel[0] = r;
      pixel[1] = g;
      pixel[2] = b;
      pixel+=3;
      src++;
    }
}

static void
ctx_composite_RGB8 (CTX_COMPOSITE_ARGUMENTS)
{
#if 1 // code is OK - but less code is better
  if (rasterizer->comp_op == ctx_RGBA8_source_over_normal_color)
  {
    uint8_t *src = ((uint8_t*)rasterizer->color);
    uint8_t *dst_u8 = (uint8_t*)dst;
    while (count--)
    {
        uint32_t cov = ((*coverage++) * src[3] + 255) >> 8;
        dst_u8[0] = ctx_lerp_u8 (dst_u8[0], src[0], cov);
        dst_u8[1] = ctx_lerp_u8 (dst_u8[1], src[1], cov);
        dst_u8[2] = ctx_lerp_u8 (dst_u8[2], src[2], cov);
        dst_u8+=3;
    }
    return;
  }
#endif
#if 1
  if (rasterizer->comp_op == ctx_RGBA8_source_copy_normal_color)
  {
    uint8_t *src = ((uint8_t*)rasterizer->color);
    uint8_t *dst_u8 = (uint8_t*)dst;
    while (count--)
    {
        //uint32_t cov = ((*coverage++) * src[3] + 255) >> 8;
        uint32_t cov = *coverage++;
        dst_u8[0] = ctx_lerp_u8 (dst_u8[0], src[0], cov);
        dst_u8[1] = ctx_lerp_u8 (dst_u8[1], src[1], cov);
        dst_u8[2] = ctx_lerp_u8 (dst_u8[2], src[2], cov);
        dst_u8+=3;
    }
    return;
  }
#endif

  uint8_t pixels[count * 4];
  ctx_RGB8_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (count, &pixels[0], rasterizer->color, coverage, rasterizer, x0);
  ctx_RGBA8_to_RGB8 (rasterizer, x0, &pixels[0], dst, count);
}

#endif
#if CTX_ENABLE_GRAY1

#if CTX_NATIVE_GRAYA8
inline static void
ctx_GRAY1_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *graya, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int bitno = x&7;
      if ((bitno == 0) & (count >= 7))
      {
        if (*pixel == 0)
        {
          for (int i = 0; i < 8; i++)
          {
            *graya++ = 0; *graya++ = 255;
          }
          x+=8; count-=7; pixel++;
          continue;
        }
        else if (*pixel == 0xff)
        {
          for (int i = 0; i < 8 * 2; i++)
          {
            *graya++ = 255;
          }
          x+=8; count-=7; pixel++;
          continue;
        }
      }
      *graya++ = 255 * ((*pixel) & (1<<bitno));
      *graya++ = 255;
      pixel+= (bitno ==7);
      x++;
    }
}

inline static void
ctx_GRAYA8_to_GRAY1 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int gray = rgba[0];
      int bitno = x&7;
      if (gray >= 128)
        *pixel |= (1<<bitno);
      else
        *pixel &= (~ (1<<bitno));
      pixel+= (bitno==7);
      x++;
      rgba +=2;
    }
}

#else

inline static void
ctx_GRAY1_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  uint32_t *dst = (uint32_t*)rgba;
  while (count--)
    {
      int bitno = x&7;
      uint8_t pval = *pixel;
      if ((bitno == 0) & (count >=7))
      {
        /* special case some bit patterns when decoding */
        if (pval == 0)
        {
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          x+=8; count-=7; pixel++;
          continue;
        }
        else if (pval == 0xff)
        {
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          x+=8; count-=7; pixel++;
          continue;
        }
        else if (pval == 0x0f)
        {
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          x+=8; count-=7; pixel++;
          continue;
        }
        else if (pval == 0xfc)
        {
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          x+=8; count-=7; pixel++;
          continue;
        }
        else if (pval == 0x3f)
        {
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          x+=8; count-=7; pixel++;
          continue;
        }
      }
      *dst++=0xff000000 + 0x00ffffff * ((pval & (1<< bitno ) )!=0);
      pixel += (bitno ==7);
      x++;
    }
}

inline static void
ctx_RGBA8_to_GRAY1 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int gray = ctx_u8_color_rgb_to_gray (rasterizer->state, rgba);
      int bitno = x&7;
      //gray += ctx_dither_mask_a (x, rasterizer->scanline/aa, 0, 127);
      if (gray >= 128)
        *pixel |= (1<< bitno);
      else
        *pixel &= (~ (1<< bitno));
      pixel+= (bitno ==7);
      x++;
      rgba +=4;
    }
}
#endif

#endif
#if CTX_ENABLE_GRAY2

#if CTX_NATIVE_GRAYA8
inline static void
ctx_GRAY2_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      uint8_t val = (((*pixel) >> ( (x&3) <<1)) & 3) * 85;
      rgba[0] = val;
      rgba[1] = 255;
      if ( (x&3) ==3)
        { pixel+=1; }
      x++;
      rgba +=2;
    }
}

inline static void
ctx_GRAYA8_to_GRAY2 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = rgba[0];
      val = ctx_sadd8 (val, 40) >> 6;
      *pixel = (*pixel & (~ (3 << ( (x&3) <<1) ) ))
                      | ( (val << ( (x&3) <<1) ) );
      if ( (x&3) ==3)
        { pixel+=1; }
      x++;
      rgba +=2;
    }
}
#else

inline static void
ctx_GRAY2_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  uint32_t *dst = (uint32_t*)rgba;
  while (count--)
    {
      int bitno = x & 3;
      if ((bitno == 0) & (count >=3))
      {
        /* special case some bit patterns when decoding */
        if (*pixel == 0)
        {
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          x+=4; count-=3; pixel++;
          continue;
        }
        else if (*pixel == 0xff)
        {
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          x+=4; count-=3; pixel++;
          continue;
        }
        else if (*pixel == 0x55)
        {
          *dst++ = 0xff555555;
          *dst++ = 0xff555555;
          *dst++ = 0xff555555;
          *dst++ = 0xff555555;
          x+=4; count-=3; pixel++;
          continue;
        }
        else if (*pixel == 0xaa)
        {
          *dst++ = 0xffaaaaaa;
          *dst++ = 0xffaaaaaa;
          *dst++ = 0xffaaaaaa;
          *dst++ = 0xffaaaaaa;
          x+=4; count-=3; pixel++;
          continue;
        }
        else if (*pixel == 0x0f)
        {
          *dst++ = 0xff000000;
          *dst++ = 0xff000000;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          x+=4; count-=3; pixel++;
          continue;
        }
        else if (*pixel == 0xfc)
        {
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xff000000;
          x+=4; count-=3; pixel++;
          continue;
        }
        else if (*pixel == 0x3f)
        {
          *dst++ = 0xff000000;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          *dst++ = 0xffffffff;
          x+=4; count-=3; pixel++;
          continue;
        }
      }
      {
        uint8_t val = (((*pixel) >> ( (bitno) <<1)) & 3) * 85;
        *dst = val + val * 256u + val * 256u * 256u + 255u * 256u * 256u * 256u;
        if (bitno==3)
          { pixel+=1; }
        x++;
        dst++;
      }
    }
}

inline static void
ctx_RGBA8_to_GRAY2 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  CtxState *state = rasterizer->state;
  while (count--)
    {
      int val = ctx_u8_color_rgb_to_gray (state, rgba);
      val >>= 6;
      *pixel = (*pixel & (~ (3 << ((x&3) <<1) ) ))
                      | ( (val << ((x&3) <<1) ) );
      if ( (x&3) ==3)
        { pixel+=1; }
      x++;
      rgba +=4;
    }
}
#endif

#endif
#if CTX_ENABLE_GRAY4

#if CTX_NATIVE_GRAYA8
inline static void
ctx_GRAY4_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = (*pixel & (15 << ( (x & 1) <<2) ) ) >> ( (x&1) <<2);
      val <<= 4;
      rgba[0] = val;
      rgba[1] = 255;
      if ( (x&1) ==1)
        { pixel+=1; }
      x++;
      rgba +=2;
    }
}

inline static void
ctx_GRAYA8_to_GRAY4 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = rgba[0];
      val >>= 4;
      *pixel = *pixel & (~ (15 << ( (x&1) <<2) ) );
      *pixel = *pixel | ( (val << ( (x&1) <<2) ) );
      if ( (x&1) ==1)
        { pixel+=1; }
      x++;
      rgba +=2;
    }
}
#else
inline static void
ctx_GRAY4_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      int val = (*pixel & (15 << ( (x & 1) <<2) ) ) >> ( (x&1) <<2);
      val <<= 4;
      rgba[0] = val;
      rgba[1] = val;
      rgba[2] = val;
      rgba[3] = 255;
      if ( (x&1) ==1)
        { pixel+=1; }
      x++;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_GRAY4 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  CtxState *state = rasterizer->state;
  while (count--)
    {
      int val = ctx_u8_color_rgb_to_gray (state, rgba);
      val >>= 4;
      *pixel = *pixel & (~ (15 << ( (x&1) <<2) ) );
      *pixel = *pixel | ( (val << ( (x&1) <<2) ) );
      if ( (x&1) ==1)
        { pixel+=1; }
      x++;
      rgba +=4;
    }
}
#endif

#endif
#if CTX_ENABLE_GRAY8

#if CTX_NATIVE_GRAYA8
inline static void
ctx_GRAY8_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      rgba[0] = pixel[0];
      rgba[1] = 255;
      pixel+=1;
      rgba +=2;
    }
}

inline static void
ctx_GRAYA8_to_GRAY8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      pixel[0] = rgba[0];
      pixel+=1;
      rgba +=2;
    }
}
#else
inline static void
ctx_GRAY8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      rgba[0] = pixel[0];
      rgba[1] = pixel[0];
      rgba[2] = pixel[0];
      rgba[3] = 255;
      pixel+=1;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_GRAY8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  CtxState *state = rasterizer->state;
  for (int i = 0; i < count; i ++)
    {
      pixel[i] = ctx_u8_color_rgb_to_gray (state, rgba + i * 4);
    }
}
#endif

#endif
#if CTX_ENABLE_GRAYA8

inline static void
ctx_GRAYA8_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (const uint8_t *) buf;
  while (count--)
    {
      rgba[0] = pixel[0];
      rgba[1] = pixel[0];
      rgba[2] = pixel[0];
      rgba[3] = pixel[1];
      pixel+=2;
      rgba +=4;
    }
}

inline static void
ctx_RGBA8_to_GRAYA8 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  CtxState *state = rasterizer->state;
  while (count--)
    {
      pixel[0] = ctx_u8_color_rgb_to_gray (state, rgba);
      pixel[1] = rgba[3];
      pixel+=2;
      rgba +=4;
    }
}

#if CTX_NATIVE_GRAYA8
CTX_INLINE static void ctx_rgba_to_graya_u8 (CtxState *state, uint8_t *in, uint8_t *out)
{
  out[0] = ctx_u8_color_rgb_to_gray (state, in);
  out[1] = in[3];
}

#if CTX_GRADIENTS
static void
ctx_fragment_linear_gradient_GRAYA8 (CtxRasterizer *rasterizer, float u0, float v0, float z, void *out, int count, float ud, float vd, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
        uint8_t *dst = (uint8_t*)out;

  float linear_gradient_dx = g->linear_gradient.dx_scaled;
  float linear_gradient_dy = g->linear_gradient.dy_scaled;
  float linear_gradient_start = g->linear_gradient.start_scaled;

  u0 *= linear_gradient_dx;
  v0 *= linear_gradient_dy;
  ud *= linear_gradient_dx;
  vd *= linear_gradient_dy;

  float vv = ((u0 + v0) - linear_gradient_start);
  float ud_plus_vd = (ud + vd);

#if CTX_DITHER
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int ox = (int)u0;
#endif
  for (int i = 0; i < count;i ++)
  {
    uint8_t rgba[4];
    ctx_fragment_gradient_1d_RGBA8 (rasterizer, vv, 1.0f, rgba);
    ctx_rgba_to_graya_u8 (rasterizer->state, rgba, dst);

#if CTX_DITHER
  ctx_dither_graya_u8 ((uint8_t*)dst, ox + i, scan, rasterizer->format->dither_red_blue,
                      rasterizer->format->dither_green);
#endif
    dst += 2;
    vv += ud_plus_vd;
  }
}

static void
ctx_fragment_radial_gradient_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t *dst = (uint8_t*)out;
#if CTX_DITHER
  int scan = rasterizer->scanline / CTX_FULL_AA;
  int ox = (int)x;
#endif

  CtxSource *g = &rasterizer->state->gstate.source_fill;
  float rg_x0 = g->radial_gradient.x0;
  float rg_y0 = g->radial_gradient.y0;
  float rg_r0 = g->radial_gradient.r0;
  float rg_rdelta = g->radial_gradient.rdelta;
  for (int i = 0; i < count;i ++)
  {
  float v = (ctx_hypotf (rg_x0 - x, rg_y0 - y) - rg_r0) * (rg_rdelta);
  {
    uint8_t rgba[4];
    ctx_fragment_gradient_1d_RGBA8 (rasterizer, v, 1.0, rgba);
    ctx_rgba_to_graya_u8 (rasterizer->state, rgba, dst);
  }
#if CTX_DITHER
  ctx_dither_graya_u8 ((uint8_t*)dst, ox+i, scan, rasterizer->format->dither_red_blue,
                      rasterizer->format->dither_green);
#endif
  dst += 2;
  x += dx;
  y += dy;
  }
}
#endif

static void
ctx_fragment_color_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  CtxSource *g = &rasterizer->state->gstate.source_fill;
  uint16_t *dst = (uint16_t*)out;
  uint16_t pix;
  ctx_color_get_graya_u8 (rasterizer->state, &g->color, (uint8_t*)&pix);
  for (int i = 0; i <count; i++)
  {
    dst[i]=pix;
  }
}

static void ctx_fragment_image_GRAYA8 (CtxRasterizer *rasterizer, float x, float y, float z, void *out, int count, float dx, float dy, float dz)
{
  uint8_t rgba[4*count];
  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
         CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
         CtxBuffer *buffer = g->texture.buffer;
#endif
  switch (buffer->format->bpp)
    {
#if CTX_FRAGMENT_SPECIALIZE
      case 1:  ctx_fragment_image_gray1_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
      case 24: ctx_fragment_image_rgb8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);  break;
      case 32: ctx_fragment_image_rgba8_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz); break;
#endif
      default: ctx_fragment_image_RGBA8 (rasterizer, x, y, z, rgba, count, dx, dy, dz);       break;
    }
  for (int i = 0; i < count; i++)
    ctx_rgba_to_graya_u8 (rasterizer->state, &rgba[i*4], &((uint8_t*)out)[i*2]);
}

static CtxFragment ctx_rasterizer_get_fragment_GRAYA8 (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  switch (gstate->source_fill.type)
    {
      case CTX_SOURCE_TEXTURE:           return ctx_fragment_image_GRAYA8;
      case CTX_SOURCE_COLOR:           return ctx_fragment_color_GRAYA8;
#if CTX_GRADIENTS
      case CTX_SOURCE_LINEAR_GRADIENT: return ctx_fragment_linear_gradient_GRAYA8;
      case CTX_SOURCE_RADIAL_GRADIENT: return ctx_fragment_radial_gradient_GRAYA8;
#endif
    }
  return ctx_fragment_color_GRAYA8;
}

ctx_u8_porter_duff(GRAYA8, 2,generic, rasterizer->fragment, rasterizer->state->gstate.blend_mode)

#if CTX_INLINED_NORMAL
ctx_u8_porter_duff(GRAYA8, 2,generic_normal, rasterizer->fragment, CTX_BLEND_NORMAL)

static void
ctx_GRAYA8_copy_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_copy_normal (2, count, dst, src, coverage, rasterizer, x0);
}

static void
ctx_GRAYA8_clear_normal (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_clear_normal (2, count, dst, src, coverage, rasterizer, x0);
}

static void
ctx_GRAYA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
#if 1
  ctx_u8_source_over_normal_color (2, count, dst, rasterizer->color, coverage, rasterizer, x0);
#else
  uint8_t tsrc[5];
  *((uint32_t*)tsrc) = *((uint32_t*)src);

  while (count--)
  {
    uint32_t cov = *coverage++;
    uint32_t common =(((((255+(tsrc[1] * cov))>>8))^255 ));
    dst[0] =  ((((tsrc[0] * cov)) + (dst[0] * common ))>>8);
    dst[1] =  ((((tsrc[1] * cov)) + (dst[1] * common ))>>8);
    dst+=2;
  }
#endif
}

static void
ctx_GRAYA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS)
{
  ctx_u8_source_copy_normal_color (2, count, dst, rasterizer->color, coverage, rasterizer, x0);
}
#endif

inline static int
ctx_is_opaque_color (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  if (gstate->global_alpha_u8 != 255)
    return 0;
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
  {
    uint8_t ga[2];
    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
    return ga[1] == 255;
  }
  return 0;
}

static void
ctx_setup_GRAYA8 (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  int components = 2;
  rasterizer->fragment = ctx_rasterizer_get_fragment_GRAYA8 (rasterizer);
  rasterizer->comp_op  = ctx_GRAYA8_porter_duff_generic;
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
  if (gstate->source_fill.type == CTX_SOURCE_NONE)
  {
    ctx_setup_apply_coverage (rasterizer);
    return;
  }
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
    {
      ctx_fragment_color_GRAYA8 (rasterizer, 0,0, 1,rasterizer->color, 1, 0,0,0);
      if (gstate->global_alpha_u8 != 255)
        for (int c = 0; c < components; c ++)
          rasterizer->color[c] = (rasterizer->color[c] * gstate->global_alpha_u8)/255;

      if (rasterizer->format->from_comp)
        rasterizer->format->from_comp (rasterizer, 0,
          &rasterizer->color[0],
          &rasterizer->color_native,
          1);
    }

#if CTX_INLINED_NORMAL
  if (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)
    rasterizer->comp_op = ctx_GRAYA8_clear_normal;
  else
    switch (gstate->blend_mode)
    {
      case CTX_BLEND_NORMAL:
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp_op = ctx_GRAYA8_copy_normal;
          rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
        }
        else if (gstate->global_alpha_u8 == 0)
          rasterizer->comp_op = ctx_RGBA8_nop;
        else
        switch (gstate->source_fill.type)
        {
          case CTX_SOURCE_COLOR:
            if (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER)
            {
              if (rasterizer->color[components-1] == 0)
                rasterizer->comp_op = ctx_RGBA8_nop;
              else if (rasterizer->color[components-1] == 255)
              {
                rasterizer->comp_op = ctx_GRAYA8_source_copy_normal_color;
                rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
              }
              else
                rasterizer->comp_op = ctx_GRAYA8_source_over_normal_color;
            }
            else
            {
              rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic_normal;
            }
            break;
          default:
            rasterizer->comp_op = ctx_GRAYA8_porter_duff_generic_normal;
            break;
        }
        break;
    }
#else
    if ((gstate->blend_mode == CTX_BLEND_NORMAL) &
        (gstate->source_fill.type == CTX_SOURCE_COLOR))
    {
        if (gstate->compositing_mode == CTX_COMPOSITE_COPY)
        {
          rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
        }
        else if ((gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OVER) &
                 (rasterizer->color[components-1] == 255))
        {
          rasterizer->comp = CTX_COV_PATH_GRAYA8_COPY;
        }
    }
#endif
  ctx_setup_apply_coverage (rasterizer);
}

#if CTX_ENABLE_GRAY4
static void
ctx_setup_GRAY4 (CtxRasterizer *rasterizer)
{
  ctx_setup_GRAYA8 (rasterizer);
  if (rasterizer->comp == CTX_COV_PATH_GRAYA8_COPY)
    rasterizer->comp = CTX_COV_PATH_GRAY4_COPY;
  else
  rasterizer->comp = CTX_COV_PATH_FALLBACK;
}
#endif

#if CTX_ENABLE_GRAY2
static void
ctx_setup_GRAY2 (CtxRasterizer *rasterizer)
{
  ctx_setup_GRAYA8 (rasterizer);
  if (rasterizer->comp == CTX_COV_PATH_GRAYA8_COPY)
    rasterizer->comp = CTX_COV_PATH_GRAY2_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}
#endif

#if CTX_ENABLE_GRAY1
static void
ctx_setup_GRAY1 (CtxRasterizer *rasterizer)
{
  ctx_setup_GRAYA8 (rasterizer);
  if (rasterizer->comp == CTX_COV_PATH_GRAYA8_COPY)
    rasterizer->comp = CTX_COV_PATH_GRAY1_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}
#endif

static void
ctx_setup_GRAY8 (CtxRasterizer *rasterizer)
{
  ctx_setup_GRAYA8 (rasterizer);
  if (rasterizer->comp == CTX_COV_PATH_GRAYA8_COPY)
    rasterizer->comp = CTX_COV_PATH_GRAY8_COPY;
  else
    rasterizer->comp = CTX_COV_PATH_FALLBACK;
}

#endif

#endif


static inline uint8_t
ctx_332_pack (uint8_t red,
              uint8_t green,
              uint8_t blue)
{
  return ((ctx_sadd8(red,15) >> 5) << 5)
        |((ctx_sadd8(green,15) >> 5) << 2)
        |(ctx_sadd8(blue,15) >> 6);
}
#if CTX_ENABLE_RGB332

static inline uint8_t
ctx_888_to_332 (uint32_t in)
{
  uint8_t *rgb=(uint8_t*)(&in);
  return ctx_332_pack (rgb[0],rgb[1],rgb[2]);
}

static inline uint32_t
ctx_332_to_888 (uint8_t in)
{
  uint32_t ret = 0;
  uint8_t *rgba=(uint8_t*)&ret;
  ctx_332_unpack (in,
                  &rgba[0],
                  &rgba[1],
                  &rgba[2]);
  rgba[3] = 255;
  return ret;
}

static inline void
ctx_RGB332_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
      ctx_332_unpack (*pixel, &rgba[0], &rgba[1], &rgba[2]);
#if CTX_RGB332_ALPHA
      if ((rgba[0]==255) & (rgba[2] == 255) & (rgba[1]==0))
        { rgba[3] = 0; }
      else
#endif
        { rgba[3] = 255; }
      pixel+=1;
      rgba +=4;
    }
}

static inline void
ctx_RGBA8_to_RGB332 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint8_t *pixel = (uint8_t *) buf;
  while (count--)
    {
#if CTX_RGB332_ALPHA
      if (rgba[3]==0)
        { pixel[0] = ctx_332_pack (255, 0, 255); }
      else
#endif
        { pixel[0] = ctx_332_pack (rgba[0], rgba[1], rgba[2]); }
      pixel+=1;
      rgba +=4;
    }
}

static void
ctx_composite_RGB332 (CTX_COMPOSITE_ARGUMENTS)
{
#if 0 // it is slower
  if (CTX_LIKELY(rasterizer->comp_op == ctx_RGBA8_source_over_normal_color))
  {
    uint32_t si_ga = ((uint32_t*)rasterizer->color)[1];
    uint32_t si_rb = ((uint32_t*)rasterizer->color)[2];
    uint32_t si_a  = si_ga >> 16;

    while (count--)
    {
      uint32_t cov   = *coverage++;
      uint32_t rcov  = (((255+si_a * cov)>>8))^255;
      uint32_t di    = ctx_332_to_888 (*((uint8_t*)dst));
      uint32_t di_ga = ((di & 0xff00ff00) >> 8);
      uint32_t di_rb = (di & 0x00ff00ff);
      *((uint8_t*)(dst)) =
      ctx_888_to_332((((si_rb * cov + 0xff00ff + di_rb * rcov) & 0xff00ff00) >> 8)  |
       ((si_ga * cov + 0xff00ff + di_ga * rcov) & 0xff00ff00));
       dst+=1;
    }
    return;
  }
#endif
  uint8_t pixels[count * 4];
  ctx_RGB332_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (count, &pixels[0], rasterizer->color, coverage, rasterizer, x0);
  ctx_RGBA8_to_RGB332 (rasterizer, x0, &pixels[0], dst, count);
}

#endif
#if CTX_ENABLE_RGB565 | CTX_ENABLE_RGB565_BYTESWAPPED

static inline void
ctx_565_unpack (const uint16_t pixel,
                uint8_t *red,
                uint8_t *green,
                uint8_t *blue,
                const int byteswap)
{
  uint16_t byteswapped;
  if (byteswap)
    { byteswapped = (pixel>>8) | (pixel<<8); }
  else
    { byteswapped  = pixel; }
  uint8_t b  =  (byteswapped & 31) <<3;
  uint8_t g  = ( (byteswapped>>5) & 63) <<2;
  uint8_t r  = ( (byteswapped>>11) & 31) <<3;

#if 0
  *blue  = (b > 248) * 255 + (b <= 248) * b;
  *green = (g > 248) * 255 + (g <= 248) * g;
  *red   = (r > 248) * 255 + (r <= 248) * r;
#else
  *blue = b;
  *green = g;
  *red = r;
#endif
}


static inline uint16_t
ctx_888_to_565 (uint32_t in, int byteswap)
{
  uint8_t *rgb=(uint8_t*)(&in);
  return ctx_565_pack (rgb[0],rgb[1],rgb[2], byteswap);
}

static inline uint32_t
ctx_565_to_888 (uint16_t in, int byteswap)
{
  uint32_t ret = 0;
  uint8_t *rgba=(uint8_t*)&ret;
  ctx_565_unpack (in,
                  &rgba[0],
                  &rgba[1],
                  &rgba[2],
                  byteswap);
  //rgba[3]=255;
  return ret;
}

#endif
#if CTX_ENABLE_RGB565


static inline void
ctx_RGB565_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count)
{
  const uint16_t *pixel = (uint16_t *) buf;
  while (count--)
    {
      // XXX : checking the raw value for alpha before unpack will be faster
      ((uint32_t*)(rgba))[0] = ctx_565_unpack_32 (*pixel, 0);
#if CTX_RGB565_ALPHA
      if ((rgba[0]==255) & (rgba[2] == 255) & (rgba[1]==0))
        { rgba[3] = 0; }
#endif
      pixel+=1;
      rgba +=4;
    }
}

static inline void
ctx_RGBA8_to_RGB565 (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count)
{
  uint16_t *pixel = (uint16_t *) buf;
  while (count--)
    {
#if CTX_RGB565_ALPHA
      if (rgba[3]==0)
        { pixel[0] = ctx_565_pack (255, 0, 255, 0); }
      else
#endif
        { pixel[0] = ctx_565_pack (rgba[0], rgba[1], rgba[2], 0); }
      pixel+=1;
      rgba +=4;
    }
}

static void
ctx_RGBA8_source_over_normal_color (CTX_COMPOSITE_ARGUMENTS);
static void
ctx_RGBA8_source_copy_normal_color (CTX_COMPOSITE_ARGUMENTS);

static void
ctx_composite_RGB565 (CTX_COMPOSITE_ARGUMENTS)
{
  uint8_t pixels[count * 4];
  ctx_RGB565_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (count, &pixels[0], rasterizer->color, coverage, rasterizer, x0);
  ctx_RGBA8_to_RGB565 (rasterizer, x0, &pixels[0], dst, count);
}
#endif
#if CTX_ENABLE_RGB565_BYTESWAPPED

static void
ctx_RGB565_BS_to_RGBA8 (CtxRasterizer *rasterizer, int x, const void *buf, uint8_t *rgba, int count);
static void
ctx_RGBA8_to_RGB565_BS (CtxRasterizer *rasterizer, int x, const uint8_t *rgba, void *buf, int count);

static void
ctx_composite_RGB565_BS (CTX_COMPOSITE_ARGUMENTS)
{
  uint8_t pixels[count * 4];
  ctx_RGB565_BS_to_RGBA8 (rasterizer, x0, dst, &pixels[0], count);
  rasterizer->comp_op (count, &pixels[0], rasterizer->color, coverage, rasterizer, x0);
  ctx_RGBA8_to_RGB565_BS (rasterizer, x0, &pixels[0], dst, count);
}
#endif


static inline uint32_t
ctx_over_RGBA8 (uint32_t dst, uint32_t src, uint32_t cov)
{
  uint32_t si_ga = (src & 0xff00ff00) >> 8;
  uint32_t si_rb = src & 0x00ff00ff;
  uint32_t si_a  = si_ga >> 16;
  uint32_t rcov  = ((255+si_a * cov)>>8)^255;
  uint32_t di_ga = ( dst & 0xff00ff00) >> 8;
  uint32_t di_rb = dst & 0x00ff00ff;
  return
     ((((si_rb * cov) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
      (((si_ga * cov) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
}


static inline uint32_t
ctx_over_RGBA8_full (uint32_t dst, uint32_t src)
{
  uint32_t si_ga = (src & 0xff00ff00) >> 8;
  uint32_t si_rb = src & 0x00ff00ff;
  uint32_t si_a  = si_ga >> 16;
  uint32_t rcov  = si_a^255;
  uint32_t di_ga = (dst & 0xff00ff00) >> 8;
  uint32_t di_rb = dst & 0x00ff00ff;
  return
     ((((si_rb * 255) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
      (((si_ga * 255) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
}

static inline uint32_t
ctx_over_RGBA8_2 (uint32_t dst, uint32_t si_ga, uint32_t si_rb, uint32_t si_a, uint32_t cov)
{
  uint32_t rcov  = ((si_a * cov)/255)^255;
  uint32_t di_ga = (dst & 0xff00ff00) >> 8;
  uint32_t di_rb = dst & 0x00ff00ff;
  return
     ((((si_rb * cov) + 0xff00ff + (di_rb * rcov)) & 0xff00ff00) >> 8)  |
      (((si_ga * cov) + 0xff00ff + (di_ga * rcov)) & 0xff00ff00);
}


static inline void ctx_span_set_colorb  (uint32_t *dst_pix, uint32_t val, int count)
{
  while(count--)
    *dst_pix++=val;
}

static inline void ctx_span_set_colorbu (uint32_t *dst_pix, uint32_t val, unsigned int count)
{
  while(count--)
    *dst_pix++=val;
}

#if CTX_FAST_FILL_RECT

#if 1

static inline void ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect_copy (CtxRasterizer *rasterizer, int x0, int y0, int x1, int y1, const int copy)
{
#if 1
  float u0 = 0; float v0 = 0;
  float ud = 0; float vd = 0;
  float w0 = 1; float wd = 0;
  ctx_init_uv (rasterizer, x0, y0,&u0, &v0, &w0, &ud, &vd, &wd);
#endif

  uint32_t *dst = ( (uint32_t *) rasterizer->buf);
  int blit_stride = rasterizer->blit_stride/4;
  dst += (y0 - rasterizer->blit_y) * blit_stride;
  dst += (x0);

  unsigned int width = x1-x0+1;
  unsigned int height = y1-y0+1;

  CtxSource *g = &rasterizer->state->gstate.source_fill;
#if CTX_ENABLE_CM
         CtxBuffer *buffer = g->texture.buffer->color_managed?g->texture.buffer->color_managed:g->texture.buffer;
#else
         CtxBuffer *buffer = g->texture.buffer;
#endif
  int bwidth  = buffer->width;
  int bheight = buffer->height;
  int u = u0;// + 0.5f;
  int v = v0;// + 0.5f;

  uint32_t *src = ((uint32_t*)buffer->data) + bwidth * v + u;

  int pre = ctx_mini(ctx_maxi(-u,0), width);

  width-=pre;
  u+=pre;

  int core = ctx_mini (width, bwidth - u);

  if (core<0)
    return;
  if (copy)
  {
      uint32_t *t_dst = dst;
      src += pre;
      for (unsigned int y = 0; (y < height) & (v < bheight); y++)
      {
         memcpy (t_dst, src, core * 4);
         v++;
         src += bwidth;
         t_dst += blit_stride;
      }
  }
  else
  {
      uint32_t *t_dst = dst;
      for (unsigned int y = 0; (y < height) & (v < bheight); y++)
      {
         ctx_RGBA8_source_over_normal_full_cov_buf (core,
             (uint8_t*)t_dst, NULL, NULL, rasterizer, x0 + pre, (uint8_t*)src);
         v++;
         src += bwidth;
         t_dst += blit_stride;
      }
  }
}
#endif


static CTX_INLINE void
ctx_composite_fill_rect_aligned (CtxRasterizer *rasterizer,
                                 int            x0,
                                 int            y0,
                                 int            x1,
                                 int            y1,
                                 const uint8_t  cov)
{
  int blit_x      = rasterizer->blit_x;
  int blit_y      = rasterizer->blit_y;
  int blit_width  = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;
  int blit_stride = rasterizer->blit_stride;

  x0 = ctx_maxi (x0, blit_x);
  x1 = ctx_mini (x1, blit_x + blit_width - 1);
  y0 = ctx_maxi (y0, blit_y);
  y1 = ctx_mini (y1, blit_y + blit_height - 1);

  const int width = x1 - x0 + 1;
  const int height= y1 - y0 + 1;
  //
  if (((width <=0) | (height <= 0)))
    return;

  CtxCovPath comp = rasterizer->comp;
  uint8_t *dst;

  // this could be done here, but is not used
  // by a couple of the cases
#define INIT_ENV do {\
  rasterizer->scanline = y0 * CTX_FULL_AA; \
  dst = ( (uint8_t *) rasterizer->buf); \
  dst += (y0 - blit_y) * blit_stride; \
  dst += (x0 * rasterizer->format->bpp)/8;}while(0);

  if (cov == 255)
  {
    switch (comp)
    {
    case CTX_COV_PATH_RGBA8_COPY:
    {
      uint32_t color = ((uint32_t*)(rasterizer->color))[0];
      INIT_ENV;
      if (CTX_UNLIKELY(width == 1))
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          *dst_i = color;
          dst += blit_stride;
        }
      }
      else
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          ctx_span_set_colorbu ((uint32_t*)&dst[0], color, width);
          dst += blit_stride;
        }
      }
      return;
    }
    case CTX_COV_PATH_RGBAF_COPY:
    case CTX_COV_PATH_GRAY8_COPY:
    case CTX_COV_PATH_GRAYA8_COPY:
    case CTX_COV_PATH_GRAYAF_COPY:
    case CTX_COV_PATH_CMYKAF_COPY:
    case CTX_COV_PATH_RGB565_COPY:
    case CTX_COV_PATH_RGB332_COPY:
    case CTX_COV_PATH_RGB8_COPY:
    case CTX_COV_PATH_CMYK8_COPY:
    case CTX_COV_PATH_CMYKA8_COPY:
    {
      uint8_t *color = (uint8_t*)&rasterizer->color_native;
      unsigned int bytes = rasterizer->format->bpp/8;
      INIT_ENV;

      switch (bytes)
      {
        case 1:
          {
          uint8_t col = *color;
          if (width == 1)
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            *dst = col;
            dst += blit_stride;
          }
          else
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
#if 0
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++) *dst_i++ = col;
#else
            memset (dst, col, width);
#endif
            dst += blit_stride;
          }
          }
          break;
        case 2:
          {
            uint16_t val = ((uint16_t*)color)[0];
            for (unsigned int y = y0; y <= (unsigned)y1; y++)
            {
              uint16_t *dst_i = (uint16_t*)&dst[0];
              for (int x = 0; x < width; x++)
                 *dst_i++ = val;
              dst += blit_stride;
            }
          }
          break;
        case 3:
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++)
                for (unsigned int b = 0; b < 3; b++) *dst_i++ = color[b];
            dst += blit_stride;
          }
          break;
        case 4:
          {
            uint32_t val = ((uint32_t*)color)[0];
            if (width == 1)
            for (unsigned int y = y0; y <= (unsigned)y1; y++)
            {
              *((uint32_t*)&dst[0]) = val;
              dst += blit_stride;
            }
            else
            for (unsigned int y = y0; y <= (unsigned)y1; y++)
            {
              //uint32_t *dst_i = (uint32_t*)&dst[0];
              ctx_span_set_colorbu ((uint32_t*)&dst[0], val, width);
              dst += blit_stride;
            }
          }
          break;
        case 5:
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++)
               for (unsigned int b = 0; b < 5; b++) *dst_i++ = color[b];
            dst += blit_stride;
          }
          break;
        case 16:
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++)for (unsigned int b = 0; b < 16; b++) *dst_i++ = color[b];
            dst += blit_stride;
          }
          break;
        default:
          for (unsigned int y = y0; y <= (unsigned)y1; y++)
          {
            uint8_t *dst_i = (uint8_t*)&dst[0];
            for (int x = 0; x < width; x++)
              for (unsigned int b = 0; b < bytes; b++)
                *dst_i++ = color[b];
            dst += blit_stride;
          }
      }
      return;
    }
    case CTX_COV_PATH_RGBA8_OVER:
    {
      uint32_t si_ga_full = ((uint32_t*)rasterizer->color)[3];
      uint32_t si_rb_full = ((uint32_t*)rasterizer->color)[4];
      uint32_t si_a  = rasterizer->color[3];
      INIT_ENV;

      if (width == 1)
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          ((uint32_t*)(dst))[0] = ctx_over_RGBA8_full_2 (
             ((uint32_t*)(dst))[0], si_ga_full, si_rb_full, si_a);
          dst += blit_stride;
        }
      }
      else
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          for (unsigned int i = 0; i < (unsigned)width; i++)
            dst_i[i] = ctx_over_RGBA8_full_2 (dst_i[i], si_ga_full, si_rb_full, si_a);
          dst += blit_stride;
        }
      }
      return;
    }
    case CTX_COV_PATH_RGBA8_COPY_FRAGMENT:
    {
      CtxFragment fragment = rasterizer->fragment;
      CtxMatrix *transform = &rasterizer->state->gstate.source_fill.transform;
      //CtxExtend extend = rasterizer->state->gstate.extend;
      INIT_ENV;

#if CTX_FRAGMENT_SPECIALIZE
      if (fragment == ctx_fragment_image_rgba8_RGBA8_nearest_copy)
      {
        ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect_copy (rasterizer, x0, y0, x1, y1, 1);
        return;
      }
#endif
#if 0
      if (fragment == ctx_fragment_image_rgba8_RGBA8_bi_scale)
      {
        ctx_RGBA8_image_rgba8_RGBA8_bi_scaled_fill_rect (rasterizer, x0, y0, x1,
y1, 1);
        return;
      }
#endif

      if (CTX_LIKELY(ctx_matrix_no_perspective (transform)))
      {
        int scan = rasterizer->scanline/CTX_FULL_AA;
        float u0, v0, ud, vd, w0, wd;
        ctx_init_uv (rasterizer, x0, scan, &u0, &v0, &w0, &ud, &vd, &wd);
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          fragment (rasterizer, u0, v0, w0, &dst[0], width, ud, vd, wd);
          u0 -= vd;
          v0 += ud;
          dst += blit_stride;
        }
      }
      else
      {
        int scan = rasterizer->scanline/CTX_FULL_AA;
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          float u0, v0, ud, vd, w0, wd;
          ctx_init_uv (rasterizer, x0, scan + y-y0, &u0, &v0, &w0, &ud, &vd, &wd);
          fragment (rasterizer, u0, v0, w0, &dst[0], width, ud, vd, wd);
          dst += blit_stride;
        }
      }
      return;
    }
    case CTX_COV_PATH_RGBA8_OVER_FRAGMENT:
    {
#if CTX_FRAGMENT_SPECIALIZE
      CtxFragment fragment = rasterizer->fragment;
      //CtxExtend extend = rasterizer->state->gstate.extend;
      if (fragment == ctx_fragment_image_rgba8_RGBA8_nearest_copy)
      {
        ctx_RGBA8_image_rgba8_RGBA8_nearest_fill_rect_copy (rasterizer, x0, y0, x1, y1, 0);
        return;
      }
      else
#endif
#if 0
      if (fragment == ctx_fragment_image_rgba8_RGBA8_bi_scale)
      {
        ctx_RGBA8_image_rgba8_RGBA8_bi_scaled_fill_rect (rasterizer, x0, y0, x1,
y1, 0);
        return;
      }
#endif
      INIT_ENV;
      CTX_SIMD_SUFFIX(ctx_RGBA8_source_over_normal_full_cov_fragment) (width,
                         &dst[0], NULL, NULL, rasterizer, x0, y1-y0+1);
      return;
    }
    break;
    default:
    break;
    }
  }
  else
  {
    switch (comp)
    {
    case CTX_COV_PATH_RGBA8_COPY:
    {
      uint32_t color = ((uint32_t*)(rasterizer->color))[0];
      INIT_ENV;
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          for (unsigned int i = 0; i < (unsigned)width; i++)
            dst_i[i] = ctx_lerp_RGBA8 (dst_i[i], color, cov);
          dst += blit_stride;
        }
        return;
      }
    }
    case CTX_COV_PATH_RGBAF_COPY:
    {
      float *color = ((float*)rasterizer->color);
      float covf = cov / 255.0f;
      INIT_ENV;
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          float *dst_f = (float*)&dst[0];
          for (unsigned int i = 0; i < (unsigned)width; i++)
          {
            for (unsigned int c = 0; c < 4; c++)
              dst_f[i*4+c] = ctx_lerpf (dst_f[i*4+c], color[c], covf);
          }
          dst += blit_stride;
        }
        return;
      }
    }
    case CTX_COV_PATH_RGBA8_OVER:
    {
      uint32_t color = ((uint32_t*)(rasterizer->color))[0];
      INIT_ENV;
      if (width == 1)
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          *dst_i = ctx_over_RGBA8 (*dst_i, color, cov);
          dst += blit_stride;
        }
      }
      else
      {
        for (unsigned int y = y0; y <= (unsigned)y1; y++)
        {
          uint32_t *dst_i = (uint32_t*)&dst[0];
          for (unsigned int i = 0; i < (unsigned)width; i++)
            dst_i[i] = ctx_over_RGBA8 (dst_i[i], color, cov);
          dst += blit_stride;
        }
      }
      return;
    }
    break;
    default:
    break;
    }
  }

  INIT_ENV;
#undef INIT_ENV


  /* fallback */
  {
    uint8_t coverage[width];
    memset (coverage, cov, sizeof (coverage) );
    uint8_t *rasterizer_src = rasterizer->color;
    ctx_apply_coverage_fun apply_coverage =
      rasterizer->apply_coverage;

    for (unsigned int y = y0; y <= (unsigned)y1; y++)
    {
      apply_coverage (width, &dst[0], rasterizer_src, coverage, rasterizer, (int)x0);
      rasterizer->scanline += CTX_FULL_AA;
      dst += blit_stride;
    }
  }
}


void
CTX_SIMD_SUFFIX (ctx_composite_fill_rect) (CtxRasterizer *rasterizer,
                          float          x0,
                          float          y0,
                          float          x1,
                          float          y1,
                          uint8_t        cov)
{
  float x0_fm = ctx_fmod1f (x0);
  float y0_fm = ctx_fmod1f (y0);
  float x1_fm = ctx_fmod1f (x1);
  float y1_fm = ctx_fmod1f (y1);

  if(((int)(x0_fm < 0.01f) | (x0_fm > 0.99f)) &
     ((int)(y0_fm < 0.01f) | (y0_fm > 0.99f)) &
     ((int)(x1_fm < 0.01f) | (x1_fm > 0.99f)) &
     ((int)(y1_fm < 0.01f) | (y1_fm > 0.99f)))
  {
    /* best-case scenario axis aligned rectangle */
    int ix0 = (int)x0;
    int iy0 = (int)y0;
    int ix1 = (int)x1-1;
    int iy1 = (int)y1-1;
    if ((ix1 >= ix0) & (iy1 >= iy0))
      ctx_composite_fill_rect_aligned (rasterizer, ix0, iy0, ix1, iy1, 255);
    return;
  }

  int blit_x = rasterizer->blit_x;
  int blit_y = rasterizer->blit_y;
  int blit_stride = rasterizer->blit_stride;
  int blit_width = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;
  uint8_t *rasterizer_src = rasterizer->color;
  ctx_apply_coverage_fun apply_coverage = 
    rasterizer->apply_coverage;

  y1 += 1.0f;
  x1 += 7.0f/8.0f;

  uint8_t left = (int)(255-x0_fm * 255);
  uint8_t top  = (int)(255-y0_fm * 255);
  uint8_t right  = (int)(x1_fm * 255);
  uint8_t bottom = (int)(y1_fm * 255);


  int has_top    = (top < 255);
  int has_bottom = (bottom < 255);
  int has_right  = (right > 0);
  int has_left   = (left > 0);

  has_right *= !(x1 >= blit_x + blit_width);
  has_bottom *= !(y1 >= blit_y + blit_height);

  x0 = ctx_maxi (x0, blit_x);
  x1 = ctx_mini (x1, blit_x + blit_width);
  y0 = ctx_maxi (y0, blit_y);
  y1 = ctx_mini (y1, blit_y + blit_height);
  x0 = ctx_floorf (x0);
  y0 = ctx_floorf (y0);
  x1 = ctx_floorf (x1);
  y1 = ctx_floorf (y1);

  int width = (int)(x1 - x0);
  int height = (int)(y1 - y0);

  if ((width >0) & (height>0))
  {
     uint8_t *dst = ( (uint8_t *) rasterizer->buf);
     uint8_t coverage[width+2];
     uint32_t x0i = (int)x0+has_left;
     uint32_t x1i = (int)x1-has_right;
     uint32_t y0i = (int)y0+has_top;
     uint32_t y1i = (int)y1-has_bottom;
     dst += (((int)y0) - blit_y) * blit_stride;
     dst += ((int)x0) * rasterizer->format->bpp/8;

     if (has_top)
     {
       int i = 0;
       if (has_left)
       {
         coverage[i++] = (top * left + 255) >> 8;
       }
       for (unsigned int x = x0i; x < x1i; x++)
         coverage[i++] = top;
       if (has_right)
         coverage[i++]= (top * right + 255) >> 8;

       apply_coverage (width, dst, rasterizer_src, coverage, rasterizer, (int)x0);
       dst += blit_stride;
     }

  if (y1-y0-has_top-has_bottom > 0)
  {
    if (has_left)
      ctx_composite_fill_rect_aligned (rasterizer, (int)x0, y0i,
                                                   (int)x0, y1i-1, left);
    if (has_right)
      ctx_composite_fill_rect_aligned (rasterizer, (int)x1-1, y0i,
                                                   (int)x1-1, y1i-1, right);

    if (width - has_left - has_right > 0)
      ctx_composite_fill_rect_aligned (rasterizer, x0i,y0i,
                                          x1i-1,y1i-1,255);

    dst += blit_stride * (y1i-y0i);
  }
    if (has_bottom)
    {
      int i = 0;
      if (has_left)
        coverage[i++] = (bottom * left + 255) >> 8;
      for (unsigned int x = x0i; x < x1i; x++)
        coverage[i++] = bottom;
      coverage[i++]= (bottom * right + 255) >> 8;

      apply_coverage (width, dst, rasterizer_src, coverage, rasterizer, (int)x0);
    }
  }
}

#if CTX_FAST_STROKE_RECT

void
CTX_SIMD_SUFFIX(ctx_composite_stroke_rect) (CtxRasterizer *rasterizer,
                           float          x0,
                           float          y0,
                           float          x1,
                           float          y1,
                           float          line_width)
{
      float lwmod = ctx_fmod1f (line_width);
      int lw = (int)ctx_floorf (line_width + 0.5f);
      int is_compat_even = (lw % 2 == 0) && (lwmod < 0.1f); // only even linewidths implemented properly
      int is_compat_odd = (lw % 2 == 1) && (lwmod < 0.1f); // only even linewidths implemented properly

      float off_x = 0;
      float off_y = 0;

      if (is_compat_odd)
      {
        off_x = 0.5f;
        off_y = (CTX_FULL_AA/2)*1.0f / (CTX_FULL_AA);
      }

      if((is_compat_odd | is_compat_even) &

     (((int)(ctx_fmod1f (x0-off_x) < 0.01f) | (ctx_fmod1f(x0-off_x) > 0.99f)) &
     ((int)(ctx_fmod1f (y0-off_y) < 0.01f) | (ctx_fmod1f(y0-off_y) > 0.99f)) &
     ((int)(ctx_fmod1f (x1-off_x) < 0.01f) | (ctx_fmod1f(x1-off_x) > 0.99f)) &
     ((int)(ctx_fmod1f (y1-off_y) < 0.01f) | (ctx_fmod1f(y1-off_y) > 0.99f))))


      {
        int bw = lw/2+1;
        int bwb = lw/2;

        if (is_compat_even)
        {
          bw = lw/2;
        }
        /* top */
        ctx_composite_fill_rect_aligned (rasterizer,
                                         (int)x0-bwb, (int)y0-bwb,
                                         (int)x1+bw-1, (int)y0+bw-1, 255);
        /* bottom */
        ctx_composite_fill_rect_aligned (rasterizer,
                                         (int)x0-bwb, (int)y1-bwb,
                                         (int)x1-bwb-1, (int)y1+bw-1, 255);

        /* left */
        ctx_composite_fill_rect_aligned (rasterizer,
                                         (int)x0-bwb, (int)y0+1,
                                         (int)x0+bw-1, (int)y1-bwb, 255);
        /* right */
        ctx_composite_fill_rect_aligned (rasterizer,
                                         (int)x1-bwb, (int)y0+1,
                                         (int)x1+bw-1, (int)y1+bw-1, 255);
      }
      else
      {
        float hw = line_width/2;


        /* top */
        ctx_composite_fill_rect (rasterizer,
                                 x0+hw, y0-hw,
                                 x1-hw, y0+hw, 255);
        /* bottom */
        ctx_composite_fill_rect (rasterizer,
                                 x0+hw, y1-hw,
                                 x1-hw, y1+hw, 255);

        /* left */
        ctx_composite_fill_rect (rasterizer,
                                 x0-hw, y0+hw,
                                 x0+hw, y1-hw, 255);
        /* right */

        ctx_composite_fill_rect (rasterizer,
                                 x1-hw, y0+hw,
                                 x1+hw, y1-hw, 255);

        /* corners */

        ctx_composite_fill_rect (rasterizer,
                                 x0-hw, y0-hw,
                                 x0+hw, y0+hw, 255);
        ctx_composite_fill_rect (rasterizer,
                                 x1-hw, y1-hw,
                                 x1+hw, y1+hw, 255);
        ctx_composite_fill_rect (rasterizer,
                                 x1-hw, y0-hw,
                                 x1+hw, y0+hw, 255);
        ctx_composite_fill_rect (rasterizer,
                                 x0-hw, y1-hw,
                                 x0+hw, y1+hw, 255);
      }
}
#endif
#endif



void
CTX_SIMD_SUFFIX (ctx_composite_setup) (CtxRasterizer *rasterizer)
{
  if (rasterizer->comp_op==NULL)
  {
#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE
  switch (rasterizer->state->gstate.source_fill.type)
  {
    case CTX_SOURCE_CONIC_GRADIENT:
    case CTX_SOURCE_LINEAR_GRADIENT:
    case CTX_SOURCE_RADIAL_GRADIENT:
      ctx_gradient_cache_prime (rasterizer);
      break;
    case CTX_SOURCE_TEXTURE:

      _ctx_matrix_multiply (&rasterizer->state->gstate.source_fill.transform,
                            &rasterizer->state->gstate.transform,
                            &rasterizer->state->gstate.source_fill.set_transform
                            );
#if 0
      rasterizer->state->gstate.source_fill.transform_inv =
                           rasterizer->state->gstate.source_fill.transform;
#endif
      ctx_matrix_invert (&rasterizer->state->gstate.source_fill.transform);

#if 0
      if (!rasterizer->state->gstate.source_fill.texture.buffer->color_managed)
      {
        _ctx_texture_prepare_color_management (rasterizer->state,
        rasterizer->state->gstate.source_fill.texture.buffer);
      }
#endif
      break;
  }
#endif
#endif
  rasterizer->format->setup (rasterizer);
  }
}


const CtxPixelFormatInfo CTX_SIMD_SUFFIX(ctx_pixel_formats)[]=
{
#if CTX_ENABLE_RGBA8
  {
    CTX_FORMAT_RGBA8, 4, 32, 4, 0, 0, CTX_FORMAT_RGBA8,
    NULL, NULL, NULL, ctx_setup_RGBA8
  },
#endif
#if CTX_ENABLE_BGRA8
  {
    CTX_FORMAT_BGRA8, 4, 32, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_BGRA8_to_RGBA8, ctx_RGBA8_to_BGRA8, ctx_composite_BGRA8, ctx_setup_RGBA8,
  },
#endif
#if CTX_ENABLE_GRAYF
  {
    CTX_FORMAT_GRAYF, 1, 32, 4 * 2, 0, 0, CTX_FORMAT_GRAYAF,
    NULL, NULL, ctx_composite_GRAYF, ctx_setup_GRAYAF,
  },
#endif
#if CTX_ENABLE_GRAYAF
  {
    CTX_FORMAT_GRAYAF, 2, 64, 4 * 2, 0, 0, CTX_FORMAT_GRAYAF,
    NULL, NULL, NULL, ctx_setup_GRAYAF,
  },
#endif
#if CTX_ENABLE_RGBAF
  {
    CTX_FORMAT_RGBAF, 4, 128, 4 * 4, 0, 0, CTX_FORMAT_RGBAF,
    NULL, NULL, NULL, ctx_setup_RGBAF,
  },
#endif
#if CTX_ENABLE_GRAY1
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAY1, 1, 1, 2, 1, 1, CTX_FORMAT_GRAYA8,
    ctx_GRAY1_to_GRAYA8, ctx_GRAYA8_to_GRAY1, ctx_composite_convert, ctx_setup_GRAY1,
#else
    CTX_FORMAT_GRAY1, 1, 1, 4, 1, 1, CTX_FORMAT_RGBA8,
    ctx_GRAY1_to_RGBA8, ctx_RGBA8_to_GRAY1, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_GRAY2
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAY2, 1, 2, 2, 4, 4, CTX_FORMAT_GRAYA8,
    ctx_GRAY2_to_GRAYA8, ctx_GRAYA8_to_GRAY2, ctx_composite_convert, ctx_setup_GRAY2,
#else
    CTX_FORMAT_GRAY2, 1, 2, 4, 4, 4, CTX_FORMAT_RGBA8,
    ctx_GRAY2_to_RGBA8, ctx_RGBA8_to_GRAY2, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_GRAY4
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAY4, 1, 4, 2, 16, 16, CTX_FORMAT_GRAYA8,
    ctx_GRAY4_to_GRAYA8, ctx_GRAYA8_to_GRAY4, ctx_composite_convert, ctx_setup_GRAY4,
#else
    CTX_FORMAT_GRAY4, 1, 4, 4, 16, 16, CTX_FORMAT_GRAYA8,
    ctx_GRAY4_to_RGBA8, ctx_RGBA8_to_GRAY4, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_GRAY8
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAY8, 1, 8, 2, 0, 0, CTX_FORMAT_GRAYA8,
    ctx_GRAY8_to_GRAYA8, ctx_GRAYA8_to_GRAY8, ctx_composite_convert, ctx_setup_GRAY8,
#else
    CTX_FORMAT_GRAY8, 1, 8, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_GRAY8_to_RGBA8, ctx_RGBA8_to_GRAY8, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_GRAYA8
  {
#if CTX_NATIVE_GRAYA8
    CTX_FORMAT_GRAYA8, 2, 16, 2, 0, 0, CTX_FORMAT_GRAYA8,
    ctx_GRAYA8_to_RGBA8, ctx_RGBA8_to_GRAYA8, NULL, ctx_setup_GRAYA8,
#else
    CTX_FORMAT_GRAYA8, 2, 16, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_GRAYA8_to_RGBA8, ctx_RGBA8_to_GRAYA8, ctx_composite_convert, ctx_setup_RGB,
#endif
  },
#endif
#if CTX_ENABLE_RGB332
  {
    CTX_FORMAT_RGB332, 3, 8, 4, 12, 12, CTX_FORMAT_RGBA8,
    ctx_RGB332_to_RGBA8,  ctx_RGBA8_to_RGB332,
    ctx_composite_RGB332, ctx_setup_RGB332,
  },
#endif
#if CTX_ENABLE_RGB565
  {
    CTX_FORMAT_RGB565, 3, 16, 4, 16, 32, CTX_FORMAT_RGBA8,
    ctx_RGB565_to_RGBA8,  ctx_RGBA8_to_RGB565,
    ctx_composite_RGB565, ctx_setup_RGB565,
  },
#endif
#if CTX_ENABLE_RGB8
  {
    CTX_FORMAT_RGB8, 3, 24, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_RGB8_to_RGBA8, ctx_RGBA8_to_RGB8,
    ctx_composite_RGB8, ctx_setup_RGB8,
  },
#endif
#if CTX_ENABLE_BGR8
  {
    CTX_FORMAT_BGR8, 3, 24, 4, 0, 0, CTX_FORMAT_RGBA8,
    ctx_RGB8_to_RGBA8, ctx_RGBA8_to_RGB8,
    ctx_composite_BGR8, ctx_setup_RGB8,
  },
#endif
#if CTX_ENABLE_RGB565_BYTESWAPPED
  {
    CTX_FORMAT_RGB565_BYTESWAPPED, 3, 16, 4, 16, 32, CTX_FORMAT_RGBA8,
    ctx_RGB565_BS_to_RGBA8,
    ctx_RGBA8_to_RGB565_BS,
    ctx_composite_RGB565_BS, ctx_setup_RGB565,
  },
#endif
#if CTX_ENABLE_CMYKAF
  {
    CTX_FORMAT_CMYKAF, 5, 160, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
    NULL, NULL, NULL, ctx_setup_CMYKAF,
  },
#endif
#if CTX_ENABLE_CMYKA8
  {
    CTX_FORMAT_CMYKA8, 5, 40, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
    NULL, NULL, ctx_composite_CMYKA8, ctx_setup_CMYKA8,
  },
#endif
#if CTX_ENABLE_CMYK8
  {
    CTX_FORMAT_CMYK8, 5, 32, 4 * 5, 0, 0, CTX_FORMAT_CMYKAF,
    NULL, NULL, ctx_composite_CMYK8, ctx_setup_CMYK8,
  },
#endif
#if CTX_ENABLE_YUV420
  {
    CTX_FORMAT_YUV420, 1, 8, 4, 0, 0, CTX_FORMAT_RGBA8,
    NULL, NULL, ctx_composite_convert, ctx_setup_RGB,
  },
#endif
  {
    CTX_FORMAT_NONE, 0, 0, 0, 0, 0, (CtxPixelFormat)0, NULL, NULL, NULL, NULL,
  }
};

#endif // CTX_COMPOSITE

#ifndef __clang__
#if CTX_COMPOSITE_O3
#pragma GCC pop_options
#endif
#if CTX_COMPOSITE_O2
#pragma GCC pop_options
#endif
#endif

#endif // CTX_IMPLEMENTATION

#ifndef __clang__
#if CTX_RASTERIZER_O3
#pragma GCC push_options
#pragma GCC optimize("O3")
#endif
#if CTX_RASTERIZER_O2
#pragma GCC push_options
#pragma GCC optimize("O2")
#endif
#endif

#if CTX_IMPLEMENTATION || CTX_SIMD_BUILD
#if CTX_COMPOSITE 

#define CTX_AA_HALFSTEP    ((CTX_FULL_AA/2)+1)
#define CTX_AA_HALFSTEP2   (CTX_FULL_AA/2)


#define CTX_MAGIC_OFFSET  1 // without this we get scanline glitches

static inline void ctx_rasterizer_discard_edges (CtxRasterizer *rasterizer)
{
  int scanline = rasterizer->scanline + CTX_MAGIC_OFFSET;
  int next_scanline = scanline + CTX_FULL_AA;
  CtxSegment *segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
  int *edges = rasterizer->edges;
  int ending_edges = 0;
  unsigned int active_edges = rasterizer->active_edges;
  for (unsigned int i = 0; i < active_edges; i++)
    {
      CtxSegment *segment = segments + edges[i];
      int edge_end = segment->y1;
      if (edge_end < scanline)
        {
#if 0
          for (unsigned int j = i; j < active_edges -1; j++)
            rasterizer->edges[j] = rasterizer->edges[j+1];
#else
          rasterizer->edges[i] = rasterizer->edges[active_edges-1];
#endif
          rasterizer->scan_aa[segment->aa]--;
          active_edges--;
          i--;
        }
      else ending_edges += (edge_end < next_scanline);
    }
  rasterizer->active_edges = active_edges;

  unsigned int pending_edges = rasterizer->pending_edges;
  for (unsigned int i = 0; i < pending_edges; i++)
    {
      int edge_end = ((CtxSegment*)(rasterizer->edge_list.entries))[rasterizer->edges[CTX_MAX_EDGES-1-i]].y1;
      ending_edges += (edge_end < next_scanline);
    }
  rasterizer->ending_edges = ending_edges;
}

CTX_INLINE static void ctx_rasterizer_increment_edges (CtxRasterizer *rasterizer, int count)
{
  CtxSegment *__restrict__ segments = &((CtxSegment*)(rasterizer->edge_list.entries))[0];
  unsigned int active_edges = rasterizer->active_edges;
  unsigned int pending_edges = rasterizer->pending_edges;
  unsigned int pending_base = CTX_MAX_EDGES-pending_edges;
  for (unsigned int i = 0; i < active_edges; i++)
    {
      CtxSegment *segment = segments + rasterizer->edges[i];
      segment->val += segment->delta * count;
    }
  for (unsigned int i = 0; i < pending_edges; i++)
    {
      CtxSegment *segment = segments + rasterizer->edges[pending_base+i];
      segment->val += segment->delta * count;
    }
}

CTX_INLINE static void ctx_rasterizer_sort_active_edges (CtxRasterizer *rasterizer)
{
  CtxSegment *segments= (CtxSegment*)rasterizer->edge_list.entries;
  int *entries = rasterizer->edges;
  unsigned int count = rasterizer->active_edges;

  for(unsigned int i=1; i<count; i++)
   {
     int temp = entries[i];
     int tv = segments[temp].val;
     int j = i-1;
     while (j >= 0 && tv - segments[entries[j]].val < 0)
     {
       entries[j+1] = entries[j];
       j--;
     }
     entries[j+1] = temp;
   }
}

CTX_INLINE static void ctx_rasterizer_feed_pending_edges (CtxRasterizer *rasterizer)
{
  CtxSegment *__restrict__ entries = (CtxSegment*)&rasterizer->edge_list.entries[0];
  int *edges = rasterizer->edges;
  unsigned int pending_edges   = rasterizer->pending_edges;
  int scanline = rasterizer->scanline + CTX_MAGIC_OFFSET;
  int active_edges = rasterizer->active_edges;
  for (unsigned int i = 0; i < pending_edges; i++)
    {
      if ((entries[edges[CTX_MAX_EDGES-1-i]].y0 <= scanline) &
          (active_edges < CTX_MAX_EDGES-2))
        {
          edges[active_edges] = edges[CTX_MAX_EDGES-1-i];
          active_edges++;
          edges[CTX_MAX_EDGES-1-i] =
            edges[CTX_MAX_EDGES-1-pending_edges + 1];
          pending_edges--;
          i--;
        }
    }
    rasterizer->active_edges = active_edges;
    rasterizer->pending_edges = pending_edges;
    ctx_rasterizer_discard_edges (rasterizer);
}

// makes us up-to date with ready to render rasterizer->scanline
inline static int ctx_rasterizer_feed_edges_full (CtxRasterizer *rasterizer,
                                                  int with_shadow,
                                                  float blur_radius)
{
  int miny;
  const int max_vaa = rasterizer->aa;
  ctx_rasterizer_feed_pending_edges (rasterizer);
  CtxSegment *__restrict__ entries = (CtxSegment*)&rasterizer->edge_list.entries[0];
  int *edges = rasterizer->edges;
  unsigned int pending_edges   = rasterizer->pending_edges;
  int scanline = rasterizer->scanline + CTX_MAGIC_OFFSET;

  int active_edges = rasterizer->active_edges;
  int horizontal_edges = 0;

  if (with_shadow)
  {
  int shadow_active_edges = rasterizer->shadow_active_edges;
  int *edges = rasterizer->shadow_edges;
  int blur_scanline_start = scanline - CTX_FULL_AA * (int)blur_radius;
  int next_scanline = scanline + CTX_FULL_AA * (int)blur_radius;
  unsigned int edge_pos = rasterizer->shadow_edge_pos;
  unsigned int edge_count = rasterizer->edge_list.count;
  for (int i = 0; i < shadow_active_edges;i++)
  {
    if (entries[edges[i]].y1 < blur_scanline_start)
    {
       edges[i]=edges[shadow_active_edges-1];
       shadow_active_edges--;
       i--;
    }
  }

  while ((edge_pos < edge_count &&
         (miny=entries[edge_pos].y0)  <= next_scanline))
  {
      int y1 = entries[edge_pos].y1;
      if ((shadow_active_edges < CTX_MAX_EDGES-2) &
        (y1 >= blur_scanline_start))
        {
          edges[shadow_active_edges++] = edge_pos;
        }
      edge_pos++;
  }
  rasterizer->shadow_edge_pos     = edge_pos;
  rasterizer->shadow_active_edges = shadow_active_edges;
  }


#if CTX_SCANBIN
   int scan = scanline / CTX_FULL_AA;
   int count = rasterizer->scan_bin_count[scan];
   if (count)
   for (int i = 0; i < count; i++)
   {
       int edge_pos = rasterizer->scan_bins[scan][i];
       miny = entries[edge_pos].y0;
#else
  int next_scanline = scanline + CTX_FULL_AA;
  unsigned int edge_pos = rasterizer->edge_pos;
  unsigned int edge_count = rasterizer->edge_list.count;
  while ((edge_pos < edge_count &&
         (miny=entries[edge_pos].y0)  <= next_scanline))
  {
#endif
      int y1 = entries[edge_pos].y1;
      if ((active_edges < CTX_MAX_EDGES-2) &
        (y1 >= scanline))
        {
          int dy = (y1 - miny);
          if (dy)
            {
              int yd = (scanline + CTX_AA_HALFSTEP2) - miny;
              unsigned int index = edges[active_edges] = edge_pos;
              int x0 = entries[index].x0;
              int x1 = entries[index].x1;
              int dx_dy = CTX_RASTERIZER_EDGE_MULTIPLIER * (x1 - x0) / dy;
              entries[index].delta = dx_dy;
              entries[index].val = x0 * CTX_RASTERIZER_EDGE_MULTIPLIER + (yd * dx_dy);

              {
                dx_dy = abs(dx_dy);

#if 0
#define CTX_RASTERIZER_AA_SLOPE_LIMIT3           ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER*1.3)/CTX_SUBDIV/15/1024)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT5           ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER*14)/CTX_SUBDIV/15/1024)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT15          ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER*15)/CTX_SUBDIV/15/1024)
#else
#if 0
#define CTX_RASTERIZER_AA_SLOPE_LIMIT3           ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER)/CTX_SUBDIV/15/1024)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT5           ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER*3)/CTX_SUBDIV/15/1024)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT15          ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER*5)/CTX_SUBDIV/15/1024)
#else
#define CTX_RASTERIZER_AA_SLOPE_LIMIT3           ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER*0.95)/CTX_SUBDIV/15/1024)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT5           ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER*6.5)/CTX_SUBDIV/15/1024)
#define CTX_RASTERIZER_AA_SLOPE_LIMIT15          ((65536*CTX_RASTERIZER_EDGE_MULTIPLIER*10.5)/CTX_SUBDIV/15/1024)
#endif
#endif

                int aa = 0;
                if (max_vaa > 5)
                aa = (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT3) 
                   +  (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT5) 
                   +  (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT15);
                else
                aa = (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT3) 
                   +  (dx_dy > CTX_RASTERIZER_AA_SLOPE_LIMIT5) * (max_vaa>3);
                
                rasterizer->scan_aa[aa]++;
                entries[index].aa = aa;
              }

              if ((miny > scanline) &
                  (pending_edges < CTX_MAX_PENDING-1))
              {
                  /* it is a pending edge - we add it to the end of the array
                     and keep a different count for items stored here, like
                     a heap and stack growing against each other
                  */
                  edges[CTX_MAX_EDGES-1-pending_edges] = edges[active_edges];
                  pending_edges++;
                  active_edges--;
              }
              active_edges++;
            }
            else
            {
              horizontal_edges++;
            }
        }
#if CTX_SCANBIN
#else
      edge_pos++;
#endif
  }
#if CTX_SCANBIN==0
    rasterizer->edge_pos         = edge_pos;
#endif
    rasterizer->active_edges     = active_edges;
    rasterizer->pending_edges    = pending_edges;
    if (active_edges + pending_edges == 0)
      return -1;

    if (rasterizer->ending_edges|pending_edges|horizontal_edges)
    {
      const unsigned int *scan_aa = rasterizer->scan_aa;
      int aa = scan_aa[3]?15:scan_aa[2]?5:3;
      return aa;
      //return ctx_mini(aa, rasterizer->aa);
    }
    return 0;
}

static inline void ctx_coverage_post_process (CtxRasterizer *rasterizer, const unsigned int minx, const unsigned int maxx, uint8_t *coverage, int *first_col, int *last_col)
{
#if CTX_ENABLE_CLIP
  if (CTX_UNLIKELY((rasterizer->clip_buffer!=NULL) &  (!rasterizer->clip_rectangle)))
  {
  int scanline     = rasterizer->scanline - CTX_FULL_AA; // we do the
                                                 // post process after
                                                 // coverage generation icnrement
    /* perhaps not working right for clear? */
    int y = scanline / CTX_FULL_AA;
    uint8_t *clip_line = &((uint8_t*)(rasterizer->clip_buffer->data))[rasterizer->blit_width*y];
#if CTX_1BIT_CLIP==0
    int blit_x = rasterizer->blit_x;
#endif
    for (unsigned int x = minx; x <= maxx; x ++)
    {
#if CTX_1BIT_CLIP
       coverage[x] = (coverage[x] * ((clip_line[x/8]&(1<<(x&8)))?255:0))/255;
#else
       coverage[x] = (255 + coverage[x] * clip_line[x-blit_x])>>8;
#endif
    }
  }
#endif
}

#define UPDATE_PARITY \
        if (scanline!=segment->y0-1)\
        { \
          if (is_winding)\
             parity = parity + -1+2*(segment->code == CTX_EDGE_FLIPPED);\
          else\
             parity = 1-parity; \
        }


CTX_INLINE static void
ctx_rasterizer_generate_coverage (CtxRasterizer *rasterizer,
                                  int            minx,
                                  int            maxx,
                                  uint8_t       *coverage,
                                  int            is_winding,
                                  const uint8_t  aa_factor,
                                  const uint8_t  fraction,
                                  int *ret_c0,
                                  int *ret_c1
                                  )
{
  CtxSegment *entries      = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
  int        *edges        = rasterizer->edges;
  int         scanline     = rasterizer->scanline;
  int         active_edges = rasterizer->active_edges;
  int         parity       = 0;
  int         c0 = *ret_c0;
  int         c1 = *ret_c1;
  coverage -= minx;
  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment *segment = &entries[edges[t]];
      UPDATE_PARITY;

      if (parity)
        {
          CtxSegment *next_segment = &entries[edges[t+1]];
          const int x0  = segment->val;
          const int x1  = next_segment->val;
          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int first     = graystart >> 8;
          int last      = grayend   >> 8;

          if (first < minx)
          { 
            first = minx;
            graystart=0;
          }
          if (last > maxx)
          {
            last = maxx;
            grayend=255;
          }

          graystart = fraction- (graystart&0xff)/aa_factor;
          grayend   = (grayend & 0xff) / aa_factor;

          if (first < last)
          {
              coverage[first] += graystart;
              for (int x = first + 1; x < last; x++)
                coverage[x]  += fraction;
              coverage[last] += grayend;
          }
          else if (first == last)
            coverage[first] += (graystart-fraction+grayend);
          c0 = ctx_mini(first, c0);
          c1 = ctx_maxi(last, c1);
        }
   }
  *ret_c0 = c0;
  *ret_c1 = c1;
}

static inline float ctx_p_line_sq_dist (float x, float y, float x1, float y1, float x2, float y2) {
  float A = x - x1;
  float B = y - y1;
  float C = x2 - x1;
  float D = y2 - y1;

  float dot = A * C + B * D;
  float len_sq = C * C + D * D;
  float param = -1.0f;
  float xx, yy;

  if (len_sq != 0.0f) //in case of 0 length line
      param = dot / len_sq;

  if (param < 0.0f) {
    xx = x1;
    yy = y1;
  }
  else if (param > 1.0f) {
    xx = x2;
    yy = y2;
  }
  else {
    xx = x1 + param * C;
    yy = y1 + param * D;
  }

  float dx = x - xx;
  float dy = y - yy;
  return dx * dx + dy * dy;
}

static inline float dist_to_edge_sq (int u, int v, CtxSegment *__restrict__ entries, int edge_no)
{
  CtxSegment *segment = &entries[edge_no];
  float y0 = segment->y0;
  float y1 = segment->y1;

  float x0 = segment->x0 * (1.0f * CTX_FULL_AA / CTX_SUBDIV );
  float x1 = segment->x1 * (1.0f * CTX_FULL_AA / CTX_SUBDIV );
  return ctx_p_line_sq_dist (u, v, x0, y0, x1, y1);
}

static inline float dist_to_edge (int u, int v, CtxSegment *__restrict__ entries, int edge_no)
{
  return ctx_sqrtf_fast (dist_to_edge_sq(u,v,entries,edge_no));
}

static inline float smin_exp( float a, float b, float k )
{
    k *= 1.0;
    float r = exp2(-a/k) + exp2(-b/k);
    return -k*log2(r);
}

static inline float smin_cubic( float a, float b, float k )
{
  k *= 4.0f;
  float h = k-ctx_fabsf(a-b);
  h = (h * (h>0))/k;
  return ctx_minf(a,b) - h*h*k*0.25f;
}

static CTX_INLINE float ctx_sdf_f (CtxSegment *entries, int u, int v, float sign, int edge_count, float blur, int *edges)
{
  float min_dist_sq = 2048 * 2048 * 15 * 15;
  float min_dist = 2048 * 15;
  for (int j = 0; j < edge_count; j++)
  {
#if CTX_RASTERIZER_BLUR_FUDGE
     float dist = dist_to_edge(u, v, entries, edges[j]);
     min_dist = smin_cubic(min_dist,dist, blur/2);
#else
     float sq_dist = dist_to_edge_sq(u, v, entries, edges[j]);
     min_dist_sq = ctx_minf(min_dist_sq, sq_dist);
#endif
  }

#if CTX_RASTERIZER_BLUR_FUDGE==0
  min_dist = ctx_sqrtf_fast (min_dist_sq);
#endif
  return min_dist * sign;
}
static inline float ctx_erf2(float x)
{
  #define CTX_2_SQRTPI 1.12837916709551257390f  /* 2/sqrt(pi) */
  x = x * CTX_2_SQRTPI;
  float xx = x * x;
  x = x + (0.24295f + (0.03395f + 0.0104f * xx) * xx) * (x * xx);
  return x * ctx_invsqrtf_fast (1.0f + x * x);
}

static inline uint8_t gaussian_approximation(float x)
{
  x = ctx_erf2(x);
  x+= 0.5f;
  if (x > 1.0f) return 255;
  if (x < 0.0f) return 0;
  return x * 255.0f;
}

#ifndef CTX_RASTERIZER_SDF_SKIP
#define CTX_RASTERIZER_SDF_SKIP 1
#endif

inline static void
ctx_rasterizer_generate_sdf (CtxRasterizer *rasterizer,
                                       const int      minx,
                                       const int      maxx,
                                       uint8_t       *coverage,
                                       const int      is_winding,
                                       float          blur)
{
  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
  int *edges  = rasterizer->edges;
  int active_edges    = rasterizer->active_edges;
  int *shadow_edges  = rasterizer->shadow_edges;
  int shadow_active_edges    = rasterizer->shadow_active_edges;
  int scanline        = rasterizer->scanline;
  int parity        = 0;
  float inv_blur = 1.0/(blur * CTX_FULL_AA);
#if CTX_RASTERIZER_SDF_SKIP
  const int skip_len = blur / 2 + 1;
  // how far ahead we jump looking for
                          // same alpha runs - speeding up solid/blank and
#endif
  coverage -= minx;


  int c0 = maxx;
  int c1 = minx;

  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment   *segment = &entries[edges[t]];
      UPDATE_PARITY;

      CtxSegment   *next_segment = &entries[edges[t+1]];
      int x0        = segment->val;
      const int x1  = next_segment->val;

      int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
      int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
      int first     = graystart >> 8;
      int last      = grayend   >> 8;

      if (first < minx)
        first = minx;
      if (last > maxx)
        last = maxx;

      if (first <= last)
      {
        int u = x0 * 15 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);

#define COMPUTE_SDF(u,v) \
        (gaussian_approximation(ctx_sdf_f(entries,(u),(v), sign, shadow_active_edges, blur, shadow_edges) * inv_blur))

        int i;
#if CTX_RASTERIZER_SDF_SKIP
        int prev = -1;
#endif
        float sign = parity?1.0f:-1.0f;
        for (i = first; i <= last; i++)
        {
          coverage[i] = COMPUTE_SDF(u,scanline);

#if CTX_RASTERIZER_SDF_SKIP
          if ((prev == coverage[i]) & ((prev == 0)|(prev==255)))
          {
            if (last-i > skip_len
                && COMPUTE_SDF(u+15*skip_len, scanline) == prev
                && COMPUTE_SDF(u+15*skip_len/2, scanline) == prev)
            {
              for (int j = 1; j < skip_len; j++)
                coverage[i+j] = prev;
              u += 15 * skip_len;
              i += (skip_len-1);
              continue;
            }
          }
          prev = coverage[i];
#endif
          u += 15;
        }
      }
      c0 = ctx_mini (c0, first);
      c1 = ctx_maxi (c1, last);
   }

  float sign = -1.0f;
   
  {
     int i = minx;

#if CTX_RASTERIZER_SDF_SKIP
  int prev = -1;
#endif
  for (; i < c0; i++)
  {
     coverage[i] = COMPUTE_SDF(i*15, scanline);
#if CTX_RASTERIZER_SDF_SKIP
     if (c0-i > skip_len &&
         COMPUTE_SDF((i+skip_len)*15, scanline) == prev)
     {
        for (int j = 1; j < skip_len; j++)
          coverage[i+j] = prev;
        i += (skip_len-1);
        continue;
     }
     prev = coverage[i];
#endif
  }
#if CTX_RASTERIZER_SDF_SKIP
  prev = -1;
#endif
  for (int i = c1+1; i < maxx; i++)
  {
     coverage[i] = COMPUTE_SDF(i*15, scanline);
#if CTX_RASTERIZER_SDF_SKIP
     if (maxx-i > skip_len && COMPUTE_SDF((i+skip_len)*15, scanline) == prev)
     {
        for (int j = 1; j < skip_len; j++)
          coverage[i+j] = prev;
        i += (skip_len-1);
        continue;
     }
     prev = coverage[i];
#endif
  }
  }
}


inline static void
ctx_rasterizer_generate_coverage_grads (CtxRasterizer *rasterizer,
                                            const int      minx,
                                            const int      maxx,
                                            uint8_t       *coverage,
                                            const int      is_winding,
                                            int           *c0_ret,
                                            int           *c1_ret)
{
  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);
  int *edges  = rasterizer->edges;
  int scanline        = rasterizer->scanline;
  int active_edges    = rasterizer->active_edges;
  int parity        = 0;

  coverage -= minx;

  const int minx_ = minx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;
  const int maxx_ = maxx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;

  int c0 = maxx;
  int c1 = minx;

  for (int t = 0; t < active_edges -1;t++)
    {
      CtxSegment   *segment = &entries[edges[t]];
      UPDATE_PARITY;

       if (parity)
        {
          CtxSegment   *next_segment = &entries[edges[t+1]];
          const int x0        = segment->val;
          const int x1        = next_segment->val;

          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256);
          int first     = graystart >> 8;
          int last      = grayend   >> 8;

          if (first < minx)
          { 
            first = minx;
            graystart=0;
          }
          if (last > maxx)
          {
            last = maxx;
            grayend=255;
          }
          graystart = (graystart&0xff) ^ 255;
          grayend   = (grayend & 0xff);

          if (first < last)
          {
            int pre = 1;
            int post = 1;

            if (segment->aa == 0)
            {
              coverage[first] += graystart;
              c0 = ctx_mini(first, c0);
            }
            else
            {
              const int delta0    = segment->delta;
              int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2;
              int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP;
              unsigned int u0x0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x0_start, x0_end)));
              unsigned int u1x0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x0_start, x0_end)));

              int us = u0x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);

              int mod = ((u0x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255) *
                         (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255);
              int sum = ((u1x0-u0x0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);

              int recip = (65535)/sum;
              int a = mod * recip;
              recip *= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV;
              c0 = ctx_mini(us, c0);
              for (unsigned int u = u0x0; u < u1x0; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
              {
                coverage[us ++] += a>>16;
                a += recip;
              }
              pre = (us-1)-first+1;
            }
  
            if (next_segment->aa == 0)
            {
               coverage[last] += grayend;
               c1 = ctx_maxi(last, c1);
            }
            else
            {
              const int delta1    = next_segment->delta;
              int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2;
              int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP;
              unsigned int u0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x1_start, x1_end)));
              unsigned int u1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x1_start, x1_end)));

              int us = u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);
              int mod = ((((u0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255)) *
                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255));
              int sum = ((u1-u0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);
              int recip = (65535) / sum;
              int a = (65536 * 255) - mod * recip;
              recip *= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV;
              post = last-us;
              for (unsigned int u = u0; u < u1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)
              {
                coverage[us ++] += (a>>16);
                a -= recip;
              }
              c1 = ctx_maxi(us, c1);
            }
            last-=post;
            for (int i = first + pre; i <= last; i++)
              coverage[i] = 255;
          }
          else if (first == last)
          {
            coverage[last]+=(graystart-(grayend^255));
            c0 = ctx_mini(first, c0);
            c1 = ctx_maxi(last, c1);
          }
        }
   }

  *c0_ret = c0;
  *c1_ret = c1;
}

#define CTX_RASTERIZER_MAX_EMPTIES  16
#define CTX_RASTERIZER_MAX_SOLID    16

inline static void
ctx_rasterizer_apply_grads_generic (CtxRasterizer *rasterizer,
                                                     const int      minx,
                                                     const int      maxx,
                                                     uint8_t       *coverage,
                                                     const int      is_winding,
                                                     ctx_apply_coverage_fun apply_coverage)
{
#define CTX_APPLY_GRAD_A \
  CtxSegment *entries = (CtxSegment*)(&rasterizer->edge_list.entries[0]);\
  int *edges  = rasterizer->edges;\
  uint8_t *rasterizer_src = rasterizer->color;\
  int scanline        = rasterizer->scanline;\
  unsigned int active_edges    = rasterizer->active_edges - 1;\
  int parity        = 0;\
\
  uint8_t *dst = ( (uint8_t *) rasterizer->buf) +\
         (rasterizer->blit_stride * (scanline / CTX_FULL_AA));\
  coverage -= minx;\
\
  const int minx_ = minx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;\
  const int maxx_ = maxx * CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV;\
\
  int cov_min = maxx;\
  int cov_max = minx;\
  const int  bpp      = rasterizer->format->bpp;
  CTX_APPLY_GRAD_A

#define CTX_APPLY_GRAD_B(empty_factor, solid_factor) \
  for (unsigned int t = 0; t < active_edges;t++) \
    { \
      CtxSegment   *segment = &entries[edges[t]]; \
      UPDATE_PARITY; \
\
       if (parity)\
        {\
          CtxSegment   *next_segment = &entries[edges[t+1]]; \
          const int x0        = segment->val; \
          const int x1        = next_segment->val;\
\
          int graystart = x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256); \
          int grayend   = x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256); \
          int first     = graystart >> 8; \
          int last      = grayend   >> 8; \
 \
          if (CTX_UNLIKELY (first < minx)) \
          {  \
            first = minx; \
            graystart=0; \
          } \
          if (CTX_UNLIKELY (last > maxx)) \
          { \
            last = maxx; \
            grayend=255; \
          } \
          graystart = (graystart&0xff) ^ 255; \
          grayend   = (grayend & 0xff); \
\
          if (first < last)\
          {\
            const int delta1 = next_segment->delta; \
            int x1_start = x1 - delta1 * CTX_AA_HALFSTEP2; \
            int x1_end   = x1 + delta1 * CTX_AA_HALFSTEP; \
            unsigned int u0x1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x1_start, x1_end)));\
\
            unsigned int pre = 1;\
            unsigned int post = 1;\
\
            if (first - cov_max > CTX_RASTERIZER_MAX_EMPTIES * empty_factor)\
            {\
                 if (cov_max>=cov_min)\
                 {\
                   apply_coverage (cov_max-cov_min+1, &dst[((cov_min) * bpp)/8], rasterizer_src,\
                                   &coverage[cov_min], rasterizer, cov_min);\
                   cov_min = maxx;\
                   cov_max = minx;\
                 }\
            }\
\
            if (segment->aa == 0)\
            {\
              coverage[first] += graystart;\
              cov_min = ctx_mini (cov_min, first);\
              cov_max = ctx_maxi (cov_max, first);\
            }\
            else\
            {\
              const int delta0    = segment->delta; \
              int x0_start = x0 - delta0 * CTX_AA_HALFSTEP2; \
              int x0_end   = x0 + delta0 * CTX_AA_HALFSTEP; \
              unsigned int u0x0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_mini (x0_start, x0_end)));\
              unsigned int u1x0 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x0_start, x0_end)));\
\
              int us = u0x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);\
              int mod = ((u0x0 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255) *\
                         (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255);\
              int sum = ((u1x0-u0x0+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);\
\
              int recip = (65535)/sum;\
              int a = mod * recip;\
              recip *= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV;\
\
              cov_min = ctx_mini (cov_min, us);\
              for (unsigned int u = u0x0; u < u1x0; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)\
              {\
                coverage[us ++] += a>>16;\
                a += recip;\
              }\
              cov_max = us;\
\
              pre = (us-1)-first+1;\
            }\
            if (next_segment->aa != 0) \
            { \
              post = last - u0x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV); \
            }\
            {\
               int width = (last-post) - (first+pre) + 1;\
               if (width > CTX_RASTERIZER_MAX_SOLID * solid_factor)\
               {\
                 if (cov_max>=cov_min)\
                 {\
                   apply_coverage (cov_max-cov_min+1, &dst[((cov_min) * bpp)/8], rasterizer_src,\
                                   &coverage[cov_min], rasterizer, cov_min);\
                   cov_min = maxx;\
                   cov_max = minx;\
                 }
  CTX_APPLY_GRAD_B(1, 1)
                       {
#if static_OPAQUE
                       uint8_t *opaque = &rasterizer->opaque[0];
#else
                       uint8_t opaque[width];
                       memset (opaque, 255, sizeof (opaque));
#endif
                       apply_coverage (width,
                                   &dst[((first + pre) * bpp)/8],
                                   rasterizer_src,
                                   opaque,
                                   rasterizer,
                                   first + pre);
                       }
#define CTX_APPLY_GRAD_C \
                 }\
               else\
               {\
                 for (int i = 0; i < width; i++)\
                   coverage[first + pre + i] = 255;\
                 cov_min = ctx_mini (cov_min, first + pre);\
                 cov_max = first + pre + width;\
               }\
            }\
  \
            if (next_segment->aa == 0)\
            {\
               coverage[last] += grayend;\
               cov_min = ctx_mini (cov_min, last);\
               cov_max = last;\
            }\
            else\
            {\
              unsigned int u1x1 = ctx_mini (maxx_, ctx_maxi (minx_, ctx_maxi (x1_start, x1_end)));\
              int us = u0x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV);\
              int mod = ((((u0x1 / (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/256) % 256)^255)) *\
                    (CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV/255));\
              int sum = ((u1x1-u0x1+CTX_RASTERIZER_EDGE_MULTIPLIER * CTX_SUBDIV)/255);\
              int recip = (65535) / sum;\
              int a = (65536 * 255) - mod * recip;\
              recip *= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV;\
\
              cov_min = ctx_mini (cov_min, us);\
              for (unsigned int u = u0x1; u < u1x1; u+= CTX_RASTERIZER_EDGE_MULTIPLIER*CTX_SUBDIV)\
              {\
                coverage[us ++] += (a>>16);\
                a -= recip;\
              }\
              cov_max = us;\
            }\
          }\
          else if (first == last)\
          {\
            coverage[last]+=(graystart-(grayend^255)); \
            cov_min = ctx_mini (cov_min, first); \
            cov_max = last;\
          }\
        }\
   }\
  if (cov_max>=cov_min)\
     apply_coverage (cov_max-cov_min+1, &dst[(cov_min*bpp)/8], rasterizer_src, \
                     &coverage[cov_min], rasterizer, cov_min);
  CTX_APPLY_GRAD_C
}

inline static void
ctx_rasterizer_apply_grads_RGBA8_copy_normal_color (CtxRasterizer *rasterizer,
                                                                     const int      minx,
                                                                     const int      maxx,
                                                                     uint8_t       *coverage,
                                                                     const int      is_winding,
                                                                     ctx_apply_coverage_fun apply_coverage)
{
  CTX_APPLY_GRAD_A
  uint32_t src_pix = ((uint32_t*)rasterizer_src)[0];
  CTX_APPLY_GRAD_B(1, 1)
  ctx_span_set_color ((uint32_t*)(&dst[(first+pre) *4]), src_pix, width);
  CTX_APPLY_GRAD_C
}

inline static void
ctx_rasterizer_apply_grads_RGBA8_over_normal_color (CtxRasterizer *rasterizer,
                                                                     const int      minx,
                                                                     const int      maxx,
                                                                     uint8_t       *coverage,
                                                                     const int      is_winding,
                                                                     ctx_apply_coverage_fun apply_coverage)
{
  CTX_APPLY_GRAD_A
  uint32_t si_ga_full, si_rb_full, si_ga, si_a;
  si_ga = ((uint32_t*)rasterizer_src)[1];
  si_ga_full = ((uint32_t*)rasterizer_src)[3];
  si_rb_full = ((uint32_t*)rasterizer_src)[4];
  si_a  = si_ga >> 16;
  CTX_APPLY_GRAD_B(1, 1)
  uint32_t* dst_pix = (uint32_t*)(&dst[(first+pre) *4]);
  unsigned int count = width;
  while (count--)
  {
    *dst_pix = ctx_over_RGBA8_full_2(*dst_pix, si_ga_full, si_rb_full, si_a);
    dst_pix++;
  }
  CTX_APPLY_GRAD_C
}

inline static void
ctx_rasterizer_apply_grads_copy_normal_color (CtxRasterizer *rasterizer,
                                                                     const int      minx,
                                                                     const int      maxx,
                                                                     uint8_t       *coverage,
                                                                     const int      is_winding,
                                                                     const CtxCovPath comp,
                                                                     ctx_apply_coverage_fun apply_coverage)
{
  CTX_APPLY_GRAD_A
  unsigned int bytes = bpp/8;

  CTX_APPLY_GRAD_B(1, 1)

  uint8_t* dst_i = (uint8_t*)(&dst[(first+pre) * bytes]);
  uint8_t* color = ((uint8_t*)&rasterizer->color_native);
  switch (bytes)
  {
    case 16:
       ctx_span_set_color_x4 ((uint32_t*)dst_i, (uint32_t*)color, width);
       break;
    case 4:
      ctx_span_set_color ((uint32_t*)dst_i, ((uint32_t*)color)[0], width);
      break;
    case 2:
    {
      uint16_t val = ((uint16_t*)color)[0];
      while (width--)
      {
         ((uint16_t*)dst_i)[0] = val;
         dst_i+=2;
      }
    }
      break;
    case 3:
    {
       uint8_t r = color[0];
       uint8_t g = color[1];
       uint8_t b = color[2];
       while (width--)
       {
         *dst_i++ = r;
         *dst_i++ = g;
         *dst_i++ = b;
       }
     }
     break;
    case 1:
    {
       uint8_t val = color[0];
       while (width--)
       {
         *dst_i++ = val;
       }
     }
     break;
   default:
   while (width--)
   {
     for (unsigned int b = 0; b < bytes; b++)
       *dst_i++ = color[b];
   }
   break;

  }
  CTX_APPLY_GRAD_C
}

inline static void
ctx_rasterizer_apply_grads_RGBA8_copy_fragment (CtxRasterizer *rasterizer,
                                                                 const int      minx,
                                                                 const int      maxx,
                                                                 uint8_t       *coverage,
                                                                 const int      is_winding,
                                                                 ctx_apply_coverage_fun apply_coverage)
{
  CTX_APPLY_GRAD_A
  CTX_APPLY_GRAD_B(1, 1)
                   {
                       float u0 = 0; float v0 = 0;
                       float ud = 0; float vd = 0;
                       float w0 = 1; float wd = 0;
                       ctx_init_uv (rasterizer, first+pre, scanline/CTX_FULL_AA,&u0, &v0, &w0, &ud, &vd, &wd);
                       rasterizer->fragment (rasterizer, u0, v0, w0, &dst[(first+pre)*4],
                                             width, ud, vd, wd);
                   }
  CTX_APPLY_GRAD_C
}

inline static void
ctx_rasterizer_apply_grads_RGBA8_over_fragment (CtxRasterizer *rasterizer,
                                                                 const int      minx,
                                                                 const int      maxx,
                                                                 uint8_t       *coverage,
                                                                 const int      is_winding,
                                                                 ctx_apply_coverage_fun apply_coverage)
{
  CTX_APPLY_GRAD_A
  CTX_APPLY_GRAD_B(1, 1)
               CTX_SIMD_SUFFIX(ctx_RGBA8_source_over_normal_full_cov_fragment) (
                    width,
                     &dst[(first+pre)*4],
                     NULL,
                     NULL,
                     rasterizer,
                     first + pre,
                    1);
  CTX_APPLY_GRAD_C
}

#undef CTX_APPLY_GRAD_A
#undef CTX_APPLY_GRAD_B
#undef CTX_APPLY_GRAD_C


inline static void
ctx_rasterizer_apply_grads (CtxRasterizer *rasterizer,
                                             const int      minx,
                                             const int      maxx,
                                             uint8_t       *coverage,
                                             const int      is_winding,
                                             const CtxCovPath comp,
                                             ctx_apply_coverage_fun apply_coverage)
{
  if (rasterizer->active_edges < 2) return;
  switch (comp)
  {
#if CTX_RASTERIZER_SWITCH_DISPATCH
    case CTX_COV_PATH_RGBA8_OVER:
       ctx_rasterizer_apply_grads_RGBA8_over_normal_color (rasterizer, minx, maxx, coverage, is_winding, apply_coverage);
       break;
    case CTX_COV_PATH_RGBA8_COPY:
       ctx_rasterizer_apply_grads_RGBA8_copy_normal_color (rasterizer, minx, maxx, coverage, is_winding, apply_coverage);
       break;
    case CTX_COV_PATH_RGB565_COPY:
    case CTX_COV_PATH_RGBAF_COPY:
    case CTX_COV_PATH_RGB332_COPY:
    case CTX_COV_PATH_GRAY8_COPY:
    case CTX_COV_PATH_RGB8_COPY:
    case CTX_COV_PATH_GRAYA8_COPY:
    case CTX_COV_PATH_GRAYAF_COPY:
    case CTX_COV_PATH_CMYKAF_COPY:
    case CTX_COV_PATH_CMYK8_COPY:
    case CTX_COV_PATH_CMYKA8_COPY:
       ctx_rasterizer_apply_grads_copy_normal_color (rasterizer, minx, maxx, coverage, is_winding, comp, apply_coverage);
       break;
    case CTX_COV_PATH_RGBA8_COPY_FRAGMENT:
       ctx_rasterizer_apply_grads_RGBA8_copy_fragment (rasterizer, minx, maxx, coverage, is_winding, apply_coverage);
       break;
    case CTX_COV_PATH_RGBA8_OVER_FRAGMENT:
       ctx_rasterizer_apply_grads_RGBA8_over_fragment (rasterizer, minx, maxx, coverage, is_winding, apply_coverage);
       break;
#endif
     default:
        ctx_rasterizer_apply_grads_generic (rasterizer, minx, maxx, coverage, is_winding, apply_coverage);
  }
}

static inline void
ctx_rasterizer_reset_soft (CtxRasterizer *rasterizer)
{
#if CTX_SCANBIN==0
  rasterizer->edge_pos        =   
#endif
  rasterizer->shadow_edge_pos =   
  rasterizer->scanline        = 0;
  //rasterizer->comp_op       = NULL; // keep comp_op cached 
  //     between rasterizations where rendering attributes are
  //     nonchanging
}


static inline void
_ctx_rasterizer_reset (CtxRasterizer *rasterizer)
{
  ctx_rasterizer_reset_soft (rasterizer);
  rasterizer->first_edge = -1;
  rasterizer->has_prev        =   
  rasterizer->edge_list.count =    // ready for new edges
#if CTX_SCANBIN==0
  rasterizer->edge_pos        =   
#endif
  rasterizer->shadow_edge_pos =   
  rasterizer->scanline        = 0;
  if (CTX_LIKELY(!rasterizer->preserve))
  {
    rasterizer->scan_min      =
    rasterizer->col_min       = 50000000;
    rasterizer->scan_max      =
    rasterizer->col_max       = -50000000;
  }
  //rasterizer->comp_op       = NULL; // keep comp_op cached 
  //     between rasterizations where rendering attributes are
  //     nonchanging
}

#if CTX_SCANBIN==0
static CTX_INLINE int ctx_compare_edge (const void *ap, int by0)
{
  return ((const CtxSegment *) ap)->y0 - by0;
}

static CTX_INLINE int ctx_edge_qsort_partition (CtxSegment *A, int low, int high)
{
  int pivot_y0 = A[ (high+low) /2].y0;
  int i = low;
  int j = high;
  while (i <= j)
    {
      while (ctx_compare_edge (&A[i], pivot_y0) < 0) { i ++; }
      while (ctx_compare_edge (&A[j], pivot_y0) > 0) { j --; }
      if (i <= j)
        {
          CtxSegment tmp = A[i];
          A[i++] = A[j];
          A[j--] = tmp;
        }
    }
  return i;
}

static void ctx_edge_qsortb (CtxSegment *entries, int low, int high)
{
  do {
    int p = ctx_edge_qsort_partition (entries, low, high);
    if (low < p - 1)
      ctx_edge_qsortb (entries, low, p - 1);
    if (low >= high)
      return;
    low = p;
  } while (1);
}

static CTX_INLINE void ctx_edge_qsort (CtxSegment *entries, int low, int high)
{
  do {
    int p = ctx_edge_qsort_partition (entries, low, high);
    if (low < p - 1)
      ctx_edge_qsortb (entries, low, p - 1);
    if (low >= high)
      return;
    low = p;
  } while (1);
}

static CTX_INLINE void ctx_sort_edges (CtxRasterizer *rasterizer)
{
  ctx_edge_qsort ((CtxSegment*)& (rasterizer->edge_list.entries[0]), 0, rasterizer->edge_list.count-1);
}
#endif


static void
ctx_rasterizer_rasterize_edges2 (CtxRasterizer *rasterizer, const int fill_rule, const int allow_direct)
{
  rasterizer->pending_edges   =   
  rasterizer->active_edges    =   0;
  CtxGState     *gstate     = &rasterizer->state->gstate;
  const int      is_winding  = fill_rule == CTX_FILL_RULE_WINDING;
  const CtxCovPath comp = rasterizer->comp;
  uint8_t  *dst         = ((uint8_t *) rasterizer->buf);
  int       scan_start  = rasterizer->blit_y * CTX_FULL_AA;
  int       scan_end    = scan_start + (rasterizer->blit_height - 1) * CTX_FULL_AA;
  const int blit_width  = rasterizer->blit_width;
  const int blit_max_x  = rasterizer->blit_x + blit_width;
  int       minx        = rasterizer->col_min / CTX_SUBDIV - rasterizer->blit_x;
  int       maxx        = (rasterizer->col_max + CTX_SUBDIV-1) / CTX_SUBDIV -
                          rasterizer->blit_x;
  const int bpp = rasterizer->format->bpp;
  const int blit_stride = rasterizer->blit_stride;

  uint8_t *rasterizer_src = rasterizer->color;

  if (maxx > blit_max_x - 1)
    { maxx = blit_max_x - 1; }

  minx = ctx_maxi (gstate->clip_min_x, minx);
  maxx = ctx_mini (gstate->clip_max_x, maxx);
  minx *= (minx>0);
 
  int pixs = maxx - minx + 1;
  if (pixs <= 0)
  {
    //assert(0);
    //
    // sometimes reached by stroking code
    return;
  }
  uint8_t _coverage[pixs + 32]; // XXX this might hide some valid asan warnings
  uint8_t *coverage = &_coverage[0];
  ctx_apply_coverage_fun apply_coverage = rasterizer->apply_coverage;

  rasterizer->scan_min -= (rasterizer->scan_min % CTX_FULL_AA);
  {
     if (rasterizer->scan_min > scan_start)
       {
          dst += (blit_stride * (rasterizer->scan_min-scan_start) / CTX_FULL_AA);
          scan_start = rasterizer->scan_min;
       }
      scan_end = ctx_mini (rasterizer->scan_max, scan_end);
  }

  if (CTX_UNLIKELY(gstate->clip_min_y * CTX_FULL_AA > scan_start ))
    { 
       dst += (blit_stride * (gstate->clip_min_y * CTX_FULL_AA -scan_start) / CTX_FULL_AA);
       scan_start = gstate->clip_min_y * CTX_FULL_AA; 
    }
  scan_end = ctx_mini (gstate->clip_max_y * CTX_FULL_AA, scan_end);
  if (CTX_UNLIKELY((minx >= maxx) | (scan_start > scan_end) |
      (scan_start > (rasterizer->blit_y + (rasterizer->blit_height-1)) * CTX_FULL_AA) |
      (scan_end < (rasterizer->blit_y) * CTX_FULL_AA)))
  { 
    /* not affecting this rasterizers scanlines */
    return;
  }
  rasterizer->scan_aa[1]=
  rasterizer->scan_aa[2]=
  rasterizer->scan_aa[3]=0;

#if CTX_SCANBIN
  int ss = scan_start/CTX_FULL_AA;
  int se = scan_end/CTX_FULL_AA;
  if (ss < 0)ss =0;
  if (se >= CTX_MAX_SCANLINES) se = CTX_MAX_SCANLINES-1;

  for (int i = ss; i < se; i++)
    rasterizer->scan_bin_count[i]=0;

  for (unsigned int i = 0; i < rasterizer->edge_list.count; i++)
  {
    CtxSegment *segment = & ((CtxSegment*)rasterizer->edge_list.entries)[i];
    int scan = (segment->y0-CTX_FULL_AA+2) / CTX_FULL_AA;
    if (scan < ss) scan = ss;
    if (scan < se)
      rasterizer->scan_bins[scan][rasterizer->scan_bin_count[scan]++]=i;
  }
#else
  ctx_sort_edges (rasterizer);
#endif

  rasterizer->scanline = scan_start;

  while (rasterizer->scanline <= scan_end)
    {
      int c0 = minx;
      int c1 = maxx;
      int aa = ctx_rasterizer_feed_edges_full (rasterizer, 0, 0.0f);
      switch (aa)
      {
        case -1: /* no edges */
          rasterizer->scanline += CTX_FULL_AA;
          dst += blit_stride;
          continue;
        case 0: /* the scanline transitions does not contain multiple intersections - each aa segment is a linear ramp */
        case 1: /* level-1 aa is good enough - use same case for less iteration of edges */
        { 
          rasterizer->scanline += CTX_AA_HALFSTEP2;
          ctx_rasterizer_feed_pending_edges (rasterizer);
          ctx_rasterizer_sort_active_edges (rasterizer);
    
          memset (coverage, 0, pixs);
          if (allow_direct)
          {
            ctx_rasterizer_apply_grads (rasterizer, minx, maxx, coverage, is_winding, comp, apply_coverage);
            rasterizer->scanline += CTX_AA_HALFSTEP;
            ctx_rasterizer_increment_edges (rasterizer, CTX_FULL_AA);
    
            dst += blit_stride;
            continue;
          }
          ctx_rasterizer_generate_coverage_grads (rasterizer, minx, maxx, coverage, is_winding, &c0, &c1);
          rasterizer->scanline += CTX_AA_HALFSTEP;
          ctx_rasterizer_increment_edges (rasterizer, CTX_FULL_AA);
          break;
        }
#if 1
        case 3:
        { /* level of oversampling based on lowest steepness edges */
          const int raa=3;
          ctx_rasterizer_increment_edges (rasterizer, -CTX_AA_HALFSTEP2);
          memset (coverage, 0, pixs);
          const int scanline_increment = 15/raa;
          const uint8_t fraction = 255/raa;

          c0 = maxx;
          c1 = minx;
          for (int i = 1; i <= raa; i++)
          {
            ctx_rasterizer_sort_active_edges (rasterizer);
            ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, raa, fraction, &c0, &c1);
            rasterizer->scanline += scanline_increment;
            ctx_rasterizer_increment_edges (rasterizer, scanline_increment + CTX_AA_HALFSTEP2 * (i==raa));
            ctx_rasterizer_feed_pending_edges (rasterizer);
          }
        }
        break;
        case 5:
        { /* level of oversampling based on lowest steepness edges */
          const int raa=5;
          ctx_rasterizer_increment_edges (rasterizer, -CTX_AA_HALFSTEP2);
          memset (coverage, 0, pixs);
          const int scanline_increment = 15/raa;
          const uint8_t fraction = 255/raa;

          c0 = maxx;
          c1 = minx;
          for (int i = 1; i <= raa; i++)
          {
            ctx_rasterizer_sort_active_edges (rasterizer);
            ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, raa, fraction, &c0, &c1);
            rasterizer->scanline += scanline_increment;
            ctx_rasterizer_increment_edges (rasterizer, scanline_increment + CTX_AA_HALFSTEP2 * (i==raa));
            ctx_rasterizer_feed_pending_edges (rasterizer);
          }
        }
        break;
        case 15:
        { /* level of oversampling based on lowest steepness edges */
          const int raa=15;
          ctx_rasterizer_increment_edges (rasterizer, -CTX_AA_HALFSTEP2);
          memset (coverage, 0, pixs);
          const int scanline_increment = 15/raa;
          const uint8_t fraction = 255/raa;

          c0 = maxx;
          c1 = minx;
          for (int i = 1; i <= raa; i++)
          {
            ctx_rasterizer_sort_active_edges (rasterizer);
            ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, raa, fraction, &c0, &c1);
            rasterizer->scanline += scanline_increment;
            ctx_rasterizer_increment_edges (rasterizer, scanline_increment + CTX_AA_HALFSTEP2 * (i==raa));
            ctx_rasterizer_feed_pending_edges (rasterizer);
          }
        }
        break;
#else
        default:
        { /* level of oversampling based on lowest steepness edges */
          const int raa=aa;
          ctx_rasterizer_increment_edges (rasterizer, -CTX_AA_HALFSTEP2);
          memset (coverage, 0, pixs);
          const int scanline_increment = 15/raa;
          const uint8_t fraction = 255/raa;

          c0 = maxx;
          c1 = minx;
          for (int i = 1; i <= raa; i++)
          {
            ctx_rasterizer_sort_active_edges (rasterizer);
            ctx_rasterizer_generate_coverage (rasterizer, minx, maxx, coverage, is_winding, raa, fraction, &c0, &c1);
            rasterizer->scanline += scanline_increment;
            ctx_rasterizer_increment_edges (rasterizer, scanline_increment + CTX_AA_HALFSTEP2 * (i==raa));
            ctx_rasterizer_feed_pending_edges (rasterizer);
          }
        }
#endif
      }
  
      if (c1 >= c0)
      {
        ctx_coverage_post_process (rasterizer, c0, c1, coverage - minx, NULL, NULL);
        apply_coverage (c1-c0+1,
                        &dst[(c0 * bpp) /8],
                        rasterizer_src,
                        coverage + (c0-minx),
                        rasterizer, c0);
      }
      dst += blit_stride;

#if CONFIG_IDF_TARGET_ESP32C3
    if (rasterizer->scanline % (CTX_FULL_AA*10) == 0)
      taskYIELD();
#endif
    }

#if CTX_BLENDING_AND_COMPOSITING
  if (CTX_UNLIKELY((gstate->compositing_mode == CTX_COMPOSITE_SOURCE_OUT) |
      (gstate->compositing_mode == CTX_COMPOSITE_SOURCE_IN) |
      (gstate->compositing_mode == CTX_COMPOSITE_DESTINATION_IN) |
      (gstate->compositing_mode == CTX_COMPOSITE_DESTINATION_ATOP) |
      (gstate->compositing_mode == CTX_COMPOSITE_CLEAR)))
  {
     /* fill in the rest of the blitrect when compositing mode permits it */
     uint8_t nocoverage[rasterizer->blit_width];
     int gscan_start = gstate->clip_min_y * CTX_FULL_AA;
     //int gscan_end = gstate->clip_max_y * CTX_FULL_AA;
     memset (nocoverage, 0, sizeof(nocoverage));
     int startx   = gstate->clip_min_x;
     int endx     = gstate->clip_max_x;
     int clipw    = endx-startx + 1;
     uint8_t *dst = ( (uint8_t *) rasterizer->buf);

     dst = (uint8_t*)(rasterizer->buf) + blit_stride * (gscan_start / CTX_FULL_AA);
     for (rasterizer->scanline = gscan_start; rasterizer->scanline < scan_start;)
     {
       apply_coverage (clipw,
                       &dst[ (startx * rasterizer->format->bpp) /8],
                       rasterizer_src, nocoverage, rasterizer, 0);
       rasterizer->scanline += CTX_FULL_AA;
       dst += blit_stride;
     }

     if (0)//(minx > startx) & (minx<maxx))
     {
     dst = (uint8_t*)(rasterizer->buf) + blit_stride * (scan_start / CTX_FULL_AA);
     for (rasterizer->scanline = scan_start; rasterizer->scanline < scan_end;)
     {
       apply_coverage (minx-startx,
                       &dst[ (startx * rasterizer->format->bpp) /8],
                       rasterizer_src,
                       nocoverage, rasterizer, 0);
       dst += blit_stride;
     }
     }

     if (endx > maxx)
     {
     dst = (uint8_t*)(rasterizer->buf) + blit_stride * (scan_start / CTX_FULL_AA);
     for (rasterizer->scanline = scan_start; rasterizer->scanline < scan_end;)
     {
       apply_coverage (endx-maxx,
                       &dst[ (maxx * rasterizer->format->bpp) /8],
                       rasterizer_src, nocoverage, rasterizer, 0);

       rasterizer->scanline += CTX_FULL_AA;
       dst += blit_stride;
     }
     }
#if 0
     dst = (uint8_t*)(rasterizer->buf) + blit_stride * (scan_end / CTX_FULL_AA);
     for (rasterizer->scanline = scan_end; rasterizer->scanline < gscan_end;)
     {
       apply_coverage (clipw-1,
                       &dst[ (startx * rasterizer->format->bpp) /8],
                       rasterizer_src,
                       nocoverage, rasterizer, 0);

       rasterizer->scanline += CTX_FULL_AA;
       dst += blit_stride;
     }
#endif
  }
#endif
}

#if CTX_ENABLE_SHADOW_BLUR
static void
ctx_rasterizer_rasterize_edges3 (CtxRasterizer *rasterizer, const int fill_rule)
{
  rasterizer->pending_edges   =   
  rasterizer->active_edges    =   0;
  rasterizer->shadow_active_edges =   0;
  CtxGState *gstate     = &rasterizer->state->gstate;
  float blur_radius = rasterizer->feather;
  const int  is_winding = fill_rule == CTX_FILL_RULE_WINDING;
  uint8_t  *dst         = ((uint8_t *) rasterizer->buf);

  int       scan_start  = rasterizer->blit_y * CTX_FULL_AA;
  int       scan_end    = scan_start + (rasterizer->blit_height - 1) * CTX_FULL_AA;
  const int blit_width  = rasterizer->blit_width;
  const int blit_max_x  = rasterizer->blit_x + blit_width;
  int       minx        = rasterizer->col_min / CTX_SUBDIV - rasterizer->blit_x;
  int       maxx        = (rasterizer->col_max + CTX_SUBDIV-1) / CTX_SUBDIV -
                          rasterizer->blit_x;
  const int bpp = rasterizer->format->bpp;
  const int blit_stride = rasterizer->blit_stride;

  uint8_t *rasterizer_src = rasterizer->color;

  if (maxx > blit_max_x - 1)
    { maxx = blit_max_x - 1; }

  minx = ctx_maxi (gstate->clip_min_x, minx);
  maxx = ctx_mini (gstate->clip_max_x, maxx);
  minx *= (minx>0);
 
  int pixs = maxx - minx + 1;
  uint8_t _coverage[pixs+16]; // XXX this might hide some valid asan warnings
  uint8_t *coverage = &_coverage[0];
  ctx_apply_coverage_fun apply_coverage = rasterizer->apply_coverage;

  rasterizer->scan_min -= (rasterizer->scan_min % CTX_FULL_AA);
  {
     if (rasterizer->scan_min > scan_start)
       {
          dst += (blit_stride * (rasterizer->scan_min-scan_start) / CTX_FULL_AA);
          scan_start = rasterizer->scan_min;
       }
      scan_end = ctx_mini (rasterizer->scan_max, scan_end);
  }

  if (CTX_UNLIKELY(gstate->clip_min_y * CTX_FULL_AA > scan_start ))
    { 
       dst += (blit_stride * (gstate->clip_min_y * CTX_FULL_AA -scan_start) / CTX_FULL_AA);
       scan_start = gstate->clip_min_y * CTX_FULL_AA; 
    }
  scan_end = ctx_mini (gstate->clip_max_y * CTX_FULL_AA, scan_end);
  if (CTX_UNLIKELY((minx >= maxx) | (scan_start > scan_end) |
      (scan_start > (rasterizer->blit_y + (rasterizer->blit_height-1)) * CTX_FULL_AA) |
      (scan_end < (rasterizer->blit_y) * CTX_FULL_AA)))
  { 
    /* not affecting this rasterizers scanlines */
    return;
  }
  rasterizer->scan_aa[1]=
  rasterizer->scan_aa[2]=
  rasterizer->scan_aa[3]=0;

#if CTX_SCANBIN
  int ss = scan_start/CTX_FULL_AA;
  int se = scan_end/CTX_FULL_AA;
  if (ss < 0)ss =0;
  if (se >= CTX_MAX_SCANLINES) se = CTX_MAX_SCANLINES-1;

  for (int i = ss; i < se; i++)
    rasterizer->scan_bin_count[i]=0;

  for (unsigned int i = 0; i < rasterizer->edge_list.count; i++)
  {
    CtxSegment *segment = & ((CtxSegment*)rasterizer->edge_list.entries)[i];
    int scan = (segment->y0-CTX_FULL_AA+2) / CTX_FULL_AA;
    if (scan < ss) scan = ss;
    if (scan < se)
      rasterizer->scan_bins[scan][rasterizer->scan_bin_count[scan]++]=i;
  }
#else
  ctx_sort_edges (rasterizer);
#endif

  rasterizer->scanline = scan_start;

  while (rasterizer->scanline <= scan_end)
    {
      int c0 = minx;
      int c1 = maxx;
        ctx_rasterizer_feed_edges_full (rasterizer, 1, blur_radius);
        { 
          rasterizer->scanline += CTX_AA_HALFSTEP2;
          ctx_rasterizer_feed_pending_edges (rasterizer);
    
          memset (coverage, 0, pixs);
          ctx_rasterizer_sort_active_edges (rasterizer);
          ctx_rasterizer_generate_sdf (rasterizer, minx, maxx, coverage, is_winding, blur_radius);
          rasterizer->scanline += CTX_AA_HALFSTEP;
          ctx_rasterizer_increment_edges (rasterizer, CTX_FULL_AA);
        }
  
      {
        ctx_coverage_post_process (rasterizer, c0, c1, coverage - minx, NULL, NULL);
        apply_coverage (c1-c0+1,
                        &dst[(c0 * bpp) /8],
                        rasterizer_src,
                        coverage + (c0-minx),
                        rasterizer, c0);
      }
      dst += blit_stride;
    }
}
#endif




#if CTX_INLINE_FILL_RULE

// this can shave 1-2% percent off execution time, at the penalty of increased code size
void
CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule)
{
#if CTX_RASTERIZER_ALLOW_DIRECT
  int allow_direct = !(0 
#if CTX_ENABLE_CLIP
         | ((rasterizer->clip_buffer!=NULL) & (!rasterizer->clip_rectangle))
#endif
#if CTX_ENABLE_SHADOW_BLUR
         | rasterizer->in_shadow
#endif
         );
#else
  const int allow_direct = 0;  // temporarily disabled
                               // we seem to overrrun our scans
#endif

#if CTX_ENABLE_SHADOW_BLUR
    if (rasterizer->in_shadow)
    {
      if (fill_rule) ctx_rasterizer_rasterize_edges3 (rasterizer, 1);
      else           ctx_rasterizer_rasterize_edges3 (rasterizer, 0);
      return;
    }
#endif

#if 1
    if (allow_direct)
    {
      if (fill_rule) ctx_rasterizer_rasterize_edges2 (rasterizer, 1, 1);
      else           ctx_rasterizer_rasterize_edges2 (rasterizer, 0, 1);
    }
    else
    {
      if (fill_rule) ctx_rasterizer_rasterize_edges2 (rasterizer, 1, 0);
      else           ctx_rasterizer_rasterize_edges2 (rasterizer, 0, 0);
    }
#else
#endif
}
#else

void
CTX_SIMD_SUFFIX (ctx_rasterizer_rasterize_edges) (CtxRasterizer *rasterizer, const int fill_rule)
{
  int allow_direct = !(0 
#if CTX_ENABLE_CLIP
         | ((rasterizer->clip_buffer!=NULL) & (!rasterizer->clip_rectangle))
#endif
         );
#if CTX_ENABLE_SHADOW_BLUR
  if (rasterizer->in_shadow)
    ctx_rasterizer_rasterize_edges3 (rasterizer, fill_rule);
  else
#endif
    ctx_rasterizer_rasterize_edges2 (rasterizer, fill_rule, allow_direct);
}

#endif



extern const CtxPixelFormatInfo *ctx_pixel_formats;
void CTX_SIMD_SUFFIX(ctx_simd_setup)(void);
void CTX_SIMD_SUFFIX(ctx_simd_setup)(void)
{
  ctx_pixel_formats         = CTX_SIMD_SUFFIX(ctx_pixel_formats);
  ctx_composite_setup       = CTX_SIMD_SUFFIX(ctx_composite_setup);
  ctx_rasterizer_rasterize_edges = CTX_SIMD_SUFFIX(ctx_rasterizer_rasterize_edges);
#if CTX_FAST_FILL_RECT
  ctx_composite_fill_rect   = CTX_SIMD_SUFFIX(ctx_composite_fill_rect);
#if CTX_FAST_STROKE_RECT
  ctx_composite_stroke_rect = CTX_SIMD_SUFFIX(ctx_composite_stroke_rect);
#endif
#endif
}


#endif
#endif
#if CTX_IMPLEMENTATION
#if CTX_RASTERIZER


static void
_ctx_rasterizer_gradient_add_stop (CtxRasterizer *rasterizer, float pos, float *rgba)
{
  /* FIXME XXX we only have one gradient, but might need separate gradients
   * for fill/stroke !
   * 
   */
  CtxGradient *gradient = &rasterizer->state->gradient;
  CtxGradientStop *stop = &gradient->stops[gradient->n_stops];
  stop->pos = pos;
  ctx_color_set_rgba (rasterizer->state, & (stop->color), rgba[0], rgba[1], rgba[2], rgba[3]);
  if (gradient->n_stops < CTX_MAX_GRADIENT_STOPS-1) //we'll keep overwriting the last when out of stops
    { gradient->n_stops++; }
}

void
ctx_rasterizer_gradient_add_stop (CtxRasterizer *rasterizer, float pos, float *rgba)
{
  _ctx_rasterizer_gradient_add_stop (rasterizer, pos, rgba);
}

static inline void ctx_rasterizer_update_inner_point (CtxRasterizer *rasterizer, int x, int y)
{
  rasterizer->scan_min = ctx_mini (y, rasterizer->scan_min);
  rasterizer->scan_max = ctx_maxi (y, rasterizer->scan_max);
  rasterizer->col_min = ctx_mini (x, rasterizer->col_min);
  rasterizer->col_max = ctx_maxi (x, rasterizer->col_max);
  rasterizer->inner_x = x;
  rasterizer->inner_y = y;
}

static CTX_INLINE int ctx_rasterizer_add_point (CtxRasterizer *rasterizer, int x1, int y1)
{
  CtxSegment entry = {{CTX_EDGE, 0, 0, 0, 0, 0}};
  x1 -= rasterizer->blit_x * CTX_SUBDIV;

  entry.x0=rasterizer->inner_x;
  entry.y0=rasterizer->inner_y;

  entry.x1=x1;
  entry.y1=y1;

  ctx_rasterizer_update_inner_point (rasterizer, x1, y1);

  int ret = ctx_edgelist_add_single (&rasterizer->edge_list, (CtxEntry*)&entry);
  if (CTX_UNLIKELY(rasterizer->has_prev<=0))
    {
      CtxSegment *segment = & ((CtxSegment*)rasterizer->edge_list.entries)[rasterizer->edge_list.count-1];
      segment->code = CTX_NEW_EDGE;
      rasterizer->has_prev = 1;
      rasterizer->first_edge = rasterizer->edge_list.count-1;
    }
  return ret;
}

static inline void ctx_rasterizer_poly_to_edges (CtxRasterizer *rasterizer)
{
  unsigned int count = rasterizer->edge_list.count;
  CtxSegment *segment = (CtxSegment*)&rasterizer->edge_list.entries[0];
  int skipped = 0;
  for (unsigned int i = 0; i < count; i++)
    {
      if (segment[skipped].code == CTX_CLOSE_EDGE)
        skipped ++;
      else
      {
        if (segment[skipped].y1 < segment[skipped].y0)
        {
          segment[0] = ctx_segment_s16 (CTX_EDGE_FLIPPED,
                            segment[skipped].x1, segment[skipped].y1,
                            segment[skipped].x0, segment[skipped].y0);
        }
        else
        {
          segment[0] = segment[skipped];
        }
        segment++;
      }
    }
  rasterizer->edge_list.count = count - skipped;
}

static inline void
_ctx_rasterizer_close_path (CtxRasterizer *rasterizer)
{
  int x0 = rasterizer->inner_x;
  int y0 = rasterizer->inner_y;
  if (rasterizer->first_edge>=0)
    {
        if (rasterizer->edge_list.entries == NULL)
          return;
        CtxSegment *segment = & ((CtxSegment*)rasterizer->edge_list.entries)[rasterizer->first_edge];
        if (segment && segment->code == CTX_NEW_EDGE)
        {
          CtxSegment entry = {{CTX_EDGE, 0, 0, 0, 0, 0}};
          int x1 = segment->x0;
          int y1 = segment->y0;
          entry.x0=x0;
          entry.y0=y0;
          entry.x1=x1;
          entry.y1=y1;
          // XXX 
          rasterizer->has_prev = 0;
          rasterizer->first_edge = -1;
          ctx_edgelist_add_single (&rasterizer->edge_list, (CtxEntry*)&entry);
          entry = *segment;
          entry.code = CTX_CLOSE_EDGE;

          ctx_edgelist_add_single (&rasterizer->edge_list, (CtxEntry*)&entry);

          ctx_rasterizer_update_inner_point (rasterizer, x1, y1);

          float nx = x1 * (1.0f / CTX_SUBDIV);
          float ny = y1 * (1.0f / CTX_FULL_AA);
          ctx_device_to_user(rasterizer->backend.ctx, &nx, &ny);
          rasterizer->x = nx;
          rasterizer->y = ny;
          return;
        }
    }
}

void
ctx_rasterizer_close_path (CtxRasterizer *rasterizer)
{
  _ctx_rasterizer_close_path (rasterizer);
}

//#define MIN_Y -100
//#define MAX_Y 3800
//#define MIN_X -100
//#define MAX_X 3600*10


static inline void _ctx_rasterizer_move_to (CtxRasterizer *rasterizer, float x, float y)
{
  int tx = 0, ty = 0;

  rasterizer->x        = x;
  rasterizer->y        = y;
  rasterizer->first_edge = rasterizer->edge_list.count - 1; // ?
  rasterizer->has_prev = -1;
  _ctx_user_to_device_prepped (rasterizer->state, x,y, &tx, &ty);

  tx -= rasterizer->blit_x * CTX_SUBDIV;
  ctx_rasterizer_update_inner_point (rasterizer, tx, ty);
}

void ctx_rasterizer_move_to (CtxRasterizer *rasterizer, float x, float y)
{
  _ctx_rasterizer_move_to (rasterizer, x, y);
}

static CTX_INLINE void
ctx_rasterizer_line_to_fixed (CtxRasterizer *rasterizer, int x, int y)
{
  int tx = 0, ty = 0;
  _ctx_user_to_device_prepped_fixed (rasterizer->state, x, y, &tx, &ty);
  ctx_rasterizer_add_point (rasterizer, tx, ty);
}

static CTX_INLINE void
_ctx_rasterizer_line_to (CtxRasterizer *rasterizer, float x, float y)
{
  int tx = 0, ty = 0;
  rasterizer->y         = y;
  rasterizer->x         = x;

  _ctx_user_to_device_prepped (rasterizer->state, x, y, &tx, &ty);
  ctx_rasterizer_add_point (rasterizer, tx, ty);
}

void
ctx_rasterizer_line_to (CtxRasterizer *rasterizer, float x, float y)
{
  _ctx_rasterizer_line_to (rasterizer, x, y);
}

CTX_INLINE static float
ctx_bezier_sample_1d (float x0, float x1, float x2, float x3, float dt)
{
  return ctx_lerpf (
      ctx_lerpf (ctx_lerpf (x0, x1, dt),
                 ctx_lerpf (x1, x2, dt), dt),
      ctx_lerpf (ctx_lerpf (x1, x2, dt),
                 ctx_lerpf (x2, x3, dt), dt), dt);
}

CTX_INLINE static void
ctx_bezier_sample (float x0, float y0,
                   float x1, float y1,
                   float x2, float y2,
                   float x3, float y3,
                   float dt, float *x, float *y)
{
  *x = ctx_bezier_sample_1d (x0, x1, x2, x3, dt);
  *y = ctx_bezier_sample_1d (y0, y1, y2, y3, dt);
}

static inline void
ctx_rasterizer_bezier_divide (CtxRasterizer *rasterizer,
                              float ox, float oy,
                              float x0, float y0,
                              float x1, float y1,
                              float x2, float y2,
                              float sx, float sy,
                              float ex, float ey,
                              float s,
                              float e,
                              int   iteration,
                              float tolerance)
{
  float t = (s + e) * 0.5f;
  float x, y;
  float dx, dy;
  ctx_bezier_sample (ox, oy, x0, y0, x1, y1, x2, y2, t, &x, &y);
  dx = (sx+ex)/2 - x;
  dy = (sy+ey)/2 - y;

  if ((iteration<2) | ((iteration < 6) & (dx*dx+dy*dy > tolerance)))
  {
    ctx_rasterizer_bezier_divide (rasterizer, ox, oy, x0, y0, x1, y1, x2, y2,
                                  sx, sy, x, y, s, t, iteration + 1,
                                  tolerance);
    _ctx_rasterizer_line_to (rasterizer, x, y);
    ctx_rasterizer_bezier_divide (rasterizer, ox, oy, x0, y0, x1, y1, x2, y2,
                                  x, y, ex, ey, t, e, iteration + 1,
                                  tolerance);
  }
}

                      

CTX_INLINE static int
ctx_lerp_fixed (int v0, int v1, int dx)
{
  return v0 + (((v1-v0) * dx + ((1<<CTX_FIX_SHIFT)-1)) >> CTX_FIX_SHIFT);
}

CTX_INLINE static int
ctx_bezier_sample_1d_fixed (int x0, int x1, int x2, int x3, int dt)
{
  return ctx_lerp_fixed (
      ctx_lerp_fixed (ctx_lerp_fixed (x0, x1, dt),
                 ctx_lerp_fixed (x1, x2, dt), dt),
      ctx_lerp_fixed (ctx_lerp_fixed (x1, x2, dt),
                 ctx_lerp_fixed (x2, x3, dt), dt), dt);
}

typedef struct CtxFixedBezier
{
  int x0; int y0;
  int x1; int y1;
  int x2; int y2;
  int x3; int y3;
} CtxFixedBezier;

CTX_INLINE static void
ctx_bezier_sample_fixed (const CtxFixedBezier *b,
                         int dt, int *x, int *y)
{
  *x = ctx_bezier_sample_1d_fixed (b->x0, b->x1, b->x2, b->x3, dt);
  *y = ctx_bezier_sample_1d_fixed (b->y0, b->y1, b->y2, b->y3, dt);
}

static inline void
ctx_rasterizer_bezier_divide_fixed (CtxRasterizer *rasterizer,
                                    const CtxFixedBezier *b,
                                    int sx, int sy,
                                    int ex, int ey,
                                    int s,
                                    int e,
                                    int iteration, long int tolerance)
{
  int t = (s + e) / 2;
  int x, y;

  ctx_bezier_sample_fixed (b, t, &x, &y);

  int dx, dy;
#if 1
  dx = (sx+ex)/2 - x;
  dy = (sy+ey)/2 - y;
#else
  int lx, ly;
  lx = ctx_lerp_fixed (sx, ex, t);
  ly = ctx_lerp_fixed (sy, ey, t);
  dx = lx - x;
  dy = ly - y;
#endif

  if ((iteration < 2) | ((iteration < 6) & (((long)dx*dx+dy*dy) > tolerance)))
  {
    ctx_rasterizer_bezier_divide_fixed (rasterizer, b,
                                  sx, sy, x, y, s, t, iteration+1, tolerance
                                  );
    ctx_rasterizer_line_to_fixed (rasterizer, x, y);
    ctx_rasterizer_bezier_divide_fixed (rasterizer, b,
                                  x, y, ex, ey, t, e, iteration+1, tolerance
                                  );
  }
}

static inline void
_ctx_rasterizer_curve_to (CtxRasterizer *rasterizer,
                          float x0, float y0,
                          float x1, float y1,
                          float x2, float y2)
{
  float ox = rasterizer->state->x;
  float oy = rasterizer->state->y;


#if CTX_RASTERIZER_BEZIER_FIXED_POINT
  CtxFixedBezier b = {
            (int)(ox * CTX_FIX_SCALE), (int)(oy * CTX_FIX_SCALE), (int)(x0 * CTX_FIX_SCALE), (int)(y0 * CTX_FIX_SCALE),
            (int)(x1 * CTX_FIX_SCALE), (int)(y1 * CTX_FIX_SCALE), (int)(x2 * CTX_FIX_SCALE), (int)(y2 * CTX_FIX_SCALE)
  };
  ctx_rasterizer_bezier_divide_fixed (rasterizer, &b,
            (int)(ox * CTX_FIX_SCALE), (int)(oy * CTX_FIX_SCALE), (int)(x2 * CTX_FIX_SCALE), (int)(y2 * CTX_FIX_SCALE),
            0, CTX_FIX_SCALE, 0, rasterizer->state->gstate.tolerance_fixed);
#else
  ctx_rasterizer_bezier_divide (rasterizer,
                                ox, oy, x0, y0,
                                x1, y1, x2, y2,
                                ox, oy, x2, y2,
                                0.0f, 1.0f, 0, rasterizer->state->gstate.tolerance);
#endif
  _ctx_rasterizer_line_to (rasterizer, x2, y2);
}

void
ctx_rasterizer_curve_to (CtxRasterizer *rasterizer,
                          float x0, float y0,
                          float x1, float y1,
                          float x2, float y2)
{
  _ctx_rasterizer_curve_to (rasterizer, x0,  y0, x1,  y1, x2,  y2);
}

static inline void
_ctx_rasterizer_rel_move_to (CtxRasterizer *rasterizer, float x, float y)
{
  //if (CTX_UNLIKELY(x == 0.f && y == 0.f))
  //{ return; }
  x += rasterizer->x;
  y += rasterizer->y;
  _ctx_rasterizer_move_to (rasterizer, x, y);
}

void
ctx_rasterizer_rel_move_to (CtxRasterizer *rasterizer, float x, float y)
{
  _ctx_rasterizer_rel_move_to (rasterizer,x,y);
}

static inline void
_ctx_rasterizer_rel_line_to (CtxRasterizer *rasterizer, float x, float y)
{
  //if (CTX_UNLIKELY(x== 0.f && y==0.f))
  //  { return; }
  x += rasterizer->x;
  y += rasterizer->y;
  _ctx_rasterizer_line_to (rasterizer, x, y);
}
void
ctx_rasterizer_rel_line_to (CtxRasterizer *rasterizer, float x, float y)
{
  _ctx_rasterizer_rel_line_to (rasterizer, x, y);
}

static inline void
_ctx_rasterizer_rel_curve_to (CtxRasterizer *rasterizer,
                             float x0, float y0, float x1, float y1, float x2, float y2)
{
  x0 += rasterizer->x;
  y0 += rasterizer->y;
  x1 += rasterizer->x;
  y1 += rasterizer->y;
  x2 += rasterizer->x;
  y2 += rasterizer->y;
  _ctx_rasterizer_curve_to (rasterizer, x0, y0, x1, y1, x2, y2);
}
void
ctx_rasterizer_rel_curve_to (CtxRasterizer *rasterizer,
                             float x0, float y0, float x1, float y1, float x2, float y2)
{
  _ctx_rasterizer_rel_curve_to (rasterizer, x0, y0, x1, y1, x2, y2);
}



static int
ctx_rasterizer_find_texture (CtxRasterizer *rasterizer,
                             const char *eid)
{
  int no;
  for (no = 0; no < CTX_MAX_TEXTURES; no++)
  {
    if (rasterizer->texture_source->texture[no].data &&
        rasterizer->texture_source->texture[no].eid &&
        !strcmp (rasterizer->texture_source->texture[no].eid, eid))
      return no;
  }
  return -1;
}

static void
ctx_rasterizer_set_texture (CtxRasterizer *rasterizer,
                            const char *eid,
                            float x,
                            float y)
{
  int is_stroke = (rasterizer->state->source != 0);
  CtxSource *source = is_stroke?
                        &rasterizer->state->gstate.source_stroke:
                        &rasterizer->state->gstate.source_fill;
  rasterizer->state->source = 0;

  if (source->type == CTX_SOURCE_TEXTURE)
  {
    if (eid[0] != '!')
    {
      source->type = CTX_SOURCE_NONE;
      source->texture.buffer = NULL;
    }
  }
  else
  {
    source->type = CTX_SOURCE_NONE;
    source->texture.buffer = NULL;
  }
  int no = ctx_rasterizer_find_texture (rasterizer, eid);
  if (no < 0 || no >= CTX_MAX_TEXTURES) { no = 0; }
  if (rasterizer->texture_source->texture[no].data == NULL)
    {
      return;
    }
  else
  {
    rasterizer->texture_source->texture[no].frame = rasterizer->texture_source->frame;
  }
  source->texture.buffer = &rasterizer->texture_source->texture[no];
  if (source->texture.buffer)
  {
    source->type = CTX_SOURCE_TEXTURE;
    ctx_matrix_identity (&source->set_transform);
    ctx_matrix_translate (&source->set_transform, x, y);
  }
}

void
ctx_rasterizer_define_texture (CtxRasterizer *rasterizer,
                               const char    *eid,
                               int            width,
                               int            height,
                               int            format,
                               char unsigned *data,
                               int            steal_data)
{

  _ctx_texture_lock (); // we're using the same texture_source from all threads, keeping allocaitons down
                        // need synchronizing (it could be better to do a pre-pass)
  ctx_texture_init (rasterizer->texture_source,
                    eid,
                    width,
                    height,
                    ctx_pixel_format_get_stride ((CtxPixelFormat)format, width),
                    (CtxPixelFormat)format,
#if CTX_ENABLE_CM
                    (void*)rasterizer->state->gstate.texture_space,
#else
                    NULL,
#endif
                    data,
                    ctx_buffer_pixels_free, (steal_data?(void*)0:(void*)23));
                    /*  when userdata for ctx_buffer_pixels_free is 23, texture_init dups the data on
                     *  use
                     */

  int is_stroke = (rasterizer->state->source != 0);
  ctx_rasterizer_set_texture (rasterizer, eid, 0.0f, 0.0f);
#if CTX_ENABLE_CM
  CtxSource *source = is_stroke?
                        &rasterizer->state->gstate.source_stroke:
                        &rasterizer->state->gstate.source_fill;
  if (source->texture.buffer &&
      !source->texture.buffer->color_managed)
  {
    _ctx_texture_prepare_color_management (rasterizer->state,
      source->texture.buffer);
  }
#else
  if (is_stroke){};
#endif
  _ctx_texture_unlock ();
}


inline static int
ctx_is_transparent (CtxRasterizer *rasterizer, int stroke)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  if (gstate->global_alpha_u8 == 0)
    return 1;
  if (gstate->source_fill.type == CTX_SOURCE_COLOR)
  {
    uint8_t ga[2];
    ctx_color_get_graya_u8 (rasterizer->state, &gstate->source_fill.color, ga);
    if (ga[1] == 0)
      return 1;
  }
  return 0;
}

static CTX_INLINE int ctx_perpdot(int ax,int ay,int bx, int by)
{ return (ax*by)-(ay*bx);
}

static void
ctx_rasterizer_fill (CtxRasterizer *rasterizer)
{
  CtxGState     *gstate     = &rasterizer->state->gstate;
  unsigned int preserved_count =
          (rasterizer->preserve&(rasterizer->edge_list.count!=0))?
             rasterizer->edge_list.count:1;
  int blit_x = rasterizer->blit_x;
  int blit_y = rasterizer->blit_y;
  int blit_width = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;

  CtxSegment temp[preserved_count]; /* copy of already built up path's poly line
                                       XXX - by building a large enough path
                                       the stack can be smashed!
                                     */
  int preserved = 0;
  if (rasterizer->preserve)
    { memcpy (temp, rasterizer->edge_list.entries, sizeof (CtxSegment)*preserved_count );
      preserved = 1;
    }

#if CTX_ENABLE_SHADOW_BLUR
  if (CTX_UNLIKELY(rasterizer->in_shadow))
  {
  for (unsigned int i = 0; i < rasterizer->edge_list.count; i++)
    {
      CtxSegment *segment = &((CtxSegment*)rasterizer->edge_list.entries)[i];
      segment->x0 += rasterizer->feather_x * CTX_SUBDIV;
      segment->y0 += rasterizer->feather_y * CTX_FULL_AA;
      segment->x1 += rasterizer->feather_x * CTX_SUBDIV;
      segment->y1 += rasterizer->feather_y * CTX_FULL_AA;
    }
    rasterizer->scan_min += ((rasterizer->feather_y - rasterizer->feather) +1) * CTX_FULL_AA;
    rasterizer->scan_max += ((rasterizer->feather_y + rasterizer->feather) +1) * CTX_FULL_AA;
    rasterizer->col_min  += ((rasterizer->feather_x - rasterizer->feather)+ 1) * CTX_SUBDIV;
    rasterizer->col_max  += ((rasterizer->feather_x + rasterizer->feather)+ 1) * CTX_SUBDIV;
  }
#endif

  if (CTX_UNLIKELY(ctx_is_transparent (rasterizer, 0) |
      (rasterizer->scan_min > CTX_FULL_AA * (blit_y + blit_height)) |
      (rasterizer->scan_max < CTX_FULL_AA * blit_y) |
      (rasterizer->col_min > CTX_SUBDIV * (blit_x + blit_width)) |
      (rasterizer->col_max < CTX_SUBDIV * blit_x)))
    {
    }
  else
  {
    ctx_composite_setup (rasterizer);

    rasterizer->state->ink_min_x = ctx_mini (rasterizer->state->ink_min_x, rasterizer->col_min / CTX_SUBDIV);
    rasterizer->state->ink_max_x = ctx_maxi (rasterizer->state->ink_min_x, rasterizer->col_max / CTX_SUBDIV);
    rasterizer->state->ink_min_y = ctx_mini (rasterizer->state->ink_min_y, rasterizer->scan_min / CTX_FULL_AA);
    rasterizer->state->ink_max_y = ctx_maxi (rasterizer->state->ink_max_y, rasterizer->scan_max / CTX_FULL_AA);

#if CTX_FAST_FILL_RECT
  if (rasterizer->edge_list.count == 5)
    {
      CtxSegment *entry0 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[0];
      CtxSegment *entry1 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[1];
      CtxSegment *entry2 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[2];
      CtxSegment *entry3 = &(((CtxSegment*)(rasterizer->edge_list.entries)))[3];


      if (
          (!(gstate->clipped != 0)) &
          (entry0->x1 == entry1->x1) &
          (entry0->y1 == entry3->y1) &
          (entry1->y1 == entry2->y1) &
          (entry2->x1 == entry3->x1)
#if CTX_ENABLE_SHADOW_BLUR
           & (!rasterizer->in_shadow)
#endif
         )
       {
         float x0 = entry3->x1 * (1.0f / CTX_SUBDIV);
         float y0 = entry3->y1 * (1.0f / CTX_FULL_AA);
         float x1 = entry1->x1 * (1.0f / CTX_SUBDIV);
         float y1 = entry1->y1 * (1.0f / CTX_FULL_AA);

         x0 = ctx_maxf (x0, blit_x);
         y0 = ctx_maxf (y0, blit_y);
         x1 = ctx_minf (x1, blit_x + blit_width);
         y1 = ctx_minf (y1, blit_y + blit_height);

         if ((x1 > x0) & (y1 > y0))
         {
           ctx_composite_fill_rect (rasterizer, x0, y0, x1, y1, 255);
           goto done;
         }
       }
    }
#endif


    _ctx_rasterizer_close_path (rasterizer);
    ctx_rasterizer_poly_to_edges (rasterizer);

    ctx_rasterizer_rasterize_edges (rasterizer, gstate->fill_rule);
  }
#if CTX_FAST_FILL_RECT
done:
#endif
  if (preserved)
    {
      memcpy (rasterizer->edge_list.entries, temp, sizeof (CtxSegment)*preserved_count );
      rasterizer->edge_list.count = preserved_count;
    }
#if CTX_ENABLE_SHADOW_BLUR
  if (CTX_UNLIKELY(rasterizer->in_shadow))
  {
    rasterizer->scan_min -= ((rasterizer->feather_y - rasterizer->feather) +1) * CTX_FULL_AA;
    rasterizer->scan_max -= ((rasterizer->feather_y + rasterizer->feather) +1) * CTX_FULL_AA;
    rasterizer->col_min  -= ((rasterizer->feather_x - rasterizer->feather)+ 1) * CTX_SUBDIV;
    rasterizer->col_max  -= ((rasterizer->feather_x + rasterizer->feather)+ 1) * CTX_SUBDIV;
  }
#endif
  rasterizer->preserve = 0;
}


#if CTX_BRAILLE_TEXT
static CtxTermGlyph *
ctx_rasterizer_find_term_glyph (CtxRasterizer *rasterizer, int col, int row)
{
    CtxTermGlyph *glyph = NULL;
    
    for (CtxList *l = rasterizer->glyphs; l; l=l->next)
    {
      glyph = (CtxTermGlyph*)l->data;
      if ((glyph->col == col) &
          (glyph->row == row))
      {
        return glyph;
      }
    }

    glyph = (CtxTermGlyph*)ctx_calloc (1, sizeof (CtxTermGlyph));
    ctx_list_append (&rasterizer->glyphs, glyph);
    glyph->col = col;
    glyph->row = row;
    return glyph;
}
#endif

static void
ctx_rasterizer_glyph (CtxRasterizer *rasterizer, uint32_t unichar, int stroke)
{
  float tx = rasterizer->state->x;
  float ty = rasterizer->state->y - rasterizer->state->gstate.font_size;
  float tx2 = rasterizer->state->x + rasterizer->state->gstate.font_size;
  float ty2 = rasterizer->state->y + rasterizer->state->gstate.font_size;
  _ctx_user_to_device (rasterizer->state, &tx, &ty);
  _ctx_user_to_device (rasterizer->state, &tx2, &ty2);

  if ((tx2 < rasterizer->blit_x) | (ty2 < rasterizer->blit_y))
    return;
  if ((tx  > rasterizer->blit_x + rasterizer->blit_width) |
      (ty  > rasterizer->blit_y + rasterizer->blit_height))
    return;

#if CTX_TERM
#if CTX_BRAILLE_TEXT
  float font_size = 0;
  int ch = 1;
  int cw = 1;

  if (rasterizer->term_glyphs)
  {
    float tx = 0;
    font_size = rasterizer->state->gstate.font_size;

    ch = (int)ctx_term_get_cell_height (rasterizer->backend.ctx);
    cw = (int)ctx_term_get_cell_width (rasterizer->backend.ctx);

    _ctx_user_to_device_distance (rasterizer->state, &tx, &font_size);
  }
  if ((rasterizer->term_glyphs!=0) & (!stroke) &
      (fabsf (font_size - ch) < 0.5f))
  {
    float tx = rasterizer->x;
    float ty = rasterizer->y;
    _ctx_user_to_device (rasterizer->state, &tx, &ty);
    int col = (int)(tx / cw + 1);
    int row = (int)(ty / ch + 1);
    CtxTermGlyph *glyph = ctx_rasterizer_find_term_glyph (rasterizer, col, row);

    glyph->unichar = unichar;
    ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
                         &glyph->rgba_fg[0]);
  }
  else
#endif
#endif
  _ctx_glyph (rasterizer->backend.ctx, unichar, stroke);
}

static void
ctx_rasterizer_text (CtxRasterizer *rasterizer, const char *string, int stroke)
{
#if CTX_TERM
#if CTX_BRAILLE_TEXT
  float font_size = 0;
  if (rasterizer->term_glyphs)
  {
    float tx = 0;
    font_size = rasterizer->state->gstate.font_size;
    _ctx_user_to_device_distance (rasterizer->state, &tx, &font_size);
  }
  int   ch = (int)ctx_term_get_cell_height (rasterizer->backend.ctx);
  int   cw = (int)ctx_term_get_cell_width (rasterizer->backend.ctx);

  if ((rasterizer->term_glyphs!=0) & (!stroke) &
      (fabsf (font_size - ch) < 0.5f))
  {
    float tx = rasterizer->x;
    float ty = rasterizer->y;
    _ctx_user_to_device (rasterizer->state, &tx, &ty);
    int col = (int)(tx / cw + 1);
    int row = (int)(ty / ch + 1);

    for (int i = 0; string[i]; i++, col++)
    {
      CtxTermGlyph *glyph = ctx_rasterizer_find_term_glyph (rasterizer, col, row);

      glyph->unichar = string[i];
      ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
                      glyph->rgba_fg);
    }
  }
  else
#endif
#endif
  {
    _ctx_text (rasterizer->backend.ctx, string, stroke, 1);
  }
}

void
_ctx_font (Ctx *ctx, const char *name);
void
ctx_rasterizer_set_font (CtxRasterizer *rasterizer, const char *font_name)
{
  _ctx_font (rasterizer->backend.ctx, font_name);
}

void
ctx_rasterizer_arc (CtxRasterizer *rasterizer,
                    float          x,
                    float          y,
                    float          radius,
                    float          start_angle,
                    float          end_angle,
                    int            anticlockwise)
{
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  int full_segments = CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS;
  full_segments = (int)(factor * radius * CTX_PI * 2 / 4.0f);
  if (full_segments > CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS)
    { full_segments = CTX_RASTERIZER_MAX_CIRCLE_SEGMENTS; }
  if (full_segments < 24) full_segments = 24;
  float step = CTX_PI*2.0f/full_segments;
  int steps;

  if (end_angle < -30.0f)
    end_angle = -30.0f;
  if (start_angle < -30.0f)
    start_angle = -30.0f;
  if (end_angle > 30.0f)
    end_angle = 30.0f;
  if (start_angle > 30.0f)
    start_angle = 30.0f;

  if (radius <= 0.0001f)
          return;

  if (end_angle == start_angle)
          // XXX also detect arcs fully outside render view
    {
    if (rasterizer->has_prev!=0)
      _ctx_rasterizer_line_to (rasterizer, x + ctx_cosf (end_angle) * radius,
                              y + ctx_sinf (end_angle) * radius);
      else
      _ctx_rasterizer_move_to (rasterizer, x + ctx_cosf (end_angle) * radius,
                               y + ctx_sinf (end_angle) * radius);
      return;
    }
#if 1
  if ( (!anticlockwise && fabsf((end_angle - start_angle) - CTX_PI*2) < 0.01f)  ||
       ( (anticlockwise && fabsf((start_angle - end_angle) - CTX_PI*2) < 0.01f ) ) 
  ||   (anticlockwise && fabsf((end_angle - start_angle) - CTX_PI*2) < 0.01f)  ||  (!anticlockwise && fabsf((start_angle - end_angle) - CTX_PI*2) < 0.01f )  )
    {
      steps = full_segments - 1;
    }
  else
#endif
    {
      if (anticlockwise)
      steps = (int)((start_angle - end_angle) / (CTX_PI*2) * full_segments);
      else
      steps = (int)((end_angle - start_angle) / (CTX_PI*2) * full_segments);
   // if (steps > full_segments)
   //   steps = full_segments;
    }

  if (anticlockwise) { step = step * -1; }
  int first = 1;
  if (steps == 0 /* || steps==full_segments -1  || (anticlockwise && steps == full_segments) */)
    {
      float xv = x + ctx_cosf (start_angle) * radius;
      float yv = y + ctx_sinf (start_angle) * radius;
      if (!rasterizer->has_prev)
        { ctx_rasterizer_move_to (rasterizer, xv, yv); }
      first = 0;
    }
  else
    {
      for (float angle = start_angle, i = 0; i < steps; angle += step, i++)
        {
          float xv = x + ctx_cosf (angle) * radius;
          float yv = y + ctx_sinf (angle) * radius;
          if (first & (!rasterizer->has_prev))
            { ctx_rasterizer_move_to (rasterizer, xv, yv); }
          else
            { _ctx_rasterizer_line_to (rasterizer, xv, yv); }
          first = 0;
        }
    }
  _ctx_rasterizer_line_to (rasterizer, x + ctx_cosf (end_angle) * radius,
                           y + ctx_sinf (end_angle) * radius);
}

void
ctx_rasterizer_quad_to (CtxRasterizer *rasterizer,
                        float        cx,
                        float        cy,
                        float        x,
                        float        y)
{
  _ctx_rasterizer_curve_to (rasterizer,
                           (cx * 2 + rasterizer->x) / 3.0f, (cy * 2 + rasterizer->y) / 3.0f,
                           (cx * 2 + x) / 3.0f,           (cy * 2 + y) / 3.0f,
                           x,                              y);
}

void
ctx_rasterizer_rel_quad_to (CtxRasterizer *rasterizer,
                            float cx, float cy,
                            float x,  float y)
{
  ctx_rasterizer_quad_to (rasterizer, cx + rasterizer->x, cy + rasterizer->y,
                          x  + rasterizer->x, y  + rasterizer->y);
}

static void
ctx_rasterizer_rectangle_reverse (CtxRasterizer *rasterizer,
                                  float x,
                                  float y,
                                  float width,
                                  float height);

#if CTX_STROKE_1PX


// XXX : x and y are expected to be - rasterizer->blit_x
static void
ctx_rasterizer_pset (CtxRasterizer *rasterizer, int x, int y, uint8_t cov)
{
  if ((x <= 0) | (y < 0) | (x >= rasterizer->blit_width) |
      (y >= rasterizer->blit_height))
    { return; }
  uint8_t fg_color[4];
  ctx_color_get_rgba8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color,
fg_color);

  int blit_stride = rasterizer->blit_stride;
  int pitch = rasterizer->format->bpp / 8;

  uint8_t *dst = ( (uint8_t *) rasterizer->buf) + y * blit_stride + x * pitch;
  rasterizer->apply_coverage (1, dst, rasterizer->color, &cov, rasterizer, x);
}


static inline void
ctx_rasterizer_stroke_1px_segment (CtxRasterizer *rasterizer,
                                   float x0, float y0,
                                   float x1, float y1)
{
  ctx_apply_coverage_fun apply_coverage = rasterizer->apply_coverage;
  uint8_t *rasterizer_src = rasterizer->color;
  int pitch = rasterizer->format->bpp / 8;
  int blit_stride = rasterizer->blit_stride;

  x0 -= rasterizer->blit_x;
  x1 -= rasterizer->blit_x;
  y0 -= rasterizer->blit_y;
  y1 -= rasterizer->blit_y;

  x1 += 0.5f;
  x0 += 0.5f;

  y1 += 0.5f;
  y0 += 0.5f;

  float dxf = (x1 - x0);
  float dyf = (y1 - y0);
  int tx = (int)((x0)* 65536);
  int ty = (int)((y0)* 65536);

  int blit_width = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;

  if (dxf*dxf>dyf*dyf)
  {
    int length = abs((int)dxf);
    int dy = (int)((dyf * 65536)/(length));
    int x = tx >> 16;

    if (dxf < 0.0f)
    {
      ty = (int)((y1)* 65536);
      x = (int)x1; 
      dy *= -1;
    }
    int i = 0;
    int sblit_height = blit_height << 16;

    for (; (i < length) & (x < 0); ++i, ++x, ty += dy);
    for (; (i < length) & (x < blit_width) & ((ty<0) | (ty>=sblit_height+1))
         ; ++i, ++x, ty += dy);

    for (; i < length && x < blit_width && (ty<65536 || (ty>=sblit_height))
         ; ++i, ++x, ty += dy)
    {
      int y = ty>>16;
      int ypos = (ty >> 8) & 0xff;

      ctx_rasterizer_pset (rasterizer, x, y-1, 255-ypos);
      ctx_rasterizer_pset (rasterizer, x, y, ypos);
    }

      {
       for (; (i < length) & (x < blit_width) & ((ty>65536) & (ty<sblit_height))
            ; ++i, ++x, ty += dy)
       {
         uint8_t *dst = ( (uint8_t *) rasterizer->buf)
                        + ((ty>>16)-1) * blit_stride + x * pitch;
         uint8_t ypos = (ty >> 8) & 0xff;
         uint8_t rcov=255-ypos;
         apply_coverage (1, dst, rasterizer_src, &rcov, rasterizer, x);
         dst += blit_stride;
         apply_coverage (1, dst, rasterizer_src, &ypos, rasterizer, x);
       }
      }

    {
      int y = ty>>16;
      int ypos = (ty >> 8) & 0xff;
      ctx_rasterizer_pset (rasterizer, x, y-1, 255-ypos);
      ctx_rasterizer_pset (rasterizer, x, y, ypos);
    }

  }
  else
  {
    int length = abs((int)dyf);
    int dx = (int)((dxf * 65536)/(length));
    int y = ty >> 16;

    if (dyf < 0.0f)
    {
      tx = (int)((x1)* 65536);
      y = (int)y1; 
      dx *= -1;
    }
    int i = 0;

    int sblit_width = blit_width << 16;

    for (; (i < length) & (y < 0); ++i, ++y, tx += dx);

    for (; (i < length) & (y < blit_height) & ((tx<0) | (tx>=sblit_width+1))
         ; ++i, ++y, tx += dx);
    for (; (i < length) & (y < blit_height) & ((tx<65536) | (tx>=sblit_width))
         ; ++i, ++y, tx += dx)
    {
      int x = tx>>16;
      int xpos = (tx >> 8) & 0xff;
      ctx_rasterizer_pset (rasterizer, x-1, y, 255-xpos);
      ctx_rasterizer_pset (rasterizer, x, y, xpos);
    }

      {
       for (; (i < length) & (y < blit_height) & ((tx>65536) & (tx<sblit_width))
            ; ++i, ++y, tx += dx)
       {
         int x = tx>>16;
         uint8_t *dst = ( (uint8_t *) rasterizer->buf)
                       + y * blit_stride + (x-1) * pitch;
         int xpos = (tx >> 8) & 0xff;
         uint8_t cov[2]={255-xpos, xpos};
         apply_coverage (2, dst, rasterizer_src, cov, rasterizer, x);
       }
      }
    //for (; i <= length; ++i, ++y, tx += dx)
    { // better do one too many than one too little
      int x = tx>>16;
      int xpos = (tx >> 8) & 0xff;
      ctx_rasterizer_pset (rasterizer, x-1, y, 255-xpos);
      ctx_rasterizer_pset (rasterizer, x, y, xpos);
    }
  }
}

static inline void
ctx_rasterizer_stroke_1px (CtxRasterizer *rasterizer)
{
  int count = rasterizer->edge_list.count;
  CtxSegment *temp = (CtxSegment*)rasterizer->edge_list.entries;
  float prev_x = 0.0f;
  float prev_y = 0.0f;
  int start = 0;
  int end = 0;

  while (start < count)
    {
      int started = 0;
      int i;
      for (i = start; i < count; i++)
        {
          CtxSegment *segment = &temp[i];
          float x, y;
          if (segment->code == CTX_NEW_EDGE)
            {
              if (started)
                {
                  end = i - 1;
                  goto foo;
                }
              prev_x = segment->x0 * 1.0f / CTX_SUBDIV;
              prev_y = segment->y0 * 1.0f / CTX_FULL_AA;
              started = 1;
              start = i;
            }
          x = segment->x1 * 1.0f / CTX_SUBDIV;
          y = segment->y1 * 1.0f / CTX_FULL_AA;
          
          ctx_rasterizer_stroke_1px_segment (rasterizer, prev_x, prev_y, x, y);
          prev_x = x;
          prev_y = y;
        }
      end = i-1;
foo:
      start = end+1;
    }
  _ctx_rasterizer_reset (rasterizer);
}

#endif

#define CTX_MIN_STROKE_LEN  0.2f

static void
ctx_rasterizer_stroke (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  CtxSource source_backup;
  int count = rasterizer->edge_list.count;
  if (count == 0)
    return;
  int preserved = rasterizer->preserve;
  float factor = ctx_matrix_get_scale (&gstate->transform);
  float line_width = gstate->line_width * factor;
  if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
  {
    source_backup = gstate->source_fill;
    gstate->source_fill = gstate->source_stroke;
  }

  rasterizer->comp_op = NULL;
  ctx_composite_setup (rasterizer);

#if CTX_STROKE_1PX
  if ((gstate->line_width * factor <= 0.0f) &
      (gstate->line_width * factor > -10.0f) &
      (rasterizer->format->bpp >= 8))
  {
    ctx_rasterizer_stroke_1px (rasterizer);
    if (preserved)
    {
      rasterizer->preserve = 0;
    }
    else
    {
      rasterizer->edge_list.count = 0;
    }
    if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
      gstate->source_fill = source_backup;

    return;
  }
#endif

  CtxSegment temp[count]; /* copy of already built up path's poly line  */
  memcpy (temp, rasterizer->edge_list.entries, sizeof (temp) );
#if CTX_FAST_FILL_RECT
#if CTX_FAST_STROKE_RECT
  if (rasterizer->edge_list.count == 5)
    {
      CtxSegment *entry0 = &((CtxSegment*)rasterizer->edge_list.entries)[0];
      CtxSegment *entry1 = &((CtxSegment*)rasterizer->edge_list.entries)[1];
      CtxSegment *entry2 = &((CtxSegment*)rasterizer->edge_list.entries)[2];
      CtxSegment *entry3 = &((CtxSegment*)rasterizer->edge_list.entries)[3];

      if (!rasterizer->state->gstate.clipped &
          (entry0->x1 == entry1->x1) &
          (entry0->y1 == entry3->y1) &
          (entry1->y1 == entry2->y1) &
          (entry2->x1 == entry3->x1)
#if CTX_ENABLE_SHADOW_BLUR
           & (!rasterizer->in_shadow)
#endif
           & (rasterizer->state->gstate.source_fill.type != CTX_SOURCE_TEXTURE)
         )
       {
        float x0 = entry3->x1 * 1.0f / CTX_SUBDIV;
        float y0 = entry3->y1 * 1.0f / CTX_FULL_AA;
        float x1 = entry1->x1 * 1.0f / CTX_SUBDIV;
        float y1 = entry1->y1 * 1.0f / CTX_FULL_AA;

        ctx_composite_stroke_rect (rasterizer, x0, y0, x1, y1, line_width);

        goto done;
       }
    }
#endif
#endif
  
    {
    {
      _ctx_rasterizer_reset (rasterizer); /* then start afresh with our stroked shape  */
      CtxMatrix transform_backup = gstate->transform;
      _ctx_matrix_identity (&gstate->transform);
      _ctx_transform_prime (rasterizer->state);
      float prev_x = 0.0f;
      float prev_y = 0.0f;
      float half_width_x = line_width/2;
      float half_width_y = half_width_x;

      if (CTX_UNLIKELY(line_width <= 0.0f))
        { // makes negative width be 1px in user-space; hairline
          half_width_x = .5f;
          half_width_y = .5f;
        }
      int start = 0;
      int end   = 0;
      while (start < count)
        {
          int started = 0;
          int i;
          for (i = start; i < count; i++)
            {
              CtxSegment *segment= &temp[i];
              float x, y;
              if (segment->code == CTX_NEW_EDGE)
                {
                  if (CTX_LIKELY(started))
                    {
                      end = i - 1;
                      goto foo;
                    }
                  prev_x = segment->x0 * 1.0f / CTX_SUBDIV;
                  prev_y = segment->y0 * 1.0f / CTX_FULL_AA;
                  started = 1;
                  start = i;
                }
              x = segment->x1 * 1.0f / CTX_SUBDIV;
              y = segment->y1 * 1.0f/ CTX_FULL_AA;
              float dx = x - prev_x;
              float dy = y - prev_y;
              float length = ctx_hypotf (dx, dy);
              if ((length>CTX_MIN_STROKE_LEN) | (segment->code == CTX_NEW_EDGE))
                {
                  float recip_length = 1.0f/length;
                  dx = dx * recip_length * half_width_x;
                  dy = dy * recip_length * half_width_y;
                  if (segment->code == CTX_NEW_EDGE)
                    {
                      _ctx_rasterizer_close_path (rasterizer);
                      _ctx_rasterizer_move_to (rasterizer, prev_x+dy, prev_y-dx);
                    }
                  _ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
                  
                  _ctx_rasterizer_line_to (rasterizer, x-dy, y+dx);
                }
                  prev_x = x;
                  prev_y = y;
            }
          end = i-1;
foo:
          for (int i = end; i >= start; i--)
            {
              CtxSegment *segment = &temp[i];
              float x, y, dx, dy;
              x = segment->x1 * 1.0f / CTX_SUBDIV;
              y = segment->y1 * 1.0f / CTX_FULL_AA;
              dx = x - prev_x;
              dy = y - prev_y;
              float length = ctx_hypotf (dx, dy);
              if (length>CTX_MIN_STROKE_LEN)
                {
                  float recip_length = 1.0f/length;
                  dx = dx * recip_length * half_width_x;
                  dy = dy * recip_length * half_width_y;
                  _ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);

                  // XXX possible miter line-to
             //   ctx_rasterizer_line_to (rasterizer, prev_x-dy+10, prev_y+dx+10);
                  ctx_rasterizer_line_to (rasterizer, x-dy,      y+dx);
                  prev_x = x;
                  prev_y = y;
                }
              if (CTX_UNLIKELY(segment->code == CTX_NEW_EDGE))
                {
                  x = segment->x0 * 1.0f / CTX_SUBDIV;
                  y = segment->y0 * 1.0f / CTX_FULL_AA;
                  dx = x - prev_x;
                  dy = y - prev_y;
                  length = ctx_hypotf (dx, dy);
                  if (CTX_LIKELY(length>CTX_MIN_STROKE_LEN))
                    {
                      float recip_length = 1.0f/length;
                      dx = dx * recip_length * half_width_x;
                      dy = dy * recip_length * half_width_y;
                      _ctx_rasterizer_line_to (rasterizer, prev_x-dy, prev_y+dx);
                      _ctx_rasterizer_line_to (rasterizer, x-dy, y+dx);
                    }
                  prev_x = x;
                  prev_y = y;
                }
            }
          start = end+1;
        }
      _ctx_rasterizer_close_path (rasterizer);
      switch (gstate->line_cap)
        {
          case CTX_CAP_SQUARE: // XXX: incorrect - if rectangles were in
                               //                  reverse order - rotation would be off
                               //                  better implement correct here
            {
              float x = 0, y = 0;
              int has_prev = 0;
              for (int i = 0; i < count; i++)
                {
                  CtxSegment *segment = &temp[i];
                  if (CTX_UNLIKELY(segment->code == CTX_NEW_EDGE))
                    {
                      if (has_prev)
                        {
                          ctx_rasterizer_rectangle_reverse (rasterizer, x - half_width_x, y - half_width_y, half_width_x, half_width_y);
                          _ctx_rasterizer_close_path (rasterizer);
                        }
                      x = segment->x0 * 1.0f / CTX_SUBDIV;
                      y = segment->y0 * 1.0f / CTX_FULL_AA;
                      ctx_rasterizer_rectangle_reverse (rasterizer, x - half_width_x, y - half_width_y, half_width_x * 2, half_width_y * 2);
                      _ctx_rasterizer_close_path (rasterizer);
                    }
                  x = segment->x1 * 1.0f / CTX_SUBDIV;
                  y = segment->y1 * 1.0f / CTX_FULL_AA;
                  has_prev = 1;
                }
              ctx_rasterizer_rectangle_reverse (rasterizer, x - half_width_x, y - half_width_y, half_width_x * 2, half_width_y * 2);
              _ctx_rasterizer_close_path (rasterizer);
            }
            break;
          case CTX_CAP_NONE: /* nothing to do */
            break;
          case CTX_CAP_ROUND:
            {
              float x = 0, y = 0;
              int has_prev = 0;
              for (int i = 0; i < count; i++)
                {
                  CtxSegment *segment = &temp[i];
                  if (CTX_UNLIKELY(segment->code == CTX_NEW_EDGE))
                    {
                      if (has_prev)
                        {
                          ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*3, 0, 1);
                          _ctx_rasterizer_close_path (rasterizer);
                        }
                      x = segment->x0 * 1.0f / CTX_SUBDIV;
                      y = segment->y0 * 1.0f / CTX_FULL_AA;
                      ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*2, 0, 1);
                      _ctx_rasterizer_close_path (rasterizer);
                    }
                  x = segment->x1 * 1.0f / CTX_SUBDIV;
                  y = segment->y1 * 1.0f / CTX_FULL_AA;
                  has_prev = 1;
                }
              _ctx_rasterizer_move_to (rasterizer, x, y);
              ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*2, 0, 1);
              _ctx_rasterizer_close_path (rasterizer);
              break;
            }
        }
      switch (gstate->line_join)
        {
          case CTX_JOIN_BEVEL:
          case CTX_JOIN_MITER:
            break;
          case CTX_JOIN_ROUND:
            {
              float x = 0, y = 0;
              for (int i = 0; i < count-1; i++)
                {
                  CtxSegment *segment = &temp[i];
                  x = segment->x1 * 1.0f / CTX_SUBDIV;
                  y = segment->y1 * 1.0f / CTX_FULL_AA;
                  if (CTX_UNLIKELY(segment[1].code == CTX_EDGE))
                    {
                      ctx_rasterizer_arc (rasterizer, x, y, half_width_x, CTX_PI*2, 0, 1);
                      _ctx_rasterizer_close_path (rasterizer);
                    }
                }
              break;
            }
        }
      CtxFillRule rule_backup = gstate->fill_rule;
      gstate->fill_rule = CTX_FILL_RULE_WINDING;
      rasterizer->preserve = 0; // so fill isn't tripped
      int aa = rasterizer->aa;
      rasterizer->aa = 3 + (aa>5)*2;
      ctx_rasterizer_fill (rasterizer);
      rasterizer->aa = aa;
      gstate->fill_rule = rule_backup;
      gstate->transform = transform_backup;
      _ctx_transform_prime (rasterizer->state);
    }
  }
#if CTX_FAST_FILL_RECT
#if CTX_FAST_STROKE_RECT
  done:
#endif
#endif
  if (preserved)
    {
      memcpy (rasterizer->edge_list.entries, temp, sizeof (temp) );
      rasterizer->edge_list.count = count;
      rasterizer->preserve = 0;
    }
  if (gstate->source_stroke.type != CTX_SOURCE_INHERIT_FILL)
  {
    gstate->source_fill = source_backup;
    rasterizer->comp_op = NULL;
    rasterizer->fragment = NULL;
  }
}

#if CTX_1BIT_CLIP
#define CTX_CLIP_FORMAT CTX_FORMAT_GRAY1
#else
#define CTX_CLIP_FORMAT CTX_FORMAT_GRAY8
#endif


static void
ctx_rasterizer_clip_reset (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
#if CTX_ENABLE_CLIP
  if (rasterizer->clip_buffer)
   ctx_buffer_destroy (rasterizer->clip_buffer);
  rasterizer->clip_buffer = NULL;
#endif
  gstate->clip_min_x = rasterizer->blit_x;
  gstate->clip_min_y = rasterizer->blit_y;

  gstate->clip_max_x = rasterizer->blit_x + rasterizer->blit_width - 1;
  gstate->clip_max_y = rasterizer->blit_y + rasterizer->blit_height - 1;
}

static void
ctx_rasterizer_clip_apply (CtxRasterizer *rasterizer,
                           CtxSegment    *edges)
{
  unsigned int count = edges[0].u32[0];
  CtxGState *gstate = &rasterizer->state->gstate;

  int minx = 5000;
  int miny = 5000;
  int maxx = -5000;
  int maxy = -5000;
  int prev_x = 0;
  int prev_y = 0;
#if CTX_ENABLE_CLIP
  int blit_width = rasterizer->blit_width;
  int blit_height = rasterizer->blit_height;
#endif

  float coords[6][2];

  for (unsigned int i = 0; i < count; i++)
    {
      CtxSegment *segment = &edges[i+1];
      float x, y;
      if (segment->code == CTX_NEW_EDGE)
        {
          prev_x = segment->x0 / CTX_SUBDIV;
          prev_y = segment->y0 / CTX_FULL_AA;
          if (prev_x < minx) { minx = prev_x; }
          if (prev_y < miny) { miny = prev_y; }
          if (prev_x > maxx) { maxx = prev_x; }
          if (prev_y > maxy) { maxy = prev_y; }
        }
      x = segment->x1 * 1.0f / CTX_SUBDIV;
      y = segment->y1 * 1.0f / CTX_FULL_AA;
      if (x < minx) { minx = (int)x; }
      if (y < miny) { miny = (int)y; }
      if (x > maxx) { maxx = (int)x; }
      if (y > maxy) { maxy = (int)y; }

      if (i < 6)
      {
        coords[i][0] = x;
        coords[i][1] = y;
      }
    }

#if CTX_ENABLE_CLIP

  if (((rasterizer->clip_rectangle==1) | (!rasterizer->clip_buffer))
      )
  {
    if (count == 5)
    {
      if ((coords[0][0] == coords[1][0]) &
          (coords[0][1] == coords[3][1]) &
          (coords[1][1] == coords[2][1]) &
          (coords[2][0] == coords[3][0])
          )
      {
#if 0
        printf ("%d,%d %dx%d\n", minx, miny,
                                       maxx-minx+1, maxy-miny+1);
#endif

         gstate->clip_min_x =
            ctx_maxi (minx, gstate->clip_min_x);
         gstate->clip_min_y =
            ctx_maxi (miny, gstate->clip_min_y);
         gstate->clip_max_x =
            ctx_mini (maxx, gstate->clip_max_x);
         gstate->clip_max_y =
            ctx_mini (maxy, gstate->clip_max_y);

         rasterizer->clip_rectangle = 1;

#if 0
         if (!rasterizer->clip_buffer)
           rasterizer->clip_buffer = ctx_buffer_new (blit_width,
                                                     blit_height,
                                                     CTX_CLIP_FORMAT);

         memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height);
         int i = 0;
         for (int y = gstate->clip_min_y;
                  y <= gstate->clip_max_y;
                  y++)
         for (int x = gstate->clip_min_x;
                  x <= gstate->clip_max_x;
                  x++, i++)
         {
           ((uint8_t*)(rasterizer->clip_buffer->data))[i] = 255;
         }
#endif

         return;
      }
#if 0
      else
      {
        printf ("%d,%d %dx%d  0,0:%.2f 0,1:%.2f 1,0:%.2f 11:%.2f 20:%.2f 21:%2.f 30:%.2f 31:%.2f 40:%.2f 41:%.2f\n", minx, miny,
                                       maxx-minx+1, maxy-miny+1
                                       
         ,coords[0][0] ,  coords[0][1]
         ,coords[1][0] ,  coords[1][1]
         ,coords[2][0] ,  coords[2][1]
         ,coords[3][0] ,  coords[3][1]
         ,coords[4][0] ,  coords[4][1]
         );
      }
#endif
    }
  }
  rasterizer->clip_rectangle = 0;

  if ((minx == maxx) | (miny == maxy) || count < 2) // XXX : reset hack
  {
    ctx_rasterizer_clip_reset (rasterizer);
    return;
  }

  int we_made_it = 0;
  CtxBuffer *clip_buffer;

  if (!rasterizer->clip_buffer)
  {
    rasterizer->clip_buffer = ctx_buffer_new (blit_width,
                                              blit_height,
                                              CTX_CLIP_FORMAT);
    clip_buffer = rasterizer->clip_buffer;
    we_made_it = 1;
    if (CTX_CLIP_FORMAT == CTX_FORMAT_GRAY1)
      memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height/8);
    else
      memset (rasterizer->clip_buffer->data, 0, blit_width * blit_height);
  }
  else
  {
    clip_buffer = ctx_buffer_new (blit_width, blit_height,
                                  CTX_CLIP_FORMAT);
  }

  {

  float prev_x = 0;
  float prev_y = 0;

    Ctx *ctx = ctx_new_for_framebuffer (clip_buffer->data, blit_width, blit_height,
       blit_width,
       CTX_CLIP_FORMAT);

  for (unsigned int i = 0; i < count; i++)
    {
      CtxSegment *segment = &edges[i+1];
      float x, y;
      if (segment->code == CTX_NEW_EDGE)
        {
          prev_x = segment->x0 * 1.0f / CTX_SUBDIV;
          prev_y = segment->y0 * 1.0f / CTX_FULL_AA;
          ctx_move_to (ctx, prev_x, prev_y);
        }
      x = segment->x1 * 1.0f / CTX_SUBDIV;
      y = segment->y1 * 1.0f / CTX_FULL_AA;
      ctx_line_to (ctx, x, y);
    }
    ctx_gray (ctx, 1.0f);
    ctx_fill (ctx);
    ctx_destroy (ctx);
  }

  int maybe_rect = 1;
  rasterizer->clip_rectangle = 0;

  if (CTX_CLIP_FORMAT == CTX_FORMAT_GRAY1)
  {
    unsigned int count = blit_width * blit_height / 8;
    for (unsigned int i = 0; i < count; i++)
    {
      ((uint8_t*)rasterizer->clip_buffer->data)[i] =
      (((uint8_t*)rasterizer->clip_buffer->data)[i] &
      ((uint8_t*)clip_buffer->data)[i]);
    }
  }
  else
  {
    int count = blit_width * blit_height;


    int i;
    int x0 = 0;
    int y0 = 0;
    int width = -1;
    int next_stage = 0;
    uint8_t *p_data = (uint8_t*)rasterizer->clip_buffer->data;
    uint8_t *data = (uint8_t*)clip_buffer->data;

    i=0;
    /* find upper left */
    for (; (i < count) & maybe_rect & (!next_stage); i++)
    {
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;
      switch (val)
      {
        case 255:
          x0 = i % blit_width;
          y0 = i / blit_width;
          next_stage = 1;
          break;
        case 0: break;
        default:
          maybe_rect = 0;
          break;
      }
    }

    next_stage = 0;
    /* figure out with */
    for (; (i < count) & (!next_stage) & maybe_rect; i++)
    {
      int x = i % blit_width;
      int y = i / blit_width;
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;

      if (y == y0)
      {
        switch (val)
        {
          case 255:
            width = x - x0 + 1;
            break;
          case 0:
            next_stage = 1;
            break;
          default:
            maybe_rect = 0;
            break;
        }
        if (x % blit_width == blit_width - 1) next_stage = 1;
      }
      else next_stage = 1;
    }

    next_stage = 0;
    /* body */
    for (; (i < count) & maybe_rect & (!next_stage); i++)
    {
      int x = i % blit_width;
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;

      if (x < x0)
      {
        if (val != 0){ maybe_rect = 0; next_stage = 1; }
      } else if (x < x0 + width)
      {
        if (val != 255){ if (val != 0) maybe_rect = 0; next_stage = 1; }
      } else {
        if (val != 0){ maybe_rect = 0; next_stage = 1; }
      }
    }

    next_stage = 0;
    /* foot */
    for (; (i < count) & maybe_rect & (!next_stage); i++)
    {
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;

      if (val != 0){ maybe_rect = 0; next_stage = 1; }
    }


    for (; i < count; i++)
    {
      uint8_t val = (p_data[i] * data[i])/255;
      data[i] = val;
    }

    if (maybe_rect)
       rasterizer->clip_rectangle = 1;
  }
  if (!we_made_it)
   ctx_buffer_destroy (clip_buffer);
#else
  if (coords[0][0]){};
#endif
  
  gstate->clip_min_x = ctx_maxi (minx,
                                         gstate->clip_min_x);
  gstate->clip_min_y = ctx_maxi (miny,
                                         gstate->clip_min_y);
  gstate->clip_max_x = ctx_mini (maxx,
                                         gstate->clip_max_x);
  gstate->clip_max_y = ctx_mini (maxy,
                                         gstate->clip_max_y);
}


static void
_ctx_rasterizer_clip (CtxRasterizer *rasterizer)
{
  int count = rasterizer->edge_list.count;
  CtxSegment temp[count+1]; /* copy of already built up path's poly line  */
  rasterizer->state->has_clipped=1;
  rasterizer->state->gstate.clipped=1;
  //if (rasterizer->preserve)
    { memcpy (temp + 1, rasterizer->edge_list.entries, sizeof (temp) - sizeof (temp[0]));
      temp[0].code = CTX_NOP;
      temp[0].u32[0] = count;
      ctx_state_set_blob (rasterizer->state, SQZ_clip, (char*)temp, sizeof(temp));
    }
  ctx_rasterizer_clip_apply (rasterizer, temp);
  _ctx_rasterizer_reset (rasterizer);
  if (rasterizer->preserve)
    {
      memcpy (rasterizer->edge_list.entries, temp + 1, sizeof (temp) - sizeof(temp[0]));
      rasterizer->edge_list.count = count;
      rasterizer->preserve = 0;
    }
}

void
ctx_rasterizer_clip (CtxRasterizer *rasterizer)
{
  _ctx_rasterizer_clip (rasterizer);
}

#if 0
static void
ctx_rasterizer_load_image (CtxRasterizer *rasterizer,
                           const char  *path,
                           float x,
                           float y)
{
  // decode PNG, put it in image is slot 1,
  // magic width height stride format data
  ctx_buffer_load_png (&rasterizer->backend.ctx->texture[0], path);
  ctx_rasterizer_set_texture (rasterizer, 0, x, y);
}
#endif

static void
ctx_rasterizer_rectangle_reverse (CtxRasterizer *rasterizer,
                                  float x,
                                  float y,
                                  float width,
                                  float height)
{
  _ctx_rasterizer_move_to (rasterizer, x, y);
  _ctx_rasterizer_rel_line_to (rasterizer, 0, height);
  _ctx_rasterizer_rel_line_to (rasterizer, width, 0);
  _ctx_rasterizer_rel_line_to (rasterizer, 0, -height);
  _ctx_rasterizer_rel_line_to (rasterizer, -width, 0);
  _ctx_rasterizer_close_path (rasterizer);
}

static void
_ctx_rasterizer_rectangle (CtxRasterizer *rasterizer,
                          float x,
                          float y,
                          float width,
                          float height)
{
  _ctx_rasterizer_move_to (rasterizer, x, y);
  _ctx_rasterizer_rel_line_to (rasterizer, width, 0);
  _ctx_rasterizer_rel_line_to (rasterizer, 0, height);
  _ctx_rasterizer_rel_line_to (rasterizer, -width, 0);
  _ctx_rasterizer_close_path (rasterizer);
}
void
ctx_rasterizer_rectangle (CtxRasterizer *rasterizer,
                          float x,
                          float y,
                          float width,
                          float height)
{
  _ctx_rasterizer_rectangle (rasterizer, x, y, width, height);
}                          

void
ctx_rasterizer_set_pixel (CtxRasterizer *rasterizer,
                          uint16_t x,
                          uint16_t y,
                          uint8_t r,
                          uint8_t g,
                          uint8_t b,
                          uint8_t a)
{
  rasterizer->state->gstate.source_fill.type = CTX_SOURCE_COLOR;
  ctx_color_set_RGBA8 (rasterizer->state, &rasterizer->state->gstate.source_fill.color, r, g, b, a);
  rasterizer->comp_op = NULL;
#if 0
  // XXX : doesn't take transforms into account - and has
  // received less testing than code paths part of protocol,
  // using rectangle properly will trigger the fillrect fastpath
  ctx_rasterizer_pset (rasterizer, x, y, 255);
#else
  _ctx_rasterizer_rectangle (rasterizer, x, y, 1.0f, 1.0f);
  ctx_rasterizer_fill (rasterizer);
#endif
}

static void
_ctx_rasterizer_round_rectangle (CtxRasterizer *rasterizer, float x, float y, float width, float height, float corner_radius)
{
  float aspect  = 1.0f;
  float radius  = corner_radius / aspect;
  float degrees = CTX_PI / 180.0f;

  if (radius > width*0.5f) radius = width/2;
  if (radius > height*0.5f) radius = height/2;

  _ctx_rasterizer_close_path (rasterizer);
  ctx_rasterizer_arc (rasterizer, x + width - radius, y + radius, radius, -90 * degrees, 0 * degrees, 0);
  ctx_rasterizer_arc (rasterizer, x + width - radius, y + height - radius, radius, 0 * degrees, 90 * degrees, 0);
  ctx_rasterizer_arc (rasterizer, x + radius, y + height - radius, radius, 90 * degrees, 180 * degrees, 0);
  ctx_rasterizer_arc (rasterizer, x + radius, y + radius, radius, 180 * degrees, 270 * degrees, 0);

  _ctx_rasterizer_close_path (rasterizer);
}

void
ctx_rasterizer_round_rectangle (CtxRasterizer *rasterizer, float x, float y, float width, float height, float corner_radius)
{
  _ctx_rasterizer_round_rectangle (rasterizer, x, y, width, height, corner_radius);
}


#if CTX_COMPOSITING_GROUPS
static void
ctx_rasterizer_start_group (CtxRasterizer *rasterizer) /* add a radius? */
{
  CtxEntry save_command = ctx_void(CTX_SAVE);
  // allocate buffer, and set it as temporary target
  int no;
  if (rasterizer->group[0] == NULL) // first group
  {
    rasterizer->saved_buf = rasterizer->buf;
  }
  for (no = 0; rasterizer->group[no] && no < CTX_GROUP_MAX; no++);

  if (no >= CTX_GROUP_MAX)
     return;
  rasterizer->group[no] = ctx_buffer_new (rasterizer->blit_width,
                                          rasterizer->blit_height,
                                          rasterizer->format->composite_format);
  rasterizer->buf = rasterizer->group[no]->data;
  ctx_rasterizer_process (rasterizer->backend.ctx, (CtxCommand*)&save_command);
}

static void
ctx_rasterizer_end_group (CtxRasterizer *rasterizer)
{
  CtxGState *gstate = &rasterizer->state->gstate;
  CtxEntry restore_command = ctx_void(CTX_RESTORE);
  CtxEntry save_command = ctx_void(CTX_SAVE);
  int no = 0;
  for (no = 0; rasterizer->group[no] && no < CTX_GROUP_MAX; no++);
  no--;

  if (no < 0)
    return;

  Ctx *ctx = rasterizer->backend.ctx;

  CtxCompositingMode comp = gstate->compositing_mode;
  CtxBlend blend = gstate->blend_mode;
  CtxExtend extend = gstate->extend;
  float global_alpha = gstate->global_alpha_f;
  // fetch compositing, blending, global alpha
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);
  CtxEntry set_state[4]=
  {
    ctx_u32 (CTX_COMPOSITING_MODE, comp,  0),
    ctx_u32 (CTX_BLEND_MODE,       blend, 0),
    ctx_u32 (CTX_EXTEND,          extend, 0),
    ctx_f  (CTX_GLOBAL_ALPHA,     global_alpha, 0.0)
  };
  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[0]);
  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[1]);
  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[2]);
  ctx_rasterizer_process (ctx, (CtxCommand*)&set_state[3]);
  if (no == 0)
  {
    rasterizer->buf = rasterizer->saved_buf;
  }
  else
  {
    rasterizer->buf = rasterizer->group[no-1]->data;
  }
  // XXX use texture_source ?
   ctx_texture_init (ctx, ".ctx-group", 
                  rasterizer->blit_width, 
                  rasterizer->blit_height,
                                         
                  rasterizer->blit_width * rasterizer->format->bpp/8,
                  rasterizer->format->pixel_format,
                  NULL, // space
                  (uint8_t*)rasterizer->group[no]->data,
                  NULL, NULL);
  {
     const char *eid = ".ctx-group";
     int   eid_len = ctx_strlen (eid);

     CtxEntry commands[4] =
      {
       ctx_f   (CTX_TEXTURE, rasterizer->blit_x, rasterizer->blit_y), 
       ctx_u32 (CTX_DATA, eid_len, eid_len/9+1),
       ctx_u32 (CTX_CONT, 0,0),
       ctx_u32 (CTX_CONT, 0,0)
      };
     memcpy( (char *) &commands[2].data.u8[0], eid, eid_len);
     ( (char *) (&commands[2].data.u8[0]) ) [eid_len]=0;

     ctx_rasterizer_process (ctx, (CtxCommand*)commands);
  }
  {
    CtxEntry commands[2]=
    {
      ctx_f (CTX_RECTANGLE, rasterizer->blit_x, rasterizer->blit_y),
      ctx_f (CTX_CONT,      rasterizer->blit_width, rasterizer->blit_height)
    };
    ctx_rasterizer_process (ctx, (CtxCommand*)commands);
  }
  {
    CtxEntry commands[1] = { ctx_void (CTX_FILL) };
    ctx_rasterizer_process (ctx, (CtxCommand*)commands);
  }
  //ctx_texture_release (rasterizer->backend.ctx, ".ctx-group");
  ctx_buffer_destroy (rasterizer->group[no]);
  rasterizer->group[no] = 0;
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
}
#endif

#if CTX_ENABLE_SHADOW_BLUR
static void
ctx_rasterizer_shadow_stroke (CtxRasterizer *rasterizer)
{
  CtxColor color;
  CtxEntry save_command = ctx_void(CTX_SAVE);
  Ctx *ctx = rasterizer->backend.ctx;

  float rgba[4] = {0, 0, 0, 1.0};
  if (ctx_get_color (rasterizer->backend.ctx, SQZ_shadowColor, &color) == 0)
    ctx_color_get_rgba (rasterizer->state, &color, rgba);

  CtxEntry set_color_command [3]=
  {
    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
    ctx_f (CTX_CONT, rgba[1], rgba[2]),
    ctx_f (CTX_CONT, rgba[3], 0.0f)
  };
  CtxEntry restore_command = ctx_void(CTX_RESTORE);
  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);
    ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command[0]);
    rasterizer->in_shadow = 1;
  {
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  rasterizer->feather_x = rasterizer->state->gstate.shadow_offset_x * factor;
  rasterizer->feather_y = rasterizer->state->gstate.shadow_offset_y * factor;
  rasterizer->feather   = rasterizer->state->gstate.shadow_blur * factor;
  }
    rasterizer->preserve = 1;
    ctx_rasterizer_stroke (rasterizer);
    rasterizer->in_shadow = 0;
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
}

static void
ctx_rasterizer_shadow_text (CtxRasterizer *rasterizer, const char *str)
{
  float x = rasterizer->state->x;
  float y = rasterizer->state->y;
  CtxColor color;
  CtxEntry save_command = ctx_void(CTX_SAVE);
  Ctx *ctx = rasterizer->backend.ctx;

  float rgba[4] = {0, 0, 0, 1.0};
  if (ctx_get_color (rasterizer->backend.ctx, SQZ_shadowColor, &color) == 0)
    ctx_color_get_rgba (rasterizer->state, &color, rgba);

  CtxEntry set_color_command [3]=
  {
    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
    ctx_f (CTX_CONT, rgba[1], rgba[2]),
    ctx_f (CTX_CONT, rgba[3], 0)
  };
  CtxEntry move_to_command [1]=
  {
    ctx_f (CTX_MOVE_TO, x, y),
  };
  CtxEntry restore_command = ctx_void(CTX_RESTORE);
  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);

  {
      {
        move_to_command[0].data.f[0] = x;
        move_to_command[0].data.f[1] = y;
        set_color_command[2].data.f[0] = rgba[3];
        ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command);
        ctx_rasterizer_process (ctx, (CtxCommand*)&move_to_command);
        rasterizer->in_shadow=1;
  {
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  rasterizer->feather_x = rasterizer->state->gstate.shadow_offset_x * factor;
  rasterizer->feather_y = rasterizer->state->gstate.shadow_offset_y * factor;
  rasterizer->feather   = rasterizer->state->gstate.shadow_blur * factor;
  }
        ctx_rasterizer_text (rasterizer, str, 0);
        rasterizer->in_shadow=0;
      }
  }
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
  move_to_command[0].data.f[0] = x;
  move_to_command[0].data.f[1] = y;
  ctx_rasterizer_process (ctx, (CtxCommand*)&move_to_command);
}

static void
ctx_rasterizer_shadow_fill (CtxRasterizer *rasterizer)
{
  CtxColor color;
  Ctx *ctx = rasterizer->backend.ctx;
  CtxEntry save_command = ctx_void(CTX_SAVE);

  float rgba[4] = {0, 0, 0, 1.0};
  if (ctx_get_color (rasterizer->backend.ctx, SQZ_shadowColor, &color) == 0)
    ctx_color_get_rgba (rasterizer->state, &color, rgba);

  CtxEntry set_color_command [3]=
  {
    ctx_f (CTX_COLOR, CTX_RGBA, rgba[0]),
    ctx_f (CTX_CONT, rgba[1], rgba[2]),
    ctx_f (CTX_CONT, rgba[3], 1.0f)
  };
  CtxEntry restore_command = ctx_void(CTX_RESTORE);
  ctx_rasterizer_process (ctx, (CtxCommand*)&save_command);

  ctx_rasterizer_process (ctx, (CtxCommand*)&set_color_command);
  rasterizer->preserve = 1;
  rasterizer->in_shadow = 1;
  {
  float factor = ctx_matrix_get_scale (&rasterizer->state->gstate.transform);
  if (rasterizer->in_text)
    factor /= rasterizer->state->gstate.font_size / CTX_BAKE_FONT_SIZE;
  rasterizer->feather_x = rasterizer->state->gstate.shadow_offset_x * factor;
  rasterizer->feather_y = rasterizer->state->gstate.shadow_offset_y * factor;
  rasterizer->feather   = rasterizer->state->gstate.shadow_blur * factor;
  }
  ctx_rasterizer_fill (rasterizer);
  ctx_rasterizer_reset_soft (rasterizer);
  if (!rasterizer->in_text)
    rasterizer->in_shadow = 0;
  ctx_rasterizer_process (ctx, (CtxCommand*)&restore_command);
}
#endif

static void
ctx_rasterizer_line_dash (CtxRasterizer *rasterizer, unsigned int count, const float *dashes)
{
  if (!dashes)
  {
    rasterizer->state->gstate.n_dashes = 0;
    return;
  }
  count = CTX_MIN(count, CTX_MAX_DASHES);
  rasterizer->state->gstate.n_dashes = count;
  memcpy(&rasterizer->state->gstate.dashes[0], dashes, count * sizeof(float));
  for (unsigned int i = 0; i < count; i ++)
  {
    if (rasterizer->state->gstate.dashes[i] < 0.0001f)
      rasterizer->state->gstate.dashes[i] = 0.0001f; // hang protection
  }
}


void
ctx_rasterizer_process (Ctx *ctx, const CtxCommand *c)
{
  const CtxEntry *entry      = &c->entry;
  CtxRasterizer  *rasterizer = (CtxRasterizer *) ctx->backend;
  CtxState       *state      = rasterizer->state;
  int             clear_clip = 0;

  switch (c->code)
    {
      case CTX_LINE_HEIGHT:
      case CTX_WRAP_LEFT:
      case CTX_WRAP_RIGHT:
      case CTX_LINE_DASH_OFFSET:
      case CTX_STROKE_POS:
      case CTX_FEATHER:
      case CTX_LINE_WIDTH:
      case CTX_SHADOW_BLUR:
      case CTX_SHADOW_OFFSET_X:
      case CTX_SHADOW_OFFSET_Y:
      case CTX_LINE_CAP:
      case CTX_FILL_RULE:
      case CTX_LINE_JOIN:
      case CTX_TEXT_ALIGN:
      case CTX_TEXT_BASELINE:
      case CTX_TEXT_DIRECTION:
      case CTX_GLOBAL_ALPHA:
      case CTX_FONT_SIZE:
      case CTX_MITER_LIMIT:
      case CTX_COLOR_SPACE:
      case CTX_STROKE_SOURCE:
        ctx_interpret_style (state, entry, NULL);
        break;
#if CTX_ENABLE_SHADOW_BLUR
      case CTX_SHADOW_COLOR:
        {
          CtxColor  col;
          CtxColor *color = &col;
          //state->gstate.source_fill.type = CTX_SOURCE_COLOR;
          switch ((int)c->rgba.model)
            {
              case CTX_RGB:
                ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, 1.0f);
                break;
              case CTX_RGBA:
                //ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
                ctx_color_set_rgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
                break;
              case CTX_DRGBA:
                ctx_color_set_drgba (state, color, c->rgba.r, c->rgba.g, c->rgba.b, c->rgba.a);
                break;
#if CTX_ENABLE_CMYK
              case CTX_CMYKA:
                ctx_color_set_cmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, c->cmyka.a);
                break;
              case CTX_CMYK:
                ctx_color_set_cmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 1.0f);
                break;
              case CTX_DCMYKA:
                ctx_color_set_dcmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, c->cmyka.a);
                break;
              case CTX_DCMYK:
                ctx_color_set_dcmyka (state, color, c->cmyka.c, c->cmyka.m, c->cmyka.y, c->cmyka.k, 1.0f);
                break;
#endif
              case CTX_GRAYA:
                ctx_color_set_graya (state, color, c->graya.g, c->graya.a);
                break;
              case CTX_GRAY:
                ctx_color_set_graya (state, color, c->graya.g, 1.0f);
                break;
            }
          ctx_set_color (rasterizer->backend.ctx, SQZ_shadowColor, color);
        }
        break;
#endif
      case CTX_LINE_DASH:
        if (c->line_dash.count)
          {
            ctx_rasterizer_line_dash (rasterizer, c->line_dash.count, c->line_dash.data);
          }
        else
        ctx_rasterizer_line_dash (rasterizer, 0, NULL);
        break;


      case CTX_LINE_TO:
        if (ctx->bail) break;
        _ctx_rasterizer_line_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_REL_LINE_TO:
        if (ctx->bail) break;
        _ctx_rasterizer_rel_line_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_MOVE_TO:
        if (ctx->bail) break;
        _ctx_rasterizer_move_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_REL_MOVE_TO:
        if (ctx->bail) break;
        _ctx_rasterizer_rel_move_to (rasterizer, c->c.x0, c->c.y0);
        break;
      case CTX_CURVE_TO:
        if (ctx->bail) break;
        _ctx_rasterizer_curve_to (rasterizer, c->c.x0, c->c.y0,
                                  c->c.x1, c->c.y1,
                                  c->c.x2, c->c.y2);
        break;
      case CTX_REL_CURVE_TO:
        if (ctx->bail) break;
        _ctx_rasterizer_rel_curve_to (rasterizer, c->c.x0, c->c.y0,
                                      c->c.x1, c->c.y1,
                                      c->c.x2, c->c.y2);
        break;
      case CTX_QUAD_TO:
        if (ctx->bail) break;
        ctx_rasterizer_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
        break;
      case CTX_REL_QUAD_TO:
        if (ctx->bail) break;
        ctx_rasterizer_rel_quad_to (rasterizer, c->c.x0, c->c.y0, c->c.x1, c->c.y1);
        break;
      case CTX_ARC:
        if (ctx->bail) break;
        ctx_rasterizer_arc (rasterizer, c->arc.x, c->arc.y, c->arc.radius, c->arc.angle1, c->arc.angle2, (int)c->arc.direction);
        break;
      case CTX_RECTANGLE:
        if (ctx->bail) break;
        _ctx_rasterizer_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
                                   c->rectangle.width, c->rectangle.height);
        break;
      case CTX_ROUND_RECTANGLE:
        if (ctx->bail) break;
        ctx_rasterizer_round_rectangle (rasterizer, c->rectangle.x, c->rectangle.y,
                                        c->rectangle.width, c->rectangle.height,
                                        c->rectangle.radius);
        break;
      case CTX_SET_PIXEL:
        ctx_rasterizer_set_pixel (rasterizer, c->set_pixel.x, c->set_pixel.y,
                                  c->set_pixel.rgba[0],
                                  c->set_pixel.rgba[1],
                                  c->set_pixel.rgba[2],
                                  c->set_pixel.rgba[3]);
        break;
      case CTX_DEFINE_TEXTURE:
        {
          uint8_t *pixel_data = ctx_define_texture_pixel_data (entry);

          ctx_rasterizer_define_texture (rasterizer, c->define_texture.eid,
                                         c->define_texture.width, c->define_texture.height,
                                         c->define_texture.format,
                                         pixel_data, 0);
          rasterizer->comp_op = NULL;
          rasterizer->fragment = NULL;
        }
        break;
      case CTX_TEXTURE:
        ctx_rasterizer_set_texture (rasterizer, c->texture.eid,
                                    c->texture.x, c->texture.y);
        rasterizer->comp_op = NULL;
        rasterizer->fragment = NULL;
        break;
      case CTX_SOURCE_TRANSFORM:
        ctx_matrix_set (&state->gstate.source_fill.set_transform,
                        ctx_arg_float (0), ctx_arg_float (1),
                        ctx_arg_float (2), ctx_arg_float (3),
                        ctx_arg_float (4), ctx_arg_float (5),
                        ctx_arg_float (6), ctx_arg_float (7),
                        ctx_arg_float (8));
        rasterizer->comp_op = NULL;
        break;
#if 0
      case CTX_LOAD_IMAGE:
        ctx_rasterizer_load_image (rasterizer, ctx_arg_string(),
                                   ctx_arg_float (0), ctx_arg_float (1) );
        break;
#endif
#if CTX_GRADIENTS
      case CTX_GRADIENT_STOP:
        {
          float rgba[4]= {ctx_u8_to_float (ctx_arg_u8 (4) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+1) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+2) ),
                          ctx_u8_to_float (ctx_arg_u8 (4+3) )
                         };
          ctx_rasterizer_gradient_add_stop (rasterizer,
                                            ctx_arg_float (0), rgba);
          rasterizer->comp_op = NULL;
        }
        break;
      case CTX_CONIC_GRADIENT:
      case CTX_LINEAR_GRADIENT:
      case CTX_RADIAL_GRADIENT:
        ctx_interpret_style (state, entry, NULL);
        ctx_state_gradient_clear_stops (state);
#if CTX_GRADIENT_CACHE
        rasterizer->gradient_cache_valid = 0;
#endif
        rasterizer->comp_op = NULL;
        break;
#endif
      case CTX_PRESERVE:
        rasterizer->preserve = 1;
        break;
      case CTX_COLOR:
      case CTX_COMPOSITING_MODE:
      case CTX_BLEND_MODE:
      case CTX_EXTEND:
      case CTX_SET_RGBA_U8:
        ctx_interpret_style (state, entry, NULL);
        rasterizer->comp_op = NULL;
        break;
#if CTX_COMPOSITING_GROUPS
      case CTX_START_GROUP:
        ctx_rasterizer_start_group (rasterizer);
        break;
      case CTX_END_GROUP:
        ctx_rasterizer_end_group (rasterizer);
        break;
#endif

      case CTX_RESTORE:
        for (unsigned int i = state->gstate_no?state->gstate_stack[state->gstate_no-1].keydb_pos:0;
             i < state->gstate.keydb_pos; i++)
        {
          if (state->keydb[i].key == SQZ_clip)
          {
            clear_clip = 1;
          }
        }
        /* FALLTHROUGH */
      case CTX_ROTATE:
      case CTX_SCALE:
      case CTX_APPLY_TRANSFORM:
      case CTX_TRANSLATE:
      case CTX_IDENTITY:
        /* FALLTHROUGH */
      case CTX_SAVE:
        rasterizer->comp_op = NULL;
        ctx_interpret_transforms (state, entry, NULL);
        if (clear_clip)
        {
          ctx_rasterizer_clip_reset (rasterizer);
          for (unsigned int i = state->gstate_no?state->gstate_stack[state->gstate_no-1].keydb_pos:0;
             i < state->gstate.keydb_pos; i++)
        {
          if (state->keydb[i].key == SQZ_clip)
          {
            int idx = ctx_float_to_string_index (state->keydb[i].value);
            if (idx >=0)
            {
              CtxSegment *edges = (CtxSegment*)&state->stringpool[idx];
              ctx_rasterizer_clip_apply (rasterizer, edges);
            }
          }
        }
        }
        break;
      case CTX_STROKE:
          if (rasterizer->edge_list.count == 0)break;
#if CTX_ENABLE_SHADOW_BLUR
        if ((state->gstate.shadow_blur > 0.0f) & (!rasterizer->in_text))
          ctx_rasterizer_shadow_stroke (rasterizer);
#endif
        {
        int count = rasterizer->edge_list.count;
        if (state->gstate.n_dashes)
        {
          int n_dashes = state->gstate.n_dashes;
          float *dashes = state->gstate.dashes;
          float factor = ctx_matrix_get_scale (&state->gstate.transform);

          CtxSegment temp[count]; /* copy of already built up path's poly line  */
          memcpy (temp, rasterizer->edge_list.entries, sizeof (temp));
          int start = 0;
          int end   = 0;
      CtxMatrix transform_backup = state->gstate.transform;
      _ctx_matrix_identity (&state->gstate.transform);
      _ctx_transform_prime (state);
      _ctx_rasterizer_reset (rasterizer); /* for dashing we create
                                            a dashed path to stroke */
      float prev_x = 0.0f;
      float prev_y = 0.0f;
      //float pos = 0.0;

      int   dash_no  = 0.0;
      float dash_lpos = state->gstate.line_dash_offset * factor;
      int   is_down = 0;
      float lr = state->gstate.line_width * factor / 2;

          while (start < count)
          {
            int started = 0;
            int i;
            is_down = 0;

            if (!is_down)
            {
              CtxSegment *segment = &temp[0];
              prev_x = segment->x0 * 1.0f / CTX_SUBDIV;
              prev_y = segment->y0 * 1.0f / CTX_FULL_AA;
              ctx_rasterizer_move_to (rasterizer, prev_x, prev_y);
              is_down = 1;
            }

            for (i = start; i < count; i++)
            {
              CtxSegment *segment = &temp[i];
              float x, y;
              if (segment->code == CTX_NEW_EDGE)
                {
                  if (started)
                    {
                      end = i - 1;
                      dash_no = 0;
                      dash_lpos = 0.0;
                      goto foo;
                    }
                  prev_x = segment->x0 * 1.0f / CTX_SUBDIV;
                  prev_y = segment->y0 * 1.0f / CTX_FULL_AA;
                  started = 1;
                  start = i;
                  is_down = 1;
                  ctx_rasterizer_move_to (rasterizer, prev_x, prev_y);
                }
   int max_again = 40;
again:
   max_again--;
              x = segment->x1 * 1.0f / CTX_SUBDIV;
              y = segment->y1 * 1.0f / CTX_FULL_AA;
              float dx = x - prev_x;
              float dy = y - prev_y;
              float length = ctx_hypotf (dx, dy);

              if (dash_lpos + length >= dashes[dash_no] * factor)
              {
                float p = (dashes[dash_no] * factor - dash_lpos) / length;
                float splitx = x * p + (1.0f - p) * prev_x;
                float splity = y * p + (1.0f - p) * prev_y;

                /* TODO : check for intersection rather than just end points being in raster-rect */
                if ( ((splitx - lr >= rasterizer->blit_x) &
                      (prev_x - lr >= rasterizer->blit_x) &
                      (splity - lr >= rasterizer->blit_y) &
                      (prev_y - lr >= rasterizer->blit_y)) |

                     ((splitx + lr < rasterizer->blit_x + rasterizer->blit_width) &
                      (prev_x + lr < rasterizer->blit_x + rasterizer->blit_width) &
                      (splity + lr < rasterizer->blit_y + rasterizer->blit_height) &
                      (prev_y + lr < rasterizer->blit_y + rasterizer->blit_height)))
                {
                  if (is_down)
                  {
                    ctx_rasterizer_line_to (rasterizer, splitx, splity);
                    is_down = 0;
                  }
                  else
                  {
                    _ctx_rasterizer_move_to (rasterizer, splitx, splity);
                    is_down = 1;
                  }
                }
                prev_x = splitx;
                prev_y = splity;
                dash_no++;
                dash_lpos=0;
                if (dash_no >= n_dashes) dash_no = 0;
                if (max_again > 0)
                  goto again;
              }
              else
              {
                //pos += length;
                dash_lpos += length;
                {
                  if (is_down)
                    ctx_rasterizer_line_to (rasterizer, x, y);
                }
              }
              prev_x = x;
              prev_y = y;
            }
          end = i-1;
foo:
          start = end+1;
        }
        state->gstate.transform = transform_backup;
        _ctx_transform_prime (state);
        }
        ctx_rasterizer_stroke (rasterizer);
        }
        _ctx_rasterizer_reset (rasterizer);

        break;
      case CTX_FONT:
        ctx_interpret_style (state, entry, NULL);
        ctx_rasterizer_set_font (rasterizer, ctx_arg_string() );
        break;
      case CTX_TEXT:
        if (ctx->bail)
        {
          _ctx_text (rasterizer->backend.ctx, ctx_arg_string(), 0, 0);
          break;
        }

        rasterizer->in_text++;
#if CTX_ENABLE_SHADOW_BLUR
        if (state->gstate.shadow_blur > 0.0)
          ctx_rasterizer_shadow_text (rasterizer, ctx_arg_string ());
#endif
        ctx_rasterizer_text (rasterizer, ctx_arg_string(), 0);
        rasterizer->in_text--;
        _ctx_rasterizer_reset (rasterizer);
#if CONFIG_IDF_TARGET_ESP32C3
        taskYIELD();
#endif
        break;
      case CTX_GLYPH:
        if (ctx->bail) break;
        {
        uint32_t unichar = entry[0].data.u32[0];
        uint32_t stroke = unichar &  ((uint32_t)1<<31);
        if (stroke) unichar -= stroke;
        ctx_rasterizer_glyph (rasterizer, entry[0].data.u32[0], stroke);
        }
#if CONFIG_IDF_TARGET_ESP32C3
        taskYIELD();
#endif
        break;
      case CTX_PAINT:
        // XXX simplify this with a special case
        _ctx_rasterizer_rectangle (rasterizer, -1000.0, -1000.0, 11000, 11000);
        ctx_rasterizer_fill (rasterizer);
        _ctx_rasterizer_reset (rasterizer);
#if CONFIG_IDF_TARGET_ESP32C3
        taskYIELD();
#endif
        break;
      case CTX_FILL:
        if (!ctx->bail)
        {
          if (rasterizer->edge_list.count == 0)break;
          int preserve = rasterizer->preserve;
#if CTX_ENABLE_SHADOW_BLUR
        if ((state->gstate.shadow_blur > 0.0f) & (!rasterizer->in_text) & (!rasterizer->in_shadow))
        {
          ctx_rasterizer_shadow_fill (rasterizer);
        }
#endif
        ctx_rasterizer_fill (rasterizer);
        if (preserve)
          ctx_rasterizer_reset_soft (rasterizer);
        else
          _ctx_rasterizer_reset (rasterizer);
#if CONFIG_IDF_TARGET_ESP32C3
        taskYIELD();
#endif
        }
        break;
      case CTX_START_FRAME:
      case CTX_RESET_PATH:
        _ctx_rasterizer_reset (rasterizer);
        break;
      case CTX_CLIP:
        _ctx_rasterizer_clip (rasterizer);
#if CONFIG_IDF_TARGET_ESP32C3
        taskYIELD();
#endif
        break;
      case CTX_CLOSE_PATH:
        _ctx_rasterizer_close_path (rasterizer);
        break;
      case CTX_IMAGE_SMOOTHING:
        ctx_interpret_style (state, entry, NULL);
        rasterizer->comp_op = NULL;
        break;
      case CTX_VIEW_BOX:
        { // XXX : this can screw with transforms if one is not careful
           float x = ctx_arg_float(0),
                       y = ctx_arg_float(1),
                       width = ctx_arg_float(2),
                       height = ctx_arg_float(3);
           float factor = ctx_width (ctx)/width;
           float factorh = ctx_height (ctx)/height;

           if (factorh <= factor) factor = factorh;

           ctx_translate (ctx, -x, -y);
           ctx_scale (ctx, factor, factor);
        }
        break;
    }
  ctx_interpret_pos_bare (state, entry, NULL);
}


//static CtxFont *ctx_fonts;
void
ctx_rasterizer_deinit (CtxRasterizer *rasterizer)
{
  //rasterizer->fonts = ctx_fonts;
  ctx_drawlist_deinit (&rasterizer->edge_list);
#if CTX_ENABLE_CLIP
  if (rasterizer->clip_buffer)
  {
    ctx_buffer_destroy (rasterizer->clip_buffer);
    rasterizer->clip_buffer = NULL;
  }
#endif
}

void
ctx_rasterizer_destroy (void *r)
{
  CtxRasterizer *rasterizer = (CtxRasterizer*)r;
  ctx_rasterizer_deinit (rasterizer);
  ctx_free (rasterizer);
}

CtxAntialias ctx_get_antialias (Ctx *ctx)
{
  if (ctx_backend_type (ctx) != CTX_BACKEND_RASTERIZER) return CTX_ANTIALIAS_DEFAULT;

  switch (((CtxRasterizer*)(ctx->backend))->aa)
  {
    case 0: 
    case 1:
        return CTX_ANTIALIAS_NONE;
    case 3:
        return CTX_ANTIALIAS_FAST;
    case 5:
        return CTX_ANTIALIAS_GOOD;
    default:
    case 15:
        return CTX_ANTIALIAS_FULL;
  }
}

static int _ctx_antialias_to_aa (CtxAntialias antialias)
{
  switch (antialias)
  {
    case CTX_ANTIALIAS_NONE: return 1;
    case CTX_ANTIALIAS_FAST: return 3;
    case CTX_ANTIALIAS_GOOD: return 5;
    case CTX_ANTIALIAS_FULL: return 15;
    default:
    case CTX_ANTIALIAS_DEFAULT: return CTX_RASTERIZER_AA;
  }
}

void
ctx_set_antialias (Ctx *ctx, CtxAntialias antialias)
{
  if (ctx_backend_type (ctx) != CTX_BACKEND_RASTERIZER) return;

  ((CtxRasterizer*)(ctx->backend))->aa = 
     _ctx_antialias_to_aa (antialias);
}

CtxRasterizer *
ctx_rasterizer_init (CtxRasterizer *rasterizer, Ctx *ctx, Ctx *texture_source, CtxState *state, void *data, int x, int y, int width, int height, int stride, CtxPixelFormat pixel_format, CtxAntialias antialias)
{
#if CTX_ENABLE_CLIP
  if (rasterizer->clip_buffer)
    ctx_buffer_destroy (rasterizer->clip_buffer);
#endif
  if (rasterizer->edge_list.size)
    ctx_drawlist_deinit (&rasterizer->edge_list);
  memset (rasterizer, 0, sizeof (CtxRasterizer));
  CtxBackend *backend = (CtxBackend*)rasterizer;
  backend->type = CTX_BACKEND_RASTERIZER;
  backend->process = ctx_rasterizer_process;
  backend->destroy = (CtxDestroyNotify)ctx_rasterizer_destroy;
  backend->ctx     = ctx;
  rasterizer->edge_list.flags |= CTX_DRAWLIST_EDGE_LIST;
  rasterizer->state       = state;
  rasterizer->texture_source = texture_source?texture_source:ctx;

  ctx_state_init (rasterizer->state);
  rasterizer->buf         = data;
  rasterizer->blit_x      = x;
  rasterizer->blit_y      = y;
  rasterizer->blit_width  = width;
  rasterizer->blit_height = height;
  rasterizer->state->gstate.clip_min_x  = x;
  rasterizer->state->gstate.clip_min_y  = y;
  rasterizer->state->gstate.clip_max_x  = x + width - 1;
  rasterizer->state->gstate.clip_max_y  = y + height - 1;
  rasterizer->blit_stride = stride;
  rasterizer->scan_min    = 5000;
  rasterizer->scan_max    = -5000;

  if (pixel_format == CTX_FORMAT_BGRA8)
  {
    pixel_format = CTX_FORMAT_RGBA8;
    rasterizer->swap_red_green = 1;
  }
  else if (pixel_format == CTX_FORMAT_BGR8)
  {
    pixel_format = CTX_FORMAT_RGB8;
    rasterizer->swap_red_green = 1;
  }

  rasterizer->format = ctx_pixel_format_info (pixel_format);

#if CTX_GRADIENTS
#if CTX_GRADIENT_CACHE
  rasterizer->gradient_cache_elements = CTX_GRADIENT_CACHE_ELEMENTS;
  rasterizer->gradient_cache_valid = 0;
#endif
#endif

#if static_OPAQUE
  memset (rasterizer->opaque, 255, sizeof (rasterizer->opaque));
#endif

  return rasterizer;
}

void
ctx_rasterizer_reinit (Ctx *ctx,
                       void *fb,
                       int x,
                       int y,
                       int width,
                       int height,
                       int stride,
                       CtxPixelFormat pixel_format)
{
  CtxBackend *backend = (CtxBackend*)ctx_get_backend (ctx);
  CtxRasterizer *rasterizer = (CtxRasterizer*)backend;
  if (!backend) return;
#if 0
  // this is a more proper reinit than the below, which should be a lot faster..
  ctx_rasterizer_init (rasterizer, ctx, rasterizer->texture_source, &ctx->state, fb, x, y, width, height, stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
#else

  ctx_state_init (rasterizer->state);
  rasterizer->buf         = fb;
  rasterizer->blit_x      = x;
  rasterizer->blit_y      = y;
  rasterizer->blit_width  = width;
  rasterizer->blit_height = height;
  rasterizer->state->gstate.clip_min_x  = x;
  rasterizer->state->gstate.clip_min_y  = y;
  rasterizer->state->gstate.clip_max_x  = x + width - 1;
  rasterizer->state->gstate.clip_max_y  = y + height - 1;
  rasterizer->blit_stride = stride;
  rasterizer->scan_min    = 5000;
  rasterizer->scan_max    = -5000;
#if CTX_GRADIENT_CACHE
  rasterizer->gradient_cache_valid = 0;
#endif

  if (pixel_format == CTX_FORMAT_BGRA8)
  {
    pixel_format = CTX_FORMAT_RGBA8;
    rasterizer->swap_red_green = 1;
  }
  else if (pixel_format == CTX_FORMAT_BGR8)
  {
    pixel_format = CTX_FORMAT_RGB8;
    rasterizer->swap_red_green = 1;
  }

  rasterizer->format = ctx_pixel_format_info (pixel_format);
#endif
}

Ctx *
ctx_new_for_buffer (CtxBuffer *buffer)
{
  Ctx *ctx = ctx_new_drawlist (buffer->width, buffer->height);
  ctx_set_backend (ctx,
                    ctx_rasterizer_init ( (CtxRasterizer *) ctx_calloc (1, sizeof (CtxRasterizer)),
                                          ctx, NULL, &ctx->state,
                                          buffer->data, 0, 0, buffer->width, buffer->height,
                                          buffer->stride, buffer->format->pixel_format,
                                          CTX_ANTIALIAS_DEFAULT));
  return ctx;
}


Ctx *
ctx_new_for_framebuffer (void *data, int width, int height,
                         int stride,
                         CtxPixelFormat pixel_format)
{
  Ctx *ctx = ctx_new_drawlist (width, height);
  CtxRasterizer *r = ctx_rasterizer_init ( (CtxRasterizer *) ctx_calloc (1, sizeof (CtxRasterizer)),
                                          ctx, NULL, &ctx->state, data, 0, 0, width, height,
                                          stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
  ctx_set_backend (ctx, r);
  if (pixel_format == CTX_FORMAT_GRAY1) // XXX we get some bugs without it..
  {                                     //     something is going amiss with offsets
    ctx_set_antialias (ctx, CTX_ANTIALIAS_NONE);
  }
  return ctx;
}

// ctx_new_for_stream (FILE *stream);

#if 0
CtxRasterizer *ctx_rasterizer_new (void *data, int x, int y, int width, int height,
                                   int stride, CtxPixelFormat pixel_format)
{
  CtxState    *state    = (CtxState *) ctx_malloc (sizeof (CtxState) );
  CtxRasterizer *rasterizer = (CtxRasterizer *) ctx_malloc (sizeof (CtxBackend) );
  ctx_rasterizer_init (rasterizer, state, data, x, y, width, height,
                       stride, pixel_format, CTX_ANTIALIAS_DEFAULT);
}
#endif

#else

#endif


static void
ctx_state_gradient_clear_stops (CtxState *state)
{
  state->gradient.n_stops = 0;
}


#ifndef __clang__
#if CTX_RASTERIZER_O3
#pragma GCC pop_options
#endif
#if CTX_RASTERIZER_O2
#pragma GCC pop_options
#endif
#endif

void
ctx_rasterizer_reset (CtxRasterizer *rasterizer)
{
  _ctx_rasterizer_reset (rasterizer);
}

#endif

#if CTX_IMPLEMENTATION



/**************************************************************************
 *
 * Copyright 2013-2014 RAD Game Tools and Valve Software
 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 **************************************************************************/



typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1];
typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1];
typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1];

#ifdef __cplusplus
extern "C" {
#endif

/* ------------------- zlib-style API's */

mz_ulong mz_adler32(mz_ulong adler, const unsigned char *ptr, size_t buf_len)
{
    mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16);
    size_t block_len = buf_len % 5552;
    if (!ptr)
        return MZ_ADLER32_INIT;
    while (buf_len)
    {
        for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
        {
            s1 += ptr[0], s2 += s1;
            s1 += ptr[1], s2 += s1;
            s1 += ptr[2], s2 += s1;
            s1 += ptr[3], s2 += s1;
            s1 += ptr[4], s2 += s1;
            s1 += ptr[5], s2 += s1;
            s1 += ptr[6], s2 += s1;
            s1 += ptr[7], s2 += s1;
        }
        for (; i < block_len; ++i)
            s1 += *ptr++, s2 += s1;
        s1 %= 65521U, s2 %= 65521U;
        buf_len -= block_len;
        block_len = 5552;
    }
    return (s2 << 16) + s1;
}

/* Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": http://www.geocities.com/malbrain/ */
#if 0
    mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len)
    {
        static const mz_uint32 s_crc32[16] = { 0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c,
                                               0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c };
        mz_uint32 crcu32 = (mz_uint32)crc;
        if (!ptr)
            return MZ_CRC32_INIT;
        crcu32 = ~crcu32;
        while (buf_len--)
        {
            mz_uint8 b = *ptr++;
            crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)];
            crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)];
        }
        return ~crcu32;
    }
#elif defined(USE_EXTERNAL_MZCRC)
/* If USE_EXTERNAL_CRC is defined, an external module will export the
 * mz_crc32() symbol for us to use, e.g. an SSE-accelerated version.
 * Depending on the impl, it may be necessary to ~ the input/output crc values.
 */
mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len);
#else
/* Faster, but larger CPU cache footprint.
 */
mz_ulong mz_crc32(mz_ulong crc, const mz_uint8 *ptr, size_t buf_len)
{
    static const mz_uint32 s_crc_table[256] =
        {
          0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535,
          0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD,
          0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D,
          0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,
          0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4,
          0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
          0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC,
          0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F,
          0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB,
          0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F,
          0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB,
          0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
          0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA,
          0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE,
          0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A,
          0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9,
          0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409,
          0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
          0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739,
          0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
          0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268,
          0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0,
          0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8,
          0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
          0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF,
          0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703,
          0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7,
          0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A,
          0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE,
          0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
          0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6,
          0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45,
          0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D,
          0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5,
          0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605,
          0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
          0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
        };

    mz_uint32 crc32 = (mz_uint32)crc ^ 0xFFFFFFFF;
    const mz_uint8 *pByte_buf = (const mz_uint8 *)ptr;

    while (buf_len >= 4)
    {
        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF];
        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[1]) & 0xFF];
        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[2]) & 0xFF];
        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[3]) & 0xFF];
        pByte_buf += 4;
        buf_len -= 4;
    }

    while (buf_len)
    {
        crc32 = (crc32 >> 8) ^ s_crc_table[(crc32 ^ pByte_buf[0]) & 0xFF];
        ++pByte_buf;
        --buf_len;
    }

    return ~crc32;
}
#endif

void mz_free(void *p)
{
    MZ_FREE(p);
}

MINIZ_EXPORT void *miniz_def_alloc_func(void *opaque, size_t items, size_t size)
{
    (void)opaque, (void)items, (void)size;
    return MZ_MALLOC(items * size);
}
MINIZ_EXPORT void miniz_def_free_func(void *opaque, void *address)
{
    (void)opaque, (void)address;
    MZ_FREE(address);
}
MINIZ_EXPORT void *miniz_def_realloc_func(void *opaque, void *address, size_t items, size_t size)
{
    (void)opaque, (void)address, (void)items, (void)size;
    return MZ_REALLOC(address, items * size);
}

const char *mz_version(void)
{
    return MZ_VERSION;
}

#ifndef MINIZ_NO_ZLIB_APIS

#ifndef MINIZ_NO_DEFLATE_APIS

int mz_deflateInit(mz_streamp pStream, int level)
{
    return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY);
}

int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy)
{
    tdefl_compressor *pComp;
    mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy);

    if (!pStream)
        return MZ_STREAM_ERROR;
    if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)))
        return MZ_PARAM_ERROR;

    pStream->data_type = 0;
    pStream->adler = MZ_ADLER32_INIT;
    pStream->msg = NULL;
    pStream->reserved = 0;
    pStream->total_in = 0;
    pStream->total_out = 0;
    if (!pStream->zalloc)
        pStream->zalloc = miniz_def_alloc_func;
    if (!pStream->zfree)
        pStream->zfree = miniz_def_free_func;

    pComp = (tdefl_compressor *)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor));
    if (!pComp)
        return MZ_MEM_ERROR;

    pStream->state = (struct mz_internal_state *)pComp;

    if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY)
    {
        mz_deflateEnd(pStream);
        return MZ_PARAM_ERROR;
    }

    return MZ_OK;
}

int mz_deflateReset(mz_streamp pStream)
{
    if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree))
        return MZ_STREAM_ERROR;
    pStream->total_in = pStream->total_out = 0;
    tdefl_init((tdefl_compressor *)pStream->state, NULL, NULL, ((tdefl_compressor *)pStream->state)->m_flags);
    return MZ_OK;
}

int mz_deflate(mz_streamp pStream, int flush)
{
    size_t in_bytes, out_bytes;
    mz_ulong orig_total_in, orig_total_out;
    int mz_status = MZ_OK;

    if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out))
        return MZ_STREAM_ERROR;
    if (!pStream->avail_out)
        return MZ_BUF_ERROR;

    if (flush == MZ_PARTIAL_FLUSH)
        flush = MZ_SYNC_FLUSH;

    if (((tdefl_compressor *)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE)
        return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR;

    orig_total_in = pStream->total_in;
    orig_total_out = pStream->total_out;
    for (;;)
    {
        tdefl_status defl_status;
        in_bytes = pStream->avail_in;
        out_bytes = pStream->avail_out;

        defl_status = tdefl_compress((tdefl_compressor *)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush);
        pStream->next_in += (mz_uint)in_bytes;
        pStream->avail_in -= (mz_uint)in_bytes;
        pStream->total_in += (mz_uint)in_bytes;
        pStream->adler = tdefl_get_adler32((tdefl_compressor *)pStream->state);

        pStream->next_out += (mz_uint)out_bytes;
        pStream->avail_out -= (mz_uint)out_bytes;
        pStream->total_out += (mz_uint)out_bytes;

        if (defl_status < 0)
        {
            mz_status = MZ_STREAM_ERROR;
            break;
        }
        else if (defl_status == TDEFL_STATUS_DONE)
        {
            mz_status = MZ_STREAM_END;
            break;
        }
        else if (!pStream->avail_out)
            break;
        else if ((!pStream->avail_in) && (flush != MZ_FINISH))
        {
            if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out))
                break;
            return MZ_BUF_ERROR; /* Can't make forward progress without some input.
 */
        }
    }
    return mz_status;
}

int mz_deflateEnd(mz_streamp pStream)
{
    if (!pStream)
        return MZ_STREAM_ERROR;
    if (pStream->state)
    {
        pStream->zfree(pStream->opaque, pStream->state);
        pStream->state = NULL;
    }
    return MZ_OK;
}

mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len)
{
    (void)pStream;
    /* This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) */
    return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5);
}

int mz_compress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len, int level)
{
    int status;
    mz_stream stream;
    memset(&stream, 0, sizeof(stream));

    /* In case mz_ulong is 64-bits (argh I hate longs). */
    if ((mz_uint64)(source_len | *pDest_len) > 0xFFFFFFFFU)
        return MZ_PARAM_ERROR;

    stream.next_in = pSource;
    stream.avail_in = (mz_uint32)source_len;
    stream.next_out = pDest;
    stream.avail_out = (mz_uint32)*pDest_len;

    status = mz_deflateInit(&stream, level);
    if (status != MZ_OK)
        return status;

    status = mz_deflate(&stream, MZ_FINISH);
    if (status != MZ_STREAM_END)
    {
        mz_deflateEnd(&stream);
        return (status == MZ_OK) ? MZ_BUF_ERROR : status;
    }

    *pDest_len = stream.total_out;
    return mz_deflateEnd(&stream);
}

int mz_compress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
{
    return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION);
}

mz_ulong mz_compressBound(mz_ulong source_len)
{
    return mz_deflateBound(NULL, source_len);
}

#endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/

#ifndef MINIZ_NO_INFLATE_APIS

typedef struct
{
    tinfl_decompressor m_decomp;
    mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed;
    int m_window_bits;
    mz_uint8 m_dict[TINFL_LZ_DICT_SIZE];
    tinfl_status m_last_status;
} inflate_state;

int mz_inflateInit2(mz_streamp pStream, int window_bits)
{
    inflate_state *pDecomp;
    if (!pStream)
        return MZ_STREAM_ERROR;
    if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))
        return MZ_PARAM_ERROR;

    pStream->data_type = 0;
    pStream->adler = 0;
    pStream->msg = NULL;
    pStream->total_in = 0;
    pStream->total_out = 0;
    pStream->reserved = 0;
    if (!pStream->zalloc)
        pStream->zalloc = miniz_def_alloc_func;
    if (!pStream->zfree)
        pStream->zfree = miniz_def_free_func;

    pDecomp = (inflate_state *)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state));
    if (!pDecomp)
        return MZ_MEM_ERROR;

    pStream->state = (struct mz_internal_state *)pDecomp;

    tinfl_init(&pDecomp->m_decomp);
    pDecomp->m_dict_ofs = 0;
    pDecomp->m_dict_avail = 0;
    pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
    pDecomp->m_first_call = 1;
    pDecomp->m_has_flushed = 0;
    pDecomp->m_window_bits = window_bits;

    return MZ_OK;
}

int mz_inflateInit(mz_streamp pStream)
{
    return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS);
}

int mz_inflateReset(mz_streamp pStream)
{
    inflate_state *pDecomp;
    if (!pStream)
        return MZ_STREAM_ERROR;

    pStream->data_type = 0;
    pStream->adler = 0;
    pStream->msg = NULL;
    pStream->total_in = 0;
    pStream->total_out = 0;
    pStream->reserved = 0;

    pDecomp = (inflate_state *)pStream->state;

    tinfl_init(&pDecomp->m_decomp);
    pDecomp->m_dict_ofs = 0;
    pDecomp->m_dict_avail = 0;
    pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT;
    pDecomp->m_first_call = 1;
    pDecomp->m_has_flushed = 0;
    /* pDecomp->m_window_bits = window_bits */;

    return MZ_OK;
}

int mz_inflate(mz_streamp pStream, int flush)
{
    inflate_state *pState;
    mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32;
    size_t in_bytes, out_bytes, orig_avail_in;
    tinfl_status status;

    if ((!pStream) || (!pStream->state))
        return MZ_STREAM_ERROR;
    if (flush == MZ_PARTIAL_FLUSH)
        flush = MZ_SYNC_FLUSH;
    if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH))
        return MZ_STREAM_ERROR;

    pState = (inflate_state *)pStream->state;
    if (pState->m_window_bits > 0)
        decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER;
    orig_avail_in = pStream->avail_in;

    first_call = pState->m_first_call;
    pState->m_first_call = 0;
    if (pState->m_last_status < 0)
        return MZ_DATA_ERROR;

    if (pState->m_has_flushed && (flush != MZ_FINISH))
        return MZ_STREAM_ERROR;
    pState->m_has_flushed |= (flush == MZ_FINISH);

    if ((flush == MZ_FINISH) && (first_call))
    {
        /* MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. */
        decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
        in_bytes = pStream->avail_in;
        out_bytes = pStream->avail_out;
        status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags);
        pState->m_last_status = status;
        pStream->next_in += (mz_uint)in_bytes;
        pStream->avail_in -= (mz_uint)in_bytes;
        pStream->total_in += (mz_uint)in_bytes;
        pStream->adler = tinfl_get_adler32(&pState->m_decomp);
        pStream->next_out += (mz_uint)out_bytes;
        pStream->avail_out -= (mz_uint)out_bytes;
        pStream->total_out += (mz_uint)out_bytes;

        if (status < 0)
            return MZ_DATA_ERROR;
        else if (status != TINFL_STATUS_DONE)
        {
            pState->m_last_status = TINFL_STATUS_FAILED;
            return MZ_BUF_ERROR;
        }
        return MZ_STREAM_END;
    }
    /* flush != MZ_FINISH then we must assume there's more input. */
    if (flush != MZ_FINISH)
        decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT;

    if (pState->m_dict_avail)
    {
        n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
        memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
        pStream->next_out += n;
        pStream->avail_out -= n;
        pStream->total_out += n;
        pState->m_dict_avail -= n;
        pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);
        return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
    }

    for (;;)
    {
        in_bytes = pStream->avail_in;
        out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs;

        status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags);
        pState->m_last_status = status;

        pStream->next_in += (mz_uint)in_bytes;
        pStream->avail_in -= (mz_uint)in_bytes;
        pStream->total_in += (mz_uint)in_bytes;
        pStream->adler = tinfl_get_adler32(&pState->m_decomp);

        pState->m_dict_avail = (mz_uint)out_bytes;

        n = MZ_MIN(pState->m_dict_avail, pStream->avail_out);
        memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n);
        pStream->next_out += n;
        pStream->avail_out -= n;
        pStream->total_out += n;
        pState->m_dict_avail -= n;
        pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1);

        if (status < 0)
            return MZ_DATA_ERROR; /* Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). */
        else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in))
            return MZ_BUF_ERROR; /* Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. */
        else if (flush == MZ_FINISH)
        {
            /* The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. */
            if (status == TINFL_STATUS_DONE)
                return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END;
            /* status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. */
            else if (!pStream->avail_out)
                return MZ_BUF_ERROR;
        }
        else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail))
            break;
    }

    return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK;
}

int mz_inflateEnd(mz_streamp pStream)
{
    if (!pStream)
        return MZ_STREAM_ERROR;
    if (pStream->state)
    {
        pStream->zfree(pStream->opaque, pStream->state);
        pStream->state = NULL;
    }
    return MZ_OK;
}
int mz_uncompress2(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong *pSource_len)
{
    mz_stream stream;
    int status;
    memset(&stream, 0, sizeof(stream));

    /* In case mz_ulong is 64-bits (argh I hate longs). */
    if ((mz_uint64)(*pSource_len | *pDest_len) > 0xFFFFFFFFU)
        return MZ_PARAM_ERROR;

    stream.next_in = pSource;
    stream.avail_in = (mz_uint32)*pSource_len;
    stream.next_out = pDest;
    stream.avail_out = (mz_uint32)*pDest_len;

    status = mz_inflateInit(&stream);
    if (status != MZ_OK)
        return status;

    status = mz_inflate(&stream, MZ_FINISH);
    *pSource_len = *pSource_len - stream.avail_in;
    if (status != MZ_STREAM_END)
    {
        mz_inflateEnd(&stream);
        return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status;
    }
    *pDest_len = stream.total_out;

    return mz_inflateEnd(&stream);
}

int mz_uncompress(unsigned char *pDest, mz_ulong *pDest_len, const unsigned char *pSource, mz_ulong source_len)
{
    return mz_uncompress2(pDest, pDest_len, pSource, &source_len);
}

#endif /*#ifndef MINIZ_NO_INFLATE_APIS*/

const char *mz_error(int err)
{
    static struct
    {
        int m_err;
        const char *m_pDesc;
    } s_error_descs[] =
        {
          { MZ_OK, "" }, { MZ_STREAM_END, "stream end" }, { MZ_NEED_DICT, "need dictionary" }, { MZ_ERRNO, "file error" }, { MZ_STREAM_ERROR, "stream error" }, { MZ_DATA_ERROR, "data error" }, { MZ_MEM_ERROR, "out of memory" }, { MZ_BUF_ERROR, "buf error" }, { MZ_VERSION_ERROR, "version error" }, { MZ_PARAM_ERROR, "parameter error" }
        };
    mz_uint i;
    for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i)
        if (s_error_descs[i].m_err == err)
            return s_error_descs[i].m_pDesc;
    return NULL;
}

#endif /*MINIZ_NO_ZLIB_APIS */

#ifdef __cplusplus
}
#endif

/*
  This is free and unencumbered software released into the public domain.

  Anyone is free to copy, modify, publish, use, compile, sell, or
  distribute this software, either in source code form or as a compiled
  binary, for any purpose, commercial or non-commercial, and by any
  means.

  In jurisdictions that recognize copyright laws, the author or authors
  of this software dedicate any and all copyright interest in the
  software to the public domain. We make this dedication for the benefit
  of the public at large and to the detriment of our heirs and
  successors. We intend this dedication to be an overt act of
  relinquishment in perpetuity of all present and future rights to this
  software under copyright law.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  OTHER DEALINGS IN THE SOFTWARE.

  For more information, please refer to <http://unlicense.org/>
*/
/**************************************************************************
 *
 * Copyright 2013-2014 RAD Game Tools and Valve Software
 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 **************************************************************************/



#ifndef MINIZ_NO_DEFLATE_APIS

#ifdef __cplusplus
extern "C" {
#endif

/* ------------------- Low-level Compression (independent from all decompression API's) */

/* Purposely making these tables static for faster init and thread safety. */
static const mz_uint16 s_tdefl_len_sym[256] =
    {
      257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272,
      273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276,
      277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278,
      279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280,
      281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281,
      282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282,
      283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283,
      284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285
    };

static const mz_uint8 s_tdefl_len_extra[256] =
    {
      0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
      4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0
    };

static const mz_uint8 s_tdefl_small_dist_sym[512] =
    {
      0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11,
      11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
      14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
      14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
      15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
      16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
      17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
      17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17,
      17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17
    };

static const mz_uint8 s_tdefl_small_dist_extra[512] =
    {
      0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
      5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
      6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
      7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
      7, 7, 7, 7, 7, 7, 7, 7
    };

static const mz_uint8 s_tdefl_large_dist_sym[128] =
    {
      0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26,
      26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28,
      28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29
    };

static const mz_uint8 s_tdefl_large_dist_extra[128] =
    {
      0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
      12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
      13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13
    };

/* Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. */
typedef struct
{
    mz_uint16 m_key, m_sym_index;
} tdefl_sym_freq;
static tdefl_sym_freq *tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq *pSyms0, tdefl_sym_freq *pSyms1)
{
    mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2];
    tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1;
    MZ_CLEAR_ARR(hist);
    for (i = 0; i < num_syms; i++)
    {
        mz_uint freq = pSyms0[i].m_key;
        hist[freq & 0xFF]++;
        hist[256 + ((freq >> 8) & 0xFF)]++;
    }
    while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256]))
        total_passes--;
    for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
    {
        const mz_uint32 *pHist = &hist[pass << 8];
        mz_uint offsets[256], cur_ofs = 0;
        for (i = 0; i < 256; i++)
        {
            offsets[i] = cur_ofs;
            cur_ofs += pHist[i];
        }
        for (i = 0; i < num_syms; i++)
            pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
        {
            tdefl_sym_freq *t = pCur_syms;
            pCur_syms = pNew_syms;
            pNew_syms = t;
        }
    }
    return pCur_syms;
}

/* tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. */
static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq *A, int n)
{
    int root, leaf, next, avbl, used, dpth;
    if (n == 0)
        return;
    else if (n == 1)
    {
        A[0].m_key = 1;
        return;
    }
    A[0].m_key += A[1].m_key;
    root = 0;
    leaf = 2;
    for (next = 1; next < n - 1; next++)
    {
        if (leaf >= n || A[root].m_key < A[leaf].m_key)
        {
            A[next].m_key = A[root].m_key;
            A[root++].m_key = (mz_uint16)next;
        }
        else
            A[next].m_key = A[leaf++].m_key;
        if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key))
        {
            A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key);
            A[root++].m_key = (mz_uint16)next;
        }
        else
            A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key);
    }
    A[n - 2].m_key = 0;
    for (next = n - 3; next >= 0; next--)
        A[next].m_key = A[A[next].m_key].m_key + 1;
    avbl = 1;
    used = dpth = 0;
    root = n - 2;
    next = n - 1;
    while (avbl > 0)
    {
        while (root >= 0 && (int)A[root].m_key == dpth)
        {
            used++;
            root--;
        }
        while (avbl > used)
        {
            A[next--].m_key = (mz_uint16)(dpth);
            avbl--;
        }
        avbl = 2 * used;
        dpth++;
        used = 0;
    }
}

/* Limits canonical Huffman code table's max code size. */
enum
{
    TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32
};
static void tdefl_huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size)
{
    int i;
    mz_uint32 total = 0;
    if (code_list_len <= 1)
        return;
    for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++)
        pNum_codes[max_code_size] += pNum_codes[i];
    for (i = max_code_size; i > 0; i--)
        total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i));
    while (total != (1UL << max_code_size))
    {
        pNum_codes[max_code_size]--;
        for (i = max_code_size - 1; i > 0; i--)
            if (pNum_codes[i])
            {
                pNum_codes[i]--;
                pNum_codes[i + 1] += 2;
                break;
            }
        total--;
    }
}

static void tdefl_optimize_huffman_table(tdefl_compressor *d, int table_num, int table_len, int code_size_limit, int static_table)
{
    int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE];
    mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1];
    MZ_CLEAR_ARR(num_codes);
    if (static_table)
    {
        for (i = 0; i < table_len; i++)
            num_codes[d->m_huff_code_sizes[table_num][i]]++;
    }
    else
    {
        tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms;
        int num_used_syms = 0;
        const mz_uint16 *pSym_count = &d->m_huff_count[table_num][0];
        for (i = 0; i < table_len; i++)
            if (pSym_count[i])
            {
                syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i];
                syms0[num_used_syms++].m_sym_index = (mz_uint16)i;
            }

        pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1);
        tdefl_calculate_minimum_redundancy(pSyms, num_used_syms);

        for (i = 0; i < num_used_syms; i++)
            num_codes[pSyms[i].m_key]++;

        tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit);

        MZ_CLEAR_ARR(d->m_huff_code_sizes[table_num]);
        MZ_CLEAR_ARR(d->m_huff_codes[table_num]);
        for (i = 1, j = num_used_syms; i <= code_size_limit; i++)
            for (l = num_codes[i]; l > 0; l--)
                d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i);
    }

    next_code[1] = 0;
    for (j = 0, i = 2; i <= code_size_limit; i++)
        next_code[i] = j = ((j + num_codes[i - 1]) << 1);

    for (i = 0; i < table_len; i++)
    {
        mz_uint rev_code = 0, code, code_size;
        if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0)
            continue;
        code = next_code[code_size]++;
        for (l = code_size; l > 0; l--, code >>= 1)
            rev_code = (rev_code << 1) | (code & 1);
        d->m_huff_codes[table_num][i] = (mz_uint16)rev_code;
    }
}

#define TDEFL_PUT_BITS(b, l)                                       \
    do                                                             \
    {                                                              \
        mz_uint bits = b;                                          \
        mz_uint len = l;                                           \
        MZ_ASSERT(bits <= ((1U << len) - 1U));                     \
        d->m_bit_buffer |= (bits << d->m_bits_in);                 \
        d->m_bits_in += len;                                       \
        while (d->m_bits_in >= 8)                                  \
        {                                                          \
            if (d->m_pOutput_buf < d->m_pOutput_buf_end)           \
                *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \
            d->m_bit_buffer >>= 8;                                 \
            d->m_bits_in -= 8;                                     \
        }                                                          \
    }                                                              \
    MZ_MACRO_END

#define TDEFL_RLE_PREV_CODE_SIZE()                                                                                       \
    {                                                                                                                    \
        if (rle_repeat_count)                                                                                            \
        {                                                                                                                \
            if (rle_repeat_count < 3)                                                                                    \
            {                                                                                                            \
                d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \
                while (rle_repeat_count--)                                                                               \
                    packed_code_sizes[num_packed_code_sizes++] = prev_code_size;                                         \
            }                                                                                                            \
            else                                                                                                         \
            {                                                                                                            \
                d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1);                                        \
                packed_code_sizes[num_packed_code_sizes++] = 16;                                                         \
                packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3);                           \
            }                                                                                                            \
            rle_repeat_count = 0;                                                                                        \
        }                                                                                                                \
    }

#define TDEFL_RLE_ZERO_CODE_SIZE()                                                         \
    {                                                                                      \
        if (rle_z_count)                                                                   \
        {                                                                                  \
            if (rle_z_count < 3)                                                           \
            {                                                                              \
                d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count);  \
                while (rle_z_count--)                                                      \
                    packed_code_sizes[num_packed_code_sizes++] = 0;                        \
            }                                                                              \
            else if (rle_z_count <= 10)                                                    \
            {                                                                              \
                d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1);          \
                packed_code_sizes[num_packed_code_sizes++] = 17;                           \
                packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3);  \
            }                                                                              \
            else                                                                           \
            {                                                                              \
                d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1);          \
                packed_code_sizes[num_packed_code_sizes++] = 18;                           \
                packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \
            }                                                                              \
            rle_z_count = 0;                                                               \
        }                                                                                  \
    }

static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };

static void tdefl_start_dynamic_block(tdefl_compressor *d)
{
    int num_lit_codes, num_dist_codes, num_bit_lengths;
    mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index;
    mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF;

    d->m_huff_count[0][256] = 1;

    tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE);
    tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE);

    for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--)
        if (d->m_huff_code_sizes[0][num_lit_codes - 1])
            break;
    for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--)
        if (d->m_huff_code_sizes[1][num_dist_codes - 1])
            break;

    memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes);
    memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes);
    total_code_sizes_to_pack = num_lit_codes + num_dist_codes;
    num_packed_code_sizes = 0;
    rle_z_count = 0;
    rle_repeat_count = 0;

    memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2);
    for (i = 0; i < total_code_sizes_to_pack; i++)
    {
        mz_uint8 code_size = code_sizes_to_pack[i];
        if (!code_size)
        {
            TDEFL_RLE_PREV_CODE_SIZE();
            if (++rle_z_count == 138)
            {
                TDEFL_RLE_ZERO_CODE_SIZE();
            }
        }
        else
        {
            TDEFL_RLE_ZERO_CODE_SIZE();
            if (code_size != prev_code_size)
            {
                TDEFL_RLE_PREV_CODE_SIZE();
                d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1);
                packed_code_sizes[num_packed_code_sizes++] = code_size;
            }
            else if (++rle_repeat_count == 6)
            {
                TDEFL_RLE_PREV_CODE_SIZE();
            }
        }
        prev_code_size = code_size;
    }
    if (rle_repeat_count)
    {
        TDEFL_RLE_PREV_CODE_SIZE();
    }
    else
    {
        TDEFL_RLE_ZERO_CODE_SIZE();
    }

    tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE);

    TDEFL_PUT_BITS(2, 2);

    TDEFL_PUT_BITS(num_lit_codes - 257, 5);
    TDEFL_PUT_BITS(num_dist_codes - 1, 5);

    for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--)
        if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]])
            break;
    num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1));
    TDEFL_PUT_BITS(num_bit_lengths - 4, 4);
    for (i = 0; (int)i < num_bit_lengths; i++)
        TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3);

    for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes;)
    {
        mz_uint code = packed_code_sizes[packed_code_sizes_index++];
        MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2);
        TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]);
        if (code >= 16)
            TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]);
    }
}

static void tdefl_start_static_block(tdefl_compressor *d)
{
    mz_uint i;
    mz_uint8 *p = &d->m_huff_code_sizes[0][0];

    for (i = 0; i <= 143; ++i)
        *p++ = 8;
    for (; i <= 255; ++i)
        *p++ = 9;
    for (; i <= 279; ++i)
        *p++ = 7;
    for (; i <= 287; ++i)
        *p++ = 8;

    memset(d->m_huff_code_sizes[1], 5, 32);

    tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE);
    tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE);

    TDEFL_PUT_BITS(1, 2);
}

static const mz_uint mz_bitmasks[17] = { 0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF };

#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS
static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
{
    mz_uint flags;
    mz_uint8 *pLZ_codes;
    mz_uint8 *pOutput_buf = d->m_pOutput_buf;
    mz_uint8 *pLZ_code_buf_end = d->m_pLZ_code_buf;
    mz_uint64 bit_buffer = d->m_bit_buffer;
    mz_uint bits_in = d->m_bits_in;

#define TDEFL_PUT_BITS_FAST(b, l)                    \
    {                                                \
        bit_buffer |= (((mz_uint64)(b)) << bits_in); \
        bits_in += (l);                              \
    }

    flags = 1;
    for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1)
    {
        if (flags == 1)
            flags = *pLZ_codes++ | 0x100;

        if (flags & 1)
        {
            mz_uint s0, s1, n0, n1, sym, num_extra_bits;
            mz_uint match_len = pLZ_codes[0];
            match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8));
            pLZ_codes += 3;

            MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
            TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
            TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);

            /* This sequence coaxes MSVC into using cmov's vs. jmp's. */
            s0 = s_tdefl_small_dist_sym[match_dist & 511];
            n0 = s_tdefl_small_dist_extra[match_dist & 511];
            s1 = s_tdefl_large_dist_sym[match_dist >> 8];
            n1 = s_tdefl_large_dist_extra[match_dist >> 8];
            sym = (match_dist < 512) ? s0 : s1;
            num_extra_bits = (match_dist < 512) ? n0 : n1;

            MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
            TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
            TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
        }
        else
        {
            mz_uint lit = *pLZ_codes++;
            MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
            TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);

            if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
            {
                flags >>= 1;
                lit = *pLZ_codes++;
                MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
                TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);

                if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end))
                {
                    flags >>= 1;
                    lit = *pLZ_codes++;
                    MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
                    TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
                }
            }
        }

        if (pOutput_buf >= d->m_pOutput_buf_end)
            return MZ_FALSE;

        memcpy(pOutput_buf, &bit_buffer, sizeof(mz_uint64));
        pOutput_buf += (bits_in >> 3);
        bit_buffer >>= (bits_in & ~7);
        bits_in &= 7;
    }

#undef TDEFL_PUT_BITS_FAST

    d->m_pOutput_buf = pOutput_buf;
    d->m_bits_in = 0;
    d->m_bit_buffer = 0;

    while (bits_in)
    {
        mz_uint32 n = MZ_MIN(bits_in, 16);
        TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n);
        bit_buffer >>= n;
        bits_in -= n;
    }

    TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);

    return (d->m_pOutput_buf < d->m_pOutput_buf_end);
}
#else
static mz_bool tdefl_compress_lz_codes(tdefl_compressor *d)
{
    mz_uint flags;
    mz_uint8 *pLZ_codes;

    flags = 1;
    for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1)
    {
        if (flags == 1)
            flags = *pLZ_codes++ | 0x100;
        if (flags & 1)
        {
            mz_uint sym, num_extra_bits;
            mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8));
            pLZ_codes += 3;

            MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
            TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]);
            TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]);

            if (match_dist < 512)
            {
                sym = s_tdefl_small_dist_sym[match_dist];
                num_extra_bits = s_tdefl_small_dist_extra[match_dist];
            }
            else
            {
                sym = s_tdefl_large_dist_sym[match_dist >> 8];
                num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8];
            }
            MZ_ASSERT(d->m_huff_code_sizes[1][sym]);
            TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]);
            TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits);
        }
        else
        {
            mz_uint lit = *pLZ_codes++;
            MZ_ASSERT(d->m_huff_code_sizes[0][lit]);
            TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]);
        }
    }

    TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]);

    return (d->m_pOutput_buf < d->m_pOutput_buf_end);
}
#endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS */

static mz_bool tdefl_compress_block(tdefl_compressor *d, mz_bool static_block)
{
    if (static_block)
        tdefl_start_static_block(d);
    else
        tdefl_start_dynamic_block(d);
    return tdefl_compress_lz_codes(d);
}

static const mz_uint s_tdefl_num_probes[11];

static int tdefl_flush_block(tdefl_compressor *d, int flush)
{
    mz_uint saved_bit_buf, saved_bits_in;
    mz_uint8 *pSaved_output_buf;
    mz_bool comp_block_succeeded = MZ_FALSE;
    int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size;
    mz_uint8 *pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf;

    d->m_pOutput_buf = pOutput_buf_start;
    d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16;

    MZ_ASSERT(!d->m_output_flush_remaining);
    d->m_output_flush_ofs = 0;
    d->m_output_flush_remaining = 0;

    *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left);
    d->m_pLZ_code_buf -= (d->m_num_flags_left == 8);

    if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index))
    {
        const mz_uint8 cmf = 0x78;
        mz_uint8 flg, flevel = 3;
        mz_uint header, i, n = sizeof(s_tdefl_num_probes) / sizeof(mz_uint);

        /* Determine compression level by reversing the process in tdefl_create_comp_flags_from_zip_params() */
        for (i = 0; i < n; i++)
            if (s_tdefl_num_probes[i] == (d->m_flags & 0xFFF)) break;

        if (i < 2)
            flevel = 0;
        else if (i < 6)
            flevel = 1;
        else if (i == 6)
            flevel = 2;

        header = cmf << 8 | (flevel << 6);
        header += 31 - (header % 31);
        flg = header & 0xFF;

        TDEFL_PUT_BITS(cmf, 8);
        TDEFL_PUT_BITS(flg, 8);
    }

    TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1);

    pSaved_output_buf = d->m_pOutput_buf;
    saved_bit_buf = d->m_bit_buffer;
    saved_bits_in = d->m_bits_in;

    if (!use_raw_block)
        comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48));

    /* If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. */
    if (((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) &&
        ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size))
    {
        mz_uint i;
        d->m_pOutput_buf = pSaved_output_buf;
        d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
        TDEFL_PUT_BITS(0, 2);
        if (d->m_bits_in)
        {
            TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
        }
        for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF)
        {
            TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16);
        }
        for (i = 0; i < d->m_total_lz_bytes; ++i)
        {
            TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8);
        }
    }
    /* Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. */
    else if (!comp_block_succeeded)
    {
        d->m_pOutput_buf = pSaved_output_buf;
        d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in;
        tdefl_compress_block(d, MZ_TRUE);
    }

    if (flush)
    {
        if (flush == TDEFL_FINISH)
        {
            if (d->m_bits_in)
            {
                TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
            }
            if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER)
            {
                mz_uint i, a = d->m_adler32;
                for (i = 0; i < 4; i++)
                {
                    TDEFL_PUT_BITS((a >> 24) & 0xFF, 8);
                    a <<= 8;
                }
            }
        }
        else
        {
            mz_uint i, z = 0;
            TDEFL_PUT_BITS(0, 3);
            if (d->m_bits_in)
            {
                TDEFL_PUT_BITS(0, 8 - d->m_bits_in);
            }
            for (i = 2; i; --i, z ^= 0xFFFF)
            {
                TDEFL_PUT_BITS(z & 0xFFFF, 16);
            }
        }
    }

    MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end);

    memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
    memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);

    d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
    d->m_pLZ_flags = d->m_lz_code_buf;
    d->m_num_flags_left = 8;
    d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes;
    d->m_total_lz_bytes = 0;
    d->m_block_index++;

    if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0)
    {
        if (d->m_pPut_buf_func)
        {
            *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
            if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user))
                return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED);
        }
        else if (pOutput_buf_start == d->m_output_buf)
        {
            int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs));
            memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy);
            d->m_out_buf_ofs += bytes_to_copy;
            if ((n -= bytes_to_copy) != 0)
            {
                d->m_output_flush_ofs = bytes_to_copy;
                d->m_output_flush_remaining = n;
            }
        }
        else
        {
            d->m_out_buf_ofs += n;
        }
    }

    return d->m_output_flush_remaining;
}

#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
#ifdef MINIZ_UNALIGNED_USE_MEMCPY
static mz_uint16 TDEFL_READ_UNALIGNED_WORD(const mz_uint8* p)
{
	mz_uint16 ret;
	memcpy(&ret, p, sizeof(mz_uint16));
	return ret;
}
static mz_uint16 TDEFL_READ_UNALIGNED_WORD2(const mz_uint16* p)
{
	mz_uint16 ret;
	memcpy(&ret, p, sizeof(mz_uint16));
	return ret;
}
#else
#define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16 *)(p)
#define TDEFL_READ_UNALIGNED_WORD2(p) *(const mz_uint16 *)(p)
#endif
static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
{
    mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
    mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
    const mz_uint16 *s = (const mz_uint16 *)(d->m_dict + pos), *p, *q;
    mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD2(s);
    MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
    if (max_match_len <= match_len)
        return;
    for (;;)
    {
        for (;;)
        {
            if (--num_probes_left == 0)
                return;
#define TDEFL_PROBE                                                                             \
    next_probe_pos = d->m_next[probe_pos];                                                      \
    if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \
        return;                                                                                 \
    probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK;                                       \
    if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01)                \
        break;
            TDEFL_PROBE;
            TDEFL_PROBE;
            TDEFL_PROBE;
        }
        if (!dist)
            break;
        q = (const mz_uint16 *)(d->m_dict + probe_pos);
        if (TDEFL_READ_UNALIGNED_WORD2(q) != s01)
            continue;
        p = s;
        probe_len = 32;
        do
        {
        } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) &&
                 (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0));
        if (!probe_len)
        {
            *pMatch_dist = dist;
            *pMatch_len = MZ_MIN(max_match_len, (mz_uint)TDEFL_MAX_MATCH_LEN);
            break;
        }
        else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q)) > match_len)
        {
            *pMatch_dist = dist;
            if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len)
                break;
            c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]);
        }
    }
}
#else
static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor *d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint *pMatch_dist, mz_uint *pMatch_len)
{
    mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len;
    mz_uint num_probes_left = d->m_max_probes[match_len >= 32];
    const mz_uint8 *s = d->m_dict + pos, *p, *q;
    mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1];
    MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN);
    if (max_match_len <= match_len)
        return;
    for (;;)
    {
        for (;;)
        {
            if (--num_probes_left == 0)
                return;
#define TDEFL_PROBE                                                                               \
    next_probe_pos = d->m_next[probe_pos];                                                        \
    if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist))   \
        return;                                                                                   \
    probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK;                                         \
    if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) \
        break;
            TDEFL_PROBE;
            TDEFL_PROBE;
            TDEFL_PROBE;
        }
        if (!dist)
            break;
        p = s;
        q = d->m_dict + probe_pos;
        for (probe_len = 0; probe_len < max_match_len; probe_len++)
            if (*p++ != *q++)
                break;
        if (probe_len > match_len)
        {
            *pMatch_dist = dist;
            if ((*pMatch_len = match_len = probe_len) == max_match_len)
                return;
            c0 = d->m_dict[pos + match_len];
            c1 = d->m_dict[pos + match_len - 1];
        }
    }
}
#endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES */

#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
#ifdef MINIZ_UNALIGNED_USE_MEMCPY
static mz_uint32 TDEFL_READ_UNALIGNED_WORD32(const mz_uint8* p)
{
	mz_uint32 ret;
	memcpy(&ret, p, sizeof(mz_uint32));
	return ret;
}
#else
#define TDEFL_READ_UNALIGNED_WORD32(p) *(const mz_uint32 *)(p)
#endif
static mz_bool tdefl_compress_fast(tdefl_compressor *d)
{
    /* Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. */
    mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left;
    mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags;
    mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;

    while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size)))
    {
        const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096;
        mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
        mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size);
        d->m_src_buf_left -= num_bytes_to_process;
        lookahead_size += num_bytes_to_process;

        while (num_bytes_to_process)
        {
            mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process);
            memcpy(d->m_dict + dst_pos, d->m_pSrc, n);
            if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
                memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos));
            d->m_pSrc += n;
            dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK;
            num_bytes_to_process -= n;
        }

        dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size);
        if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE))
            break;

        while (lookahead_size >= 4)
        {
            mz_uint cur_match_dist, cur_match_len = 1;
            mz_uint8 *pCur_dict = d->m_dict + cur_pos;
            mz_uint first_trigram = TDEFL_READ_UNALIGNED_WORD32(pCur_dict) & 0xFFFFFF;
            mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK;
            mz_uint probe_pos = d->m_hash[hash];
            d->m_hash[hash] = (mz_uint16)lookahead_pos;

            if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((TDEFL_READ_UNALIGNED_WORD32(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram))
            {
                const mz_uint16 *p = (const mz_uint16 *)pCur_dict;
                const mz_uint16 *q = (const mz_uint16 *)(d->m_dict + probe_pos);
                mz_uint32 probe_len = 32;
                do
                {
                } while ((TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) &&
                         (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (TDEFL_READ_UNALIGNED_WORD2(++p) == TDEFL_READ_UNALIGNED_WORD2(++q)) && (--probe_len > 0));
                cur_match_len = ((mz_uint)(p - (const mz_uint16 *)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8 *)p == *(const mz_uint8 *)q);
                if (!probe_len)
                    cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0;

                if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)))
                {
                    cur_match_len = 1;
                    *pLZ_code_buf++ = (mz_uint8)first_trigram;
                    *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
                    d->m_huff_count[0][(mz_uint8)first_trigram]++;
                }
                else
                {
                    mz_uint32 s0, s1;
                    cur_match_len = MZ_MIN(cur_match_len, lookahead_size);

                    MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE));

                    cur_match_dist--;

                    pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN);
#ifdef MINIZ_UNALIGNED_USE_MEMCPY
					memcpy(&pLZ_code_buf[1], &cur_match_dist, sizeof(cur_match_dist));
#else
                    *(mz_uint16 *)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist;
#endif
                    pLZ_code_buf += 3;
                    *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80);

                    s0 = s_tdefl_small_dist_sym[cur_match_dist & 511];
                    s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8];
                    d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++;

                    d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++;
                }
            }
            else
            {
                *pLZ_code_buf++ = (mz_uint8)first_trigram;
                *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
                d->m_huff_count[0][(mz_uint8)first_trigram]++;
            }

            if (--num_flags_left == 0)
            {
                num_flags_left = 8;
                pLZ_flags = pLZ_code_buf++;
            }

            total_lz_bytes += cur_match_len;
            lookahead_pos += cur_match_len;
            dict_size = MZ_MIN(dict_size + cur_match_len, (mz_uint)TDEFL_LZ_DICT_SIZE);
            cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK;
            MZ_ASSERT(lookahead_size >= cur_match_len);
            lookahead_size -= cur_match_len;

            if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
            {
                int n;
                d->m_lookahead_pos = lookahead_pos;
                d->m_lookahead_size = lookahead_size;
                d->m_dict_size = dict_size;
                d->m_total_lz_bytes = total_lz_bytes;
                d->m_pLZ_code_buf = pLZ_code_buf;
                d->m_pLZ_flags = pLZ_flags;
                d->m_num_flags_left = num_flags_left;
                if ((n = tdefl_flush_block(d, 0)) != 0)
                    return (n < 0) ? MZ_FALSE : MZ_TRUE;
                total_lz_bytes = d->m_total_lz_bytes;
                pLZ_code_buf = d->m_pLZ_code_buf;
                pLZ_flags = d->m_pLZ_flags;
                num_flags_left = d->m_num_flags_left;
            }
        }

        while (lookahead_size)
        {
            mz_uint8 lit = d->m_dict[cur_pos];

            total_lz_bytes++;
            *pLZ_code_buf++ = lit;
            *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1);
            if (--num_flags_left == 0)
            {
                num_flags_left = 8;
                pLZ_flags = pLZ_code_buf++;
            }

            d->m_huff_count[0][lit]++;

            lookahead_pos++;
            dict_size = MZ_MIN(dict_size + 1, (mz_uint)TDEFL_LZ_DICT_SIZE);
            cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
            lookahead_size--;

            if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8])
            {
                int n;
                d->m_lookahead_pos = lookahead_pos;
                d->m_lookahead_size = lookahead_size;
                d->m_dict_size = dict_size;
                d->m_total_lz_bytes = total_lz_bytes;
                d->m_pLZ_code_buf = pLZ_code_buf;
                d->m_pLZ_flags = pLZ_flags;
                d->m_num_flags_left = num_flags_left;
                if ((n = tdefl_flush_block(d, 0)) != 0)
                    return (n < 0) ? MZ_FALSE : MZ_TRUE;
                total_lz_bytes = d->m_total_lz_bytes;
                pLZ_code_buf = d->m_pLZ_code_buf;
                pLZ_flags = d->m_pLZ_flags;
                num_flags_left = d->m_num_flags_left;
            }
        }
    }

    d->m_lookahead_pos = lookahead_pos;
    d->m_lookahead_size = lookahead_size;
    d->m_dict_size = dict_size;
    d->m_total_lz_bytes = total_lz_bytes;
    d->m_pLZ_code_buf = pLZ_code_buf;
    d->m_pLZ_flags = pLZ_flags;
    d->m_num_flags_left = num_flags_left;
    return MZ_TRUE;
}
#endif /* MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */

static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor *d, mz_uint8 lit)
{
    d->m_total_lz_bytes++;
    *d->m_pLZ_code_buf++ = lit;
    *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1);
    if (--d->m_num_flags_left == 0)
    {
        d->m_num_flags_left = 8;
        d->m_pLZ_flags = d->m_pLZ_code_buf++;
    }
    d->m_huff_count[0][lit]++;
}

static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor *d, mz_uint match_len, mz_uint match_dist)
{
    mz_uint32 s0, s1;

    MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE));

    d->m_total_lz_bytes += match_len;

    d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN);

    match_dist -= 1;
    d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF);
    d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8);
    d->m_pLZ_code_buf += 3;

    *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80);
    if (--d->m_num_flags_left == 0)
    {
        d->m_num_flags_left = 8;
        d->m_pLZ_flags = d->m_pLZ_code_buf++;
    }

    s0 = s_tdefl_small_dist_sym[match_dist & 511];
    s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127];
    d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++;
    d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++;
}

static mz_bool tdefl_compress_normal(tdefl_compressor *d)
{
    const mz_uint8 *pSrc = d->m_pSrc;
    size_t src_buf_left = d->m_src_buf_left;
    tdefl_flush flush = d->m_flush;

    while ((src_buf_left) || ((flush) && (d->m_lookahead_size)))
    {
        mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos;
        /* Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. */
        if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1))
        {
            mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2;
            mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK];
            mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size);
            const mz_uint8 *pSrc_end = pSrc ? pSrc + num_bytes_to_process : NULL;
            src_buf_left -= num_bytes_to_process;
            d->m_lookahead_size += num_bytes_to_process;
            while (pSrc != pSrc_end)
            {
                mz_uint8 c = *pSrc++;
                d->m_dict[dst_pos] = c;
                if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
                    d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
                hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
                d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
                d->m_hash[hash] = (mz_uint16)(ins_pos);
                dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK;
                ins_pos++;
            }
        }
        else
        {
            while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
            {
                mz_uint8 c = *pSrc++;
                mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK;
                src_buf_left--;
                d->m_dict[dst_pos] = c;
                if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1))
                    d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c;
                if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN)
                {
                    mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2;
                    mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1);
                    d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash];
                    d->m_hash[hash] = (mz_uint16)(ins_pos);
                }
            }
        }
        d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size);
        if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN))
            break;

        /* Simple lazy/greedy parsing state machine. */
        len_to_move = 1;
        cur_match_dist = 0;
        cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1);
        cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK;
        if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS))
        {
            if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))
            {
                mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK];
                cur_match_len = 0;
                while (cur_match_len < d->m_lookahead_size)
                {
                    if (d->m_dict[cur_pos + cur_match_len] != c)
                        break;
                    cur_match_len++;
                }
                if (cur_match_len < TDEFL_MIN_MATCH_LEN)
                    cur_match_len = 0;
                else
                    cur_match_dist = 1;
            }
        }
        else
        {
            tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len);
        }
        if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5)))
        {
            cur_match_dist = cur_match_len = 0;
        }
        if (d->m_saved_match_len)
        {
            if (cur_match_len > d->m_saved_match_len)
            {
                tdefl_record_literal(d, (mz_uint8)d->m_saved_lit);
                if (cur_match_len >= 128)
                {
                    tdefl_record_match(d, cur_match_len, cur_match_dist);
                    d->m_saved_match_len = 0;
                    len_to_move = cur_match_len;
                }
                else
                {
                    d->m_saved_lit = d->m_dict[cur_pos];
                    d->m_saved_match_dist = cur_match_dist;
                    d->m_saved_match_len = cur_match_len;
                }
            }
            else
            {
                tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist);
                len_to_move = d->m_saved_match_len - 1;
                d->m_saved_match_len = 0;
            }
        }
        else if (!cur_match_dist)
            tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]);
        else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128))
        {
            tdefl_record_match(d, cur_match_len, cur_match_dist);
            len_to_move = cur_match_len;
        }
        else
        {
            d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)];
            d->m_saved_match_dist = cur_match_dist;
            d->m_saved_match_len = cur_match_len;
        }
        /* Move the lookahead forward by len_to_move bytes. */
        d->m_lookahead_pos += len_to_move;
        MZ_ASSERT(d->m_lookahead_size >= len_to_move);
        d->m_lookahead_size -= len_to_move;
        d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, (mz_uint)TDEFL_LZ_DICT_SIZE);
        /* Check if it's time to flush the current LZ codes to the internal output buffer. */
        if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) ||
            ((d->m_total_lz_bytes > 31 * 1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))))
        {
            int n;
            d->m_pSrc = pSrc;
            d->m_src_buf_left = src_buf_left;
            if ((n = tdefl_flush_block(d, 0)) != 0)
                return (n < 0) ? MZ_FALSE : MZ_TRUE;
        }
    }

    d->m_pSrc = pSrc;
    d->m_src_buf_left = src_buf_left;
    return MZ_TRUE;
}

static tdefl_status tdefl_flush_output_buffer(tdefl_compressor *d)
{
    if (d->m_pIn_buf_size)
    {
        *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8 *)d->m_pIn_buf;
    }

    if (d->m_pOut_buf_size)
    {
        size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining);
        memcpy((mz_uint8 *)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n);
        d->m_output_flush_ofs += (mz_uint)n;
        d->m_output_flush_remaining -= (mz_uint)n;
        d->m_out_buf_ofs += n;

        *d->m_pOut_buf_size = d->m_out_buf_ofs;
    }

    return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY;
}

tdefl_status tdefl_compress(tdefl_compressor *d, const void *pIn_buf, size_t *pIn_buf_size, void *pOut_buf, size_t *pOut_buf_size, tdefl_flush flush)
{
    if (!d)
    {
        if (pIn_buf_size)
            *pIn_buf_size = 0;
        if (pOut_buf_size)
            *pOut_buf_size = 0;
        return TDEFL_STATUS_BAD_PARAM;
    }

    d->m_pIn_buf = pIn_buf;
    d->m_pIn_buf_size = pIn_buf_size;
    d->m_pOut_buf = pOut_buf;
    d->m_pOut_buf_size = pOut_buf_size;
    d->m_pSrc = (const mz_uint8 *)(pIn_buf);
    d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0;
    d->m_out_buf_ofs = 0;
    d->m_flush = flush;

    if (((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) ||
        (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf))
    {
        if (pIn_buf_size)
            *pIn_buf_size = 0;
        if (pOut_buf_size)
            *pOut_buf_size = 0;
        return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM);
    }
    d->m_wants_to_finish |= (flush == TDEFL_FINISH);

    if ((d->m_output_flush_remaining) || (d->m_finished))
        return (d->m_prev_return_status = tdefl_flush_output_buffer(d));

#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN
    if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) &&
        ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) &&
        ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0))
    {
        if (!tdefl_compress_fast(d))
            return d->m_prev_return_status;
    }
    else
#endif /* #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN */
    {
        if (!tdefl_compress_normal(d))
            return d->m_prev_return_status;
    }

    if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf))
        d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8 *)pIn_buf, d->m_pSrc - (const mz_uint8 *)pIn_buf);

    if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining))
    {
        if (tdefl_flush_block(d, flush) < 0)
            return d->m_prev_return_status;
        d->m_finished = (flush == TDEFL_FINISH);
        if (flush == TDEFL_FULL_FLUSH)
        {
            MZ_CLEAR_ARR(d->m_hash);
            MZ_CLEAR_ARR(d->m_next);
            d->m_dict_size = 0;
        }
    }

    return (d->m_prev_return_status = tdefl_flush_output_buffer(d));
}

tdefl_status tdefl_compress_buffer(tdefl_compressor *d, const void *pIn_buf, size_t in_buf_size, tdefl_flush flush)
{
    MZ_ASSERT(d->m_pPut_buf_func);
    return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush);
}

tdefl_status tdefl_init(tdefl_compressor *d, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
{
    d->m_pPut_buf_func = pPut_buf_func;
    d->m_pPut_buf_user = pPut_buf_user;
    d->m_flags = (mz_uint)(flags);
    d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3;
    d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0;
    d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3;
    if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG))
        MZ_CLEAR_ARR(d->m_hash);
    d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0;
    d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0;
    d->m_pLZ_code_buf = d->m_lz_code_buf + 1;
    d->m_pLZ_flags = d->m_lz_code_buf;
    *d->m_pLZ_flags = 0;
    d->m_num_flags_left = 8;
    d->m_pOutput_buf = d->m_output_buf;
    d->m_pOutput_buf_end = d->m_output_buf;
    d->m_prev_return_status = TDEFL_STATUS_OKAY;
    d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0;
    d->m_adler32 = 1;
    d->m_pIn_buf = NULL;
    d->m_pOut_buf = NULL;
    d->m_pIn_buf_size = NULL;
    d->m_pOut_buf_size = NULL;
    d->m_flush = TDEFL_NO_FLUSH;
    d->m_pSrc = NULL;
    d->m_src_buf_left = 0;
    d->m_out_buf_ofs = 0;
    if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG))
        MZ_CLEAR_ARR(d->m_dict);
    memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0);
    memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1);
    return TDEFL_STATUS_OKAY;
}

tdefl_status tdefl_get_prev_return_status(tdefl_compressor *d)
{
    return d->m_prev_return_status;
}

mz_uint32 tdefl_get_adler32(tdefl_compressor *d)
{
    return d->m_adler32;
}

mz_bool tdefl_compress_mem_to_output(const void *pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
{
    tdefl_compressor *pComp;
    mz_bool succeeded;
    if (((buf_len) && (!pBuf)) || (!pPut_buf_func))
        return MZ_FALSE;
    pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
    if (!pComp)
        return MZ_FALSE;
    succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY);
    succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE);
    MZ_FREE(pComp);
    return succeeded;
}

typedef struct
{
    size_t m_size, m_capacity;
    mz_uint8 *m_pBuf;
    mz_bool m_expandable;
} tdefl_output_buffer;

static mz_bool tdefl_output_buffer_putter(const void *pBuf, int len, void *pUser)
{
    tdefl_output_buffer *p = (tdefl_output_buffer *)pUser;
    size_t new_size = p->m_size + len;
    if (new_size > p->m_capacity)
    {
        size_t new_capacity = p->m_capacity;
        mz_uint8 *pNew_buf;
        if (!p->m_expandable)
            return MZ_FALSE;
        do
        {
            new_capacity = MZ_MAX(128U, new_capacity << 1U);
        } while (new_size > new_capacity);
        pNew_buf = (mz_uint8 *)MZ_REALLOC(p->m_pBuf, new_capacity);
        if (!pNew_buf)
            return MZ_FALSE;
        p->m_pBuf = pNew_buf;
        p->m_capacity = new_capacity;
    }
    memcpy((mz_uint8 *)p->m_pBuf + p->m_size, pBuf, len);
    p->m_size = new_size;
    return MZ_TRUE;
}

void *tdefl_compress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
{
    tdefl_output_buffer out_buf;
    MZ_CLEAR_OBJ(out_buf);
    if (!pOut_len)
        return MZ_FALSE;
    else
        *pOut_len = 0;
    out_buf.m_expandable = MZ_TRUE;
    if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags))
        return NULL;
    *pOut_len = out_buf.m_size;
    return out_buf.m_pBuf;
}

size_t tdefl_compress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
{
    tdefl_output_buffer out_buf;
    MZ_CLEAR_OBJ(out_buf);
    if (!pOut_buf)
        return 0;
    out_buf.m_pBuf = (mz_uint8 *)pOut_buf;
    out_buf.m_capacity = out_buf_len;
    if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags))
        return 0;
    return out_buf.m_size;
}

static const mz_uint s_tdefl_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 };

/* level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). */
mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy)
{
    mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0);
    if (window_bits > 0)
        comp_flags |= TDEFL_WRITE_ZLIB_HEADER;

    if (!level)
        comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS;
    else if (strategy == MZ_FILTERED)
        comp_flags |= TDEFL_FILTER_MATCHES;
    else if (strategy == MZ_HUFFMAN_ONLY)
        comp_flags &= ~TDEFL_MAX_PROBES_MASK;
    else if (strategy == MZ_FIXED)
        comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS;
    else if (strategy == MZ_RLE)
        comp_flags |= TDEFL_RLE_MATCHES;

    return comp_flags;
}

#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4204) /* nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) */
#endif

/* Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at
 http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/.
 This is actually a modification of Alex's original code so PNG files generated by this function pass pngcheck. */
void *tdefl_write_image_to_png_file_in_memory_ex(const void *pImage, int w, int h, int num_chans, size_t *pLen_out, mz_uint level, mz_bool flip)
{
    /* Using a local copy of this array here in case MINIZ_NO_ZLIB_APIS was defined. */
    static const mz_uint s_tdefl_png_num_probes[11] = { 0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500 };
    tdefl_compressor *pComp = (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
    tdefl_output_buffer out_buf;
    int i, bpl = w * num_chans, y, z;
    mz_uint32 c;
    *pLen_out = 0;
    if (!pComp)
        return NULL;
    MZ_CLEAR_OBJ(out_buf);
    out_buf.m_expandable = MZ_TRUE;
    out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h);
    if (NULL == (out_buf.m_pBuf = (mz_uint8 *)MZ_MALLOC(out_buf.m_capacity)))
    {
        MZ_FREE(pComp);
        return NULL;
    }
    /* write dummy header */
    for (z = 41; z; --z)
        tdefl_output_buffer_putter(&z, 1, &out_buf);
    /* compress image data */
    tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, s_tdefl_png_num_probes[MZ_MIN(10, level)] | TDEFL_WRITE_ZLIB_HEADER);
    for (y = 0; y < h; ++y)
    {
        tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH);
        tdefl_compress_buffer(pComp, (mz_uint8 *)pImage + (flip ? (h - 1 - y) : y) * bpl, bpl, TDEFL_NO_FLUSH);
    }
    if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE)
    {
        MZ_FREE(pComp);
        MZ_FREE(out_buf.m_pBuf);
        return NULL;
    }
    /* write real header */
    *pLen_out = out_buf.m_size - 41;
    {
        static const mz_uint8 chans[] = { 0x00, 0x00, 0x04, 0x02, 0x06 };
        mz_uint8 pnghdr[41] = { 0x89, 0x50, 0x4e, 0x47, 0x0d,
                                0x0a, 0x1a, 0x0a, 0x00, 0x00,
                                0x00, 0x0d, 0x49, 0x48, 0x44,
                                0x52, 0x00, 0x00, 0x00, 0x00,
                                0x00, 0x00, 0x00, 0x00, 0x08,
                                0x00, 0x00, 0x00, 0x00, 0x00,
                                0x00, 0x00, 0x00, 0x00, 0x00,
                                0x00, 0x00, 0x49, 0x44, 0x41,
                                0x54 };
        pnghdr[18] = (mz_uint8)(w >> 8);
        pnghdr[19] = (mz_uint8)w;
        pnghdr[22] = (mz_uint8)(h >> 8);
        pnghdr[23] = (mz_uint8)h;
        pnghdr[25] = chans[num_chans];
        pnghdr[33] = (mz_uint8)(*pLen_out >> 24);
        pnghdr[34] = (mz_uint8)(*pLen_out >> 16);
        pnghdr[35] = (mz_uint8)(*pLen_out >> 8);
        pnghdr[36] = (mz_uint8)*pLen_out;
        c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17);
        for (i = 0; i < 4; ++i, c <<= 8)
            ((mz_uint8 *)(pnghdr + 29))[i] = (mz_uint8)(c >> 24);
        memcpy(out_buf.m_pBuf, pnghdr, 41);
    }
    /* write footer (IDAT CRC-32, followed by IEND chunk) */
    if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf))
    {
        *pLen_out = 0;
        MZ_FREE(pComp);
        MZ_FREE(out_buf.m_pBuf);
        return NULL;
    }
    c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4);
    for (i = 0; i < 4; ++i, c <<= 8)
        (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24);
    /* compute final size of file, grab compressed data buffer and return */
    *pLen_out += 57;
    MZ_FREE(pComp);
    return out_buf.m_pBuf;
}
void *tdefl_write_image_to_png_file_in_memory(const void *pImage, int w, int h, int num_chans, size_t *pLen_out)
{
    /* Level 6 corresponds to TDEFL_DEFAULT_MAX_PROBES or MZ_DEFAULT_LEVEL (but we can't depend on MZ_DEFAULT_LEVEL being available in case the zlib API's where #defined out) */
    return tdefl_write_image_to_png_file_in_memory_ex(pImage, w, h, num_chans, pLen_out, 6, MZ_FALSE);
}

#ifndef MINIZ_NO_MALLOC
/* Allocate the tdefl_compressor and tinfl_decompressor structures in C so that */
/* non-C language bindings to tdefL_ and tinfl_ API don't need to worry about */
/* structure size and allocation mechanism. */
tdefl_compressor *tdefl_compressor_alloc(void)
{
    return (tdefl_compressor *)MZ_MALLOC(sizeof(tdefl_compressor));
}

void tdefl_compressor_free(tdefl_compressor *pComp)
{
    MZ_FREE(pComp);
}
#endif

#ifdef _MSC_VER
#pragma warning(pop)
#endif

#ifdef __cplusplus
}
#endif

#endif /*#ifndef MINIZ_NO_DEFLATE_APIS*/
 /**************************************************************************
 *
 * Copyright 2013-2014 RAD Game Tools and Valve Software
 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 **************************************************************************/



#ifndef MINIZ_NO_INFLATE_APIS

#ifdef __cplusplus
extern "C" {
#endif

/* ------------------- Low-level Decompression (completely independent from all compression API's) */

#define TINFL_MEMCPY(d, s, l) memcpy(d, s, l)
#define TINFL_MEMSET(p, c, l) memset(p, c, l)

#define TINFL_CR_BEGIN  \
    switch (r->m_state) \
    {                   \
        case 0:
#define TINFL_CR_RETURN(state_index, result) \
    do                                       \
    {                                        \
        status = result;                     \
        r->m_state = state_index;            \
        goto common_exit;                    \
        case state_index:;                   \
    }                                        \
    MZ_MACRO_END
#define TINFL_CR_RETURN_FOREVER(state_index, result) \
    do                                               \
    {                                                \
        for (;;)                                     \
        {                                            \
            TINFL_CR_RETURN(state_index, result);    \
        }                                            \
    }                                                \
    MZ_MACRO_END
#define TINFL_CR_FINISH }

#define TINFL_GET_BYTE(state_index, c)                                                                                                                           \
    do                                                                                                                                                           \
    {                                                                                                                                                            \
        while (pIn_buf_cur >= pIn_buf_end)                                                                                                                       \
        {                                                                                                                                                        \
            TINFL_CR_RETURN(state_index, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS); \
        }                                                                                                                                                        \
        c = *pIn_buf_cur++;                                                                                                                                      \
    }                                                                                                                                                            \
    MZ_MACRO_END

#define TINFL_NEED_BITS(state_index, n)                \
    do                                                 \
    {                                                  \
        mz_uint c;                                     \
        TINFL_GET_BYTE(state_index, c);                \
        bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \
        num_bits += 8;                                 \
    } while (num_bits < (mz_uint)(n))
#define TINFL_SKIP_BITS(state_index, n)      \
    do                                       \
    {                                        \
        if (num_bits < (mz_uint)(n))         \
        {                                    \
            TINFL_NEED_BITS(state_index, n); \
        }                                    \
        bit_buf >>= (n);                     \
        num_bits -= (n);                     \
    }                                        \
    MZ_MACRO_END
#define TINFL_GET_BITS(state_index, b, n)    \
    do                                       \
    {                                        \
        if (num_bits < (mz_uint)(n))         \
        {                                    \
            TINFL_NEED_BITS(state_index, n); \
        }                                    \
        b = bit_buf & ((1 << (n)) - 1);      \
        bit_buf >>= (n);                     \
        num_bits -= (n);                     \
    }                                        \
    MZ_MACRO_END

/* TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. */
/* It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a */
/* Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the */
/* bit buffer contains >=15 bits (deflate's max. Huffman code size). */
#define TINFL_HUFF_BITBUF_FILL(state_index, pLookUp, pTree)                    \
    do                                                                         \
    {                                                                          \
        temp = pLookUp[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)];                \
        if (temp >= 0)                                                         \
        {                                                                      \
            code_len = temp >> 9;                                              \
            if ((code_len) && (num_bits >= code_len))                          \
                break;                                                         \
        }                                                                      \
        else if (num_bits > TINFL_FAST_LOOKUP_BITS)                            \
        {                                                                      \
            code_len = TINFL_FAST_LOOKUP_BITS;                                 \
            do                                                                 \
            {                                                                  \
                temp = pTree[~temp + ((bit_buf >> code_len++) & 1)];           \
            } while ((temp < 0) && (num_bits >= (code_len + 1)));              \
            if (temp >= 0)                                                     \
                break;                                                         \
        }                                                                      \
        TINFL_GET_BYTE(state_index, c);                                        \
        bit_buf |= (((tinfl_bit_buf_t)c) << num_bits);                         \
        num_bits += 8;                                                         \
    } while (num_bits < 15);

/* TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read */
/* beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully */
/* decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. */
/* The slow path is only executed at the very end of the input buffer. */
/* v1.16: The original macro handled the case at the very end of the passed-in input buffer, but we also need to handle the case where the user passes in 1+zillion bytes */
/* following the deflate data and our non-conservative read-ahead path won't kick in here on this code. This is much trickier. */
#define TINFL_HUFF_DECODE(state_index, sym, pLookUp, pTree)                                                                         \
    do                                                                                                                              \
    {                                                                                                                               \
        int temp;                                                                                                                   \
        mz_uint code_len, c;                                                                                                        \
        if (num_bits < 15)                                                                                                          \
        {                                                                                                                           \
            if ((pIn_buf_end - pIn_buf_cur) < 2)                                                                                    \
            {                                                                                                                       \
                TINFL_HUFF_BITBUF_FILL(state_index, pLookUp, pTree);                                                                \
            }                                                                                                                       \
            else                                                                                                                    \
            {                                                                                                                       \
                bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \
                pIn_buf_cur += 2;                                                                                                   \
                num_bits += 16;                                                                                                     \
            }                                                                                                                       \
        }                                                                                                                           \
        if ((temp = pLookUp[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)                                                          \
            code_len = temp >> 9, temp &= 511;                                                                                      \
        else                                                                                                                        \
        {                                                                                                                           \
            code_len = TINFL_FAST_LOOKUP_BITS;                                                                                      \
            do                                                                                                                      \
            {                                                                                                                       \
                temp = pTree[~temp + ((bit_buf >> code_len++) & 1)];                                                                \
            } while (temp < 0);                                                                                                     \
        }                                                                                                                           \
        sym = temp;                                                                                                                 \
        bit_buf >>= code_len;                                                                                                       \
        num_bits -= code_len;                                                                                                       \
    }                                                                                                                               \
    MZ_MACRO_END

static void tinfl_clear_tree(tinfl_decompressor *r)
{
    if (r->m_type == 0)
        MZ_CLEAR_ARR(r->m_tree_0);
    else if (r->m_type == 1)
        MZ_CLEAR_ARR(r->m_tree_1);
    else
        MZ_CLEAR_ARR(r->m_tree_2);
}

tinfl_status tinfl_decompress(tinfl_decompressor *r, const mz_uint8 *pIn_buf_next, size_t *pIn_buf_size, mz_uint8 *pOut_buf_start, mz_uint8 *pOut_buf_next, size_t *pOut_buf_size, const mz_uint32 decomp_flags)
{
    static const mz_uint16 s_length_base[31] = { 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 };
    static const mz_uint8 s_length_extra[31] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0 };
    static const mz_uint16 s_dist_base[32] = { 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0 };
    static const mz_uint8 s_dist_extra[32] = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 };
    static const mz_uint8 s_length_dezigzag[19] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 };
    static const mz_uint16 s_min_table_sizes[3] = { 257, 1, 4 };

    mz_int16 *pTrees[3];
    mz_uint8 *pCode_sizes[3];

    tinfl_status status = TINFL_STATUS_FAILED;
    mz_uint32 num_bits, dist, counter, num_extra;
    tinfl_bit_buf_t bit_buf;
    const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size;
    mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next ? pOut_buf_next + *pOut_buf_size : NULL;
    size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start;

    /* Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). */
    if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start))
    {
        *pIn_buf_size = *pOut_buf_size = 0;
        return TINFL_STATUS_BAD_PARAM;
    }

    pTrees[0] = r->m_tree_0;
    pTrees[1] = r->m_tree_1;
    pTrees[2] = r->m_tree_2;
    pCode_sizes[0] = r->m_code_size_0;
    pCode_sizes[1] = r->m_code_size_1;
    pCode_sizes[2] = r->m_code_size_2;

    num_bits = r->m_num_bits;
    bit_buf = r->m_bit_buf;
    dist = r->m_dist;
    counter = r->m_counter;
    num_extra = r->m_num_extra;
    dist_from_out_buf_start = r->m_dist_from_out_buf_start;
    TINFL_CR_BEGIN

    bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0;
    r->m_z_adler32 = r->m_check_adler32 = 1;
    if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
    {
        TINFL_GET_BYTE(1, r->m_zhdr0);
        TINFL_GET_BYTE(2, r->m_zhdr1);
        counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8));
        if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
            counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4)))));
        if (counter)
        {
            TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED);
        }
    }

    do
    {
        TINFL_GET_BITS(3, r->m_final, 3);
        r->m_type = r->m_final >> 1;
        if (r->m_type == 0)
        {
            TINFL_SKIP_BITS(5, num_bits & 7);
            for (counter = 0; counter < 4; ++counter)
            {
                if (num_bits)
                    TINFL_GET_BITS(6, r->m_raw_header[counter], 8);
                else
                    TINFL_GET_BYTE(7, r->m_raw_header[counter]);
            }
            if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8))))
            {
                TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED);
            }
            while ((counter) && (num_bits))
            {
                TINFL_GET_BITS(51, dist, 8);
                while (pOut_buf_cur >= pOut_buf_end)
                {
                    TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT);
                }
                *pOut_buf_cur++ = (mz_uint8)dist;
                counter--;
            }
            while (counter)
            {
                size_t n;
                while (pOut_buf_cur >= pOut_buf_end)
                {
                    TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT);
                }
                while (pIn_buf_cur >= pIn_buf_end)
                {
                    TINFL_CR_RETURN(38, (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) ? TINFL_STATUS_NEEDS_MORE_INPUT : TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS);
                }
                n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter);
                TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n);
                pIn_buf_cur += n;
                pOut_buf_cur += n;
                counter -= (mz_uint)n;
            }
        }
        else if (r->m_type == 3)
        {
            TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED);
        }
        else
        {
            if (r->m_type == 1)
            {
                mz_uint8 *p = r->m_code_size_0;
                mz_uint i;
                r->m_table_sizes[0] = 288;
                r->m_table_sizes[1] = 32;
                TINFL_MEMSET(r->m_code_size_1, 5, 32);
                for (i = 0; i <= 143; ++i)
                    *p++ = 8;
                for (; i <= 255; ++i)
                    *p++ = 9;
                for (; i <= 279; ++i)
                    *p++ = 7;
                for (; i <= 287; ++i)
                    *p++ = 8;
            }
            else
            {
                for (counter = 0; counter < 3; counter++)
                {
                    TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]);
                    r->m_table_sizes[counter] += s_min_table_sizes[counter];
                }
                MZ_CLEAR_ARR(r->m_code_size_2);
                for (counter = 0; counter < r->m_table_sizes[2]; counter++)
                {
                    mz_uint s;
                    TINFL_GET_BITS(14, s, 3);
                    r->m_code_size_2[s_length_dezigzag[counter]] = (mz_uint8)s;
                }
                r->m_table_sizes[2] = 19;
            }
            for (; (int)r->m_type >= 0; r->m_type--)
            {
                int tree_next, tree_cur;
                mz_int16 *pLookUp;
                mz_int16 *pTree;
                mz_uint8 *pCode_size;
                mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16];
                pLookUp = r->m_look_up[r->m_type];
                pTree = pTrees[r->m_type];
                pCode_size = pCode_sizes[r->m_type];
                MZ_CLEAR_ARR(total_syms);
                TINFL_MEMSET(pLookUp, 0, sizeof(r->m_look_up[0]));
                tinfl_clear_tree(r);
                for (i = 0; i < r->m_table_sizes[r->m_type]; ++i)
                    total_syms[pCode_size[i]]++;
                used_syms = 0, total = 0;
                next_code[0] = next_code[1] = 0;
                for (i = 1; i <= 15; ++i)
                {
                    used_syms += total_syms[i];
                    next_code[i + 1] = (total = ((total + total_syms[i]) << 1));
                }
                if ((65536 != total) && (used_syms > 1))
                {
                    TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED);
                }
                for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index)
                {
                    mz_uint rev_code = 0, l, cur_code, code_size = pCode_size[sym_index];
                    if (!code_size)
                        continue;
                    cur_code = next_code[code_size]++;
                    for (l = code_size; l > 0; l--, cur_code >>= 1)
                        rev_code = (rev_code << 1) | (cur_code & 1);
                    if (code_size <= TINFL_FAST_LOOKUP_BITS)
                    {
                        mz_int16 k = (mz_int16)((code_size << 9) | sym_index);
                        while (rev_code < TINFL_FAST_LOOKUP_SIZE)
                        {
                            pLookUp[rev_code] = k;
                            rev_code += (1 << code_size);
                        }
                        continue;
                    }
                    if (0 == (tree_cur = pLookUp[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)]))
                    {
                        pLookUp[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next;
                        tree_cur = tree_next;
                        tree_next -= 2;
                    }
                    rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1);
                    for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--)
                    {
                        tree_cur -= ((rev_code >>= 1) & 1);
                        if (!pTree[-tree_cur - 1])
                        {
                            pTree[-tree_cur - 1] = (mz_int16)tree_next;
                            tree_cur = tree_next;
                            tree_next -= 2;
                        }
                        else
                            tree_cur = pTree[-tree_cur - 1];
                    }
                    tree_cur -= ((rev_code >>= 1) & 1);
                    pTree[-tree_cur - 1] = (mz_int16)sym_index;
                }
                if (r->m_type == 2)
                {
                    for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);)
                    {
                        mz_uint s;
                        TINFL_HUFF_DECODE(16, dist, r->m_look_up[2], r->m_tree_2);
                        if (dist < 16)
                        {
                            r->m_len_codes[counter++] = (mz_uint8)dist;
                            continue;
                        }
                        if ((dist == 16) && (!counter))
                        {
                            TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED);
                        }
                        num_extra = "\02\03\07"[dist - 16];
                        TINFL_GET_BITS(18, s, num_extra);
                        s += "\03\03\013"[dist - 16];
                        TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s);
                        counter += s;
                    }
                    if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter)
                    {
                        TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED);
                    }
                    TINFL_MEMCPY(r->m_code_size_0, r->m_len_codes, r->m_table_sizes[0]);
                    TINFL_MEMCPY(r->m_code_size_1, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]);
                }
            }
            for (;;)
            {
                mz_uint8 *pSrc;
                for (;;)
                {
                    if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2))
                    {
                        TINFL_HUFF_DECODE(23, counter, r->m_look_up[0], r->m_tree_0);
                        if (counter >= 256)
                            break;
                        while (pOut_buf_cur >= pOut_buf_end)
                        {
                            TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT);
                        }
                        *pOut_buf_cur++ = (mz_uint8)counter;
                    }
                    else
                    {
                        int sym2;
                        mz_uint code_len;
#if TINFL_USE_64BIT_BITBUF
                        if (num_bits < 30)
                        {
                            bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits);
                            pIn_buf_cur += 4;
                            num_bits += 32;
                        }
#else
                        if (num_bits < 15)
                        {
                            bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits);
                            pIn_buf_cur += 2;
                            num_bits += 16;
                        }
#endif
                        if ((sym2 = r->m_look_up[0][bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
                            code_len = sym2 >> 9;
                        else
                        {
                            code_len = TINFL_FAST_LOOKUP_BITS;
                            do
                            {
                                sym2 = r->m_tree_0[~sym2 + ((bit_buf >> code_len++) & 1)];
                            } while (sym2 < 0);
                        }
                        counter = sym2;
                        bit_buf >>= code_len;
                        num_bits -= code_len;
                        if (counter & 256)
                            break;

#if !TINFL_USE_64BIT_BITBUF
                        if (num_bits < 15)
                        {
                            bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits);
                            pIn_buf_cur += 2;
                            num_bits += 16;
                        }
#endif
                        if ((sym2 = r->m_look_up[0][bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0)
                            code_len = sym2 >> 9;
                        else
                        {
                            code_len = TINFL_FAST_LOOKUP_BITS;
                            do
                            {
                                sym2 = r->m_tree_0[~sym2 + ((bit_buf >> code_len++) & 1)];
                            } while (sym2 < 0);
                        }
                        bit_buf >>= code_len;
                        num_bits -= code_len;

                        pOut_buf_cur[0] = (mz_uint8)counter;
                        if (sym2 & 256)
                        {
                            pOut_buf_cur++;
                            counter = sym2;
                            break;
                        }
                        pOut_buf_cur[1] = (mz_uint8)sym2;
                        pOut_buf_cur += 2;
                    }
                }
                if ((counter &= 511) == 256)
                    break;

                num_extra = s_length_extra[counter - 257];
                counter = s_length_base[counter - 257];
                if (num_extra)
                {
                    mz_uint extra_bits;
                    TINFL_GET_BITS(25, extra_bits, num_extra);
                    counter += extra_bits;
                }

                TINFL_HUFF_DECODE(26, dist, r->m_look_up[1], r->m_tree_1);
                num_extra = s_dist_extra[dist];
                dist = s_dist_base[dist];
                if (num_extra)
                {
                    mz_uint extra_bits;
                    TINFL_GET_BITS(27, extra_bits, num_extra);
                    dist += extra_bits;
                }

                dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start;
                if ((dist == 0 || dist > dist_from_out_buf_start || dist_from_out_buf_start == 0) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))
                {
                    TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED);
                }

                pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask);

                if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end)
                {
                    while (counter--)
                    {
                        while (pOut_buf_cur >= pOut_buf_end)
                        {
                            TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT);
                        }
                        *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask];
                    }
                    continue;
                }
#if MINIZ_USE_UNALIGNED_LOADS_AND_STORES
                else if ((counter >= 9) && (counter <= dist))
                {
                    const mz_uint8 *pSrc_end = pSrc + (counter & ~7);
                    do
                    {
#ifdef MINIZ_UNALIGNED_USE_MEMCPY
						memcpy(pOut_buf_cur, pSrc, sizeof(mz_uint32)*2);
#else
                        ((mz_uint32 *)pOut_buf_cur)[0] = ((const mz_uint32 *)pSrc)[0];
                        ((mz_uint32 *)pOut_buf_cur)[1] = ((const mz_uint32 *)pSrc)[1];
#endif
                        pOut_buf_cur += 8;
                    } while ((pSrc += 8) < pSrc_end);
                    if ((counter &= 7) < 3)
                    {
                        if (counter)
                        {
                            pOut_buf_cur[0] = pSrc[0];
                            if (counter > 1)
                                pOut_buf_cur[1] = pSrc[1];
                            pOut_buf_cur += counter;
                        }
                        continue;
                    }
                }
#endif
                while(counter>2)
                {
                    pOut_buf_cur[0] = pSrc[0];
                    pOut_buf_cur[1] = pSrc[1];
                    pOut_buf_cur[2] = pSrc[2];
                    pOut_buf_cur += 3;
                    pSrc += 3;
					counter -= 3;
                }
                if (counter > 0)
                {
                    pOut_buf_cur[0] = pSrc[0];
                    if (counter > 1)
                        pOut_buf_cur[1] = pSrc[1];
                    pOut_buf_cur += counter;
                }
            }
        }
    } while (!(r->m_final & 1));

    /* Ensure byte alignment and put back any bytes from the bitbuf if we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */
    /* I'm being super conservative here. A number of simplifications can be made to the byte alignment part, and the Adler32 check shouldn't ever need to worry about reading from the bitbuf now. */
    TINFL_SKIP_BITS(32, num_bits & 7);
    while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8))
    {
        --pIn_buf_cur;
        num_bits -= 8;
    }
    bit_buf &= ~(~(tinfl_bit_buf_t)0 << num_bits);
    MZ_ASSERT(!num_bits); /* if this assert fires then we've read beyond the end of non-deflate/zlib streams with following data (such as gzip streams). */

    if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER)
    {
        for (counter = 0; counter < 4; ++counter)
        {
            mz_uint s;
            if (num_bits)
                TINFL_GET_BITS(41, s, 8);
            else
                TINFL_GET_BYTE(42, s);
            r->m_z_adler32 = (r->m_z_adler32 << 8) | s;
        }
    }
    TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE);

    TINFL_CR_FINISH

common_exit:
    /* As long as we aren't telling the caller that we NEED more input to make forward progress: */
    /* Put back any bytes from the bitbuf in case we've looked ahead too far on gzip, or other Deflate streams followed by arbitrary data. */
    /* We need to be very careful here to NOT push back any bytes we definitely know we need to make forward progress, though, or we'll lock the caller up into an inf loop. */
    if ((status != TINFL_STATUS_NEEDS_MORE_INPUT) && (status != TINFL_STATUS_FAILED_CANNOT_MAKE_PROGRESS))
    {
        while ((pIn_buf_cur > pIn_buf_next) && (num_bits >= 8))
        {
            --pIn_buf_cur;
            num_bits -= 8;
        }
    }
    r->m_num_bits = num_bits;
    r->m_bit_buf = bit_buf & ~(~(tinfl_bit_buf_t)0 << num_bits);
    r->m_dist = dist;
    r->m_counter = counter;
    r->m_num_extra = num_extra;
    r->m_dist_from_out_buf_start = dist_from_out_buf_start;
    *pIn_buf_size = pIn_buf_cur - pIn_buf_next;
    *pOut_buf_size = pOut_buf_cur - pOut_buf_next;
    if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0))
    {
        const mz_uint8 *ptr = pOut_buf_next;
        size_t buf_len = *pOut_buf_size;
        mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16;
        size_t block_len = buf_len % 5552;
        while (buf_len)
        {
            for (i = 0; i + 7 < block_len; i += 8, ptr += 8)
            {
                s1 += ptr[0], s2 += s1;
                s1 += ptr[1], s2 += s1;
                s1 += ptr[2], s2 += s1;
                s1 += ptr[3], s2 += s1;
                s1 += ptr[4], s2 += s1;
                s1 += ptr[5], s2 += s1;
                s1 += ptr[6], s2 += s1;
                s1 += ptr[7], s2 += s1;
            }
            for (; i < block_len; ++i)
                s1 += *ptr++, s2 += s1;
            s1 %= 65521U, s2 %= 65521U;
            buf_len -= block_len;
            block_len = 5552;
        }
        r->m_check_adler32 = (s2 << 16) + s1;
        if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32))
            status = TINFL_STATUS_ADLER32_MISMATCH;
    }
    return status;
}

/* Higher level helper functions. */
void *tinfl_decompress_mem_to_heap(const void *pSrc_buf, size_t src_buf_len, size_t *pOut_len, int flags)
{
    tinfl_decompressor decomp;
    void *pBuf = NULL, *pNew_buf;
    size_t src_buf_ofs = 0, out_buf_capacity = 0;
    *pOut_len = 0;
    tinfl_init(&decomp);
    for (;;)
    {
        size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity;
        tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8 *)pBuf, pBuf ? (mz_uint8 *)pBuf + *pOut_len : NULL, &dst_buf_size,
                                               (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
        if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT))
        {
            MZ_FREE(pBuf);
            *pOut_len = 0;
            return NULL;
        }
        src_buf_ofs += src_buf_size;
        *pOut_len += dst_buf_size;
        if (status == TINFL_STATUS_DONE)
            break;
        new_out_buf_capacity = out_buf_capacity * 2;
        if (new_out_buf_capacity < 128)
            new_out_buf_capacity = 128;
        pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity);
        if (!pNew_buf)
        {
            MZ_FREE(pBuf);
            *pOut_len = 0;
            return NULL;
        }
        pBuf = pNew_buf;
        out_buf_capacity = new_out_buf_capacity;
    }
    return pBuf;
}

size_t tinfl_decompress_mem_to_mem(void *pOut_buf, size_t out_buf_len, const void *pSrc_buf, size_t src_buf_len, int flags)
{
    tinfl_decompressor decomp;
    tinfl_status status;
    tinfl_init(&decomp);
    status = tinfl_decompress(&decomp, (const mz_uint8 *)pSrc_buf, &src_buf_len, (mz_uint8 *)pOut_buf, (mz_uint8 *)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF);
    return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len;
}

int tinfl_decompress_mem_to_callback(const void *pIn_buf, size_t *pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void *pPut_buf_user, int flags)
{
    int result = 0;
    tinfl_decompressor decomp;
    mz_uint8 *pDict = (mz_uint8 *)MZ_MALLOC(TINFL_LZ_DICT_SIZE);
    size_t in_buf_ofs = 0, dict_ofs = 0;
    if (!pDict)
        return TINFL_STATUS_FAILED;
    memset(pDict,0,TINFL_LZ_DICT_SIZE);
    tinfl_init(&decomp);
    for (;;)
    {
        size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs;
        tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8 *)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size,
                                               (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)));
        in_buf_ofs += in_buf_size;
        if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user)))
            break;
        if (status != TINFL_STATUS_HAS_MORE_OUTPUT)
        {
            result = (status == TINFL_STATUS_DONE);
            break;
        }
        dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1);
    }
    MZ_FREE(pDict);
    *pIn_buf_size = in_buf_ofs;
    return result;
}

#ifndef MINIZ_NO_MALLOC
tinfl_decompressor *tinfl_decompressor_alloc(void)
{
    tinfl_decompressor *pDecomp = (tinfl_decompressor *)MZ_MALLOC(sizeof(tinfl_decompressor));
    if (pDecomp)
        tinfl_init(pDecomp);
    return pDecomp;
}

void tinfl_decompressor_free(tinfl_decompressor *pDecomp)
{
    MZ_FREE(pDecomp);
}
#endif

#ifdef __cplusplus
}
#endif

#endif /*#ifndef MINIZ_NO_INFLATE_APIS*/
 /**************************************************************************
 *
 * Copyright 2013-2014 RAD Game Tools and Valve Software
 * Copyright 2010-2014 Rich Geldreich and Tenacious Software LLC
 * Copyright 2016 Martin Raiber
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 *
 **************************************************************************/


#ifndef MINIZ_NO_ARCHIVE_APIS

#ifdef __cplusplus
extern "C" {
#endif

/* ------------------- .ZIP archive reading */

#ifdef MINIZ_NO_STDIO
#define MZ_FILE void *
#else
#include <sys/stat.h>

#if defined(_MSC_VER) || defined(__MINGW64__)

#define WIN32_LEAN_AND_MEAN
#include <windows.h>

static WCHAR* mz_utf8z_to_widechar(const char* str)
{
  int reqChars = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
  WCHAR* wStr = (WCHAR*)malloc(reqChars * sizeof(WCHAR));
  MultiByteToWideChar(CP_UTF8, 0, str, -1, wStr, sizeof(WCHAR) * reqChars);
  return wStr;
}

static FILE *mz_fopen(const char *pFilename, const char *pMode)
{
  WCHAR* wFilename = mz_utf8z_to_widechar(pFilename);
  WCHAR* wMode = mz_utf8z_to_widechar(pMode);
  FILE* pFile = NULL;
  errno_t err = _wfopen_s(&pFile, wFilename, wMode);
  free(wFilename);
  free(wMode);
  return err ? NULL : pFile;
}

static FILE *mz_freopen(const char *pPath, const char *pMode, FILE *pStream)
{
  WCHAR* wPath = mz_utf8z_to_widechar(pPath);
  WCHAR* wMode = mz_utf8z_to_widechar(pMode);
  FILE* pFile = NULL;
  errno_t err = _wfreopen_s(&pFile, wPath, wMode, pStream);
  free(wPath);
  free(wMode);
  return err ? NULL : pFile;
}

static int mz_stat64(const char *path, struct __stat64 *buffer)
{
  WCHAR* wPath = mz_utf8z_to_widechar(path);
  int res = _wstat64(wPath, buffer);
  free(wPath);
  return res;
}

#ifndef MINIZ_NO_TIME
#include <sys/utime.h>
#endif
#define MZ_FOPEN mz_fopen
#define MZ_FCLOSE fclose
#define MZ_FREAD fread
#define MZ_FWRITE fwrite
#define MZ_FTELL64 _ftelli64
#define MZ_FSEEK64 _fseeki64
#define MZ_FILE_STAT_STRUCT _stat64
#define MZ_FILE_STAT mz_stat64 
#define MZ_FFLUSH fflush
#define MZ_FREOPEN mz_freopen
#define MZ_DELETE_FILE remove

#elif defined(__MINGW32__) || defined(__WATCOMC__)
#ifndef MINIZ_NO_TIME
#include <sys/utime.h>
#endif
#define MZ_FOPEN(f, m) fopen(f, m)
#define MZ_FCLOSE fclose
#define MZ_FREAD fread
#define MZ_FWRITE fwrite
#define MZ_FTELL64 _ftelli64
#define MZ_FSEEK64 _fseeki64
#define MZ_FILE_STAT_STRUCT stat
#define MZ_FILE_STAT stat
#define MZ_FFLUSH fflush
#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
#define MZ_DELETE_FILE remove

#elif defined(__TINYC__)
#ifndef MINIZ_NO_TIME
#include <sys/utime.h>
#endif
#define MZ_FOPEN(f, m) fopen(f, m)
#define MZ_FCLOSE fclose
#define MZ_FREAD fread
#define MZ_FWRITE fwrite
#define MZ_FTELL64 ftell
#define MZ_FSEEK64 fseek
#define MZ_FILE_STAT_STRUCT stat
#define MZ_FILE_STAT stat
#define MZ_FFLUSH fflush
#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
#define MZ_DELETE_FILE remove

#elif defined(__USE_LARGEFILE64) /* gcc, clang */
#ifndef MINIZ_NO_TIME
#include <utime.h>
#endif
#define MZ_FOPEN(f, m) fopen64(f, m)
#define MZ_FCLOSE fclose
#define MZ_FREAD fread
#define MZ_FWRITE fwrite
#define MZ_FTELL64 ftello64
#define MZ_FSEEK64 fseeko64
#define MZ_FILE_STAT_STRUCT stat64
#define MZ_FILE_STAT stat64
#define MZ_FFLUSH fflush
#define MZ_FREOPEN(p, m, s) freopen64(p, m, s)
#define MZ_DELETE_FILE remove

#elif defined(__APPLE__) || defined(__FreeBSD__)
#ifndef MINIZ_NO_TIME
#include <utime.h>
#endif
#define MZ_FOPEN(f, m) fopen(f, m)
#define MZ_FCLOSE fclose
#define MZ_FREAD fread
#define MZ_FWRITE fwrite
#define MZ_FTELL64 ftello
#define MZ_FSEEK64 fseeko
#define MZ_FILE_STAT_STRUCT stat
#define MZ_FILE_STAT stat
#define MZ_FFLUSH fflush
#define MZ_FREOPEN(p, m, s) freopen(p, m, s)
#define MZ_DELETE_FILE remove

#else
//#pragma message("Using fopen, ftello, fseeko, stat() etc. path for file I/O - this path may not support large files.")
#ifndef MINIZ_NO_TIME
#include <utime.h>
#endif
#define MZ_FOPEN(f, m) fopen(f, m)
#define MZ_FCLOSE fclose
#define MZ_FREAD fread
#define MZ_FWRITE fwrite
#ifdef __STRICT_ANSI__
#define MZ_FTELL64 ftell
#define MZ_FSEEK64 fseek
#else
#define MZ_FTELL64 ftello
#define MZ_FSEEK64 fseeko
#endif
#define MZ_FILE_STAT_STRUCT stat
#define MZ_FILE_STAT stat
#define MZ_FFLUSH fflush
#define MZ_FREOPEN(f, m, s) freopen(f, m, s)
#define MZ_DELETE_FILE remove
#endif /* #ifdef _MSC_VER */
#endif /* #ifdef MINIZ_NO_STDIO */

#define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c))

/* Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. */
enum
{
    /* ZIP archive identifiers and record sizes */
    MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50,
    MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50,
    MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50,
    MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30,
    MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46,
    MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22,

    /* ZIP64 archive identifier and record sizes */
    MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06064b50,
    MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG = 0x07064b50,
    MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE = 56,
    MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE = 20,
    MZ_ZIP64_EXTENDED_INFORMATION_FIELD_HEADER_ID = 0x0001,
    MZ_ZIP_DATA_DESCRIPTOR_ID = 0x08074b50,
    MZ_ZIP_DATA_DESCRIPTER_SIZE64 = 24,
    MZ_ZIP_DATA_DESCRIPTER_SIZE32 = 16,

    /* Central directory header record offsets */
    MZ_ZIP_CDH_SIG_OFS = 0,
    MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4,
    MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6,
    MZ_ZIP_CDH_BIT_FLAG_OFS = 8,
    MZ_ZIP_CDH_METHOD_OFS = 10,
    MZ_ZIP_CDH_FILE_TIME_OFS = 12,
    MZ_ZIP_CDH_FILE_DATE_OFS = 14,
    MZ_ZIP_CDH_CRC32_OFS = 16,
    MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20,
    MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24,
    MZ_ZIP_CDH_FILENAME_LEN_OFS = 28,
    MZ_ZIP_CDH_EXTRA_LEN_OFS = 30,
    MZ_ZIP_CDH_COMMENT_LEN_OFS = 32,
    MZ_ZIP_CDH_DISK_START_OFS = 34,
    MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36,
    MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38,
    MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42,

    /* Local directory header offsets */
    MZ_ZIP_LDH_SIG_OFS = 0,
    MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4,
    MZ_ZIP_LDH_BIT_FLAG_OFS = 6,
    MZ_ZIP_LDH_METHOD_OFS = 8,
    MZ_ZIP_LDH_FILE_TIME_OFS = 10,
    MZ_ZIP_LDH_FILE_DATE_OFS = 12,
    MZ_ZIP_LDH_CRC32_OFS = 14,
    MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18,
    MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22,
    MZ_ZIP_LDH_FILENAME_LEN_OFS = 26,
    MZ_ZIP_LDH_EXTRA_LEN_OFS = 28,
    MZ_ZIP_LDH_BIT_FLAG_HAS_LOCATOR = 1 << 3,

    /* End of central directory offsets */
    MZ_ZIP_ECDH_SIG_OFS = 0,
    MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4,
    MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6,
    MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8,
    MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10,
    MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12,
    MZ_ZIP_ECDH_CDIR_OFS_OFS = 16,
    MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20,

    /* ZIP64 End of central directory locator offsets */
    MZ_ZIP64_ECDL_SIG_OFS = 0,                    /* 4 bytes */
    MZ_ZIP64_ECDL_NUM_DISK_CDIR_OFS = 4,          /* 4 bytes */
    MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS = 8,  /* 8 bytes */
    MZ_ZIP64_ECDL_TOTAL_NUMBER_OF_DISKS_OFS = 16, /* 4 bytes */

    /* ZIP64 End of central directory header offsets */
    MZ_ZIP64_ECDH_SIG_OFS = 0,                       /* 4 bytes */
    MZ_ZIP64_ECDH_SIZE_OF_RECORD_OFS = 4,            /* 8 bytes */
    MZ_ZIP64_ECDH_VERSION_MADE_BY_OFS = 12,          /* 2 bytes */
    MZ_ZIP64_ECDH_VERSION_NEEDED_OFS = 14,           /* 2 bytes */
    MZ_ZIP64_ECDH_NUM_THIS_DISK_OFS = 16,            /* 4 bytes */
    MZ_ZIP64_ECDH_NUM_DISK_CDIR_OFS = 20,            /* 4 bytes */
    MZ_ZIP64_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 24, /* 8 bytes */
    MZ_ZIP64_ECDH_CDIR_TOTAL_ENTRIES_OFS = 32,       /* 8 bytes */
    MZ_ZIP64_ECDH_CDIR_SIZE_OFS = 40,                /* 8 bytes */
    MZ_ZIP64_ECDH_CDIR_OFS_OFS = 48,                 /* 8 bytes */
    MZ_ZIP_VERSION_MADE_BY_DOS_FILESYSTEM_ID = 0,
    MZ_ZIP_DOS_DIR_ATTRIBUTE_BITFLAG = 0x10,
    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_IS_ENCRYPTED = 1,
    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_COMPRESSED_PATCH_FLAG = 32,
    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_USES_STRONG_ENCRYPTION = 64,
    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_LOCAL_DIR_IS_MASKED = 8192,
    MZ_ZIP_GENERAL_PURPOSE_BIT_FLAG_UTF8 = 1 << 11
};

typedef struct
{
    void *m_p;
    size_t m_size, m_capacity;
    mz_uint m_element_size;
} mz_zip_array;

struct mz_zip_internal_state_tag
{
    mz_zip_array m_central_dir;
    mz_zip_array m_central_dir_offsets;
    mz_zip_array m_sorted_central_dir_offsets;

    /* The flags passed in when the archive is initially opened. */
    mz_uint32 m_init_flags;

    /* MZ_TRUE if the archive has a zip64 end of central directory headers, etc. */
    mz_bool m_zip64;

    /* MZ_TRUE if we found zip64 extended info in the central directory (m_zip64 will also be slammed to true too, even if we didn't find a zip64 end of central dir header, etc.) */
    mz_bool m_zip64_has_extended_info_fields;

    /* These fields are used by the file, FILE, memory, and memory/heap read/write helpers. */
    MZ_FILE *m_pFile;
    mz_uint64 m_file_archive_start_ofs;

    void *m_pMem;
    size_t m_mem_size;
    size_t m_mem_capacity;
};

#define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size

#if defined(DEBUG) || defined(_DEBUG)
static MZ_FORCEINLINE mz_uint mz_zip_array_range_check(const mz_zip_array *pArray, mz_uint index)
{
    MZ_ASSERT(index < pArray->m_size);
    return index;
}
#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[mz_zip_array_range_check(array_ptr, index)]
#else
#define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type *)((array_ptr)->m_p))[index]
#endif

static MZ_FORCEINLINE void mz_zip_array_init(mz_zip_array *pArray, mz_uint32 element_size)
{
    memset(pArray, 0, sizeof(mz_zip_array));
    pArray->m_element_size = element_size;
}

static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive *pZip, mz_zip_array *pArray)
{
    pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p);
    memset(pArray, 0, sizeof(mz_zip_array));
}

static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive *pZip, mz_zip_array *pArray, size_t min_new_capacity, mz_uint growing)
{
    void *pNew_p;
    size_t new_capacity = min_new_capacity;
    MZ_ASSERT(pArray->m_element_size);
    if (pArray->m_capacity >= min_new_capacity)
        return MZ_TRUE;
    if (growing)
    {
        new_capacity = MZ_MAX(1, pArray->m_capacity);
        while (new_capacity < min_new_capacity)
            new_capacity *= 2;
    }
    if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity)))
        return MZ_FALSE;
    pArray->m_p = pNew_p;
    pArray->m_capacity = new_capacity;
    return MZ_TRUE;
}

static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_capacity, mz_uint growing)
{
    if (new_capacity > pArray->m_capacity)
    {
        if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing))
            return MZ_FALSE;
    }
    return MZ_TRUE;
}

static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive *pZip, mz_zip_array *pArray, size_t new_size, mz_uint growing)
{
    if (new_size > pArray->m_capacity)
    {
        if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing))
            return MZ_FALSE;
    }
    pArray->m_size = new_size;
    return MZ_TRUE;
}

static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive *pZip, mz_zip_array *pArray, size_t n)
{
    return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE);
}

static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive *pZip, mz_zip_array *pArray, const void *pElements, size_t n)
{
    size_t orig_size = pArray->m_size;
    if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE))
        return MZ_FALSE;
    if (n > 0)
        memcpy((mz_uint8 *)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size);
    return MZ_TRUE;
}

#ifndef MINIZ_NO_TIME
static MZ_TIME_T mz_zip_dos_to_time_t(int dos_time, int dos_date)
{
    struct tm tm;
    memset(&tm, 0, sizeof(tm));
    tm.tm_isdst = -1;
    tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900;
    tm.tm_mon = ((dos_date >> 5) & 15) - 1;
    tm.tm_mday = dos_date & 31;
    tm.tm_hour = (dos_time >> 11) & 31;
    tm.tm_min = (dos_time >> 5) & 63;
    tm.tm_sec = (dos_time << 1) & 62;
    return mktime(&tm);
}

#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
static void mz_zip_time_t_to_dos_time(MZ_TIME_T time, mz_uint16 *pDOS_time, mz_uint16 *pDOS_date)
{
#ifdef _MSC_VER
    struct tm tm_struct;
    struct tm *tm = &tm_struct;
    errno_t err = localtime_s(tm, &time);
    if (err)
    {
        *pDOS_date = 0;
        *pDOS_time = 0;
        return;
    }
#else
    struct tm *tm = localtime(&time);
#endif /* #ifdef _MSC_VER */

    *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1));
    *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday);
}
#endif /* MINIZ_NO_ARCHIVE_WRITING_APIS */

#ifndef MINIZ_NO_STDIO
#ifndef MINIZ_NO_ARCHIVE_WRITING_APIS
static mz_bool mz_zip_get_file_modified_time(const char *pFilename, MZ_TIME_T *pTime)
{
    struct MZ_FILE_STAT_STRUCT file_stat;

    /* On Linux with x86 glibc, this call will fail on large files (I think >= 0x80000000 bytes) unless you compiled with _LARGEFILE64_SOURCE. Argh. */
    if (MZ_FILE_STAT(pFilename, &file_stat) != 0)
        return MZ_FALSE;

    *pTime = file_stat.st_mtime;

    return MZ_TRUE;
}
#endif /* #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS*/

static mz_bool mz_zip_set_file_times(const char *pFilename, MZ_TIME_T access_time, MZ_TIME_T modified_time)
{
    struct utimbuf t;

    memset(&t, 0, sizeof(t));
    t.actime = access_time;
    t.modtime = modified_time;

    return !utime(pFilename, &t);
}
#endif /* #ifndef MINIZ_NO_STDIO */
#endif /* #ifndef MINIZ_NO_TIME */

static MZ_FORCEINLINE mz_bool mz_zip_set_error(mz_zip_archive *pZip, mz_zip_error err_num)
{
    if (pZip)
        pZip->m_last_error = err_num;
    return MZ_FALSE;
}

static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint flags)
{
    (void)flags;
    if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID))
        return mz_zip_set_error(pZip, MZ_ZIP_INVALID_PARAMETER);

    if (!pZip->m_pAlloc)
        pZip->m_pAlloc = miniz_def_alloc_func;
    if (!pZip->m_pFree)
        pZip->m_pFree = miniz_def_free_func;
    if (!pZip->m_pRealloc)
        pZip->m_pRealloc = miniz_def_realloc_func;

    pZip->m_archive_size = 0;
    pZip->m_central_directory_file_ofs = 0;
    pZip->m_total_files = 0;
    pZip->m_last_error = MZ_ZIP_NO_ERROR;

    if (NULL == (pZip->m_pState = (mz_zip_internal_state *)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state))))
        return mz_zip_set_error(pZip, MZ_ZIP_ALLOC_FAILED);

    memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state));
    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8));
    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32));
    MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32));
    pZip->m_pState->m_init_flags = flags;
    pZip->m_pState->m_zip64 = MZ_FALSE;
    pZip->m_pState->m_zip64_has_extended_info_fields = MZ_FALSE;

    pZip->m_zip_mode = MZ_ZIP_MODE_READING;

    return MZ_TRUE;
}

static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index)
{
    const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE;
    const mz_uint8 *pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index));
    mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS);
    mz_uint8 l = 0, r = 0;
    pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
    pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE;
    pE = pL + MZ_MIN(l_len, r_len);
    while (pL < pE)
    {
        if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR)))
            break;
        pL++;
        pR++;
    }
    return (pL == pE) ? (l_len < r_len) : (l < r);
}

#define MZ_SWAP_UINT32(a, b) \
    do                       \
    {                        \
        mz_uint32 t = a;     \
        a = b;               \
        b = t;               \
    }                        \
    MZ_MACRO_END

/* Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) */
static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive *pZip)
{
    mz_zip_internal_state *pState = pZip->m_pState;
    const mz_zip_array *pCentral_dir_offsets = &pState->m_central_dir_offsets;
    const mz_zip_array *pCentral_dir = &pState->m_central_dir;
    mz_uint32 *pIndices;
    mz_uint32 start, end;
    const mz_uint32 size = pZip->m_total_files;

    if (size <= 1U)
        return;

    pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0);

    start = (size - 2U) >> 1U;
    for (;;)
    {
        mz_uint64 child, root = start;
        for (;;)
        {
            if ((child = (root << 1U) + 1U) >= size)
                break;
            child += (((child + 1U) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U])));
            if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child]))
                break;
            MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
            root = child;
        }
        if (!start)
            break;
        start--;
    }

    end = size - 1;
    while (end > 0)
    {
        mz_uint64 child, root = 0;
        MZ_SWAP_UINT32(pIndices[end], pIndices[0]);
        for (;;)
        {
            if ((child = (root << 1U) + 1U) >= end)
                break;
            child += (((child + 1U) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1U]));
            if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child]))
                break;
            MZ_SWAP_UINT32(pIndices[root], pIndices[child]);
            root = child;
        }
        end--;
    }
}

static mz_bool mz_zip_reader_locate_header_sig(mz_zip_archive *pZip, mz_uint32 record_sig, mz_uint32 record_size, mz_int64 *pOfs)
{
    mz_int64 cur_file_ofs;
    mz_uint32 buf_u32[4096 / sizeof(mz_uint32)];
    mz_uint8 *pBuf = (mz_uint8 *)buf_u32;

    /* Basic sanity checks - reject files which are too small */
    if (pZip->m_archive_size < record_size)
        return MZ_FALSE;

    /* Find the record by scanning the file from the end towards the beginning. */
    cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0);
    for (;;)
    {
        int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs);

        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n)
            return MZ_FALSE;

        for (i = n - 4; i >= 0; --i)
        {
            mz_uint s = MZ_READ_LE32(pBuf + i);
            if (s == record_sig)
            {
                if ((pZip->m_archive_size - (cur_file_ofs + i)) >= record_size)
                    break;
            }
        }

        if (i >= 0)
        {
            cur_file_ofs += i;
            break;
        }

        /* Give up if we've searched the entire file, or we've gone back "too far" (~64kb) */
        if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (MZ_UINT16_MAX + record_size)))
            return MZ_FALSE;

        cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0);
    }

    *pOfs = cur_file_ofs;
    return MZ_TRUE;
}

static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive *pZip, mz_uint flags)
{
    mz_uint cdir_size = 0, cdir_entries_on_this_disk = 0, num_this_disk = 0, cdir_disk_index = 0;
    mz_uint64 cdir_ofs = 0;
    mz_int64 cur_file_ofs = 0;
    const mz_uint8 *p;

    mz_uint32 buf_u32[4096 / sizeof(mz_uint32)];
    mz_uint8 *pBuf = (mz_uint8 *)buf_u32;
    mz_bool sort_central_dir = ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0);
    mz_uint32 zip64_end_of_central_dir_locator_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
    mz_uint8 *pZip64_locator = (mz_uint8 *)zip64_end_of_central_dir_locator_u32;

    mz_uint32 zip64_end_of_central_dir_header_u32[(MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)];
    mz_uint8 *pZip64_end_of_central_dir = (mz_uint8 *)zip64_end_of_central_dir_header_u32;

    mz_uint64 zip64_end_of_central_dir_ofs = 0;

    /* Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. */
    if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);

    if (!mz_zip_reader_locate_header_sig(pZip, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE, &cur_file_ofs))
        return mz_zip_set_error(pZip, MZ_ZIP_FAILED_FINDING_CENTRAL_DIR);

    /* Read and verify the end of central directory record. */
    if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE)
        return mz_zip_set_error(pZip, MZ_ZIP_FILE_READ_FAILED);

    if (MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG)
        return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);

    if (cur_file_ofs >= (MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE + MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE))
    {
        if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs - MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE, pZip64_locator, MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIZE)
        {
            if (MZ_READ_LE32(pZip64_locator + MZ_ZIP64_ECDL_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_LOCATOR_SIG)
            {
                zip64_end_of_central_dir_ofs = MZ_READ_LE64(pZip64_locator + MZ_ZIP64_ECDL_REL_OFS_TO_ZIP64_ECDR_OFS);
                if (zip64_end_of_central_dir_ofs > (pZip->m_archive_size - MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE))
                    return mz_zip_set_error(pZip, MZ_ZIP_NOT_AN_ARCHIVE);

                if (pZip->m_pRead(pZip->m_pIO_opaque, zip64_end_of_central_dir_ofs, pZip64_end_of_central_dir, MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIZE)
                {
                    if (MZ_READ_LE32(pZip64_end_of_central_dir + MZ_ZIP64_ECDH_SIG_OFS) == MZ_ZIP64_END_OF_CENTRAL_DIR_HEADER_SIG)
                    {
                        pZip->m_pState->m_zip64 = MZ_TRUE;
                    }
                }
            }
        }
    }

    pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS);
    cdir_entries_on_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS);
    num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS);
    cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS);
    cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS);
    cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS);

    if (pZip->m_pState->m_zip64