|
View:
New views
2 Messages
—
Rating Filter:
Alert me
|
|
|
Speeding up the tessellatorCarl, just some profile driven optimisations for the tessellator for
review. The highlight is a cumulative performance boost of ~30% (as measured by the tessellate perf-case and an across the board increase of ~10% for fill). -- Chris From 556218ca7c2dc0cfdfda52df60cbd618049d35c7 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@...> Date: Sat, 4 Oct 2008 13:15:08 +0100 Subject: [PATCH] [tessellator] Simplify special cases of edges to compare. Use our prior knowledge of the inputs and trivial conditions to simplify the edge equations and in many common conditions, such as vertical edges and common points, reduce the operations down to a just returning the non-degenerate 32 bit value. This adds an overhead of a few conditionals, but on the fast paths we actually generate fewer branches and many fewer arithmetic operations such that it improves performance of the fill performance tests by around 10%. --- src/cairo-bentley-ottmann.c | 136 ++++++++++++++++++++++++++++++++++--------- 1 files changed, 108 insertions(+), 28 deletions(-) diff --git a/src/cairo-bentley-ottmann.c b/src/cairo-bentley-ottmann.c index c657dd9..93d0fbf 100644 --- a/src/cairo-bentley-ottmann.c +++ b/src/cairo-bentley-ottmann.c @@ -197,11 +197,21 @@ _slope_compare (cairo_bo_edge_t *a, int32_t bdx = b->bottom.x - b->top.x; /* Since the dy's are all positive by construction we can fast - * path the case where the two edges point in different directions - * with respect to x. */ - if ((adx ^ bdx) < 0) { - return adx < 0 ? -1 : +1; - } else { + * path several common cases. + */ + + /* First check for vertical lines. */ + if (adx == 0) + return -bdx; + if (bdx == 0) + return adx; + + /* Then where the two edges point in different directions wrt x. */ + if ((adx ^ bdx) < 0) + return adx; + + /* Finally we actually need to do the general comparison. */ + { int32_t ady = a->bottom.y - a->top.y; int32_t bdy = b->bottom.y - b->top.y; cairo_int64_t adx_bdy = _cairo_int32x32_64_mul (adx, bdy); @@ -228,7 +238,9 @@ _slope_compare (cairo_bo_edge_t *a, * - (Y - A_y) * A_dx * B_dy * * Given the assumption that all the deltas fit within 32 bits, we can compute - * this comparison directly using 128 bit arithmetic. + * this comparison directly using 128 bit arithmetic. For certain, but common, + * input we can reduce this down to a single 32 bit compare by inspecting the + * deltas. * * (And put the burden of the work on developing fast 128 bit ops, which are * required throughout the tessellator.) @@ -245,32 +257,95 @@ edges_compare_x_for_y_general (const cairo_bo_edge_t *a, * should prevent that before the tessellation algorithm * begins. */ + int32_t dx; int32_t adx, ady; int32_t bdx, bdy; - cairo_int128_t L, R; + enum { + HAVE_NONE = 0x0, + HAVE_DX = 0x1, + HAVE_ADX = 0x2, + HAVE_DX_ADX = HAVE_DX | HAVE_ADX, + HAVE_BDX = 0x4, + HAVE_DX_BDX = HAVE_DX | HAVE_BDX, + HAVE_ADX_BDX = HAVE_ADX | HAVE_BDX, + HAVE_ALL = HAVE_DX | HAVE_ADX | HAVE_BDX + } have_dx_adx_bdx = HAVE_ALL; - adx = a->bottom.x - a->top.x; ady = a->bottom.y - a->top.y; + adx = a->bottom.x - a->top.x; + if (adx == 0) + have_dx_adx_bdx &= ~HAVE_ADX; - bdx = b->bottom.x - b->top.x; bdy = b->bottom.y - b->top.y; + bdx = b->bottom.x - b->top.x; + if (bdx == 0) + have_dx_adx_bdx &= ~HAVE_BDX; - L = _cairo_int64x32_128_mul (_cairo_int32x32_64_mul (ady, bdy), - a->top.x - b->top.x); + dx = a->top.x - b->top.x; + if (dx == 0) + have_dx_adx_bdx &= ~HAVE_DX; - R = _cairo_int128_sub (_cairo_int64x32_128_mul (_cairo_int32x32_64_mul (bdx, - ady), - y - b->top.y), - _cairo_int64x32_128_mul (_cairo_int32x32_64_mul (adx, - bdy), - y - a->top.y)); +#define L _cairo_int64x32_128_mul (_cairo_int32x32_64_mul (ady, bdy), dx) +#define A _cairo_int64x32_128_mul (_cairo_int32x32_64_mul (adx, bdy), y - a->top.y) +#define B _cairo_int64x32_128_mul (_cairo_int32x32_64_mul (bdx, ady), y - b->top.y) + switch (have_dx_adx_bdx) { + default: + case HAVE_NONE: + return 0; + case HAVE_DX: + /* A_dy * B_dy * (A_x - B_x) -?- 0 */ + return dx; /* ady * bdy is positive definite */ + case HAVE_ADX: + /* 0 -?- - (Y - A_y) * A_dx * B_dy */ + return adx; /* bdy * (y - a->top.y) is positive definite */ + case HAVE_BDX: + /* 0 -?- (Y - B_y) * B_dx * A_dy */ + return -bdx; /* ady * (y - b->top.y) is positive definite */ + case HAVE_ADX_BDX: + /* 0 -?- (Y - B_y) * B_dx * A_dy - (Y - A_y) * A_dx * B_dy */ + if ((adx ^ bdx) < 0) { + return adx; + } else if (a->top.y == b->top.y) { /* common origin */ + cairo_int64_t adx_bdy, bdx_ady; + + /* => A_dx * B_dy -?- B_dx * A_dy */ + + adx_bdy = _cairo_int32x32_64_mul (adx, bdy); + bdx_ady = _cairo_int32x32_64_mul (bdx, ady); + + return _cairo_int64_cmp (adx_bdy, bdx_ady); + } else + return _cairo_int128_cmp (A, B); + case HAVE_DX_ADX: + /* A_dy * (A_x - B_x) -?- - (Y - A_y) * A_dx */ + if ((-adx ^ dx) < 0) { + return dx; + } else { + cairo_int64_t ady_dx, dy_adx; - /* return _cairo_int128_cmp (L, R); */ - if (_cairo_int128_lt (L, R)) - return -1; - if (_cairo_int128_gt (L, R)) - return 1; - return 0; + ady_dx = _cairo_int32x32_64_mul (ady, dx); + dy_adx = _cairo_int32x32_64_mul (a->top.y - y, adx); + + return _cairo_int64_cmp (ady_dx, dy_adx); + } + case HAVE_DX_BDX: + /* B_dy * (A_x - B_x) -?- (Y - B_y) * B_dx */ + if ((bdx ^ dx) < 0) { + return dx; + } else { + cairo_int64_t bdy_dx, dy_bdx; + + bdy_dx = _cairo_int32x32_64_mul (bdy, dx); + dy_bdx = _cairo_int32x32_64_mul (y - b->top.y, bdx); + + return _cairo_int64_cmp (bdy_dx, dy_bdx); + } + case HAVE_ALL: + return _cairo_int128_cmp (L, _cairo_int128_sub (B, A)); + } +#undef B +#undef A +#undef L } /* @@ -304,10 +379,15 @@ edge_compare_for_y_against_x (const cairo_bo_edge_t *a, cairo_int64_t L, R; adx = a->bottom.x - a->top.x; - ady = a->bottom.y - a->top.y; + dx = x - a->top.x; + + if (adx == 0) + return -dx; + if ((adx ^ dx) < 0) + return adx; dy = y - a->top.y; - dx = x - a->top.x; + ady = a->bottom.y - a->top.y; L = _cairo_int32x32_64_mul (dy, adx); R = _cairo_int32x32_64_mul (dx, ady); @@ -352,7 +432,7 @@ edges_compare_x_for_y (const cairo_bo_edge_t *a, case HAVE_NEITHER: return edges_compare_x_for_y_general (a, b, y); case HAVE_AX: - return - edge_compare_for_y_against_x (b, y, ax); + return -edge_compare_for_y_against_x (b, y, ax); case HAVE_BX: return edge_compare_for_y_against_x (a, y, bx); case HAVE_BOTH: -- 1.5.6.3 From c5e0808953ac9fc46cef3c67a3ec73231fc393a6 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@...> Date: Tue, 7 Oct 2008 23:33:32 +0100 Subject: [PATCH] [tessellator] Perform cheap checks before computing intersect. First check if we can reject the intersection without having to perform the full divides, based on analysing: t * (ady*bdx - bdy*adx) = bdy * (ax - bx) - bdx * (ay - by) s * (ady*bdx - bdy*adx) = ady * (ax - bx) - adx * (ay - by) and excluding the result if by inspection we know that (s, t) <= 0 || (s, t) => 1. Doing so virtually eliminates all division and speeds up the strokes perf cases by about 5%. --- src/cairo-bentley-ottmann.c | 51 ++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 50 insertions(+), 1 deletions(-) diff --git a/src/cairo-bentley-ottmann.c b/src/cairo-bentley-ottmann.c index 93d0fbf..2d8e324 100644 --- a/src/cairo-bentley-ottmann.c +++ b/src/cairo-bentley-ottmann.c @@ -704,12 +704,61 @@ intersect_lines (cairo_bo_edge_t *a, int32_t dx2 = b->top.x - b->bottom.x; int32_t dy2 = b->top.y - b->bottom.y; - cairo_int64_t den_det = det32_64 (dx1, dy1, dx2, dy2); + cairo_int64_t den_det; + cairo_int64_t R; cairo_quorem64_t qr; + den_det = det32_64 (dx1, dy1, dx2, dy2); if (_cairo_int64_is_zero (den_det)) return CAIRO_BO_STATUS_PARALLEL; + /* Q: Can we determine that the lines do not intersect (within range) + * much more cheaply than computing the intersection point i.e. by + * avoiding the division? + * + * X = ax + t * adx = bx + s * bdx; + * Y = ay + t * ady = by + s * bdy; + * => t * (ady*bdx - bdy*adx) = bdx * (by - ay) + bdy * (ax - bx) + * => t * L = R + * + * Therefore we can reject any intersection (under the criteria for + * valid intersection events) if: + * L^R < 0 => t < 0 + * L<R => t > 1 + * + * (where top/bottom must at least extend to the line endpoints). + * + * A similar substitution can be performed for s, yielding: + * s * (ady*bdx - bdy*adx) = ady * (ax - bx) - adx * (ay - by) + */ + R = det32_64 (dx2, dy2, b->top.x - a->top.x, b->top.y - a->top.y); + if (_cairo_int64_is_zero (R)) + return CAIRO_BO_STATUS_NO_INTERSECTION; + if (_cairo_int64_negative (den_det) ^ _cairo_int64_negative (R)) + return CAIRO_BO_STATUS_NO_INTERSECTION; + if (_cairo_int64_negative (den_det)) { + if (_cairo_int64_ge (den_det, R)) + return CAIRO_BO_STATUS_NO_INTERSECTION; + } else { + if (_cairo_int64_le (den_det, R)) + return CAIRO_BO_STATUS_NO_INTERSECTION; + } + + R = det32_64 (dy1, dx1, a->top.y - b->top.y, a->top.x - b->top.x); + if (_cairo_int64_is_zero (R)) + return CAIRO_BO_STATUS_NO_INTERSECTION; + if (_cairo_int64_negative (den_det) ^ _cairo_int64_negative (R)) + return CAIRO_BO_STATUS_NO_INTERSECTION; + if (_cairo_int64_negative (den_det)) { + if (_cairo_int64_ge (den_det, R)) + return CAIRO_BO_STATUS_NO_INTERSECTION; + } else { + if (_cairo_int64_le (den_det, R)) + return CAIRO_BO_STATUS_NO_INTERSECTION; + } + + /* We now know that the two lines should intersect within range. */ + a_det = det32_64 (a->top.x, a->top.y, a->bottom.x, a->bottom.y); b_det = det32_64 (b->top.x, b->top.y, -- 1.5.6.3 From 2048dc8bc59f233f3867adba13360e4415b3560a Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@...> Date: Tue, 7 Oct 2008 11:20:44 +0100 Subject: [PATCH] [wideint] Implement _cairo_int64x32_128_mul(). By implementing a 64bit by 32bit multiply special case, we can take advantage of the occasions when we can use a narrower multiply. This case is interesting due to its heavy use within intersect_lines() and edges_compare_x_for_y() causing it to appears on profiles. The implementation is about 2x faster during performance testing simply due to the fact most of the multiplicands fit within 32bits and are handled by the 32x32_64_mul(). Overall it gives a performance improvement of ~2%. --- src/cairo-wideint-private.h | 9 +++++++- src/cairo-wideint.c | 46 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletions(-) diff --git a/src/cairo-wideint-private.h b/src/cairo-wideint-private.h index f5aac28..8a19e38 100644 --- a/src/cairo-wideint-private.h +++ b/src/cairo-wideint-private.h @@ -72,6 +72,7 @@ cairo_uint64_t I _cairo_uint64_not (cairo_uint64_t a); #define _cairo_int64_to_uint64(i) (i) cairo_int64_t I _cairo_int32_to_int64(int32_t i); +#define _cairo_int32_to_uint64(i) (_cairo_int64_to_uint64 (_cairo_int32_to_int64(i))) #define _cairo_int64_to_int32(a) ((int32_t) _cairo_uint64_to_uint32(a)) #define _cairo_int64_add(a,b) _cairo_uint64_add (a,b) #define _cairo_int64_sub(a,b) _cairo_uint64_sub (a,b) @@ -111,6 +112,7 @@ int I _cairo_int64_cmp (cairo_int64_t a, cairo_int64_t b); #define _cairo_int64_to_uint64(i) ((uint64_t) (i)) #define _cairo_int32_to_int64(i) ((int64_t) (i)) +#define _cairo_int32_to_uint64(i) (_cairo_int64_to_uint64 (_cairo_int32_to_int64(i))) #define _cairo_int64_to_int32(i) ((int32_t) (i)) #define _cairo_int64_add(a,b) ((a) + (b)) #define _cairo_int64_sub(a,b) ((a) - (b)) @@ -169,6 +171,7 @@ cairo_uint128_t I _cairo_uint128_add (cairo_uint128_t a, cairo_uint128_t b); cairo_uint128_t I _cairo_uint128_sub (cairo_uint128_t a, cairo_uint128_t b); cairo_uint128_t I _cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b); cairo_uint128_t I _cairo_uint64x64_128_mul (cairo_uint64_t a, cairo_uint64_t b); +cairo_int128_t I _cairo_uint64x32_128_mul (cairo_uint64_t a, uint32_t b); cairo_uint128_t I _cairo_uint128_lsl (cairo_uint128_t a, int shift); cairo_uint128_t I _cairo_uint128_rsl (cairo_uint128_t a, int shift); cairo_uint128_t I _cairo_uint128_rsa (cairo_uint128_t a, int shift); @@ -184,6 +187,7 @@ cairo_uint128_t I _cairo_uint128_not (cairo_uint128_t a); #define _cairo_int128_to_uint128(i) (i) cairo_int128_t I _cairo_int32_to_int128 (int32_t i); +#define _cairo_int32_to_uint128(i) (_cairo_int128_to_uint128 (_cairo_int32_to_int128 (i))) cairo_int128_t I _cairo_int64_to_int128 (cairo_int64_t i); #define _cairo_int128_to_int64(a) ((cairo_int64_t) (a).lo) #define _cairo_int128_to_int32(a) _cairo_int64_to_int32(_cairo_int128_to_int64(a)) @@ -191,7 +195,7 @@ cairo_int128_t I _cairo_int64_to_int128 (cairo_int64_t i); #define _cairo_int128_sub(a,b) _cairo_uint128_sub(a,b) #define _cairo_int128_mul(a,b) _cairo_uint128_mul(a,b) cairo_int128_t I _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b); -#define _cairo_int64x32_128_mul(a, b) _cairo_int64x64_128_mul(a, _cairo_int32_to_int64(b)) +cairo_int128_t I _cairo_int64x32_128_mul (cairo_int64_t a, int32_t b); #define _cairo_int128_lsl(a,b) _cairo_uint128_lsl(a,b) #define _cairo_int128_rsl(a,b) _cairo_uint128_rsl(a,b) #define _cairo_int128_rsa(a,b) _cairo_uint128_rsa(a,b) @@ -213,6 +217,7 @@ int I _cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b); #define _cairo_uint128_sub(a,b) ((a) - (b)) #define _cairo_uint128_mul(a,b) ((a) * (b)) #define _cairo_uint64x64_128_mul(a,b) ((uint128_t) (a) * (b)) +#define _cairo_uint64x32_128_mul(a,b) ((uint128_t) (a) * (b)) #define _cairo_uint128_lsl(a,b) ((a) << (b)) #define _cairo_uint128_rsl(a,b) ((uint128_t) (a) >> (b)) #define _cairo_uint128_rsa(a,b) ((uint128_t) ((int128_t) (a) >> (b))) @@ -228,6 +233,7 @@ int I _cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b); #define _cairo_int128_to_uint128(i) ((uint128_t) (i)) #define _cairo_int32_to_int128(i) ((int128_t) (i)) +#define _cairo_int32_to_uint128(i) (_cairo_int128_to_uint128 (_cairo_int32_to_int128 (i))) #define _cairo_int64_to_int128(i) ((int128_t) (i)) #define _cairo_int128_to_int64(i) ((int64_t) (i)) #define _cairo_int128_to_int32(i) ((int32_t) (i)) @@ -235,6 +241,7 @@ int I _cairo_int128_cmp (cairo_int128_t a, cairo_int128_t b); #define _cairo_int128_sub(a,b) ((a) - (b)) #define _cairo_int128_mul(a,b) ((a) * (b)) #define _cairo_int64x64_128_mul(a,b) ((int128_t) (a) * (b)) +#define _cairo_int64x32_128_mul(a,b) ((int128_t) (a) * (b)) #define _cairo_int128_lt(a,b) ((a) < (b)) #define _cairo_int128_cmp(a,b) ((a) == (b) ? 0 : (a) < (b) ? -1 : 1) #define _cairo_int128_is_zero(a) ((a) == 0) diff --git a/src/cairo-wideint.c b/src/cairo-wideint.c index 4565593..78d2c6d 100644 --- a/src/cairo-wideint.c +++ b/src/cairo-wideint.c @@ -511,6 +511,52 @@ _cairo_int64x64_128_mul (cairo_int64_t a, cairo_int64_t b) } cairo_uint128_t +_cairo_uint64x32_128_mul (cairo_uint64_t a, uint32_t b) +{ + cairo_uint128_t s; + cairo_uint64_t r0, r1, r2; + + r0 = _cairo_uint32x32_64_mul (uint64_lo32 (a), b); + r2 = _cairo_uint32x32_64_mul (uint64_hi32 (a), b); + + r1 = _cairo_uint64_add (uint64_hi (r0), r2); /* this can carry */ + s.hi = uint64_hi (r1); + if (_cairo_uint64_lt (r1, r2)) /* check */ + s.hi = _cairo_uint64_add (s.hi, uint64_carry32); + + s.lo = _cairo_uint64_add (uint64_shift32 (r1), + uint64_lo (r0)); + + return s; +} + +cairo_int128_t +_cairo_int64x32_128_mul (cairo_int64_t a, int32_t b) +{ + cairo_int128_t s; + + if (uint64_hi (a) == 0 || uint64_hi (a) == -1U) { + return _cairo_int64_to_int128 ( + _cairo_int32x32_64_mul (uint64_lo (a), b)); + } + + if (b > 0) { + s = _cairo_uint64x32_128_mul (_cairo_int64_to_uint64 (a), + (uint32_t) b); + } else if (b < 0) { + s = _cairo_uint64x64_128_mul (_cairo_int64_to_uint64 (a), + _cairo_int32_to_uint64 (b)); + s.hi = _cairo_uint64_sub (s.hi, _cairo_int64_to_uint64 (a)); + } else + return _cairo_int32_to_int128 (0); + + if (_cairo_int64_negative (a)) + s.hi = _cairo_uint64_sub (s.hi, _cairo_int32_to_uint64 (b)); + + return s; +} + +cairo_uint128_t _cairo_uint128_mul (cairo_uint128_t a, cairo_uint128_t b) { cairo_uint128_t s; -- 1.5.6.3 From 16325cc872f51241812ef421202d0d8b2a30a7d0 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@...> Date: Tue, 7 Oct 2008 22:09:37 +0100 Subject: [PATCH] [tessellator] Use a combsort for sorting the event queue. In my experiments using cairo-perf, the inlined combsort is ~20% quicker than the library qsort. --- src/Makefile.am | 3 ++ src/cairo-bentley-ottmann.c | 42 +++++++++++++----------- src/cairo-combsort-fragment.c | 72 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 19 deletions(-) create mode 100644 src/cairo-combsort-fragment.c diff --git a/src/Makefile.am b/src/Makefile.am index bf87efb..5d1065d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -41,6 +41,9 @@ BUILT_SOURCES += cairo-features.h cairo-supported-features.h cairo-features.h cairo-supported-features.h: cd $(top_builddir) && ./config.status src/$@ +# Fragments +EXTRA_DIST += cairo-combsort-fragment.c + pkgconfigdir = $(libdir)/pkgconfig pkgconfig_DATA = $(enabled_cairo_pkgconf) diff --git a/src/cairo-bentley-ottmann.c b/src/cairo-bentley-ottmann.c index 2d8e324..17b3599 100644 --- a/src/cairo-bentley-ottmann.c +++ b/src/cairo-bentley-ottmann.c @@ -630,21 +630,18 @@ cairo_bo_event_compare_abstract (void *list, } static int -cairo_bo_event_compare_pointers (void const *voida, - void const *voidb) +cairo_bo_event_compare_pointers (const cairo_bo_event_t *a, + const cairo_bo_event_t *b) { - cairo_bo_event_t const * const *a = voida; - cairo_bo_event_t const * const *b = voidb; - if (*a != *b) { - int cmp = cairo_bo_event_compare (*a, *b); - if (cmp) - return cmp; - if (*a < *b) - return -1; - if (*a > *b) - return +1; - } - return 0; + int cmp; + + if (a == b) + return 0; + cmp = cairo_bo_event_compare (a, b); + if (cmp) + return cmp; + + return a - b; } static inline cairo_int64_t @@ -957,6 +954,14 @@ _cairo_bo_event_dequeue (cairo_bo_event_queue_t *event_queue) return intersection; } +#define _CAIRO_QS_NAME _cairo_bo_event_queue_sort +#define _CAIRO_QS_TYPE cairo_bo_event_t * +#define _CAIRO_QS_COMPARE cairo_bo_event_compare_pointers +#include "cairo-combsort-fragment.c" +#undef _CAIRO_QS_NAME +#undef _CAIRO_QS_TYPE +#undef _CAIRO_QS_COMPARE + static cairo_status_t _cairo_bo_event_queue_init (cairo_bo_event_queue_t *event_queue, cairo_bo_edge_t *edges, @@ -989,8 +994,8 @@ _cairo_bo_event_queue_init (cairo_bo_event_queue_t *event_queue, event_queue->next_startstop_event_index = 0; for (i = 0; i < num_edges; i++) { - sorted_event_ptrs[2*i] = &events[2*i]; - sorted_event_ptrs[2*i+1] = &events[2*i+1]; + sorted_event_ptrs[i] = &events[2*i]; + sorted_event_ptrs[i+num_edges] = &events[2*i+1]; /* Initialize "middle" to top */ edges[i].middle = edges[i].top; @@ -1006,9 +1011,8 @@ _cairo_bo_event_queue_init (cairo_bo_event_queue_t *event_queue, edges[i].bottom); } - qsort (sorted_event_ptrs, num_events, - sizeof(cairo_bo_event_t *), - cairo_bo_event_compare_pointers); + _cairo_bo_event_queue_sort (sorted_event_ptrs, num_events); + return CAIRO_STATUS_SUCCESS; } diff --git a/src/cairo-combsort-fragment.c b/src/cairo-combsort-fragment.c new file mode 100644 index 0000000..10b63c1 --- /dev/null +++ b/src/cairo-combsort-fragment.c @@ -0,0 +1,72 @@ +/* + * Copyright © 2008 Chris Wilson + * + * This library is free software; you can redistribute it and/or + * modify it either under the terms of the GNU Lesser General Public + * License version 2.1 as published by the Free Software Foundation + * (the "LGPL") or, at your option, under the terms of the Mozilla + * Public License Version 1.1 (the "MPL"). If you do not alter this + * notice, a recipient may use your version of this file under either + * the MPL or the LGPL. + * + * You should have received a copy of the LGPL along with this library + * in the file COPYING-LGPL-2.1; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * You should have received a copy of the MPL along with this library + * in the file COPYING-MPL-1.1 + * + * The contents of this file are subject to the Mozilla Public License + * Version 1.1 (the "License"); you may not use this file except in + * compliance with the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY + * OF ANY KIND, either express or implied. See the LGPL or the MPL for + * the specific language governing rights and limitations. + * + * The Original Code is the cairo graphics library. + * + * The Initial Developer of the Original Code is Chris Wilson + * + * Contributor(s): + * Chris Wilson <chris@...> + */ + +/* This fragment implements a comb sort (specifically combsort11) */ + +#ifndef _HAVE_CAIRO_COMBSORT_NEWGAP +#define _HAVE_CAIRO_COMBSORT_NEWGAP +static inline unsigned int +_cairo_combsort_newgap (unsigned int gap) +{ + gap = 10 * gap / 13; + if (gap == 9 || gap == 10) + gap = 11; + if (gap < 1) + gap = 1; + return gap; +} +#endif + +static void +_CAIRO_QS_NAME (_CAIRO_QS_TYPE *base, unsigned int nmemb) +{ + unsigned int gap = nmemb; + unsigned int i, j; + int swapped; + + do { + gap = _cairo_combsort_newgap (gap); + swapped = 0; + for (i = 0; i < nmemb-gap ; i++) { + j = i + gap; + if (_CAIRO_QS_COMPARE (base[i], base[j]) > 0 ) { + _CAIRO_QS_TYPE tmp; + tmp = base[i]; + base[i] = base[j]; + base[j] = tmp; + swapped = 1; + } + } + } while (gap > 1 || swapped); +} -- 1.5.6.3 From 49fecdf9577011afefa478487542cdfe4fbc3a8a Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@...> Date: Wed, 8 Oct 2008 13:04:37 +0100 Subject: [PATCH] [tessellator] Simplify dequeuing by using a sentinel value. Instead of maintaining an index and comparing it to the count, just mark the last startstop event with NULL and stop dequeuing events upon seeing that sentinel value. (Removes an unreadable line, and cachegrind hints that it may be a tiny bit faster.) --- src/cairo-bentley-ottmann.c | 32 ++++++++++++++++---------------- 1 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/cairo-bentley-ottmann.c b/src/cairo-bentley-ottmann.c index 17b3599..680a607 100644 --- a/src/cairo-bentley-ottmann.c +++ b/src/cairo-bentley-ottmann.c @@ -128,8 +128,6 @@ typedef struct _cairo_bo_event_queue { cairo_bo_event_t *startstop_events; cairo_bo_event_t **sorted_startstop_event_ptrs; - unsigned next_startstop_event_index; - unsigned num_startstop_events; } cairo_bo_event_queue_t; /* This structure extends #cairo_skip_list_t, which must come first. */ @@ -941,16 +939,17 @@ _cairo_bo_event_dequeue (cairo_bo_event_queue_t *event_queue) cairo_bo_event_t *intersection = elt ? SKIP_ELT_TO_EVENT (elt) : NULL; cairo_bo_event_t *startstop; - if (event_queue->next_startstop_event_index == event_queue->num_startstop_events) + startstop = *event_queue->sorted_startstop_event_ptrs; + if (startstop == NULL) return intersection; - startstop = event_queue->sorted_startstop_event_ptrs[ - event_queue->next_startstop_event_index]; - if (!intersection || cairo_bo_event_compare (startstop, intersection) <= 0) + if (intersection == NULL || + cairo_bo_event_compare (startstop, intersection) <= 0) { - event_queue->next_startstop_event_index++; + event_queue->sorted_startstop_event_ptrs++; return startstop; } + return intersection; } @@ -971,27 +970,24 @@ _cairo_bo_event_queue_init (cairo_bo_event_queue_t *event_queue, cairo_bo_event_t *events, **sorted_event_ptrs; unsigned num_events = 2*num_edges; - memset (event_queue, 0, sizeof(*event_queue)); - _cairo_skip_list_init (&event_queue->intersection_queue, - cairo_bo_event_compare_abstract, - sizeof (cairo_bo_event_t)); - if (0 == num_edges) - return CAIRO_STATUS_SUCCESS; + cairo_bo_event_compare_abstract, + sizeof (cairo_bo_event_t)); /* The skip_elt_t field of a cairo_bo_event_t isn't used for start * or stop events, so this allocation is safe. XXX: make the * event type a union so it doesn't always contain the skip * elt? */ - events = _cairo_malloc_ab (num_events, sizeof (cairo_bo_event_t) + sizeof(cairo_bo_event_t*)); + events = _cairo_malloc_ab_plus_c (num_events, + sizeof (cairo_bo_event_t) + + sizeof (cairo_bo_event_t *), + sizeof (cairo_bo_event_t *)); if (events == NULL) return _cairo_error (CAIRO_STATUS_NO_MEMORY); sorted_event_ptrs = (cairo_bo_event_t **) (events + num_events); event_queue->startstop_events = events; event_queue->sorted_startstop_event_ptrs = sorted_event_ptrs; - event_queue->num_startstop_events = num_events; - event_queue->next_startstop_event_index = 0; for (i = 0; i < num_edges; i++) { sorted_event_ptrs[i] = &events[2*i]; @@ -1012,6 +1008,7 @@ _cairo_bo_event_queue_init (cairo_bo_event_queue_t *event_queue, } _cairo_bo_event_queue_sort (sorted_event_ptrs, num_events); + event_queue->sorted_startstop_event_ptrs[num_events] = NULL; return CAIRO_STATUS_SUCCESS; } @@ -1497,6 +1494,9 @@ _cairo_bentley_ottmann_tessellate_bo_edges (cairo_bo_edge_t *edges, cairo_bo_edge_t *left, *right; cairo_bo_edge_t *edge1, *edge2; + if (num_edges == 0) + return CAIRO_STATUS_SUCCESS; + status = _cairo_bo_event_queue_init (&event_queue, edges, num_edges); if (status) return status; -- 1.5.6.3 _______________________________________________ cairo mailing list cairo@... http://lists.cairographics.org/mailman/listinfo/cairo |
|
|
Re: Speeding up the tessellatorChris,
Looks good! I'd just rather you rename the comb sort file to a header file that takes care to not make 'make check' fail. Take a look at cairo-mutex-list-private.h for example. behdad Chris Wilson wrote: > Carl, just some profile driven optimisations for the tessellator for > review. The highlight is a cumulative performance boost of ~30% (as > measured by the tessellate perf-case and an across the board increase of > ~10% for fill). > > > ------------------------------------------------------------------------ > > _______________________________________________ > cairo mailing list > cairo@... > http://lists.cairographics.org/mailman/listinfo/cairo cairo mailing list cairo@... http://lists.cairographics.org/mailman/listinfo/cairo |
| Free Forum Powered by Nabble | Forum Help |