[packages/xorg-driver-video-intel] - bring in sync with current git to fix crashes with xorg 1.18 - rel 4

baggins baggins at pld-linux.org
Mon Dec 28 00:22:30 CET 2015


commit 96bae86b9a37f6ed2340946a458a2ee5909ce60e
Author: Jan Rękorajski <baggins at pld-linux.org>
Date:   Mon Dec 28 00:21:45 2015 +0100

    - bring in sync with current git to fix crashes with xorg 1.18
    - rel 4

 git.patch                    | 29487 +++++++++++++++++++++++++++++++++++++++++
 xorg-driver-video-intel.spec |     8 +-
 2 files changed, 29494 insertions(+), 1 deletion(-)
---
diff --git a/xorg-driver-video-intel.spec b/xorg-driver-video-intel.spec
index 189f299..a6b6fc0 100644
--- a/xorg-driver-video-intel.spec
+++ b/xorg-driver-video-intel.spec
@@ -14,13 +14,14 @@ Summary:	X.org video driver for Intel integrated graphics chipsets
 Summary(pl.UTF-8):	Sterownik obrazu X.org dla zintegrowanych układów graficznych Intela
 Name:		xorg-driver-video-intel
 Version:	2.99.917
-Release:	3
+Release:	4
 License:	MIT
 Group:		X11/Applications
 Source0:	http://xorg.freedesktop.org/releases/individual/driver/xf86-video-intel-%{version}.tar.bz2
 # Source0-md5:	fa196a66e52c0c624fe5d350af7a5e7b
 URL:		http://xorg.freedesktop.org/
 Patch0:		%{name}-xserver_1_8_0.patch
+Patch1:		git.patch
 BuildRequires:	Mesa-libGL-devel
 #BuildRequires:	autoconf >= 2.63
 #BuildRequires:	automake >= 1:1.10.2-2
@@ -92,8 +93,13 @@ Wymaga aktywnego Kernel Mode Setting (KMS).
 %prep
 %setup -q -n xf86-video-intel-%{version}
 %patch0 -p1
+%patch1 -p1
 
 %build
+%{__aclocal}
+%{__autoconf}
+%{__autoheader}
+%{__automake}
 %configure \
 	--disable-silent-rules \
 	%{?with_glamor:--enable-glamor} \
diff --git a/git.patch b/git.patch
new file mode 100644
index 0000000..ec8ea66
--- /dev/null
+++ b/git.patch
@@ -0,0 +1,29487 @@
+diff --git a/Makefile.am b/Makefile.am
+index 418fdc9..853e622 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -25,7 +25,7 @@ SUBDIRS = man libobj xvmc src tools
+ MAINTAINERCLEANFILES = ChangeLog INSTALL
+ 
+ if HAVE_X11
+-SUBDIRS += test
++SUBDIRS += test benchmarks
+ endif
+ 
+ .PHONY: ChangeLog INSTALL
+diff --git a/NEWS b/NEWS
+index 604b9cc..0e20033 100644
+--- a/NEWS
++++ b/NEWS
+@@ -21,7 +21,7 @@ should make one more snapshot before an imminent release.
+    Before kernel 3.19, O_NONBLOCK support is broken and so we must avoid
+    reading if we are not expecting an event.
+ 
+- * Backwards compatibilty fix for fake triple buffering with PRIME and
++ * Backwards compatibility fix for fake triple buffering with PRIME and
+    Xorg-1.15
+    https://bugs.freedesktop.org/show_bug.cgi?id=85144#c12
+ 
+@@ -51,7 +51,7 @@ should make one more snapshot before an imminent release.
+ Snapshot 2.99.916 (2014-09-08)
+ ==============================
+ Quick update for MST in UXA - we need to hook up the RandR outputs for
+-dynamicaly added connectors.
++dynamically added connectors.
+ 
+ 
+ Snapshot 2.99.915 (2014-09-08)
+@@ -503,7 +503,7 @@ release.
+    backlight property is queried whilst the connector is disabled
+    https://bugs.freedesktop.org/show_bug.cgi?id=70406
+ 
+- * Pad GETCONNECTOR ioctl for compatability between 32/64-bit userspace
++ * Pad GETCONNECTOR ioctl for compatibility between 32/64-bit userspace
+    and kernel
+ 
+  * Handle long glyph runs correctly
+@@ -523,7 +523,7 @@ snapshot beforehand to push out the bug fixes from the last week.
+ 
+  * Fix video output using sprites when changing the image size
+ 
+- * Apply more restrictive tile constaints for 915g class devices
++ * Apply more restrictive tile constraints for 915g class devices
+    https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1232546
+ 
+  * Ensure all overlapping rectangles are drawn for XRenderFillRectangles
+@@ -1132,7 +1132,7 @@ operation.
+  * Explicitly prevent ring-switching for synchronized rendering to
+    scanouts (for vsync).
+ 
+- * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusuable)
++ * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusable)
+    https://bugs.freedesktop.org/show_bug.cgi?id=59539
+ 
+ 
+@@ -1226,7 +1226,7 @@ Release 2.20.15 (2012-12-03)
+ ============================
+ And lo, enabling more of the common acceleration paths for gen4 revealed
+ another lurking bug - something is wrong with how we prepare Y-tiling
+-surfaces for rendering. For the time being, we can surreptiously disable
++surfaces for rendering. For the time being, we can surreptitiously disable
+ them for gen4 and avoid hitting GPU hangs.
+ 
+  * Avoid clobbering the render state after failing to convert the
+@@ -1515,7 +1515,7 @@ Release 2.20.5 (2012-08-26)
+ Another silly bug found, another small bugfix release. The goal was for
+ the driver to bind to all Intel devices supported by the kernel.
+ Unfortunately we were too successful and started claiming Pouslbo,
+-Medfield and Cedarview devices which are still encumbered by propietary
++Medfield and Cedarview devices which are still encumbered by proprietary
+ IP and not supported by this driver.
+ 
+ Bugs fixed since 2.20.4:
+diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
+new file mode 100644
+index 0000000..301c012
+--- /dev/null
++++ b/benchmarks/.gitignore
+@@ -0,0 +1,2 @@
++dri2-swap
++dri3-swap
+diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
+new file mode 100644
+index 0000000..4976e8a
+--- /dev/null
++++ b/benchmarks/Makefile.am
+@@ -0,0 +1,14 @@
++AM_CFLAGS = @CWARNFLAGS@ $(X11_CFLAGS) $(DRM_CFLAGS)
++LDADD = $(X11_LIBS) $(DRM_LIBS) $(CLOCK_GETTIME_LIBS)
++
++check_PROGRAMS =
++
++if DRI2
++check_PROGRAMS += dri2-swap
++endif
++
++if DRI3
++check_PROGRAMS += dri3-swap
++AM_CFLAGS += $(X11_DRI3_CFLAGS)
++LDADD += $(X11_DRI3_LIBS)
++endif
+diff --git a/benchmarks/dri2-swap.c b/benchmarks/dri2-swap.c
+new file mode 100644
+index 0000000..3d9d30a
+--- /dev/null
++++ b/benchmarks/dri2-swap.c
+@@ -0,0 +1,588 @@
++/*
++ * Copyright (c) 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ *
++ */
++
++#ifdef HAVE_CONFIG_H
++#include "config.h"
++#endif
++
++#include <X11/Xlib.h>
++#include <X11/Xatom.h>
++#include <X11/Xlib-xcb.h>
++#include <X11/Xutil.h>
++#include <X11/Xlibint.h>
++#include <X11/extensions/dpms.h>
++#include <X11/extensions/randr.h>
++#include <X11/extensions/Xcomposite.h>
++#include <X11/extensions/Xdamage.h>
++#include <X11/extensions/Xrandr.h>
++#include <xcb/xcb.h>
++#include <xcb/dri2.h>
++#include <xf86drm.h>
++
++#include <stdio.h>
++#include <string.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <assert.h>
++#include <errno.h>
++#include <setjmp.h>
++#include <signal.h>
++
++#include <X11/Xlibint.h>
++#include <X11/extensions/Xext.h>
++#include <X11/extensions/extutil.h>
++#include <X11/extensions/dri2proto.h>
++#include <X11/extensions/dri2tokens.h>
++#include <X11/extensions/Xfixes.h>
++
++static char dri2ExtensionName[] = DRI2_NAME;
++static XExtensionInfo *dri2Info;
++static XEXT_GENERATE_CLOSE_DISPLAY (DRI2CloseDisplay, dri2Info)
++
++static Bool
++DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire);
++static Status
++DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire);
++static int
++DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code);
++
++static /* const */ XExtensionHooks dri2ExtensionHooks = {
++  NULL,                   /* create_gc */
++  NULL,                   /* copy_gc */
++  NULL,                   /* flush_gc */
++  NULL,                   /* free_gc */
++  NULL,                   /* create_font */
++  NULL,                   /* free_font */
++  DRI2CloseDisplay,       /* close_display */
++  DRI2WireToEvent,        /* wire_to_event */
++  DRI2EventToWire,        /* event_to_wire */
++  DRI2Error,              /* error */
++  NULL,                   /* error_string */
++};
++
++static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay,
++                                   dri2Info,
++                                   dri2ExtensionName,
++                                   &dri2ExtensionHooks,
++                                   0, NULL)
++
++static Bool
++DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire)
++{
++   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++
++   XextCheckExtension(dpy, info, dri2ExtensionName, False);
++
++   switch ((wire->u.u.type & 0x7f) - info->codes->first_event) {
++#ifdef X_DRI2SwapBuffers
++   case DRI2_BufferSwapComplete:
++      return False;
++#endif
++#ifdef DRI2_InvalidateBuffers
++   case DRI2_InvalidateBuffers:
++      return False;
++#endif
++   default:
++      /* client doesn't support server event */
++      break;
++   }
++
++   return False;
++}
++
++/* We don't actually support this.  It doesn't make sense for clients to
++ * send each other DRI2 events.
++ */
++static Status
++DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire)
++{
++   XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++
++   XextCheckExtension(dpy, info, dri2ExtensionName, False);
++
++   switch (event->type) {
++   default:
++      /* client doesn't support server event */
++      break;
++   }
++
++   return Success;
++}
++
++static int
++DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code)
++{
++	if (err->majorCode == codes->major_opcode &&
++	    err->errorCode == BadDrawable &&
++	    err->minorCode == X_DRI2CopyRegion)
++		return True;
++
++	/* If the X drawable was destroyed before the GLX drawable, the
++	 * DRI2 drawble will be gone by the time we call
++	 * DRI2DestroyDrawable.  So just ignore BadDrawable here. */
++	if (err->majorCode == codes->major_opcode &&
++	    err->errorCode == BadDrawable &&
++	    err->minorCode == X_DRI2DestroyDrawable)
++		return True;
++
++	/* If the server is non-local DRI2Connect will raise BadRequest.
++	 * Swallow this so that DRI2Connect can signal this in its return code */
++	if (err->majorCode == codes->major_opcode &&
++	    err->minorCode == X_DRI2Connect &&
++	    err->errorCode == BadRequest) {
++		*ret_code = False;
++		return True;
++	}
++
++	return False;
++}
++
++static Bool
++DRI2QueryExtension(Display * dpy, int *eventBase, int *errorBase)
++{
++	XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++
++	if (XextHasExtension(info)) {
++		*eventBase = info->codes->first_event;
++		*errorBase = info->codes->first_error;
++		return True;
++	}
++
++	return False;
++}
++
++static Bool
++DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName)
++{
++	XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++	xDRI2ConnectReply rep;
++	xDRI2ConnectReq *req;
++
++	XextCheckExtension(dpy, info, dri2ExtensionName, False);
++
++	LockDisplay(dpy);
++	GetReq(DRI2Connect, req);
++	req->reqType = info->codes->major_opcode;
++	req->dri2ReqType = X_DRI2Connect;
++	req->window = window;
++	req->driverType = DRI2DriverDRI;
++	if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
++		UnlockDisplay(dpy);
++		SyncHandle();
++		return False;
++	}
++
++	if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) {
++		UnlockDisplay(dpy);
++		SyncHandle();
++		return False;
++	}
++
++	*driverName = Xmalloc(rep.driverNameLength + 1);
++	if (*driverName == NULL) {
++		_XEatData(dpy,
++			  ((rep.driverNameLength + 3) & ~3) +
++			  ((rep.deviceNameLength + 3) & ~3));
++		UnlockDisplay(dpy);
++		SyncHandle();
++		return False;
++	}
++	_XReadPad(dpy, *driverName, rep.driverNameLength);
++	(*driverName)[rep.driverNameLength] = '\0';
++
++	*deviceName = Xmalloc(rep.deviceNameLength + 1);
++	if (*deviceName == NULL) {
++		Xfree(*driverName);
++		_XEatData(dpy, ((rep.deviceNameLength + 3) & ~3));
++		UnlockDisplay(dpy);
++		SyncHandle();
++		return False;
++	}
++	_XReadPad(dpy, *deviceName, rep.deviceNameLength);
++	(*deviceName)[rep.deviceNameLength] = '\0';
++
++	UnlockDisplay(dpy);
++	SyncHandle();
++
++	return True;
++}
++
++static Bool
++DRI2Authenticate(Display * dpy, XID window, unsigned int magic)
++{
++	XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++	xDRI2AuthenticateReq *req;
++	xDRI2AuthenticateReply rep;
++
++	XextCheckExtension(dpy, info, dri2ExtensionName, False);
++
++	LockDisplay(dpy);
++	GetReq(DRI2Authenticate, req);
++	req->reqType = info->codes->major_opcode;
++	req->dri2ReqType = X_DRI2Authenticate;
++	req->window = window;
++	req->magic = magic;
++
++	if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
++		UnlockDisplay(dpy);
++		SyncHandle();
++		return False;
++	}
++
++	UnlockDisplay(dpy);
++	SyncHandle();
++
++	return rep.authenticated;
++}
++
++static void
++DRI2CreateDrawable(Display * dpy, XID drawable)
++{
++	XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++	xDRI2CreateDrawableReq *req;
++
++	XextSimpleCheckExtension(dpy, info, dri2ExtensionName);
++
++	LockDisplay(dpy);
++	GetReq(DRI2CreateDrawable, req);
++	req->reqType = info->codes->major_opcode;
++	req->dri2ReqType = X_DRI2CreateDrawable;
++	req->drawable = drawable;
++	UnlockDisplay(dpy);
++	SyncHandle();
++}
++
++static void DRI2SwapInterval(Display *dpy, XID drawable, int interval)
++{
++    XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++    xDRI2SwapIntervalReq *req;
++
++    XextSimpleCheckExtension (dpy, info, dri2ExtensionName);
++
++    LockDisplay(dpy);
++    GetReq(DRI2SwapInterval, req);
++    req->reqType = info->codes->major_opcode;
++    req->dri2ReqType = X_DRI2SwapInterval;
++    req->drawable = drawable;
++    req->interval = interval;
++    UnlockDisplay(dpy);
++    SyncHandle();
++}
++
++static int _x_error_occurred;
++
++static int
++_check_error_handler(Display     *display,
++		     XErrorEvent *event)
++{
++	fprintf(stderr,
++		"X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n",
++	       DisplayString(display),
++	       event->serial,
++	       event->error_code,
++	       event->request_code,
++	       event->minor_code);
++	_x_error_occurred++;
++	return False; /* ignored */
++}
++
++static double elapsed(const struct timespec *start,
++		      const struct timespec *end)
++{
++	return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000;
++}
++
++static void run(Display *dpy, Window win)
++{
++	xcb_connection_t *c = XGetXCBConnection(dpy);
++	struct timespec start, end;
++	int n, completed = 0;
++
++	clock_gettime(CLOCK_MONOTONIC, &start);
++	do {
++		for (n = 0; n < 1000; n++) {
++			unsigned int attachments[] = { DRI2BufferBackLeft };
++			unsigned int seq[2];
++
++			seq[0] = xcb_dri2_swap_buffers_unchecked(c, win,
++								 0, 0, 0, 0, 0, 0).sequence;
++
++
++			seq[1] = xcb_dri2_get_buffers_unchecked(c, win,
++								1, 1, attachments).sequence;
++
++			xcb_flush(c);
++			xcb_discard_reply(c, seq[0]);
++			xcb_discard_reply(c, seq[1]);
++			completed++;
++		}
++		clock_gettime(CLOCK_MONOTONIC, &end);
++	} while (end.tv_sec < start.tv_sec + 10);
++
++	printf("%f\n", completed / (elapsed(&start, &end) / 1000000));
++}
++
++static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window)
++{
++	XRRScreenResources *res;
++
++	res = XRRGetScreenResourcesCurrent(dpy, window);
++	if (res == NULL)
++		res = XRRGetScreenResources(dpy, window);
++
++	return res;
++}
++
++static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id)
++{
++	int i;
++
++	for (i = 0; i < res->nmode; i++) {
++		if (res->modes[i].id == id)
++			return &res->modes[i];
++	}
++
++	return NULL;
++}
++
++static int dri2_open(Display *dpy)
++{
++	drm_auth_t auth;
++	char *driver, *device;
++	int fd;
++
++	if (!DRI2QueryExtension(dpy, &fd, &fd))
++		return -1;
++
++	if (!DRI2Connect(dpy, DefaultRootWindow(dpy), &driver, &device))
++		return -1;
++
++	fd = open(device, O_RDWR);
++	if (fd < 0)
++		return -1;
++
++	if (drmIoctl(fd, DRM_IOCTL_GET_MAGIC, &auth))
++		return -1;
++
++	if (!DRI2Authenticate(dpy, DefaultRootWindow(dpy), auth.magic))
++		return -1;
++
++	return fd;
++}
++
++static void fullscreen(Display *dpy, Window win)
++{
++	Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False);
++	XChangeProperty(dpy, win,
++			XInternAtom(dpy, "_NET_WM_STATE", False),
++			XA_ATOM, 32, PropModeReplace,
++			(unsigned char *)&atom, 1);
++}
++
++static int has_composite(Display *dpy)
++{
++	int event, error;
++	int major, minor;
++
++	if (!XDamageQueryExtension (dpy, &event, &error))
++		return 0;
++
++	if (!XCompositeQueryExtension(dpy, &event, &error))
++		return 0;
++
++	XCompositeQueryVersion(dpy, &major, &minor);
++
++	return major > 0 || minor >= 4;
++}
++
++int main(int argc, char **argv)
++{
++	Display *dpy;
++	Window root, win;
++	XRRScreenResources *res;
++	XRRCrtcInfo **original_crtc;
++	XSetWindowAttributes attr;
++	enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN;
++	enum visible {REDIRECTED, NORMAL } v = NORMAL;
++	enum display { OFF, ON } d = OFF;
++	int width, height;
++	int i, fd;
++	int c;
++
++	while ((c = getopt(argc, argv, "d:v:w:")) != -1) {
++		switch (c) {
++		case 'd':
++			if (strcmp(optarg, "off") == 0)
++				d = OFF;
++			else if (strcmp(optarg, "on") == 0)
++				d = ON;
++			else
++				abort();
++			break;
++
++		case 'v':
++			if (strcmp(optarg, "redirected") == 0)
++				v = REDIRECTED;
++			else if (strcmp(optarg, "normal") == 0)
++				v = NORMAL;
++			else
++				abort();
++			break;
++
++		case 'w':
++			if (strcmp(optarg, "fullscreen") == 0)
++				w = FULLSCREEN;
++			else if (strcmp(optarg, "window") == 0)
++				w = WINDOW;
++			else if (strcmp(optarg, "root") == 0)
++				w = ROOT;
++			else
++				abort();
++			break;
++		}
++	}
++
++	attr.override_redirect = 1;
++
++	dpy = XOpenDisplay(NULL);
++	if (dpy == NULL)
++		return 77;
++
++	width = DisplayWidth(dpy, DefaultScreen(dpy));
++	height = DisplayHeight(dpy, DefaultScreen(dpy));
++
++	fd = dri2_open(dpy);
++	if (fd < 0)
++		return 77;
++
++	if (DPMSQueryExtension(dpy, &i, &i))
++		DPMSDisable(dpy);
++
++	root = DefaultRootWindow(dpy);
++
++	signal(SIGALRM, SIG_IGN);
++	XSetErrorHandler(_check_error_handler);
++
++	res = NULL;
++	if (XRRQueryVersion(dpy, &i, &i))
++		res = _XRRGetScreenResourcesCurrent(dpy, root);
++	if (res == NULL)
++		return 77;
++
++	if (v == REDIRECTED && !has_composite(dpy))
++		return 77;
++
++	original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc);
++	for (i = 0; i < res->ncrtc; i++)
++		original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]);
++
++	for (i = 0; i < res->ncrtc; i++)
++		XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++				 0, 0, None, RR_Rotate_0, NULL, 0);
++
++	DRI2CreateDrawable(dpy, root);
++	DRI2SwapInterval(dpy, root, 0);
++
++	if (d != OFF) {
++		for (i = 0; i < res->noutput; i++) {
++			XRROutputInfo *output;
++			XRRModeInfo *mode;
++
++			output = XRRGetOutputInfo(dpy, res, res->outputs[i]);
++			if (output == NULL)
++				continue;
++
++			mode = NULL;
++			if (res->nmode)
++				mode = lookup_mode(res, output->modes[0]);
++			if (mode == NULL)
++				continue;
++
++			XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime,
++					 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1);
++			width = mode->width;
++			height = mode->height;
++			break;
++		}
++		if (i == res->noutput) {
++			_x_error_occurred = 77;
++			goto restore;
++		}
++	}
++
++	if (w == ROOT) {
++		run(dpy, root);
++	} else if (w == FULLSCREEN) {
++		win = XCreateWindow(dpy, root,
++				    0, 0, width, height, 0,
++				    DefaultDepth(dpy, DefaultScreen(dpy)),
++				    InputOutput,
++				    DefaultVisual(dpy, DefaultScreen(dpy)),
++				    CWOverrideRedirect, &attr);
++		DRI2CreateDrawable(dpy, win);
++		DRI2SwapInterval(dpy, win, 0);
++		if (v == REDIRECTED) {
++			XCompositeRedirectWindow(dpy, win, CompositeRedirectManual);
++			XDamageCreate(dpy, win, XDamageReportRawRectangles);
++		} else
++			fullscreen(dpy, win);
++		XMapWindow(dpy, win);
++		run(dpy, win);
++	} else if (w == WINDOW) {
++		win = XCreateWindow(dpy, root,
++				    0, 0, width/2, height/2, 0,
++				    DefaultDepth(dpy, DefaultScreen(dpy)),
++				    InputOutput,
++				    DefaultVisual(dpy, DefaultScreen(dpy)),
++				    CWOverrideRedirect, &attr);
++		DRI2CreateDrawable(dpy, win);
++		DRI2SwapInterval(dpy, win, 0);
++		if (v == REDIRECTED) {
++			XCompositeRedirectWindow(dpy, win, CompositeRedirectManual);
++			XDamageCreate(dpy, win, XDamageReportRawRectangles);
++		}
++		XMapWindow(dpy, win);
++		run(dpy, win);
++	}
++
++restore:
++	for (i = 0; i < res->ncrtc; i++)
++		XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++				 0, 0, None, RR_Rotate_0, NULL, 0);
++
++	for (i = 0; i < res->ncrtc; i++)
++		XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++				 original_crtc[i]->x,
++				 original_crtc[i]->y,
++				 original_crtc[i]->mode,
++				 original_crtc[i]->rotation,
++				 original_crtc[i]->outputs,
++				 original_crtc[i]->noutput);
++
++	if (DPMSQueryExtension(dpy, &i, &i))
++		DPMSEnable(dpy);
++
++	XSync(dpy, True);
++	return _x_error_occurred;
++}
+diff --git a/benchmarks/dri3-swap.c b/benchmarks/dri3-swap.c
+new file mode 100644
+index 0000000..4dd423b
+--- /dev/null
++++ b/benchmarks/dri3-swap.c
+@@ -0,0 +1,595 @@
++/*
++ * Copyright (c) 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ *
++ */
++
++#ifdef HAVE_CONFIG_H
++#include "config.h"
++#endif
++
++#include <X11/Xlib.h>
++#include <X11/Xatom.h>
++#include <X11/Xlib-xcb.h>
++#include <X11/xshmfence.h>
++#include <X11/Xutil.h>
++#include <X11/Xlibint.h>
++#include <X11/extensions/Xcomposite.h>
++#include <X11/extensions/Xdamage.h>
++#include <X11/extensions/dpms.h>
++#include <X11/extensions/randr.h>
++#include <X11/extensions/Xrandr.h>
++#include <xcb/xcb.h>
++#include <xcb/present.h>
++#include <xcb/dri3.h>
++#include <xcb/xfixes.h>
++#include <xf86drm.h>
++#include <i915_drm.h>
++
++#include <stdio.h>
++#include <string.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <assert.h>
++#include <errno.h>
++#include <setjmp.h>
++#include <signal.h>
++
++struct dri3_fence {
++	XID xid;
++	void *addr;
++};
++
++static int _x_error_occurred;
++static uint32_t stamp;
++
++struct list {
++    struct list *next, *prev;
++};
++
++static void
++list_init(struct list *list)
++{
++    list->next = list->prev = list;
++}
++
++static inline void
++__list_add(struct list *entry,
++	    struct list *prev,
++	    struct list *next)
++{
++    next->prev = entry;
++    entry->next = next;
++    entry->prev = prev;
++    prev->next = entry;
++}
++
++static inline void
++list_add(struct list *entry, struct list *head)
++{
++    __list_add(entry, head, head->next);
++}
++
++static inline void
++__list_del(struct list *prev, struct list *next)
++{
++	next->prev = prev;
++	prev->next = next;
++}
++
++static inline void
++_list_del(struct list *entry)
++{
++    __list_del(entry->prev, entry->next);
++}
++
++static inline void
++list_move(struct list *list, struct list *head)
++{
++	if (list->prev != head) {
++		_list_del(list);
++		list_add(list, head);
++	}
++}
++
++#define __container_of(ptr, sample, member)				\
++    (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample)))
++
++#define list_for_each_entry(pos, head, member)				\
++    for (pos = __container_of((head)->next, pos, member);		\
++	 &pos->member != (head);					\
++	 pos = __container_of(pos->member.next, pos, member))
++
++static int
++_check_error_handler(Display     *display,
++		     XErrorEvent *event)
++{
++	printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n",
++	       DisplayString(display),
++	       event->serial,
++	       event->error_code,
++	       event->request_code,
++	       event->minor_code);
++	_x_error_occurred++;
++	return False; /* ignored */
++}
++
++static int dri3_create_fence(Display *dpy,
++			     Pixmap pixmap,
++			     struct dri3_fence *fence)
++{
++	xcb_connection_t *c = XGetXCBConnection(dpy);
++	struct dri3_fence f;
++	int fd;
++
++	fd = xshmfence_alloc_shm();
++	if (fd < 0)
++		return -1;
++
++	f.addr = xshmfence_map_shm(fd);
++	if (f.addr == NULL) {
++		close(fd);
++		return -1;
++	}
++
++	f.xid = xcb_generate_id(c);
++	xcb_dri3_fence_from_fd(c, pixmap, f.xid, 0, fd);
++
++	*fence = f;
++	return 0;
++}
++
++static double elapsed(const struct timespec *start,
++		      const struct timespec *end)
++{
++	return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000;
++}
++
++struct buffer {
++	struct list link;
++	Pixmap pixmap;
++	struct dri3_fence fence;
++	int fd;
++	int busy;
++};
++
++static void run(Display *dpy, Window win)
++{
++	xcb_connection_t *c = XGetXCBConnection(dpy);
++	struct timespec start, end;
++#define N_BACK 8
++	struct buffer buffer[N_BACK];
++	struct list mru;
++	Window root;
++	unsigned int width, height;
++	unsigned border, depth;
++	unsigned present_flags = XCB_PRESENT_OPTION_ASYNC;
++	xcb_xfixes_region_t update = 0;
++	int completed = 0;
++	int queued = 0;
++	uint32_t eid;
++	void *Q;
++	int i, n;
++
++	list_init(&mru);
++
++	XGetGeometry(dpy, win,
++		     &root, &i, &n, &width, &height, &border, &depth);
++
++	_x_error_occurred = 0;
++
++	for (n = 0; n < N_BACK; n++) {
++		xcb_dri3_buffer_from_pixmap_reply_t *reply;
++		int *fds;
++
++		buffer[n].pixmap =
++			XCreatePixmap(dpy, win, width, height, depth);
++		buffer[n].fence.xid = 0;
++		buffer[n].fd = -1;
++
++		if (dri3_create_fence(dpy, win, &buffer[n].fence))
++			return;
++
++		reply = xcb_dri3_buffer_from_pixmap_reply (c,
++							   xcb_dri3_buffer_from_pixmap(c, buffer[n].pixmap),
++							   NULL);
++		if (reply == NULL)
++			return;
++
++		fds = xcb_dri3_buffer_from_pixmap_reply_fds (c, reply);
++		buffer[n].fd = fds[0];
++		free(reply);
++
++		/* start idle */
++		xshmfence_trigger(buffer[n].fence.addr);
++		buffer[n].busy = 0;
++		list_add(&buffer[n].link, &mru);
++	}
++
++	eid = xcb_generate_id(c);
++	xcb_present_select_input(c, eid, win,
++                                 XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY |
++                                 XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY);
++	Q = xcb_register_for_special_xge(c, &xcb_present_id, eid, &stamp);
++
++	clock_gettime(CLOCK_MONOTONIC, &start);
++	do {
++		for (n = 0; n < 1000; n++) {
++			struct buffer *tmp, *b = NULL;
++			list_for_each_entry(tmp, &mru, link) {
++				if (!tmp->busy) {
++					b = tmp;
++					break;
++				}
++			}
++			while (b == NULL) {
++				xcb_present_generic_event_t *ev;
++
++				ev = (xcb_present_generic_event_t *)
++					xcb_wait_for_special_event(c, Q);
++				if (ev == NULL)
++					abort();
++
++				do {
++					switch (ev->evtype) {
++					case XCB_PRESENT_COMPLETE_NOTIFY:
++						completed++;
++						queued--;
++						break;
++
++					case XCB_PRESENT_EVENT_IDLE_NOTIFY:
++						{
++							xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev;
++							assert(ie->serial < N_BACK);
++							buffer[ie->serial].busy = 0;
++							if (b == NULL)
++								b = &buffer[ie->serial];
++							break;
++						}
++					}
++					free(ev);
++				} while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q)));
++			}
++
++			b->busy = 1;
++			if (b->fence.xid) {
++				xshmfence_await(b->fence.addr);
++				xshmfence_reset(b->fence.addr);
++			}
++			xcb_present_pixmap(c, win, b->pixmap, b - buffer,
++					   0, /* valid */
++					   update, /* update */
++					   0, /* x_off */
++					   0, /* y_off */
++					   None,
++					   None, /* wait fence */
++					   b->fence.xid,
++					   present_flags,
++					   0, /* target msc */
++					   0, /* divisor */
++					   0, /* remainder */
++					   0, NULL);
++			list_move(&b->link, &mru);
++			queued++;
++			xcb_flush(c);
++		}
++		clock_gettime(CLOCK_MONOTONIC, &end);
++	} while (end.tv_sec < start.tv_sec + 10);
++
++	while (queued) {
++		xcb_present_generic_event_t *ev;
++
++		ev = (xcb_present_generic_event_t *)
++			xcb_wait_for_special_event(c, Q);
++		if (ev == NULL)
++			abort();
++
++		do {
++			switch (ev->evtype) {
++			case XCB_PRESENT_COMPLETE_NOTIFY:
++				completed++;
++				queued--;
++				break;
++
++			case XCB_PRESENT_EVENT_IDLE_NOTIFY:
++				break;
++			}
++			free(ev);
++		} while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q)));
++	}
++	clock_gettime(CLOCK_MONOTONIC, &end);
++
++	printf("%f\n", completed / (elapsed(&start, &end) / 1000000));
++}
++
++static int has_present(Display *dpy)
++{
++	xcb_connection_t *c = XGetXCBConnection(dpy);
++	xcb_generic_error_t *error = NULL;
++	void *reply;
++
++	reply = xcb_present_query_version_reply(c,
++						xcb_present_query_version(c,
++									  XCB_PRESENT_MAJOR_VERSION,
++									  XCB_PRESENT_MINOR_VERSION),
++						&error);
++
++	free(reply);
++	free(error);
++	if (reply == NULL) {
++		fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy));
++		return 0;
++	}
++
++	return 1;
++}
++
++static int has_composite(Display *dpy)
++{
++	int event, error;
++	int major, minor;
++
++	if (!XDamageQueryExtension (dpy, &event, &error))
++		return 0;
++
++	if (!XCompositeQueryExtension(dpy, &event, &error))
++		return 0;
++
++	XCompositeQueryVersion(dpy, &major, &minor);
++
++	return major > 0 || minor >= 4;
++}
++
++static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window)
++{
++	XRRScreenResources *res;
++
++	res = XRRGetScreenResourcesCurrent(dpy, window);
++	if (res == NULL)
++		res = XRRGetScreenResources(dpy, window);
++
++	return res;
++}
++
++static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id)
++{
++	int i;
++
++	for (i = 0; i < res->nmode; i++) {
++		if (res->modes[i].id == id)
++			return &res->modes[i];
++	}
++
++	return NULL;
++}
++
++static void fullscreen(Display *dpy, Window win)
++{
++	Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False);
++	XChangeProperty(dpy, win,
++			XInternAtom(dpy, "_NET_WM_STATE", False),
++			XA_ATOM, 32, PropModeReplace,
++			(unsigned char *)&atom, 1);
++}
++
++static int dri3_query_version(Display *dpy, int *major, int *minor)
++{
++	xcb_connection_t *c = XGetXCBConnection(dpy);
++	xcb_dri3_query_version_reply_t *reply;
++	xcb_generic_error_t *error;
++
++	*major = *minor = -1;
++
++	reply = xcb_dri3_query_version_reply(c,
++					     xcb_dri3_query_version(c,
++								    XCB_DRI3_MAJOR_VERSION,
++								    XCB_DRI3_MINOR_VERSION),
++					     &error);
++	free(error);
++	if (reply == NULL)
++		return -1;
++
++	*major = reply->major_version;
++	*minor = reply->minor_version;
++	free(reply);
++
++	return 0;
++}
++
++static int has_dri3(Display *dpy)
++{
++	const xcb_query_extension_reply_t *ext;
++	int major, minor;
++
++	ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id);
++	if (ext == NULL || !ext->present)
++		return 0;
++
++	if (dri3_query_version(dpy, &major, &minor) < 0)
++		return 0;
++
++	return major >= 0;
++}
++
++int main(int argc, char **argv)
++{
++	Display *dpy;
++	Window root, win;
++	XRRScreenResources *res;
++	XRRCrtcInfo **original_crtc;
++	XSetWindowAttributes attr;
++	enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN;
++	enum visible {REDIRECTED, NORMAL } v = NORMAL;
++	enum display { OFF, ON } d = OFF;
++	int width, height;
++	int i;
++
++	while ((i = getopt(argc, argv, "d:v:w:")) != -1) {
++		switch (i) {
++		case 'd':
++			if (strcmp(optarg, "off") == 0)
++				d = OFF;
++			else if (strcmp(optarg, "on") == 0)
++				d = ON;
++			else
++				abort();
++			break;
++
++		case 'v':
++			if (strcmp(optarg, "redirected") == 0)
++				v = REDIRECTED;
++			else if (strcmp(optarg, "normal") == 0)
++				v = NORMAL;
++			else
++				abort();
++			break;
++
++		case 'w':
++			if (strcmp(optarg, "fullscreen") == 0)
++				w = FULLSCREEN;
++			else if (strcmp(optarg, "window") == 0)
++				w = WINDOW;
++			else if (strcmp(optarg, "root") == 0)
++				w = ROOT;
++			else
++				abort();
++			break;
++		}
++	}
++
++	attr.override_redirect = 1;
++
++	dpy = XOpenDisplay(NULL);
++	if (dpy == NULL)
++		return 77;
++
++	width = DisplayWidth(dpy, DefaultScreen(dpy));
++	height = DisplayHeight(dpy, DefaultScreen(dpy));
++
++	if (!has_present(dpy))
++		return 77;
++
++	if (!has_dri3(dpy))
++		return 77;
++
++	if (DPMSQueryExtension(dpy, &i, &i))
++		DPMSDisable(dpy);
++
++	root = DefaultRootWindow(dpy);
++
++	signal(SIGALRM, SIG_IGN);
++	XSetErrorHandler(_check_error_handler);
++
++	res = NULL;
++	if (XRRQueryVersion(dpy, &i, &i))
++		res = _XRRGetScreenResourcesCurrent(dpy, root);
++	if (res == NULL)
++		return 77;
++
++	if (v == REDIRECTED && !has_composite(dpy))
++		return 77;
++
++	original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc);
++	for (i = 0; i < res->ncrtc; i++)
++		original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]);
++
++	for (i = 0; i < res->ncrtc; i++)
++		XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++				 0, 0, None, RR_Rotate_0, NULL, 0);
++
++	if (d != OFF) {
++		for (i = 0; i < res->noutput; i++) {
++			XRROutputInfo *output;
++			XRRModeInfo *mode;
++
++			output = XRRGetOutputInfo(dpy, res, res->outputs[i]);
++			if (output == NULL)
++				continue;
++
++			mode = NULL;
++			if (res->nmode)
++				mode = lookup_mode(res, output->modes[0]);
++			if (mode == NULL)
++				continue;
++
++			XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime,
++					 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1);
++			width = mode->width;
++			height = mode->height;
++			break;
++		}
++		if (i == res->noutput) {
++			_x_error_occurred = 77;
++			goto restore;
++		}
++	}
++
++	if (w == ROOT) {
++		run(dpy, root);
++	} else if (w == FULLSCREEN) {
++		win = XCreateWindow(dpy, root,
++				    0, 0, width, height, 0,
++				    DefaultDepth(dpy, DefaultScreen(dpy)),
++				    InputOutput,
++				    DefaultVisual(dpy, DefaultScreen(dpy)),
++				    CWOverrideRedirect, &attr);
++		if (v == REDIRECTED) {
++			XCompositeRedirectWindow(dpy, win, CompositeRedirectManual);
++			XDamageCreate(dpy, win, XDamageReportRawRectangles);
++		} else
++			fullscreen(dpy, win);
++		XMapWindow(dpy, win);
++		run(dpy, win);
++	} else if (w == WINDOW) {
++		win = XCreateWindow(dpy, root,
++				    0, 0, width/2, height/2, 0,
++				    DefaultDepth(dpy, DefaultScreen(dpy)),
++				    InputOutput,
++				    DefaultVisual(dpy, DefaultScreen(dpy)),
++				    CWOverrideRedirect, &attr);
++		if (v == REDIRECTED) {
++			XCompositeRedirectWindow(dpy, win, CompositeRedirectManual);
++			XDamageCreate(dpy, win, XDamageReportRawRectangles);
++		}
++		XMapWindow(dpy, win);
++		run(dpy, win);
++	}
++
++restore:
++	for (i = 0; i < res->ncrtc; i++)
++		XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++				 0, 0, None, RR_Rotate_0, NULL, 0);
++
++	for (i = 0; i < res->ncrtc; i++)
++		XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++				 original_crtc[i]->x,
++				 original_crtc[i]->y,
++				 original_crtc[i]->mode,
++				 original_crtc[i]->rotation,
++				 original_crtc[i]->outputs,
++				 original_crtc[i]->noutput);
++
++	if (DPMSQueryExtension(dpy, &i, &i))
++		DPMSEnable(dpy);
++
++	XSync(dpy, True);
++	return _x_error_occurred;
++}
+diff --git a/configure.ac b/configure.ac
+index 61bea43..9aa7d97 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -195,18 +195,24 @@ AC_ARG_ENABLE(udev,
+               [UDEV="$enableval"],
+               [UDEV=auto])
+ 
++udev_msg=" disabled"
+ if test "x$UDEV" != "xno"; then
+ 	PKG_CHECK_MODULES(UDEV, [libudev], [udev="yes"], [udev="no"])
++	AC_CHECK_HEADERS([sys/stat.h], [], [udev="no"])
+ 	if test "x$UDEV" = "xyes" -a "x$udev" != "xyes"; then
+ 		AC_MSG_ERROR([udev support requested but not found (libudev)])
+ 	fi
+ 	if test "x$udev" = "xyes"; then
+ 		AC_DEFINE(HAVE_UDEV,1,[Enable udev-based monitor hotplug detection])
++		udev_msg=" yes"
++	else
++		udev_msg=" no"
+ 	fi
+ fi
+ 
+-PKG_CHECK_MODULES(X11, [x11 xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"])
++PKG_CHECK_MODULES(X11, [x11 x11-xcb xcb-dri2 xcomposite xdamage xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"])
+ AM_CONDITIONAL(HAVE_X11, test "x$x11" = "xyes")
++echo X11_CLFAGS="$X11_CLFAGS" X11_LIBS="$X11_LIBS"
+ 
+ cpuid="yes"
+ AC_TRY_LINK([
+@@ -270,7 +276,7 @@ if test "x$shm" = "xyes"; then
+ 	AC_DEFINE([HAVE_MIT_SHM], 1, [Define to 1 if MIT-SHM is available])
+ fi
+ 
+-PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-present x11-xcb xshmfence x11 xrender xext libdrm], [x11_dri3="yes"], [x11_dri3="no"])
++PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-xfixes xcb-present x11-xcb xshmfence x11 xcomposite xdamage xrender xrandr xxf86vm xext libdrm], [x11_dri3="yes"], [x11_dri3="no"])
+ AM_CONDITIONAL(X11_DRI3, test "x$x11_dri3" = "xyes" -a "x$shm" = "xyes")
+ AM_CONDITIONAL(X11_SHM, test "x$shm" = "xyes")
+ 
+@@ -307,6 +313,8 @@ if test "x$tools" != "xno"; then
+ 		tools="no"
+ 	fi
+ 
++	PKG_CHECK_MODULES(TOOL_CURSOR, [xfixes x11 libpng], [cursor="yes"], [ivo="no"])
++
+ 	IVO_CFLAGS="$IVO_CFLAGS $extra_cflags"
+ fi
+ if test "x$tools" != "xno"; then
+@@ -315,6 +323,7 @@ fi
+ AC_MSG_CHECKING([whether to build additional tools])
+ AC_MSG_RESULT([$tools])
+ AM_CONDITIONAL(BUILD_TOOLS, test "x$tools" != "xno")
++AM_CONDITIONAL(BUILD_TOOL_CURSOR, test "x$cursor" = "xyes")
+ 
+ # Define a configure option for an alternate module directory
+ AC_ARG_WITH(xorg-module-dir,
+@@ -339,10 +348,20 @@ AC_ARG_ENABLE(dri2,
+ 	      [DRI2=$enableval],
+ 	      [DRI2=yes])
+ AC_ARG_ENABLE(dri3,
+-	      AS_HELP_STRING([--enable-dri3],
+-			     [Enable DRI3 support [[default=no]]]),
++	      AS_HELP_STRING([--disable-dri3],
++			     [Disable DRI3 support [[default=yes]]]),
+ 	      [DRI3=$enableval],
+-	      [DRI3=no])
++	      [DRI3=yes])
++AC_ARG_WITH(default-dri,
++	    AS_HELP_STRING([--with-default-dri],
++			   [Select the default maximum DRI level [default 2]]),
++	      [DRI_DEFAULT=$withval],
++	      [DRI_DEFAULT=2])
++if test "x$DRI_DEFAULT" = "x0"; then
++	AC_DEFINE(DEFAULT_DRI_LEVEL, 0,[Default DRI level])
++else
++	AC_DEFINE(DEFAULT_DRI_LEVEL, ~0, [Default DRI level])
++fi
+ 
+ AC_ARG_ENABLE(xvmc, AS_HELP_STRING([--disable-xvmc],
+                                   [Disable XvMC support [[default=yes]]]),
+@@ -375,14 +394,12 @@ AC_ARG_ENABLE(ums-only,
+ required_xorg_server_version=1.6
+ required_pixman_version=0.16
+ 
+-if pkg-config --exists 'pixman-1 >= 0.27.1'; then
+-	AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache])
+-fi
+-
+-if pkg-config --exists 'pixman-1 >= 0.24.0'; then
+-	AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation])
+-fi
+-
++PKG_CHECK_EXISTS([pixman-1 >= 0.24.0],
++		 AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation])
++		 [])
++PKG_CHECK_EXISTS([pixman-1 >= 0.27.1],
++		 [AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache])],
++		 [])
+ # Store the list of server defined optional extensions in REQUIRED_MODULES
+ XORG_DRIVER_CHECK_EXT(RANDR, randrproto)
+ XORG_DRIVER_CHECK_EXT(RENDER, renderproto)
+@@ -398,24 +415,25 @@ AC_ARG_ENABLE(sna,
+ 	      [SNA="$enableval"],
+ 	      [SNA=auto])
+ 
++AC_CHECK_HEADERS([dev/wscons/wsconsio.h])
++AC_FUNC_ALLOCA
++AC_HEADER_MAJOR
++
+ if test "x$SNA" != "xno"; then
+ 	AC_DEFINE(USE_SNA, 1, [Enable SNA support])
+ 	AC_CHECK_HEADERS([sys/sysinfo.h], AC_CHECK_MEMBERS([struct sysinfo.totalram], [], [], [[#include <sys/sysinfo.h>]]))
+ fi
+ 
+ uxa_requires_libdrm=2.4.52
++uxa_requires_pixman=0.24.0
++
+ AC_ARG_ENABLE(uxa,
+ 	      AS_HELP_STRING([--enable-uxa],
+ 			     [Enable Unified Acceleration Architecture (UXA) [default=auto]]),
+ 	      [UXA="$enableval"],
+ 	      [UXA=auto])
+ if test "x$UXA" = "xauto"; then
+-	if ! pkg-config --exists "libdrm_intel >= $uxa_requires_libdrm"; then
+-		UXA=no
+-	fi
+-	if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then
+-		UXA=no
+-	fi
++	PKG_CHECK_EXISTS([libdrm_intel >= $uxa_requires_libdrm pixman-1 >= $uxa_requires_pixman], [], [UXA=no])
+ fi
+ if test "x$UXA" != "xno"; then
+ 	AC_DEFINE(USE_UXA, 1, [Enable UXA support])
+@@ -426,6 +444,8 @@ fi
+ 
+ PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_server_version xproto fontsproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES])
+ ABI_VERSION=`$PKG_CONFIG --variable=abi_videodrv xorg-server`
++XSERVER_VERSION=`$PKG_CONFIG --modversion xorg-server`
++PIXMAN_VERSION=`$PKG_CONFIG --modversion pixman-1`
+ 
+ if test "x$ONLY_UMS" = "xyes"; then
+ 	UMS="yes"
+@@ -519,7 +539,12 @@ AC_MSG_RESULT([$have_dri1])
+ AM_CONDITIONAL(DRI1, test "x$have_dri1" != "xno")
+ if test "x$have_dri1" != "xno"; then
+         AC_DEFINE(HAVE_DRI1,1,[Enable DRI1 driver support])
+-	dri_msg="$dri_msg DRI1"
++	str="DRI1"
++	if test "x$DRI_DEFAULT" = "x1"; then
++		AC_DEFINE(DEFAULT_DRI_LEVEL,1,[Default DRI level])
++		str="*$str"
++	fi
++	dri_msg="$dri_msg $str"
+ else
+         DRI1_CFLAGS=""
+         DRI1_LIBS=""
+@@ -576,7 +601,12 @@ AM_CONDITIONAL(DRI2, test "x$have_dri2" != "xno")
+ AC_MSG_RESULT([$have_dri2])
+ if test "x$have_dri2" != "xno"; then
+         AC_DEFINE(HAVE_DRI2,1,[Enable DRI2 driver support])
+-	dri_msg="$dri_msg DRI2"
++	str="DRI2"
++	if test "x$DRI_DEFAULT" = "x2"; then
++		AC_DEFINE(DEFAULT_DRI_LEVEL,2,[Default DRI level])
++		str="*$str"
++	fi
++	dri_msg="$dri_msg $str"
+ else
+ 	if test "x$DRI" = "xyes" -a "x$DRI2" != "xno" -a "x$KMS" = "xyes"; then
+ 		AC_MSG_ERROR([DRI2 requested but prerequisites not found])
+@@ -591,13 +621,21 @@ AM_CONDITIONAL(DRI3, test "x$have_dri3" != "xno")
+ AC_MSG_RESULT([$have_dri3])
+ if test "x$have_dri3" != "xno"; then
+         AC_DEFINE(HAVE_DRI3,1,[Enable DRI3 driver support])
+-	dri_msg="$dri_msg DRI3"
++	str="DRI3"
++	if test "x$DRI_DEFAULT" = "x3"; then
++		AC_DEFINE(DEFAULT_DRI_LEVEL,3,[Default DRI level])
++		str="*$str"
++	fi
++	dri_msg="$dri_msg $str"
+ else
+ 	if test "x$DRI" = "xyes" -a "x$DRI3" != "xno" -a "x$KMS" = "xyes"; then
+ 		AC_MSG_ERROR([DRI3 requested but prerequisites not found])
+ 	fi
+ fi
+ 
++AC_MSG_CHECKING([default DRI support])
++AC_MSG_RESULT([$DEFAULT_DRI_DEFAULT])
++
+ AC_CHECK_HEADERS([X11/extensions/dpmsconst.h])
+ 
+ PRESENT="no"
+@@ -711,27 +749,6 @@ if test "x$TEARFREE" = "xyes"; then
+ 	xp_msg="$xp_msg TearFree"
+ fi
+ 
+-AC_ARG_ENABLE(rendernode,
+-	      AS_HELP_STRING([--enable-rendernode],
+-			     [Enable use of render nodes (experimental) [default=no]]),
+-	      [RENDERNODE="$enableval"],
+-	      [RENDERNODE="no"])
+-AM_CONDITIONAL(USE_RENDERNODE, test "x$RENDERNODE" = "xyes")
+-if test "x$RENDERNODE" = "xyes"; then
+-	AC_DEFINE(USE_RENDERNODE,1,[Assume "rendernode" support])
+-	xp_msg="$xp_msg rendernode"
+-fi
+-
+-AC_ARG_ENABLE(wc-mmap,
+-	      AS_HELP_STRING([--enable-wc-mmap],
+-			     [Enable use of WriteCombining mmaps [default=no]]),
+-	      [WC_MMAP="$enableval"],
+-	      [WC_MMAP="no"])
+-if test "x$WC_MMAP" = "xyes"; then
+-	AC_DEFINE(USE_WC_MMAP,1,[Enable use of WriteCombining mmaps])
+-	xp_msg="$xp_msg mmap(wc)"
+-fi
+-
+ AC_ARG_ENABLE(create2,
+ 	      AS_HELP_STRING([--enable-create2],
+ 			     [Enable use of create2 ioctl (experimental) [default=no]]),
+@@ -848,6 +865,7 @@ AC_CONFIG_FILES([
+                 xvmc/shader/mc/Makefile
+                 xvmc/shader/vld/Makefile
+ 		test/Makefile
++		benchmarks/Makefile
+ 		tools/Makefile
+ 		tools/org.x.xf86-video-intel.backlight-helper.policy
+ ])
+@@ -855,7 +873,7 @@ AC_OUTPUT
+ 
+ echo ""
+ echo ""
+-test -e `pwd $0`/README && cat `pwd $0`/README
++cat $srcdir/README
+ 
+ accel_msg=""
+ if test "x$SNA" != "xno"; then
+@@ -895,13 +913,15 @@ fi
+ 
+ echo ""
+ echo "AC_PACKAGE_STRING will be compiled with:"
+-echo "  Xorg Video ABI version: $ABI_VERSION"
++echo "  Xorg Video ABI version: $ABI_VERSION (xorg-server-$XSERVER_VERSION)"
++echo "  pixman version: pixman-1-$PIXMAN_VERSION"
+ echo "  Acceleration backends:$accel_msg"
+ echo "  Additional debugging support?$debug_msg"
+ echo "  Support for Kernel Mode Setting? $KMS"
+ echo "  Support for legacy User Mode Setting (for i810)? $UMS"
+ echo "  Support for Direct Rendering Infrastructure:$dri_msg"
+ echo "  Support for Xv motion compensation (XvMC and libXvMC):$xvmc_msg"
++echo "  Support for display hotplug notifications (udev):$udev_msg"
+ echo "  Build additional tools and utilities?$tools_msg"
+ if test -n "$xp_msg"; then
+ echo "  Experimental support:$xp_msg"
+diff --git a/libobj/alloca.c b/libobj/alloca.c
+new file mode 100644
+index 0000000..883e1e9
+--- /dev/null
++++ b/libobj/alloca.c
+@@ -0,0 +1,4 @@
++void *alloca(size_t sz)
++{
++	return NULL;
++}
+diff --git a/man/intel.man b/man/intel.man
+index 1751520..8da496e 100644
+--- a/man/intel.man
++++ b/man/intel.man
+@@ -112,8 +112,8 @@ The default is 8192 if AGP allocable memory is < 128 MB, 16384 if < 192 MB,
+ 24576 if higher. DRI require at least a value of 16384. Higher values may give
+ better 3D performance, at expense of available system memory.
+ .TP
+-.BI "Option \*qNoAccel\*q \*q" boolean \*q
+-Disable or enable acceleration.
++.BI "Option \*qAccel\*q \*q" boolean \*q
++Enable or disable acceleration.
+ .IP
+ Default: acceleration is enabled.
+ 
+@@ -122,8 +122,8 @@ The following driver
+ .B Options
+ are supported for the 830M and later chipsets:
+ .TP
+-.BI "Option \*qNoAccel\*q \*q" boolean \*q
+-Disable or enable acceleration.
++.BI "Option \*qAccel\*q \*q" boolean \*q
++Enable or disable acceleration.
+ .IP
+ Default: acceleration is enabled.
+ .TP
+@@ -201,6 +201,16 @@ that choice by specifying the entry under /sys/class/backlight to use.
+ .IP
+ Default: Automatic selection.
+ .TP
++.BI "Option \*qCustomEDID\*q \*q" string \*q
++Override the probed EDID on particular outputs. Sometimes the manufacturer
++supplied EDID is corrupt or lacking a few usable modes and supplying a
++corrected EDID may be easier than specifying every modeline. This option
++allows to pass the path to load an EDID from per output. The format is a
++comma separated string of output:path pairs, e.g.
++DP1:/path/to/dp1.edid,DP2:/path/to/dp2.edid
++.IP
++Default: No override, use manufacturer supplied EDIDs.
++.TP
+ .BI "Option \*qFallbackDebug\*q \*q" boolean \*q
+ Enable printing of debugging information on acceleration fallbacks to the
+ server log.
+@@ -225,6 +235,15 @@ i.e. perform synchronous rendering.
+ .IP
+ Default: Disabled
+ .TP
++.BI "Option \*qHWRotation\*q \*q" boolean \*q
++Override the use of native hardware rotation and force the use of software,
++but GPU accelerated where possible, rotation. On some platforms the hardware
++can scanout directly into a rotated output bypassing the intermediate rendering
++and extra allocations required for software implemented rotation (i.e. native
++rotation uses less resources, is quicker and uses less power). This allows you
++to disable the native rotation in case of errors.
++.IP
++Default: Enabled (use hardware rotation)
+ .TP
+ .BI "Option \*qVSync\*q \*q" boolean \*q
+ This option controls the use of commands to synchronise rendering with the
+@@ -324,13 +343,29 @@ Default: 0
+ .BI "Option \*qZaphodHeads\*q \*q" string \*q
+ .IP
+ Specify the randr output(s) to use with zaphod mode for a particular driver
+-instance.  If you this option you must use it with all instances of the
+-driver
++instance.  If you set this option you must use it with all instances of the
++driver. By default, each head is assigned only one CRTC (which limits
++using multiple outputs with that head to cloned mode). CRTC can be manually
++assigned to individual heads by preceding the output names with a comma
++delimited list of pipe numbers followed by a colon. Note that different pipes
++may be limited in their functionality and some outputs may only work with
++different pipes.
+ .br
+ For example:
++
++.RS
+ .B
+ Option \*qZaphodHeads\*q \*qLVDS1,VGA1\*q
+-will assign xrandr outputs LVDS1 and VGA0 to this instance of the driver.
++
++will assign xrandr outputs LVDS1 and VGA1 to this instance of the driver.
++.RE
++
++.RS
++.B
++Option \*qZaphodHeads\*q \*q0,2:HDMI1,DP2\*q
++
++will assign xrandr outputs HDMI1 and DP2 and CRTCs 0 and 2 to this instance of the driver.
++.RE
+ 
+ .SH OUTPUT CONFIGURATION
+ On 830M and better chipsets, the driver supports runtime configuration of
+@@ -431,11 +466,11 @@ First DVI SDVO output
+ Second DVI SDVO output
+ 
+ .SS "TMDS-1", "TMDS-2", "HDMI-1", "HDMI-2"
+-DVI/HDMI outputs. Avaliable common properties include:
++DVI/HDMI outputs. Available common properties include:
+ .TP
+ \fBBROADCAST_RGB\fP - method used to set RGB color range
+ Adjusting this property allows you to set RGB color range on each
+-channel in order to match HDTV requirment(default 0 for full
++channel in order to match HDTV requirement(default 0 for full
+ range). Setting 1 means RGB color range is 16-235, 0 means RGB color
+ range is 0-255 on each channel.  (Full range is 0-255, not 16-235)
+ 
+diff --git a/src/backlight.c b/src/backlight.c
+index 9f23986..d020a7c 100644
+--- a/src/backlight.c
++++ b/src/backlight.c
+@@ -34,6 +34,12 @@
+ #include <sys/stat.h>
+ #include <sys/ioctl.h>
+ 
++#if MAJOR_IN_MKDEV
++#include <sys/mkdev.h>
++#elif MAJOR_IN_SYSMACROS
++#include <sys/sysmacros.h>
++#endif
++
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+@@ -84,7 +90,7 @@ void backlight_init(struct backlight *b)
+ 	b->has_power = 0;
+ }
+ 
+-#ifdef __OpenBSD__
++#ifdef HAVE_DEV_WSCONS_WSCONSIO_H
+ 
+ #include <dev/wscons/wsconsio.h>
+ #include <xf86Priv.h>
+@@ -146,12 +152,9 @@ int backlight_open(struct backlight *b, char *iface)
+ 	return param.curval;
+ }
+ 
+-enum backlight_type backlight_exists(const char *iface)
++int backlight_exists(const char *iface)
+ {
+-	if (iface != NULL)
+-		return BL_NONE;
+-
+-	return BL_PLATFORM;
++	return iface == NULL;
+ }
+ 
+ int backlight_on(struct backlight *b)
+@@ -244,10 +247,10 @@ static const char *known_interfaces[] = {
+ 	"intel_backlight",
+ };
+ 
+-static enum backlight_type __backlight_type(const char *iface)
++static int __backlight_type(const char *iface)
+ {
+ 	char buf[1024];
+-	int fd, v;
++	int fd, v, i;
+ 
+ 	v = -1;
+ 	fd = __backlight_open(iface, "type", O_RDONLY);
+@@ -261,39 +264,41 @@ static enum backlight_type __backlight_type(const char *iface)
+ 		buf[v] = '\0';
+ 
+ 		if (strcmp(buf, "raw") == 0)
+-			v = BL_RAW;
++			v = BL_RAW << 8;
+ 		else if (strcmp(buf, "platform") == 0)
+-			v = BL_PLATFORM;
++			v = BL_PLATFORM << 8;
+ 		else if (strcmp(buf, "firmware") == 0)
+-			v = BL_FIRMWARE;
++			v = BL_FIRMWARE << 8;
+ 		else
+-			v = BL_NAMED;
++			v = BL_NAMED << 8;
+ 	} else
+-		v = BL_NAMED;
++		v = BL_NAMED << 8;
+ 
+-	if (v == BL_NAMED) {
+-		int i;
+-		for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) {
+-			if (strcmp(iface, known_interfaces[i]) == 0)
+-				break;
+-		}
+-		v += i;
++	for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) {
++		if (strcmp(iface, known_interfaces[i]) == 0)
++			break;
+ 	}
++	v += i;
+ 
+ 	return v;
+ }
+ 
+-enum backlight_type backlight_exists(const char *iface)
++static int __backlight_exists(const char *iface)
+ {
+ 	if (__backlight_read(iface, "brightness") < 0)
+-		return BL_NONE;
++		return -1;
+ 
+ 	if (__backlight_read(iface, "max_brightness") <= 0)
+-		return BL_NONE;
++		return -1;
+ 
+ 	return __backlight_type(iface);
+ }
+ 
++int backlight_exists(const char *iface)
++{
++	return __backlight_exists(iface) != -1;
++}
++
+ static int __backlight_init(struct backlight *b, char *iface, int fd)
+ {
+ 	b->fd = fd_move_cloexec(fd_set_nonblock(fd));
+@@ -399,7 +404,10 @@ __backlight_find(void)
+ 			continue;
+ 
+ 		/* Fallback to priority list of known iface for old kernels */
+-		v = backlight_exists(de->d_name);
++		v = __backlight_exists(de->d_name);
++		if (v < 0)
++			continue;
++
+ 		if (v < best_type) {
+ 			char *copy = strdup(de->d_name);
+ 			if (copy) {
+@@ -416,14 +424,17 @@ __backlight_find(void)
+ 
+ int backlight_open(struct backlight *b, char *iface)
+ {
+-	int level;
++	int level, type;
+ 
+ 	if (iface == NULL)
+ 		iface = __backlight_find();
+ 	if (iface == NULL)
+ 		goto err;
+ 
+-	b->type = __backlight_type(iface);
++	type = __backlight_type(iface);
++	if (type < 0)
++		goto err;
++	b->type = type >> 8;
+ 
+ 	b->max = __backlight_read(iface, "max_brightness");
+ 	if (b->max <= 0)
+@@ -517,7 +528,7 @@ void backlight_disable(struct backlight *b)
+ void backlight_close(struct backlight *b)
+ {
+ 	backlight_disable(b);
+-	if (b->pid)
++	if (b->pid > 0)
+ 		waitpid(b->pid, NULL, 0);
+ }
+ 
+@@ -543,7 +554,10 @@ char *backlight_find_for_device(struct pci_device *pci)
+ 		if (*de->d_name == '.')
+ 			continue;
+ 
+-		v = backlight_exists(de->d_name);
++		v = __backlight_exists(de->d_name);
++		if (v < 0)
++			continue;
++
+ 		if (v < best_type) {
+ 			char *copy = strdup(de->d_name);
+ 			if (copy) {
+diff --git a/src/backlight.h b/src/backlight.h
+index bb0e28b..ba17755 100644
+--- a/src/backlight.h
++++ b/src/backlight.h
+@@ -43,7 +43,7 @@ struct backlight {
+ 	int pid, fd;
+ };
+ 
+-enum backlight_type backlight_exists(const char *iface);
++int backlight_exists(const char *iface);
+ 
+ void backlight_init(struct backlight *backlight);
+ int backlight_open(struct backlight *backlight, char *iface);
+diff --git a/src/compat-api.h b/src/compat-api.h
+index d09e1fb..293e9d7 100644
+--- a/src/compat-api.h
++++ b/src/compat-api.h
+@@ -39,7 +39,13 @@
+ 
+ #ifndef XF86_HAS_SCRN_CONV
+ #define xf86ScreenToScrn(s) xf86Screens[(s)->myNum]
++#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,1,0,0,0)
+ #define xf86ScrnToScreen(s) screenInfo.screens[(s)->scrnIndex]
++#else
++#define xf86ScrnToScreen(s) ((s)->pScreen)
++#endif
++#else
++#define xf86ScrnToScreen(s) ((s)->pScreen)
+ #endif
+ 
+ #ifndef XF86_SCRN_INTERFACE
+@@ -131,6 +137,17 @@ region_rects(const RegionRec *r)
+ 	return r->data ? (const BoxRec *)(r->data + 1) :  &r->extents;
+ }
+ 
++inline static void
++region_get_boxes(const RegionRec *r, const BoxRec **s, const BoxRec **e)
++{
++	int n;
++	if (r->data)
++		*s = region_boxptr(r), n = r->data->numRects;
++	else
++		*s = &r->extents, n = 1;
++	*e = *s + n;
++}
++
+ #ifndef INCLUDE_LEGACY_REGION_DEFINES
+ #define RegionCreate(r, s) REGION_CREATE(NULL, r, s)
+ #define RegionBreak(r) REGION_BREAK(NULL, r)
+@@ -223,4 +240,14 @@ static inline void FreePixmap(PixmapPtr pixmap)
+ 			  dstx, dsty)
+ #endif
+ 
++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0)
++#define isGPU(S) (S)->is_gpu
++#else
++#define isGPU(S) 0
++#endif
++
++#endif
++
++#if HAS_DIRTYTRACKING_ROTATION
++#define PixmapSyncDirtyHelper(d, dd) PixmapSyncDirtyHelper(d)
+ #endif
+diff --git a/src/i915_pciids.h b/src/i915_pciids.h
+index 180ad0e..f1a113e 100644
+--- a/src/i915_pciids.h
++++ b/src/i915_pciids.h
+@@ -208,40 +208,41 @@
+ #define INTEL_VLV_D_IDS(info) \
+ 	INTEL_VGA_DEVICE(0x0155, info)
+ 
+-#define _INTEL_BDW_M(gt, id, info) \
+-	INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info)
+-#define _INTEL_BDW_D(gt, id, info) \
+-	INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info)
+-
+-#define _INTEL_BDW_M_IDS(gt, info) \
+-	_INTEL_BDW_M(gt, 0x1602, info), /* ULT */ \
+-	_INTEL_BDW_M(gt, 0x1606, info), /* ULT */ \
+-	_INTEL_BDW_M(gt, 0x160B, info), /* Iris */ \
+-	_INTEL_BDW_M(gt, 0x160E, info) /* ULX */
+-
+-#define _INTEL_BDW_D_IDS(gt, info) \
+-	_INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
+-	_INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
+-
+-#define INTEL_BDW_GT12M_IDS(info) \
+-	_INTEL_BDW_M_IDS(1, info), \
+-	_INTEL_BDW_M_IDS(2, info)
++#define INTEL_BDW_GT12M_IDS(info)  \
++	INTEL_VGA_DEVICE(0x1602, info), /* GT1 ULT */ \
++	INTEL_VGA_DEVICE(0x1606, info), /* GT1 ULT */ \
++	INTEL_VGA_DEVICE(0x160B, info), /* GT1 Iris */ \
++	INTEL_VGA_DEVICE(0x160E, info), /* GT1 ULX */ \
++	INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \
++	INTEL_VGA_DEVICE(0x1616, info), /* GT2 ULT */ \
++	INTEL_VGA_DEVICE(0x161B, info), /* GT2 ULT */ \
++	INTEL_VGA_DEVICE(0x161E, info)  /* GT2 ULX */
+ 
+ #define INTEL_BDW_GT12D_IDS(info) \
+-	_INTEL_BDW_D_IDS(1, info), \
+-	_INTEL_BDW_D_IDS(2, info)
++	INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \
++	INTEL_VGA_DEVICE(0x160D, info), /* GT1 Workstation */ \
++	INTEL_VGA_DEVICE(0x161A, info), /* GT2 Server */ \
++	INTEL_VGA_DEVICE(0x161D, info)  /* GT2 Workstation */
+ 
+ #define INTEL_BDW_GT3M_IDS(info) \
+-	_INTEL_BDW_M_IDS(3, info)
++	INTEL_VGA_DEVICE(0x1622, info), /* ULT */ \
++	INTEL_VGA_DEVICE(0x1626, info), /* ULT */ \
++	INTEL_VGA_DEVICE(0x162B, info), /* Iris */ \
++	INTEL_VGA_DEVICE(0x162E, info)  /* ULX */
+ 
+ #define INTEL_BDW_GT3D_IDS(info) \
+-	_INTEL_BDW_D_IDS(3, info)
++	INTEL_VGA_DEVICE(0x162A, info), /* Server */ \
++	INTEL_VGA_DEVICE(0x162D, info)  /* Workstation */
+ 
+ #define INTEL_BDW_RSVDM_IDS(info) \
+-	_INTEL_BDW_M_IDS(4, info)
++	INTEL_VGA_DEVICE(0x1632, info), /* ULT */ \
++	INTEL_VGA_DEVICE(0x1636, info), /* ULT */ \
++	INTEL_VGA_DEVICE(0x163B, info), /* Iris */ \
++	INTEL_VGA_DEVICE(0x163E, info)  /* ULX */
+ 
+ #define INTEL_BDW_RSVDD_IDS(info) \
+-	_INTEL_BDW_D_IDS(4, info)
++	INTEL_VGA_DEVICE(0x163A, info), /* Server */ \
++	INTEL_VGA_DEVICE(0x163D, info)  /* Workstation */
+ 
+ #define INTEL_BDW_M_IDS(info) \
+ 	INTEL_BDW_GT12M_IDS(info), \
+@@ -259,21 +260,71 @@
+ 	INTEL_VGA_DEVICE(0x22b2, info), \
+ 	INTEL_VGA_DEVICE(0x22b3, info)
+ 
+-#define INTEL_SKL_IDS(info) \
+-	INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \
++#define INTEL_SKL_GT1_IDS(info)	\
+ 	INTEL_VGA_DEVICE(0x1906, info), /* ULT GT1 */ \
+-	INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \
+-	INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \
+ 	INTEL_VGA_DEVICE(0x190E, info), /* ULX GT1 */ \
++	INTEL_VGA_DEVICE(0x1902, info), /* DT  GT1 */ \
++	INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \
++	INTEL_VGA_DEVICE(0x190A, info) /* SRV GT1 */
++
++#define INTEL_SKL_GT2_IDS(info)	\
++	INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \
++	INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \
+ 	INTEL_VGA_DEVICE(0x191E, info), /* ULX GT2 */ \
+ 	INTEL_VGA_DEVICE(0x1912, info), /* DT  GT2 */ \
+-	INTEL_VGA_DEVICE(0x1902, info), /* DT  GT1 */ \
+ 	INTEL_VGA_DEVICE(0x191B, info), /* Halo GT2 */ \
+-	INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \
+-	INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \
+ 	INTEL_VGA_DEVICE(0x191A, info), /* SRV GT2 */ \
+-	INTEL_VGA_DEVICE(0x192A, info), /* SRV GT3 */ \
+-	INTEL_VGA_DEVICE(0x190A, info), /* SRV GT1 */ \
+ 	INTEL_VGA_DEVICE(0x191D, info)  /* WKS GT2 */
+ 
++#define INTEL_SKL_GT3_IDS(info) \
++	INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \
++	INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \
++	INTEL_VGA_DEVICE(0x192A, info) /* SRV GT3 */ \
++
++#define INTEL_SKL_IDS(info) \
++	INTEL_SKL_GT1_IDS(info), \
++	INTEL_SKL_GT2_IDS(info), \
++	INTEL_SKL_GT3_IDS(info)
++
++#define INTEL_BXT_IDS(info) \
++	INTEL_VGA_DEVICE(0x0A84, info), \
++	INTEL_VGA_DEVICE(0x1A84, info), \
++	INTEL_VGA_DEVICE(0x5A84, info)
++
++#define INTEL_KBL_GT1_IDS(info)	\
++	INTEL_VGA_DEVICE(0x5913, info), /* ULT GT1.5 */ \
++	INTEL_VGA_DEVICE(0x5915, info), /* ULX GT1.5 */ \
++	INTEL_VGA_DEVICE(0x5917, info), /* DT  GT1.5 */ \
++	INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \
++	INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \
++	INTEL_VGA_DEVICE(0x5902, info), /* DT  GT1 */ \
++	INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \
++	INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */
++
++#define INTEL_KBL_GT2_IDS(info)	\
++	INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \
++	INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \
++	INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \
++	INTEL_VGA_DEVICE(0x5912, info), /* DT  GT2 */ \
++	INTEL_VGA_DEVICE(0x591B, info), /* Halo GT2 */ \
++	INTEL_VGA_DEVICE(0x591A, info), /* SRV GT2 */ \
++	INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */
++
++#define INTEL_KBL_GT3_IDS(info) \
++	INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \
++	INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \
++	INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */
++
++#define INTEL_KBL_GT4_IDS(info) \
++	INTEL_VGA_DEVICE(0x5932, info), /* DT  GT4 */ \
++	INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \
++	INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \
++	INTEL_VGA_DEVICE(0x593D, info)  /* WKS GT4 */
++
++#define INTEL_KBL_IDS(info) \
++	INTEL_KBL_GT1_IDS(info), \
++	INTEL_KBL_GT2_IDS(info), \
++	INTEL_KBL_GT3_IDS(info), \
++	INTEL_KBL_GT4_IDS(info)
++
+ #endif /* _I915_PCIIDS_H */
+diff --git a/src/intel_device.c b/src/intel_device.c
+index 140e153..54c1443 100644
+--- a/src/intel_device.c
++++ b/src/intel_device.c
+@@ -38,6 +38,12 @@
+ #include <dirent.h>
+ #include <errno.h>
+ 
++#if MAJOR_IN_MKDEV
++#include <sys/mkdev.h>
++#elif MAJOR_IN_SYSMACROS
++#include <sys/sysmacros.h>
++#endif
++
+ #include <pciaccess.h>
+ 
+ #include <xorg-server.h>
+@@ -197,9 +203,15 @@ static inline struct intel_device *intel_device(ScrnInfoPtr scrn)
+ 	return xf86GetEntityPrivate(scrn->entityList[0], intel_device_key)->ptr;
+ }
+ 
++static const char *kernel_module_names[] ={
++	"i915",
++	NULL,
++};
++
+ static int is_i915_device(int fd)
+ {
+ 	drm_version_t version;
++	const char **kn;
+ 	char name[5] = "";
+ 
+ 	memset(&version, 0, sizeof(version));
+@@ -209,7 +221,22 @@ static int is_i915_device(int fd)
+ 	if (drmIoctl(fd, DRM_IOCTL_VERSION, &version))
+ 		return 0;
+ 
+-	return strcmp("i915", name) == 0;
++	for (kn = kernel_module_names; *kn; kn++)
++		if (strcmp(*kn, name) == 0)
++			return 1;
++
++	return 0;
++}
++
++static int load_i915_kernel_module(void)
++{
++	const char **kn;
++
++	for (kn = kernel_module_names; *kn; kn++)
++		if (xf86LoadKernelModule(*kn) == 0)
++			return 0;
++
++	return -1;
+ }
+ 
+ static int is_i915_gem(int fd)
+@@ -336,7 +363,7 @@ static int __intel_open_device__pci(const struct pci_device *pci)
+ 
+ 		sprintf(path + base, "driver");
+ 		if (stat(path, &st)) {
+-			if (xf86LoadKernelModule("i915"))
++			if (load_i915_kernel_module())
+ 				return -1;
+ 			(void)xf86LoadKernelModule("fbcon");
+ 		}
+@@ -399,7 +426,7 @@ static int __intel_open_device__legacy(const struct pci_device *pci)
+ 
+ 	ret = drmCheckModesettingSupported(id);
+ 	if (ret) {
+-		if (xf86LoadKernelModule("i915"))
++		if (load_i915_kernel_module() == 0)
+ 			ret = drmCheckModesettingSupported(id);
+ 		if (ret)
+ 			return -1;
+@@ -461,9 +488,9 @@ static int is_render_node(int fd, struct stat *st)
+ 
+ static char *find_render_node(int fd)
+ {
+-#if defined(USE_RENDERNODE)
+ 	struct stat master, render;
+ 	char buf[128];
++	int i;
+ 
+ 	/* Are we a render-node ourselves? */
+ 	if (is_render_node(fd, &master))
+@@ -472,9 +499,17 @@ static char *find_render_node(int fd)
+ 	sprintf(buf, "/dev/dri/renderD%d", (int)((master.st_rdev | 0x80) & 0xbf));
+ 	if (stat(buf, &render) == 0 &&
+ 	    master.st_mode == render.st_mode &&
+-	    render.st_rdev == ((master.st_rdev | 0x80) & 0xbf))
++	    render.st_rdev == (master.st_rdev | 0x80))
+ 		return strdup(buf);
+-#endif
++
++	/* Misaligned card <-> renderD, do a full search */
++	for (i = 0; i < 16; i++) {
++		sprintf(buf, "/dev/dri/renderD%d", i + 128);
++		if (stat(buf, &render) == 0 &&
++		    master.st_mode == render.st_mode &&
++		    render.st_rdev == (master.st_rdev | 0x80))
++			return strdup(buf);
++	}
+ 
+ 	return NULL;
+ }
+@@ -672,6 +707,12 @@ struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd)
+ 	return dev;
+ }
+ 
++const char *intel_get_master_name(struct intel_device *dev)
++{
++	assert(dev && dev->master_node);
++	return dev->master_node;
++}
++
+ const char *intel_get_client_name(struct intel_device *dev)
+ {
+ 	assert(dev && dev->render_node);
+diff --git a/src/intel_driver.h b/src/intel_driver.h
+index 28ed1a0..fc9beaf 100644
+--- a/src/intel_driver.h
++++ b/src/intel_driver.h
+@@ -127,6 +127,7 @@ int intel_open_device(int entity_num,
+ int __intel_peek_fd(ScrnInfoPtr scrn);
+ struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd);
+ int intel_has_render_node(struct intel_device *dev);
++const char *intel_get_master_name(struct intel_device *dev);
+ const char *intel_get_client_name(struct intel_device *dev);
+ int intel_get_client_fd(struct intel_device *dev);
+ int intel_get_device_id(struct intel_device *dev);
+diff --git a/src/intel_list.h b/src/intel_list.h
+index 51af825..c8a3187 100644
+--- a/src/intel_list.h
++++ b/src/intel_list.h
+@@ -306,8 +306,7 @@ list_is_empty(const struct list *head)
+     list_entry((ptr)->prev, type, member)
+ 
+ #define __container_of(ptr, sample, member)				\
+-    (void *)((char *)(ptr)						\
+-	     - ((char *)&(sample)->member - (char *)(sample)))
++    (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample)))
+ /**
+  * Loop through the list given by head and set pos to struct in the list.
+  *
+@@ -392,17 +391,50 @@ static inline void list_move_tail(struct list *list, struct list *head)
+ #define list_last_entry(ptr, type, member) \
+     list_entry((ptr)->prev, type, member)
+ 
+-#define list_for_each_entry_reverse(pos, head, member)				\
++#define list_for_each_entry_reverse(pos, head, member)			\
+     for (pos = __container_of((head)->prev, pos, member);		\
+ 	 &pos->member != (head);					\
+ 	 pos = __container_of(pos->member.prev, pos, member))
+ 
+ #endif
+ 
++#define list_for_each_entry_safe_from(pos, tmp, head, member)		\
++    for (tmp = __container_of(pos->member.next, pos, member);		\
++	 &pos->member != (head);					\
++	 pos = tmp, tmp = __container_of(tmp->member.next, tmp, member))
++
+ #undef container_of
+ #define container_of(ptr, type, member) \
+ 	((type *)((char *)(ptr) - (char *) &((type *)0)->member))
+ 
++static inline void __list_splice(const struct list *list,
++				 struct list *prev,
++				 struct list *next)
++{
++	struct list *first = list->next;
++	struct list *last = list->prev;
++
++	first->prev = prev;
++	prev->next = first;
++
++	last->next = next;
++	next->prev = last;
++}
++
++static inline void list_splice(const struct list *list,
++			       struct list *head)
++{
++	if (!list_is_empty(list))
++		__list_splice(list, head, head->next);
++}
++
++static inline void list_splice_tail(const struct list *list,
++				    struct list *head)
++{
++	if (!list_is_empty(list))
++		__list_splice(list, head->prev, head);
++}
++
+ static inline int list_is_singular(const struct list *list)
+ {
+ 	return list->next == list->prev;
+diff --git a/src/intel_module.c b/src/intel_module.c
+index 102d52a..60835b9 100644
+--- a/src/intel_module.c
++++ b/src/intel_module.c
+@@ -126,6 +126,13 @@ static const struct intel_device_info intel_skylake_info = {
+ 	.gen = 0110,
+ };
+ 
++static const struct intel_device_info intel_broxton_info = {
++	.gen = 0111,
++};
++
++static const struct intel_device_info intel_kabylake_info = {
++	.gen = 0112,
++};
+ 
+ static const SymTabRec intel_chipsets[] = {
+ 	{PCI_CHIP_I810,				"i810"},
+@@ -234,30 +241,36 @@ static const SymTabRec intel_chipsets[] = {
+ 	{0x0157, "HD Graphics"},
+ 
+ 	/* Broadwell Marketing names */
+-	{0x1602, "HD graphics"},
+-	{0x1606, "HD graphics"},
+-	{0x160B, "HD graphics"},
+-	{0x160A, "HD graphics"},
+-	{0x160D, "HD graphics"},
+-	{0x160E, "HD graphics"},
+-	{0x1612, "HD graphics 5600"},
+-	{0x1616, "HD graphics 5500"},
+-	{0x161B, "HD graphics"},
+-	{0x161A, "HD graphics"},
+-	{0x161D, "HD graphics"},
+-	{0x161E, "HD graphics 5300"},
+-	{0x1622, "Iris Pro graphics 6200"},
+-	{0x1626, "HD graphics 6000"},
+-	{0x162B, "Iris graphics 6100"},
+-	{0x162A, "Iris Pro graphics P6300"},
+-	{0x162D, "HD graphics"},
+-	{0x162E, "HD graphics"},
+-	{0x1632, "HD graphics"},
+-	{0x1636, "HD graphics"},
+-	{0x163B, "HD graphics"},
+-	{0x163A, "HD graphics"},
+-	{0x163D, "HD graphics"},
+-	{0x163E, "HD graphics"},
++	{0x1602, "HD Graphics"},
++	{0x1606, "HD Graphics"},
++	{0x160B, "HD Graphics"},
++	{0x160A, "HD Graphics"},
++	{0x160D, "HD Graphics"},
++	{0x160E, "HD Graphics"},
++	{0x1612, "HD Graphics 5600"},
++	{0x1616, "HD Graphics 5500"},
++	{0x161B, "HD Graphics"},
++	{0x161A, "HD Graphics"},
++	{0x161D, "HD Graphics"},
++	{0x161E, "HD Graphics 5300"},
++	{0x1622, "Iris Pro Graphics 6200"},
++	{0x1626, "HD Graphics 6000"},
++	{0x162B, "Iris Graphics 6100"},
++	{0x162A, "Iris Pro Graphics P6300"},
++	{0x162D, "HD Graphics"},
++	{0x162E, "HD Graphics"},
++	{0x1632, "HD Graphics"},
++	{0x1636, "HD Graphics"},
++	{0x163B, "HD Graphics"},
++	{0x163A, "HD Graphics"},
++	{0x163D, "HD Graphics"},
++	{0x163E, "HD Graphics"},
++
++	/* Cherryview (Cherrytrail/Braswell) */
++	{0x22b0, "HD Graphics"},
++	{0x22b1, "HD Graphics"},
++	{0x22b2, "HD Graphics"},
++	{0x22b3, "HD Graphics"},
+ 
+ 	/* When adding new identifiers, also update:
+ 	 * 1. intel_identify()
+@@ -318,6 +331,10 @@ static const struct pci_id_match intel_device_match[] = {
+ 
+ 	INTEL_SKL_IDS(&intel_skylake_info),
+ 
++	INTEL_BXT_IDS(&intel_broxton_info),
++
++	INTEL_KBL_IDS(&intel_kabylake_info),
++
+ 	INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info),
+ #endif
+ 
+@@ -508,6 +525,9 @@ static enum accel_method { NOACCEL, SNA, UXA } get_accel_method(void)
+ 	if (hosted())
+ 		return SNA;
+ 
++	if (xf86configptr == NULL) /* X -configure */
++		return SNA;
++
+ 	dev = _xf86findDriver("intel", xf86configptr->conf_device_lst);
+ 	if (dev && dev->dev_option_lst) {
+ 		const char *s;
+@@ -582,10 +602,17 @@ intel_scrn_create(DriverPtr		driver,
+ 	case NOACCEL:
+ #endif
+ 	case UXA:
+-		  return intel_init_scrn(scrn);
++		return intel_init_scrn(scrn);
+ #endif
+ 
+-	default: break;
++	default:
++#if USE_SNA
++		return sna_init_scrn(scrn, entity_num);
++#elif USE_UXA
++		return intel_init_scrn(scrn);
++#else
++		break;
++#endif
+ 	}
+ #endif
+ 
+diff --git a/src/intel_options.c b/src/intel_options.c
+index ff8541a..7f253ac 100644
+--- a/src/intel_options.c
++++ b/src/intel_options.c
+@@ -2,18 +2,24 @@
+ #include "config.h"
+ #endif
+ 
++#include <xorg-server.h>
++#include <xorgVersion.h>
++#include <xf86Parser.h>
++
+ #include "intel_options.h"
+ 
+ const OptionInfoRec intel_options[] = {
+-	{OPTION_ACCEL_DISABLE,	"NoAccel",	OPTV_BOOLEAN,	{0},	0},
++	{OPTION_ACCEL_ENABLE,	"Accel",	OPTV_BOOLEAN,	{0},	0},
+ 	{OPTION_ACCEL_METHOD,	"AccelMethod",	OPTV_STRING,	{0},	0},
+ 	{OPTION_BACKLIGHT,	"Backlight",	OPTV_STRING,	{0},	0},
++	{OPTION_EDID,		"CustomEDID",	OPTV_STRING,	{0},	0},
+ 	{OPTION_DRI,		"DRI",		OPTV_STRING,	{0},	0},
+ 	{OPTION_PRESENT,	"Present",	OPTV_BOOLEAN,	{0},	1},
+ 	{OPTION_COLOR_KEY,	"ColorKey",	OPTV_INTEGER,	{0},	0},
+ 	{OPTION_VIDEO_KEY,	"VideoKey",	OPTV_INTEGER,	{0},	0},
+ 	{OPTION_TILING_2D,	"Tiling",	OPTV_BOOLEAN,	{0},	1},
+ 	{OPTION_TILING_FB,	"LinearFramebuffer",	OPTV_BOOLEAN,	{0},	0},
++	{OPTION_ROTATION,	"HWRotation",	OPTV_BOOLEAN,	{0},	1},
+ 	{OPTION_VSYNC,		"VSync",	OPTV_BOOLEAN,	{0},	1},
+ 	{OPTION_PAGEFLIP,	"PageFlip",	OPTV_BOOLEAN,	{0},	1},
+ 	{OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN,	{0},	1},
+@@ -21,7 +27,6 @@ const OptionInfoRec intel_options[] = {
+ 	{OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, 0},
+ 	{OPTION_HOTPLUG,	"HotPlug",	OPTV_BOOLEAN,	{0},	1},
+ 	{OPTION_REPROBE,	"ReprobeOutputs", OPTV_BOOLEAN,	{0},	0},
+-	{OPTION_DELETE_DP12,	"DeleteUnusedDP12Displays", OPTV_BOOLEAN,	{0},	0},
+ #ifdef INTEL_XVMC
+ 	{OPTION_XVMC,		"XvMC",		OPTV_BOOLEAN,	{0},	1},
+ #endif
+@@ -54,3 +59,85 @@ OptionInfoPtr intel_options_get(ScrnInfoPtr scrn)
+ 
+ 	return options;
+ }
++
++Bool intel_option_cast_to_bool(OptionInfoPtr options, int id, Bool val)
++{
++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0)
++	xf86getBoolValue(&val, xf86GetOptValString(options, id));
++#endif
++	return val;
++}
++
++static int
++namecmp(const char *s1, const char *s2)
++{
++	char c1, c2;
++
++	if (!s1 || *s1 == 0) {
++		if (!s2 || *s2 == 0)
++			return 0;
++		else
++			return 1;
++	}
++
++	while (*s1 == '_' || *s1 == ' ' || *s1 == '\t')
++		s1++;
++
++	while (*s2 == '_' || *s2 == ' ' || *s2 == '\t')
++		s2++;
++
++	c1 = isupper(*s1) ? tolower(*s1) : *s1;
++	c2 = isupper(*s2) ? tolower(*s2) : *s2;
++	while (c1 == c2) {
++		if (c1 == '\0')
++			return 0;
++
++		s1++;
++		while (*s1 == '_' || *s1 == ' ' || *s1 == '\t')
++			s1++;
++
++		s2++;
++		while (*s2 == '_' || *s2 == ' ' || *s2 == '\t')
++			s2++;
++
++		c1 = isupper(*s1) ? tolower(*s1) : *s1;
++		c2 = isupper(*s2) ? tolower(*s2) : *s2;
++	}
++
++	return c1 - c2;
++}
++
++unsigned intel_option_cast_to_unsigned(OptionInfoPtr options, int id, unsigned val)
++{
++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0)
++	const char *str = xf86GetOptValString(options, id);
++#else
++	const char *str = NULL;
++#endif
++	unsigned v;
++
++	if (str == NULL || *str == '\0')
++		return val;
++
++	if (namecmp(str, "on") == 0)
++		return val;
++	if (namecmp(str, "true") == 0)
++		return val;
++	if (namecmp(str, "yes") == 0)
++		return val;
++
++	if (namecmp(str, "0") == 0)
++		return 0;
++	if (namecmp(str, "off") == 0)
++		return 0;
++	if (namecmp(str, "false") == 0)
++		return 0;
++	if (namecmp(str, "no") == 0)
++		return 0;
++
++	v = atoi(str);
++	if (v)
++		return v;
++
++	return val;
++}
+diff --git a/src/intel_options.h b/src/intel_options.h
+index 7e2cbd9..43635f1 100644
+--- a/src/intel_options.h
++++ b/src/intel_options.h
+@@ -12,15 +12,17 @@
+  */
+ 
+ enum intel_options {
+-	OPTION_ACCEL_DISABLE,
++	OPTION_ACCEL_ENABLE,
+ 	OPTION_ACCEL_METHOD,
+ 	OPTION_BACKLIGHT,
++	OPTION_EDID,
+ 	OPTION_DRI,
+ 	OPTION_PRESENT,
+ 	OPTION_VIDEO_KEY,
+ 	OPTION_COLOR_KEY,
+ 	OPTION_TILING_2D,
+ 	OPTION_TILING_FB,
++	OPTION_ROTATION,
+ 	OPTION_VSYNC,
+ 	OPTION_PAGEFLIP,
+ 	OPTION_SWAPBUFFERS_WAIT,
+@@ -28,7 +30,6 @@ enum intel_options {
+ 	OPTION_PREFER_OVERLAY,
+ 	OPTION_HOTPLUG,
+ 	OPTION_REPROBE,
+-	OPTION_DELETE_DP12,
+ #if defined(XvMCExtension) && defined(ENABLE_XVMC)
+ 	OPTION_XVMC,
+ #define INTEL_XVMC 1
+@@ -51,5 +52,7 @@ enum intel_options {
+ 
+ extern const OptionInfoRec intel_options[];
+ OptionInfoPtr intel_options_get(ScrnInfoPtr scrn);
++unsigned intel_option_cast_to_unsigned(OptionInfoPtr, int id, unsigned val);
++Bool intel_option_cast_to_bool(OptionInfoPtr, int id, Bool val);
+ 
+ #endif /* INTEL_OPTIONS_H */
+diff --git a/src/legacy/i810/i810_common.h b/src/legacy/i810/i810_common.h
+index 4cc10e8..8355708 100644
+--- a/src/legacy/i810/i810_common.h
++++ b/src/legacy/i810/i810_common.h
+@@ -52,7 +52,7 @@
+ 
+ #define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
+ 
+-/* Using usleep() makes things noticably slow. */
++/* Using usleep() makes things noticeably slow. */
+ #if 0
+ #define DELAY(x) usleep(x)
+ #else
+@@ -185,7 +185,7 @@ enum {
+  *    - zbuffer linear offset and pitch -- also invarient
+  *    - drawing origin in back and depth buffers.
+  *
+- * Keep the depth/back buffer state here to acommodate private buffers
++ * Keep the depth/back buffer state here to accommodate private buffers
+  * in the future.
+  */
+ #define I810_DESTREG_DI0  0		/* CMD_OP_DESTBUFFER_INFO (2 dwords) */
+diff --git a/src/legacy/i810/i810_hwmc.c b/src/legacy/i810/i810_hwmc.c
+index 7cb9c1a..58661b0 100644
+--- a/src/legacy/i810/i810_hwmc.c
++++ b/src/legacy/i810/i810_hwmc.c
+@@ -171,7 +171,7 @@ static XF86MCAdaptorPtr ppAdapt[1] =
+  *
+  *  I810InitMC
+  *
+- *  Initialize the hardware motion compenstation extention for this 
++ *  Initialize the hardware motion compensation extension for this
+  *  hardware. The initialization routines want the address of the pointers
+  *  to the structures, not the address of the structures. This means we
+  *  allocate (or create static?) the pointer memory and pass that 
+diff --git a/src/legacy/i810/i810_memory.c b/src/legacy/i810/i810_memory.c
+index c3de277..6f27483 100644
+--- a/src/legacy/i810/i810_memory.c
++++ b/src/legacy/i810/i810_memory.c
+@@ -76,7 +76,7 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn)
+    unsigned long size = pScrn->videoRam * 1024UL;
+    I810Ptr pI810 = I810PTR(pScrn);
+    int key;
+-   long tom = 0;
++   unsigned long tom = 0;
+    unsigned long physical;
+ 
+    if (!xf86AgpGARTSupported() || !xf86AcquireGART(pScrn->scrnIndex)) {
+@@ -132,8 +132,8 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn)
+     * Keep it 512K aligned for the sake of tiled regions.
+     */
+ 
+-   tom += 0x7ffff;
+-   tom &= ~0x7ffff;
++   tom += 0x7ffffUL;
++   tom &= ~0x7ffffUL;
+ 
+    if ((key = xf86AllocateGARTMemory(pScrn->scrnIndex, size, 1, NULL)) != -1) {
+       pI810->DcacheOffset = tom;
+diff --git a/src/legacy/i810/i810_reg.h b/src/legacy/i810/i810_reg.h
+index 54faeb3..fa091c5 100644
+--- a/src/legacy/i810/i810_reg.h
++++ b/src/legacy/i810/i810_reg.h
+@@ -245,7 +245,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  * not sure they refer to local (graphics) memory.
+  *
+  * These details are for the local memory control registers,
+- * (pp301-310).  The test machines are not equiped with local memory,
++ * (pp301-310).  The test machines are not equipped with local memory,
+  * so nothing is tested.  Only a single row seems to be supported.
+  */
+ #define DRAM_ROW_TYPE      0x3000
+diff --git a/src/legacy/i810/xvmc/I810XvMC.c b/src/legacy/i810/xvmc/I810XvMC.c
+index e6b63d3..a538e99 100644
+--- a/src/legacy/i810/xvmc/I810XvMC.c
++++ b/src/legacy/i810/xvmc/I810XvMC.c
+@@ -61,7 +61,7 @@ static int event_base;
+ // Arguments: pI810XvMC private data structure from the current context.
+ // Notes: We faked the drmMapBufs for the i810's security so now we have
+ //   to insert an allocated page into the correct spot in the faked
+-//   list to keep up appearences.
++//   list to keep up appearances.
+ //   Concept for this function was taken from Mesa sources.
+ // Returns: drmBufPtr containing the information about the allocated page.
+ ***************************************************************************/
+@@ -188,7 +188,7 @@ _X_EXPORT Status XvMCCreateContext(Display *display, XvPortID port,
+ 
+   /* Check for drm */
+   if(! drmAvailable()) {
+-    printf("Direct Rendering is not avilable on this system!\n");
++    printf("Direct Rendering is not available on this system!\n");
+     return BadAlloc;
+   }
+ 
+@@ -3279,7 +3279,7 @@ _X_EXPORT Status XvMCSyncSurface(Display *display,XvMCSurface *surface) {
+ //   display - Connection to X server
+ //   surface - Surface to flush
+ // Info:
+-//   This command is a noop for i810 becuase we always dispatch buffers in
++//   This command is a noop for i810 because we always dispatch buffers in
+ //   render. There is little gain to be had with 4k buffers.
+ // Returns: Status
+ ***************************************************************************/
+diff --git a/src/render_program/exa_wm.g4i b/src/render_program/exa_wm.g4i
+index 5d3d45b..587b581 100644
+--- a/src/render_program/exa_wm.g4i
++++ b/src/render_program/exa_wm.g4i
+@@ -57,7 +57,7 @@ define(`mask_dw_dy', `g6.4<0,1,0>F')
+ define(`mask_wo',    `g6.12<0,1,0>F')
+ 
+ /*
+- * Local variables. Pairs must be aligned on even reg boundry
++ * Local variables. Pairs must be aligned on even reg boundary
+  */
+ 
+ /* this holds the X dest coordinates */
+diff --git a/src/render_program/exa_wm_yuv_rgb.g8a b/src/render_program/exa_wm_yuv_rgb.g8a
+index 7def093..34973ba 100644
+--- a/src/render_program/exa_wm_yuv_rgb.g8a
++++ b/src/render_program/exa_wm_yuv_rgb.g8a
+@@ -76,7 +76,7 @@ add (16)    Cbn<1>F		Cb<8,8,1>F	-0.501961F  { compr align1 };
+     /* 
+      * R = Y + Cr * 1.596
+      */
+-mov (8)    acc0<1>F		Yn<8,8,1>F		    { compr align1 };
++mov (8)    acc0<1>F		Yn_01<8,8,1>F		    { compr align1 };
+ mac.sat(8) src_sample_r_01<1>F	Crn_01<8,8,1>F	1.596F	    { compr align1 };
+      
+ mov (8)    acc0<1>F		Yn_23<8,8,1>F		    { compr align1 };
+@@ -84,7 +84,7 @@ mac.sat(8) src_sample_r_23<1>F	Crn_23<8,8,1>F	1.596F	    { compr align1 };
+     /*
+      * G = Crn * -0.813 + Cbn * -0.392 + Y
+      */
+-mov (8)    acc0<1>F		Yn_23<8,8,1>F		    { compr align1 };
++mov (8)    acc0<1>F		Yn_01<8,8,1>F		    { compr align1 };
+ mac (8)    acc0<1>F		Crn_01<8,8,1>F    	-0.813F	    { compr align1 };
+ mac.sat(8) src_sample_g_01<1>F	Cbn_01<8,8,1>F    	-0.392F	    { compr align1 };
+ 
+diff --git a/src/render_program/exa_wm_yuv_rgb.g8b b/src/render_program/exa_wm_yuv_rgb.g8b
+index 4494953..2cd6fc4 100644
+--- a/src/render_program/exa_wm_yuv_rgb.g8b
++++ b/src/render_program/exa_wm_yuv_rgb.g8b
+@@ -6,7 +6,7 @@
+    { 0x80600048, 0x21c03ae8, 0x3e8d02c0, 0x3fcc49ba },
+    { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 },
+    { 0x80600048, 0x21e03ae8, 0x3e8d02e0, 0x3fcc49ba },
+-   { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 },
++   { 0x00600001, 0x24003ae0, 0x008d0300, 0x00000000 },
+    { 0x00600048, 0x24003ae0, 0x3e8d02c0, 0xbf5020c5 },
+    { 0x80600048, 0x22003ae8, 0x3e8d0340, 0xbec8b439 },
+    { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 },
+diff --git a/src/sna/blt.c b/src/sna/blt.c
+index b5bfee6..2dae9c2 100644
+--- a/src/sna/blt.c
++++ b/src/sna/blt.c
+@@ -30,6 +30,7 @@
+ #endif
+ 
+ #include "sna.h"
++#include <pixman.h>
+ 
+ #if __x86_64__
+ #define USE_SSE2 1
+@@ -333,420 +334,270 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
+ 	}
+ }
+ 
+-fast_memcpy static void
+-memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
+-			     int32_t src_stride, int32_t dst_stride,
+-			     int16_t src_x, int16_t src_y,
+-			     int16_t dst_x, int16_t dst_y,
+-			     uint16_t width, uint16_t height)
+-{
+-	const unsigned tile_width = 512;
+-	const unsigned tile_height = 8;
+-	const unsigned tile_size = 4096;
+-
+-	const unsigned cpp = bpp / 8;
+-	const unsigned stride_tiles = dst_stride / tile_width;
+-	const unsigned swizzle_pixels = 64 / cpp;
+-	const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+-	const unsigned tile_mask = (1 << tile_pixels) - 1;
+-
+-	unsigned x, y;
+-
+-	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+-	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+-
+-	src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+-
+-	for (y = 0; y < height; ++y) {
+-		const uint32_t dy = y + dst_y;
+-		const uint32_t tile_row =
+-			(dy / tile_height * stride_tiles * tile_size +
+-			 (dy & (tile_height-1)) * tile_width);
+-		const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+-		uint32_t dx = dst_x, offset;
+-
+-		x = width * cpp;
+-		if (dx & (swizzle_pixels - 1)) {
+-			const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+-			const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= (offset >> 3) & 64;
+-
+-			memcpy((char *)dst + offset, src_row, length * cpp);
+-
+-			src_row += length * cpp;
+-			x -= length * cpp;
+-			dx += length;
+-		}
+-		while (x >= 64) {
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= (offset >> 3) & 64;
+-
+-			memcpy((char *)dst + offset, src_row, 64);
+-
+-			src_row += 64;
+-			x -= 64;
+-			dx += swizzle_pixels;
+-		}
+-		if (x) {
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= (offset >> 3) & 64;
+-			memcpy((char *)dst + offset, src_row, x);
+-		}
+-	}
++#define memcpy_to_tiled_x(swizzle) \
++fast_memcpy static void \
++memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
++			      int32_t src_stride, int32_t dst_stride, \
++			      int16_t src_x, int16_t src_y, \
++			      int16_t dst_x, int16_t dst_y, \
++			      uint16_t width, uint16_t height) \
++{ \
++	const unsigned tile_width = 512; \
++	const unsigned tile_height = 8; \
++	const unsigned tile_size = 4096; \
++	const unsigned cpp = bpp / 8; \
++	const unsigned stride_tiles = dst_stride / tile_width; \
++	const unsigned swizzle_pixels = 64 / cpp; \
++	const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \
++	const unsigned tile_mask = (1 << tile_pixels) - 1; \
++	unsigned x, y; \
++	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \
++	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \
++	src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; \
++	for (y = 0; y < height; ++y) { \
++		const uint32_t dy = y + dst_y; \
++		const uint32_t tile_row = \
++			(dy / tile_height * stride_tiles * tile_size + \
++			 (dy & (tile_height-1)) * tile_width); \
++		const uint8_t *src_row = (const uint8_t *)src + src_stride * y; \
++		uint32_t dx = dst_x; \
++		x = width * cpp; \
++		if (dx & (swizzle_pixels - 1)) { \
++			const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); \
++			const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; \
++			uint32_t offset = \
++				tile_row + \
++				(dx >> tile_pixels) * tile_size + \
++				(dx & tile_mask) * cpp; \
++			memcpy((char *)dst + swizzle(offset), src_row, length * cpp); \
++			src_row += length * cpp; \
++			x -= length * cpp; \
++			dx += length; \
++		} \
++		while (x >= 64) { \
++			uint32_t offset = \
++				tile_row + \
++				(dx >> tile_pixels) * tile_size + \
++				(dx & tile_mask) * cpp; \
++			memcpy((char *)dst + swizzle(offset), src_row, 64); \
++			src_row += 64; \
++			x -= 64; \
++			dx += swizzle_pixels; \
++		} \
++		if (x) { \
++			uint32_t offset = \
++				tile_row + \
++				(dx >> tile_pixels) * tile_size + \
++				(dx & tile_mask) * cpp; \
++			memcpy((char *)dst + swizzle(offset), src_row, x); \
++		} \
++	} \
+ }
+ 
+-fast_memcpy static void
+-memcpy_from_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
+-			       int32_t src_stride, int32_t dst_stride,
+-			       int16_t src_x, int16_t src_y,
+-			       int16_t dst_x, int16_t dst_y,
+-			       uint16_t width, uint16_t height)
+-{
+-	const unsigned tile_width = 512;
+-	const unsigned tile_height = 8;
+-	const unsigned tile_size = 4096;
+-
+-	const unsigned cpp = bpp / 8;
+-	const unsigned stride_tiles = src_stride / tile_width;
+-	const unsigned swizzle_pixels = 64 / cpp;
+-	const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+-	const unsigned tile_mask = (1 << tile_pixels) - 1;
++#define memcpy_from_tiled_x(swizzle) \
++fast_memcpy static void \
++memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
++				int32_t src_stride, int32_t dst_stride, \
++				int16_t src_x, int16_t src_y, \
++				int16_t dst_x, int16_t dst_y, \
++				uint16_t width, uint16_t height) \
++{ \
++	const unsigned tile_width = 512; \
++	const unsigned tile_height = 8; \
++	const unsigned tile_size = 4096; \
++	const unsigned cpp = bpp / 8; \
++	const unsigned stride_tiles = src_stride / tile_width; \
++	const unsigned swizzle_pixels = 64 / cpp; \
++	const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \
++	const unsigned tile_mask = (1 << tile_pixels) - 1; \
++	unsigned x, y; \
++	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \
++	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \
++	dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; \
++	for (y = 0; y < height; ++y) { \
++		const uint32_t sy = y + src_y; \
++		const uint32_t tile_row = \
++			(sy / tile_height * stride_tiles * tile_size + \
++			 (sy & (tile_height-1)) * tile_width); \
++		uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; \
++		uint32_t sx = src_x; \
++		x = width * cpp; \
++		if (sx & (swizzle_pixels - 1)) { \
++			const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); \
++			const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; \
++			uint32_t offset = \
++				tile_row + \
++				(sx >> tile_pixels) * tile_size + \
++				(sx & tile_mask) * cpp; \
++			memcpy(dst_row, (const char *)src + swizzle(offset), length * cpp); \
++			dst_row += length * cpp; \
++			x -= length * cpp; \
++			sx += length; \
++		} \
++		while (x >= 64) { \
++			uint32_t offset = \
++				tile_row + \
++				(sx >> tile_pixels) * tile_size + \
++				(sx & tile_mask) * cpp; \
++			memcpy(dst_row, (const char *)src + swizzle(offset), 64); \
++			dst_row += 64; \
++			x -= 64; \
++			sx += swizzle_pixels; \
++		} \
++		if (x) { \
++			uint32_t offset = \
++				tile_row + \
++				(sx >> tile_pixels) * tile_size + \
++				(sx & tile_mask) * cpp; \
++			memcpy(dst_row, (const char *)src + swizzle(offset), x); \
++		} \
++	} \
++}
+ 
+-	unsigned x, y;
++#define swizzle_9(X) ((X) ^ (((X) >> 3) & 64))
++memcpy_to_tiled_x(swizzle_9)
++memcpy_from_tiled_x(swizzle_9)
++#undef swizzle_9
+ 
+-	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+-	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+-
+-	dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+-
+-	for (y = 0; y < height; ++y) {
+-		const uint32_t sy = y + src_y;
+-		const uint32_t tile_row =
+-			(sy / tile_height * stride_tiles * tile_size +
+-			 (sy & (tile_height-1)) * tile_width);
+-		uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+-		uint32_t sx = src_x, offset;
+-
+-		x = width * cpp;
+-		if (sx & (swizzle_pixels - 1)) {
+-			const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+-			const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= (offset >> 3) & 64;
+-
+-			memcpy(dst_row, (const char *)src + offset, length * cpp);
+-
+-			dst_row += length * cpp;
+-			x -= length * cpp;
+-			sx += length;
+-		}
+-		while (x >= 64) {
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= (offset >> 3) & 64;
++#define swizzle_9_10(X) ((X) ^ ((((X) ^ ((X) >> 1)) >> 3) & 64))
++memcpy_to_tiled_x(swizzle_9_10)
++memcpy_from_tiled_x(swizzle_9_10)
++#undef swizzle_9_10
+ 
+-			memcpy(dst_row, (const char *)src + offset, 64);
++#define swizzle_9_11(X) ((X) ^ ((((X) ^ ((X) >> 2)) >> 3) & 64))
++memcpy_to_tiled_x(swizzle_9_11)
++memcpy_from_tiled_x(swizzle_9_11)
++#undef swizzle_9_11
+ 
+-			dst_row += 64;
+-			x -= 64;
+-			sx += swizzle_pixels;
+-		}
+-		if (x) {
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= (offset >> 3) & 64;
+-			memcpy(dst_row, (const char *)src + offset, x);
+-		}
+-	}
+-}
++#define swizzle_9_10_11(X) ((X) ^ ((((X) ^ ((X) >> 1) ^ ((X) >> 2)) >> 3) & 64))
++memcpy_to_tiled_x(swizzle_9_10_11)
++memcpy_from_tiled_x(swizzle_9_10_11)
++#undef swizzle_9_10_11
+ 
+-fast_memcpy static void
+-memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
+-				int32_t src_stride, int32_t dst_stride,
+-				int16_t src_x, int16_t src_y,
+-				int16_t dst_x, int16_t dst_y,
+-				uint16_t width, uint16_t height)
++static fast_memcpy void
++memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp,
++			int32_t src_stride, int32_t dst_stride,
++			int16_t src_x, int16_t src_y,
++			int16_t dst_x, int16_t dst_y,
++			uint16_t width, uint16_t height)
+ {
+-	const unsigned tile_width = 512;
+-	const unsigned tile_height = 8;
+-	const unsigned tile_size = 4096;
++	const unsigned tile_width = 128;
++	const unsigned tile_height = 16;
++	const unsigned tile_size = 2048;
+ 
+ 	const unsigned cpp = bpp / 8;
+-	const unsigned stride_tiles = dst_stride / tile_width;
+-	const unsigned swizzle_pixels = 64 / cpp;
+-	const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+-	const unsigned tile_mask = (1 << tile_pixels) - 1;
+-
+-	unsigned x, y;
++	const unsigned tile_pixels = tile_width / cpp;
++	const unsigned tile_shift = ffs(tile_pixels) - 1;
++	const unsigned tile_mask = tile_pixels - 1;
+ 
+ 	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ 	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
++	assert(src != dst);
+ 
+-	src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+-
+-	for (y = 0; y < height; ++y) {
+-		const uint32_t dy = y + dst_y;
+-		const uint32_t tile_row =
+-			(dy / tile_height * stride_tiles * tile_size +
+-			 (dy & (tile_height-1)) * tile_width);
+-		const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+-		uint32_t dx = dst_x, offset;
+-
+-		x = width * cpp;
+-		if (dx & (swizzle_pixels - 1)) {
+-			const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+-			const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-
+-			memcpy((char *)dst + offset, src_row, length * cpp);
+-
+-			src_row += length * cpp;
+-			x -= length * cpp;
+-			dx += length;
+-		}
+-		while (x >= 64) {
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-
+-			memcpy((char *)dst + offset, src_row, 64);
+-
+-			src_row += 64;
+-			x -= 64;
+-			dx += swizzle_pixels;
+-		}
+-		if (x) {
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-			memcpy((char *)dst + offset, src_row, x);
+-		}
+-	}
+-}
+-
+-fast_memcpy static void
+-memcpy_from_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
+-				  int32_t src_stride, int32_t dst_stride,
+-				  int16_t src_x, int16_t src_y,
+-				  int16_t dst_x, int16_t dst_y,
+-				  uint16_t width, uint16_t height)
+-{
+-	const unsigned tile_width = 512;
+-	const unsigned tile_height = 8;
+-	const unsigned tile_size = 4096;
+-
+-	const unsigned cpp = bpp / 8;
+-	const unsigned stride_tiles = src_stride / tile_width;
+-	const unsigned swizzle_pixels = 64 / cpp;
+-	const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+-	const unsigned tile_mask = (1 << tile_pixels) - 1;
++	if (src_x | src_y)
++		src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
++	assert(src_stride >= width * cpp);
++	src_stride -= width * cpp;
+ 
+-	unsigned x, y;
++	while (height--) {
++		unsigned w = width * cpp;
++		uint8_t *tile_row = dst;
+ 
+-	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+-	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
++		tile_row += dst_y / tile_height * dst_stride * tile_height;
++		tile_row += (dst_y & (tile_height-1)) * tile_width;
++		if (dst_x) {
++			tile_row += (dst_x >> tile_shift) * tile_size;
++			if (dst_x & tile_mask) {
++				const unsigned x = (dst_x & tile_mask) * cpp;
++				const unsigned len = min(tile_width - x, w);
++				memcpy(tile_row + x, src, len);
+ 
+-	dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+-
+-	for (y = 0; y < height; ++y) {
+-		const uint32_t sy = y + src_y;
+-		const uint32_t tile_row =
+-			(sy / tile_height * stride_tiles * tile_size +
+-			 (sy & (tile_height-1)) * tile_width);
+-		uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+-		uint32_t sx = src_x, offset;
+-
+-		x = width * cpp;
+-		if (sx & (swizzle_pixels - 1)) {
+-			const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+-			const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-
+-			memcpy(dst_row, (const char *)src + offset, length * cpp);
+-
+-			dst_row += length * cpp;
+-			x -= length * cpp;
+-			sx += length;
++				tile_row += tile_size;
++				src = (const uint8_t *)src + len;
++				w -= len;
++			}
+ 		}
+-		while (x >= 64) {
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-
+-			memcpy(dst_row, (const char *)src + offset, 64);
++		while (w >= tile_width) {
++			memcpy(tile_row, src, tile_width);
+ 
+-			dst_row += 64;
+-			x -= 64;
+-			sx += swizzle_pixels;
+-		}
+-		if (x) {
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-			memcpy(dst_row, (const char *)src + offset, x);
++			tile_row += tile_size;
++			src = (const uint8_t *)src + tile_width;
++			w -= tile_width;
+ 		}
++		memcpy(tile_row, src, w);
++		src = (const uint8_t *)src + src_stride + w;
++		dst_y++;
+ 	}
+ }
+ 
+-fast_memcpy static void
+-memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
+-				int32_t src_stride, int32_t dst_stride,
+-				int16_t src_x, int16_t src_y,
+-				int16_t dst_x, int16_t dst_y,
+-				uint16_t width, uint16_t height)
++static fast_memcpy void
++memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp,
++			  int32_t src_stride, int32_t dst_stride,
++			  int16_t src_x, int16_t src_y,
++			  int16_t dst_x, int16_t dst_y,
++			  uint16_t width, uint16_t height)
+ {
+-	const unsigned tile_width = 512;
+-	const unsigned tile_height = 8;
+-	const unsigned tile_size = 4096;
++	const unsigned tile_width = 128;
++	const unsigned tile_height = 16;
++	const unsigned tile_size = 2048;
+ 
+ 	const unsigned cpp = bpp / 8;
+-	const unsigned stride_tiles = dst_stride / tile_width;
+-	const unsigned swizzle_pixels = 64 / cpp;
+-	const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+-	const unsigned tile_mask = (1 << tile_pixels) - 1;
+-
+-	unsigned x, y;
++	const unsigned tile_pixels = tile_width / cpp;
++	const unsigned tile_shift = ffs(tile_pixels) - 1;
++	const unsigned tile_mask = tile_pixels - 1;
+ 
+ 	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ 	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
++	assert(src != dst);
+ 
+-	src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+-
+-	for (y = 0; y < height; ++y) {
+-		const uint32_t dy = y + dst_y;
+-		const uint32_t tile_row =
+-			(dy / tile_height * stride_tiles * tile_size +
+-			 (dy & (tile_height-1)) * tile_width);
+-		const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+-		uint32_t dx = dst_x, offset;
+-
+-		x = width * cpp;
+-		if (dx & (swizzle_pixels - 1)) {
+-			const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+-			const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+-			memcpy((char *)dst + offset, src_row, length * cpp);
+-
+-			src_row += length * cpp;
+-			x -= length * cpp;
+-			dx += length;
+-		}
+-		while (x >= 64) {
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+-
+-			memcpy((char *)dst + offset, src_row, 64);
+-
+-			src_row += 64;
+-			x -= 64;
+-			dx += swizzle_pixels;
+-		}
+-		if (x) {
+-			offset = tile_row +
+-				(dx >> tile_pixels) * tile_size +
+-				(dx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+-			memcpy((char *)dst + offset, src_row, x);
+-		}
+-	}
+-}
+-
+-fast_memcpy static void
+-memcpy_from_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
+-				  int32_t src_stride, int32_t dst_stride,
+-				  int16_t src_x, int16_t src_y,
+-				  int16_t dst_x, int16_t dst_y,
+-				  uint16_t width, uint16_t height)
+-{
+-	const unsigned tile_width = 512;
+-	const unsigned tile_height = 8;
+-	const unsigned tile_size = 4096;
+-
+-	const unsigned cpp = bpp / 8;
+-	const unsigned stride_tiles = src_stride / tile_width;
+-	const unsigned swizzle_pixels = 64 / cpp;
+-	const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+-	const unsigned tile_mask = (1 << tile_pixels) - 1;
++	if (dst_x | dst_y)
++		dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
++	assert(dst_stride >= width * cpp);
++	dst_stride -= width * cpp;
+ 
+-	unsigned x, y;
++	while (height--) {
++		unsigned w = width * cpp;
++		const uint8_t *tile_row = src;
+ 
+-	DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+-	     __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
++		tile_row += src_y / tile_height * src_stride * tile_height;
++		tile_row += (src_y & (tile_height-1)) * tile_width;
++		if (src_x) {
++			tile_row += (src_x >> tile_shift) * tile_size;
++			if (src_x & tile_mask) {
++				const unsigned x = (src_x & tile_mask) * cpp;
++				const unsigned len = min(tile_width - x, w);
++				memcpy(dst, tile_row + x, len);
+ 
+-	dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+-
+-	for (y = 0; y < height; ++y) {
+-		const uint32_t sy = y + src_y;
+-		const uint32_t tile_row =
+-			(sy / tile_height * stride_tiles * tile_size +
+-			 (sy & (tile_height-1)) * tile_width);
+-		uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+-		uint32_t sx = src_x, offset;
+-
+-		x = width * cpp;
+-		if (sx & (swizzle_pixels - 1)) {
+-			const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+-			const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+-			memcpy(dst_row, (const char *)src + offset, length * cpp);
+-
+-			dst_row += length * cpp;
+-			x -= length * cpp;
+-			sx += length;
++				tile_row += tile_size;
++				dst = (uint8_t *)dst + len;
++				w -= len;
++			}
+ 		}
+-		while (x >= 64) {
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+-
+-			memcpy(dst_row, (const char *)src + offset, 64);
++		while (w >= tile_width) {
++			memcpy(dst, tile_row, tile_width);
+ 
+-			dst_row += 64;
+-			x -= 64;
+-			sx += swizzle_pixels;
+-		}
+-		if (x) {
+-			offset = tile_row +
+-				(sx >> tile_pixels) * tile_size +
+-				(sx & tile_mask) * cpp;
+-			offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+-			memcpy(dst_row, (const char *)src + offset, x);
++			tile_row += tile_size;
++			dst = (uint8_t *)dst + tile_width;
++			w -= tile_width;
+ 		}
++		memcpy(dst, tile_row, w);
++		dst = (uint8_t *)dst + dst_stride + w;
++		src_y++;
+ 	}
+ }
+ 
+ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling)
+ {
++	if (kgem->gen < 030) {
++		if (swizzling == I915_BIT_6_SWIZZLE_NONE) {
++			DBG(("%s: gen2, no swizzling\n", __FUNCTION__));
++			kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__gen2;
++			kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__gen2;
++		} else
++			DBG(("%s: no detiling with swizzle functions for gen2\n", __FUNCTION__));
++		return;
++	}
++
+ 	switch (swizzling) {
+ 	default:
+ 		DBG(("%s: unknown swizzling, %d\n", __FUNCTION__, swizzling));
+@@ -771,6 +622,11 @@ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling)
+ 		kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11;
+ 		kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_11;
+ 		break;
++	case I915_BIT_6_SWIZZLE_9_10_11:
++		DBG(("%s: 6^9^10^11 swizzling\n", __FUNCTION__));
++		kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10_11;
++		kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_10_11;
++		break;
+ 	}
+ }
+ 
+@@ -1118,3 +974,241 @@ memcpy_xor(const void *src, void *dst, int bpp,
+ 		}
+ 	}
+ }
++
++#define BILINEAR_INTERPOLATION_BITS 4
++static inline int
++bilinear_weight(pixman_fixed_t x)
++{
++	return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
++		((1 << BILINEAR_INTERPOLATION_BITS) - 1);
++}
++
++#if BILINEAR_INTERPOLATION_BITS <= 4
++/* Inspired by Filter_32_opaque from Skia */
++static inline uint32_t
++bilinear_interpolation(uint32_t tl, uint32_t tr,
++		       uint32_t bl, uint32_t br,
++		       int distx, int disty)
++{
++	int distxy, distxiy, distixy, distixiy;
++	uint32_t lo, hi;
++
++	distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
++	disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
++
++	distxy = distx * disty;
++	distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
++	distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
++	distixiy =
++		16 * 16 - (disty << 4) -
++		(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
++
++	lo = (tl & 0xff00ff) * distixiy;
++	hi = ((tl >> 8) & 0xff00ff) * distixiy;
++
++	lo += (tr & 0xff00ff) * distxiy;
++	hi += ((tr >> 8) & 0xff00ff) * distxiy;
++
++	lo += (bl & 0xff00ff) * distixy;
++	hi += ((bl >> 8) & 0xff00ff) * distixy;
++
++	lo += (br & 0xff00ff) * distxy;
++	hi += ((br >> 8) & 0xff00ff) * distxy;
++
++	return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
++}
++#elif SIZEOF_LONG > 4
++static inline uint32_t
++bilinear_interpolation(uint32_t tl, uint32_t tr,
++		       uint32_t bl, uint32_t br,
++		       int distx, int disty)
++{
++	uint64_t distxy, distxiy, distixy, distixiy;
++	uint64_t tl64, tr64, bl64, br64;
++	uint64_t f, r;
++
++	distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
++	disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
++
++	distxy = distx * disty;
++	distxiy = distx * (256 - disty);
++	distixy = (256 - distx) * disty;
++	distixiy = (256 - distx) * (256 - disty);
++
++	/* Alpha and Blue */
++	tl64 = tl & 0xff0000ff;
++	tr64 = tr & 0xff0000ff;
++	bl64 = bl & 0xff0000ff;
++	br64 = br & 0xff0000ff;
++
++	f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
++	r = f & 0x0000ff0000ff0000ull;
++
++	/* Red and Green */
++	tl64 = tl;
++	tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
++
++	tr64 = tr;
++	tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
++
++	bl64 = bl;
++	bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
++
++	br64 = br;
++	br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
++
++	f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
++	r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
++
++	return (uint32_t)(r >> 16);
++}
++#else
++static inline uint32_t
++bilinear_interpolation(uint32_t tl, uint32_t tr,
++		       uint32_t bl, uint32_t br,
++		       int distx, int disty)
++{
++	int distxy, distxiy, distixy, distixiy;
++	uint32_t f, r;
++
++	distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
++	disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
++
++	distxy = distx * disty;
++	distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
++	distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
++	distixiy =
++		256 * 256 - (disty << 8) -
++		(distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
++
++	/* Blue */
++	r = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy +
++	     (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy);
++
++	/* Green */
++	f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy +
++	     (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy);
++	r |= f & 0xff000000;
++
++	tl >>= 16;
++	tr >>= 16;
++	bl >>= 16;
++	br >>= 16;
++	r >>= 16;
++
++	/* Red */
++	f = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy +
++	     (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy);
++	r |= f & 0x00ff0000;
++
++	/* Alpha */
++	f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy +
++	     (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy);
++	r |= f & 0xff000000;
++
++	return r;
++}
++#endif
++
++static inline uint32_t convert_pixel(const uint8_t *p, int x)
++{
++	return ((uint32_t *)p)[x];
++}
++
++fast void
++affine_blt(const void *src, void *dst, int bpp,
++	   int16_t src_x, int16_t src_y,
++	   int16_t src_width, int16_t src_height,
++	   int32_t src_stride,
++	   int16_t dst_x, int16_t dst_y,
++	   uint16_t dst_width, uint16_t dst_height,
++	   int32_t dst_stride,
++	   const struct pixman_f_transform *t)
++{
++	static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
++	const pixman_fixed_t ux = pixman_double_to_fixed(t->m[0][0]);
++	const pixman_fixed_t uy = pixman_double_to_fixed(t->m[1][0]);
++	int i, j;
++
++	assert(bpp == 32);
++
++	for (j = 0; j < dst_height; j++) {
++		pixman_fixed_t x, y;
++		struct pixman_f_vector v;
++		uint32_t *b;
++
++		/* reference point is the center of the pixel */
++		v.v[0] = dst_x + 0.5;
++		v.v[1] = dst_y + j + 0.5;
++		v.v[2] = 1.0;
++
++		pixman_f_transform_point_3d(t, &v);
++
++		x = pixman_double_to_fixed(v.v[0]);
++		x += pixman_int_to_fixed(src_x - dst_x);
++		y = pixman_double_to_fixed(v.v[1]);
++		y +=  pixman_int_to_fixed(src_y - dst_y);
++
++		b = (uint32_t*)((uint8_t *)dst + (dst_y + j) * dst_stride + dst_x * bpp / 8);
++		for (i = 0; i < dst_width; i++) {
++			const uint8_t *row1;
++			const uint8_t *row2;
++			int x1, y1, x2, y2;
++			uint32_t tl, tr, bl, br;
++			int32_t fx, fy;
++
++			x1 = x - pixman_fixed_1/2;
++			y1 = y - pixman_fixed_1/2;
++
++			fx = bilinear_weight(x1);
++			fy = bilinear_weight(y1);
++
++			x1 = pixman_fixed_to_int(x1);
++			x2 = x1 + 1;
++			y1 = pixman_fixed_to_int(y1);
++			y2 = y1 + 1;
++
++			if (x1 >= src_width  || x2 < 0 ||
++			    y1 >= src_height || y2 < 0) {
++				b[i] = 0;
++				goto next;
++			}
++
++			if (y2 == 0) {
++				row1 = zero;
++			} else {
++				row1 = (uint8_t *)src + src_stride * y1;
++				row1 += bpp / 8 * x1;
++			}
++
++			if (y1 == src_height - 1) {
++				row2 = zero;
++			} else {
++				row2 = (uint8_t *)src + src_stride * y2;
++				row2 += bpp / 8 * x1;
++			}
++
++			if (x2 == 0) {
++				tl = 0;
++				bl = 0;
++			} else {
++				tl = convert_pixel(row1, 0);
++				bl = convert_pixel(row2, 0);
++			}
++
++			if (x1 == src_width - 1) {
++				tr = 0;
++				br = 0;
++			} else {
++				tr = convert_pixel(row1, 1);
++				br = convert_pixel(row2, 1);
++			}
++
++			b[i] = bilinear_interpolation(tl, tr, bl, br, fx, fy);
++
++next:
++			x += ux;
++			y += uy;
++		}
++	}
++}
+diff --git a/src/sna/brw/brw_eu_emit.c b/src/sna/brw/brw_eu_emit.c
+index 00c984d..2f33022 100644
+--- a/src/sna/brw/brw_eu_emit.c
++++ b/src/sna/brw/brw_eu_emit.c
+@@ -700,7 +700,7 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
+  *
+  * When the matching 'else' instruction is reached (presumably by
+  * countdown of the instruction count patched in by our ELSE/ENDIF
+- * functions), the relevent flags are inverted.
++ * functions), the relevant flags are inverted.
+  *
+  * When the matching 'endif' instruction is reached, the flags are
+  * popped off.  If the stack is now empty, normal execution resumes.
+diff --git a/src/sna/compiler.h b/src/sna/compiler.h
+index ff41217..c723137 100644
+--- a/src/sna/compiler.h
++++ b/src/sna/compiler.h
+@@ -39,6 +39,7 @@
+ #define pure __attribute__((pure))
+ #define tightly_packed __attribute__((__packed__))
+ #define flatten __attribute__((flatten))
++#define nonnull __attribute__((nonnull))
+ #define page_aligned __attribute__((aligned(4096)))
+ #else
+ #define likely(expr) (expr)
+@@ -51,6 +52,7 @@
+ #define pure
+ #define tighly_packed
+ #define flatten
++#define nonnull
+ #define page_aligned
+ #endif
+ 
+@@ -61,20 +63,18 @@
+ #define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse")))
+ #endif
+ 
+-#if HAS_GCC(4, 7)
+-#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse")))
+-#endif
+-
+ #if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
+ #define fast __attribute__((optimize("Ofast")))
+ #else
+ #define fast
+ #endif
+ 
+-#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
+-#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops")))
+-#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__)
+-#define fast_memcpy __attribute__((target("inline-all-stringops")))
++#if HAS_GCC(4, 7)
++#define avx2 fast __attribute__((target("avx2,avx,sse4.2,sse2,fpmath=sse")))
++#endif
++
++#if HAS_GCC(4, 5) && defined(__OPTIMIZE__)
++#define fast_memcpy fast __attribute__((target("inline-all-stringops")))
+ #else
+ #define fast_memcpy
+ #endif
+diff --git a/src/sna/fb/fb.h b/src/sna/fb/fb.h
+index 8bf9008..9043174 100644
+--- a/src/sna/fb/fb.h
++++ b/src/sna/fb/fb.h
+@@ -24,10 +24,6 @@
+ #ifndef FB_H
+ #define FB_H
+ 
+-#ifdef HAVE_CONFIG_H
+-#include "config.h"
+-#endif
+-
+ #include <xorg-server.h>
+ #include <servermd.h>
+ #include <gcstruct.h>
+diff --git a/src/sna/fb/fbpict.h b/src/sna/fb/fbpict.h
+index 932032f..2087777 100644
+--- a/src/sna/fb/fbpict.h
++++ b/src/sna/fb/fbpict.h
+@@ -24,10 +24,6 @@
+ #ifndef FBPICT_H
+ #define FBPICT_H
+ 
+-#ifdef HAVE_CONFIG_H
+-#include "config.h"
+-#endif
+-
+ #include <xorg-server.h>
+ #include <picturestr.h>
+ 
+diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
+index 1104f46..12b741c 100644
+--- a/src/sna/gen2_render.c
++++ b/src/sna/gen2_render.c
+@@ -1572,12 +1572,12 @@ gen2_composite_picture(struct sna *sna,
+ 		if (channel->repeat &&
+ 		    (x >= 0 &&
+ 		     y >= 0 &&
+-		     x + w < pixmap->drawable.width &&
+-		     y + h < pixmap->drawable.height)) {
++		     x + w <= pixmap->drawable.width &&
++		     y + h <= pixmap->drawable.height)) {
+ 			struct sna_pixmap *priv = sna_pixmap(pixmap);
+ 			if (priv && priv->clear) {
+ 				DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
+-				return gen2_composite_solid_init(sna, channel, priv->clear_color);
++				return gen2_composite_solid_init(sna, channel, solid_color(picture->format, priv->clear_color));
+ 			}
+ 		}
+ 	} else
+diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
+index 78289f0..2d3fb1e 100644
+--- a/src/sna/gen3_render.c
++++ b/src/sna/gen3_render.c
+@@ -531,6 +531,7 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[0] = dst_x + r->width;
+ 	v[1] = dst_y + r->height;
+@@ -596,6 +597,7 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[8] = v[4] = r->dst.x + op->dst.x;
+ 	v[0] = v[4] + w;
+@@ -643,6 +645,7 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[8] = v[4] = r->dst.x;
+ 	v[9] = r->dst.y;
+@@ -693,6 +696,7 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[0] = dst_x + r->width;
+ 	v[5] = v[1] = dst_y + r->height;
+@@ -756,6 +760,7 @@ gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[8] = v[4] = r->dst.x + op->dst.x;
+ 	v[0] = v[4] + w;
+@@ -781,6 +786,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[8] = v[4] = r->dst.x;
+ 	v[9] = r->dst.y;
+@@ -817,6 +823,7 @@ gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 18;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[0] = dst_x + w;
+ 	v[1] = dst_y + h;
+@@ -862,6 +869,7 @@ gen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 18;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[0] = dst_x + w;
+ 	v[1] = dst_y + h;
+@@ -978,6 +986,7 @@ gen3_emit_composite_primitive_constant__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 6;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[4] = v[2] = r->dst.x + op->dst.x;
+ 	v[5] = r->dst.y + op->dst.y;
+@@ -1013,6 +1022,7 @@ gen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	x = r->dst.x + op->dst.x;
+ 	y = r->dst.y + op->dst.y;
+@@ -1067,6 +1077,7 @@ gen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[0] = dst_x + r->width;
+ 	v[1] = dst_y + r->height;
+@@ -1132,6 +1143,7 @@ gen3_emit_composite_primitive_identity_source__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[8] = v[4] = r->dst.x + op->dst.x;
+ 	v[0] = v[4] + w;
+@@ -1179,6 +1191,7 @@ gen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[8] = v[4] = r->dst.x;
+ 	v[9] = r->dst.y;
+@@ -1229,6 +1242,7 @@ gen3_emit_composite_primitive_affine_source__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[0] = dst_x + r->width;
+ 	v[5] = v[1] = dst_y + r->height;
+@@ -1292,6 +1306,7 @@ gen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[8] = v[4] = r->dst.x + op->dst.x;
+ 	v[0] = v[4] + w;
+@@ -1317,6 +1332,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 12;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[8] = v[4] = r->dst.x;
+ 	v[9] = r->dst.y;
+@@ -1353,6 +1369,7 @@ gen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 18;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[0] = dst_x + w;
+ 	v[1] = dst_y + h;
+@@ -1398,6 +1415,7 @@ gen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna,
+ 
+ 	v = sna->render.vertices + sna->render.vertex_used;
+ 	sna->render.vertex_used += 18;
++	assert(sna->render.vertex_used <= sna->render.vertex_size);
+ 
+ 	v[0] = dst_x + w;
+ 	v[1] = dst_y + h;
+@@ -2233,6 +2251,7 @@ static void gen3_vertex_flush(struct sna *sna)
+ static int gen3_vertex_finish(struct sna *sna)
+ {
+ 	struct kgem_bo *bo;
++	unsigned hint, size;
+ 
+ 	DBG(("%s: used=%d/%d, vbo active? %d\n",
+ 	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
+@@ -2243,6 +2262,7 @@ static int gen3_vertex_finish(struct sna *sna)
+ 
+ 	sna_vertex_wait__locked(&sna->render);
+ 
++	hint = CREATE_GTT_MAP;
+ 	bo = sna->render.vbo;
+ 	if (bo) {
+ 		DBG(("%s: reloc = %d\n", __FUNCTION__,
+@@ -2251,7 +2271,7 @@ static int gen3_vertex_finish(struct sna *sna)
+ 		if (sna->render.vertex_reloc[0]) {
+ 			sna->kgem.batch[sna->render.vertex_reloc[0]] =
+ 				kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+-					       bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
++					       bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, 0);
+ 
+ 			sna->render.vertex_reloc[0] = 0;
+ 		}
+@@ -2260,17 +2280,29 @@ static int gen3_vertex_finish(struct sna *sna)
+ 		sna->render.vbo = NULL;
+ 
+ 		kgem_bo_destroy(&sna->kgem, bo);
++		hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
+ 	}
+ 
++	size = 256*1024;
+ 	sna->render.vertices = NULL;
+-	sna->render.vbo = kgem_create_linear(&sna->kgem,
+-					     256*1024, CREATE_GTT_MAP);
+-	if (sna->render.vbo)
++	sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
++	while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) {
++		size /= 2;
++		sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
++	}
++	if (sna->render.vbo == NULL)
++		sna->render.vbo = kgem_create_linear(&sna->kgem,
++						     256*1024, CREATE_GTT_MAP);
++	if (sna->render.vbo &&
++	    kgem_check_bo(&sna->kgem, sna->render.vbo, NULL))
+ 		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
+ 	if (sna->render.vertices == NULL) {
+-		if (sna->render.vbo)
++		if (sna->render.vbo) {
+ 			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
+-		sna->render.vbo = NULL;
++			sna->render.vbo = NULL;
++		}
++		sna->render.vertices = sna->render.vertex_data;
++		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ 		return 0;
+ 	}
+ 	assert(sna->render.vbo->snoop == false);
+@@ -2280,8 +2312,14 @@ static int gen3_vertex_finish(struct sna *sna)
+ 		       sna->render.vertex_data,
+ 		       sizeof(float)*sna->render.vertex_used);
+ 	}
+-	sna->render.vertex_size = 64 * 1024 - 1;
+-	return sna->render.vertex_size - sna->render.vertex_used;
++
++	size = __kgem_bo_size(sna->render.vbo)/4;
++	if (size >= UINT16_MAX)
++		size = UINT16_MAX - 1;
++	assert(size > sna->render.vertex_used);
++
++	sna->render.vertex_size = size;
++	return size - sna->render.vertex_used;
+ }
+ 
+ static void gen3_vertex_close(struct sna *sna)
+@@ -2345,7 +2383,7 @@ static void gen3_vertex_close(struct sna *sna)
+ 	DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
+ 	sna->kgem.batch[sna->render.vertex_reloc[0]] =
+ 		kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
<Skipped 25553 lines>
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/xorg-driver-video-intel.git/commitdiff/96bae86b9a37f6ed2340946a458a2ee5909ce60e



More information about the pld-cvs-commit mailing list