[packages/xorg-driver-video-intel] - bring in sync with current git to fix crashes with xorg 1.18 - rel 4
baggins
baggins at pld-linux.org
Mon Dec 28 00:22:30 CET 2015
commit 96bae86b9a37f6ed2340946a458a2ee5909ce60e
Author: Jan Rękorajski <baggins at pld-linux.org>
Date: Mon Dec 28 00:21:45 2015 +0100
- bring in sync with current git to fix crashes with xorg 1.18
- rel 4
git.patch | 29487 +++++++++++++++++++++++++++++++++++++++++
xorg-driver-video-intel.spec | 8 +-
2 files changed, 29494 insertions(+), 1 deletion(-)
---
diff --git a/xorg-driver-video-intel.spec b/xorg-driver-video-intel.spec
index 189f299..a6b6fc0 100644
--- a/xorg-driver-video-intel.spec
+++ b/xorg-driver-video-intel.spec
@@ -14,13 +14,14 @@ Summary: X.org video driver for Intel integrated graphics chipsets
Summary(pl.UTF-8): Sterownik obrazu X.org dla zintegrowanych układów graficznych Intela
Name: xorg-driver-video-intel
Version: 2.99.917
-Release: 3
+Release: 4
License: MIT
Group: X11/Applications
Source0: http://xorg.freedesktop.org/releases/individual/driver/xf86-video-intel-%{version}.tar.bz2
# Source0-md5: fa196a66e52c0c624fe5d350af7a5e7b
URL: http://xorg.freedesktop.org/
Patch0: %{name}-xserver_1_8_0.patch
+Patch1: git.patch
BuildRequires: Mesa-libGL-devel
#BuildRequires: autoconf >= 2.63
#BuildRequires: automake >= 1:1.10.2-2
@@ -92,8 +93,13 @@ Wymaga aktywnego Kernel Mode Setting (KMS).
%prep
%setup -q -n xf86-video-intel-%{version}
%patch0 -p1
+%patch1 -p1
%build
+%{__aclocal}
+%{__autoconf}
+%{__autoheader}
+%{__automake}
%configure \
--disable-silent-rules \
%{?with_glamor:--enable-glamor} \
diff --git a/git.patch b/git.patch
new file mode 100644
index 0000000..ec8ea66
--- /dev/null
+++ b/git.patch
@@ -0,0 +1,29487 @@
+diff --git a/Makefile.am b/Makefile.am
+index 418fdc9..853e622 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -25,7 +25,7 @@ SUBDIRS = man libobj xvmc src tools
+ MAINTAINERCLEANFILES = ChangeLog INSTALL
+
+ if HAVE_X11
+-SUBDIRS += test
++SUBDIRS += test benchmarks
+ endif
+
+ .PHONY: ChangeLog INSTALL
+diff --git a/NEWS b/NEWS
+index 604b9cc..0e20033 100644
+--- a/NEWS
++++ b/NEWS
+@@ -21,7 +21,7 @@ should make one more snapshot before an imminent release.
+ Before kernel 3.19, O_NONBLOCK support is broken and so we must avoid
+ reading if we are not expecting an event.
+
+- * Backwards compatibilty fix for fake triple buffering with PRIME and
++ * Backwards compatibility fix for fake triple buffering with PRIME and
+ Xorg-1.15
+ https://bugs.freedesktop.org/show_bug.cgi?id=85144#c12
+
+@@ -51,7 +51,7 @@ should make one more snapshot before an imminent release.
+ Snapshot 2.99.916 (2014-09-08)
+ ==============================
+ Quick update for MST in UXA - we need to hook up the RandR outputs for
+-dynamicaly added connectors.
++dynamically added connectors.
+
+
+ Snapshot 2.99.915 (2014-09-08)
+@@ -503,7 +503,7 @@ release.
+ backlight property is queried whilst the connector is disabled
+ https://bugs.freedesktop.org/show_bug.cgi?id=70406
+
+- * Pad GETCONNECTOR ioctl for compatability between 32/64-bit userspace
++ * Pad GETCONNECTOR ioctl for compatibility between 32/64-bit userspace
+ and kernel
+
+ * Handle long glyph runs correctly
+@@ -523,7 +523,7 @@ snapshot beforehand to push out the bug fixes from the last week.
+
+ * Fix video output using sprites when changing the image size
+
+- * Apply more restrictive tile constaints for 915g class devices
++ * Apply more restrictive tile constraints for 915g class devices
+ https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1232546
+
+ * Ensure all overlapping rectangles are drawn for XRenderFillRectangles
+@@ -1132,7 +1132,7 @@ operation.
+ * Explicitly prevent ring-switching for synchronized rendering to
+ scanouts (for vsync).
+
+- * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusuable)
++ * Clip dirty region to slave pixmaps (otherwise UDL is nigh unusable)
+ https://bugs.freedesktop.org/show_bug.cgi?id=59539
+
+
+@@ -1226,7 +1226,7 @@ Release 2.20.15 (2012-12-03)
+ ============================
+ And lo, enabling more of the common acceleration paths for gen4 revealed
+ another lurking bug - something is wrong with how we prepare Y-tiling
+-surfaces for rendering. For the time being, we can surreptiously disable
++surfaces for rendering. For the time being, we can surreptitiously disable
+ them for gen4 and avoid hitting GPU hangs.
+
+ * Avoid clobbering the render state after failing to convert the
+@@ -1515,7 +1515,7 @@ Release 2.20.5 (2012-08-26)
+ Another silly bug found, another small bugfix release. The goal was for
+ the driver to bind to all Intel devices supported by the kernel.
+ Unfortunately we were too successful and started claiming Pouslbo,
+-Medfield and Cedarview devices which are still encumbered by propietary
++Medfield and Cedarview devices which are still encumbered by proprietary
+ IP and not supported by this driver.
+
+ Bugs fixed since 2.20.4:
+diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
+new file mode 100644
+index 0000000..301c012
+--- /dev/null
++++ b/benchmarks/.gitignore
+@@ -0,0 +1,2 @@
++dri2-swap
++dri3-swap
+diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
+new file mode 100644
+index 0000000..4976e8a
+--- /dev/null
++++ b/benchmarks/Makefile.am
+@@ -0,0 +1,14 @@
++AM_CFLAGS = @CWARNFLAGS@ $(X11_CFLAGS) $(DRM_CFLAGS)
++LDADD = $(X11_LIBS) $(DRM_LIBS) $(CLOCK_GETTIME_LIBS)
++
++check_PROGRAMS =
++
++if DRI2
++check_PROGRAMS += dri2-swap
++endif
++
++if DRI3
++check_PROGRAMS += dri3-swap
++AM_CFLAGS += $(X11_DRI3_CFLAGS)
++LDADD += $(X11_DRI3_LIBS)
++endif
+diff --git a/benchmarks/dri2-swap.c b/benchmarks/dri2-swap.c
+new file mode 100644
+index 0000000..3d9d30a
+--- /dev/null
++++ b/benchmarks/dri2-swap.c
+@@ -0,0 +1,588 @@
++/*
++ * Copyright (c) 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ *
++ */
++
++#ifdef HAVE_CONFIG_H
++#include "config.h"
++#endif
++
++#include <X11/Xlib.h>
++#include <X11/Xatom.h>
++#include <X11/Xlib-xcb.h>
++#include <X11/Xutil.h>
++#include <X11/Xlibint.h>
++#include <X11/extensions/dpms.h>
++#include <X11/extensions/randr.h>
++#include <X11/extensions/Xcomposite.h>
++#include <X11/extensions/Xdamage.h>
++#include <X11/extensions/Xrandr.h>
++#include <xcb/xcb.h>
++#include <xcb/dri2.h>
++#include <xf86drm.h>
++
++#include <stdio.h>
++#include <string.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <assert.h>
++#include <errno.h>
++#include <setjmp.h>
++#include <signal.h>
++
++#include <X11/Xlibint.h>
++#include <X11/extensions/Xext.h>
++#include <X11/extensions/extutil.h>
++#include <X11/extensions/dri2proto.h>
++#include <X11/extensions/dri2tokens.h>
++#include <X11/extensions/Xfixes.h>
++
++static char dri2ExtensionName[] = DRI2_NAME;
++static XExtensionInfo *dri2Info;
++static XEXT_GENERATE_CLOSE_DISPLAY (DRI2CloseDisplay, dri2Info)
++
++static Bool
++DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire);
++static Status
++DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire);
++static int
++DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code);
++
++static /* const */ XExtensionHooks dri2ExtensionHooks = {
++ NULL, /* create_gc */
++ NULL, /* copy_gc */
++ NULL, /* flush_gc */
++ NULL, /* free_gc */
++ NULL, /* create_font */
++ NULL, /* free_font */
++ DRI2CloseDisplay, /* close_display */
++ DRI2WireToEvent, /* wire_to_event */
++ DRI2EventToWire, /* event_to_wire */
++ DRI2Error, /* error */
++ NULL, /* error_string */
++};
++
++static XEXT_GENERATE_FIND_DISPLAY (DRI2FindDisplay,
++ dri2Info,
++ dri2ExtensionName,
++ &dri2ExtensionHooks,
++ 0, NULL)
++
++static Bool
++DRI2WireToEvent(Display *dpy, XEvent *event, xEvent *wire)
++{
++ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++
++ XextCheckExtension(dpy, info, dri2ExtensionName, False);
++
++ switch ((wire->u.u.type & 0x7f) - info->codes->first_event) {
++#ifdef X_DRI2SwapBuffers
++ case DRI2_BufferSwapComplete:
++ return False;
++#endif
++#ifdef DRI2_InvalidateBuffers
++ case DRI2_InvalidateBuffers:
++ return False;
++#endif
++ default:
++ /* client doesn't support server event */
++ break;
++ }
++
++ return False;
++}
++
++/* We don't actually support this. It doesn't make sense for clients to
++ * send each other DRI2 events.
++ */
++static Status
++DRI2EventToWire(Display *dpy, XEvent *event, xEvent *wire)
++{
++ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++
++ XextCheckExtension(dpy, info, dri2ExtensionName, False);
++
++ switch (event->type) {
++ default:
++ /* client doesn't support server event */
++ break;
++ }
++
++ return Success;
++}
++
++static int
++DRI2Error(Display *display, xError *err, XExtCodes *codes, int *ret_code)
++{
++ if (err->majorCode == codes->major_opcode &&
++ err->errorCode == BadDrawable &&
++ err->minorCode == X_DRI2CopyRegion)
++ return True;
++
++ /* If the X drawable was destroyed before the GLX drawable, the
++ * DRI2 drawble will be gone by the time we call
++ * DRI2DestroyDrawable. So just ignore BadDrawable here. */
++ if (err->majorCode == codes->major_opcode &&
++ err->errorCode == BadDrawable &&
++ err->minorCode == X_DRI2DestroyDrawable)
++ return True;
++
++ /* If the server is non-local DRI2Connect will raise BadRequest.
++ * Swallow this so that DRI2Connect can signal this in its return code */
++ if (err->majorCode == codes->major_opcode &&
++ err->minorCode == X_DRI2Connect &&
++ err->errorCode == BadRequest) {
++ *ret_code = False;
++ return True;
++ }
++
++ return False;
++}
++
++static Bool
++DRI2QueryExtension(Display * dpy, int *eventBase, int *errorBase)
++{
++ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++
++ if (XextHasExtension(info)) {
++ *eventBase = info->codes->first_event;
++ *errorBase = info->codes->first_error;
++ return True;
++ }
++
++ return False;
++}
++
++static Bool
++DRI2Connect(Display * dpy, XID window, char **driverName, char **deviceName)
++{
++ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++ xDRI2ConnectReply rep;
++ xDRI2ConnectReq *req;
++
++ XextCheckExtension(dpy, info, dri2ExtensionName, False);
++
++ LockDisplay(dpy);
++ GetReq(DRI2Connect, req);
++ req->reqType = info->codes->major_opcode;
++ req->dri2ReqType = X_DRI2Connect;
++ req->window = window;
++ req->driverType = DRI2DriverDRI;
++ if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
++ UnlockDisplay(dpy);
++ SyncHandle();
++ return False;
++ }
++
++ if (rep.driverNameLength == 0 && rep.deviceNameLength == 0) {
++ UnlockDisplay(dpy);
++ SyncHandle();
++ return False;
++ }
++
++ *driverName = Xmalloc(rep.driverNameLength + 1);
++ if (*driverName == NULL) {
++ _XEatData(dpy,
++ ((rep.driverNameLength + 3) & ~3) +
++ ((rep.deviceNameLength + 3) & ~3));
++ UnlockDisplay(dpy);
++ SyncHandle();
++ return False;
++ }
++ _XReadPad(dpy, *driverName, rep.driverNameLength);
++ (*driverName)[rep.driverNameLength] = '\0';
++
++ *deviceName = Xmalloc(rep.deviceNameLength + 1);
++ if (*deviceName == NULL) {
++ Xfree(*driverName);
++ _XEatData(dpy, ((rep.deviceNameLength + 3) & ~3));
++ UnlockDisplay(dpy);
++ SyncHandle();
++ return False;
++ }
++ _XReadPad(dpy, *deviceName, rep.deviceNameLength);
++ (*deviceName)[rep.deviceNameLength] = '\0';
++
++ UnlockDisplay(dpy);
++ SyncHandle();
++
++ return True;
++}
++
++static Bool
++DRI2Authenticate(Display * dpy, XID window, unsigned int magic)
++{
++ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++ xDRI2AuthenticateReq *req;
++ xDRI2AuthenticateReply rep;
++
++ XextCheckExtension(dpy, info, dri2ExtensionName, False);
++
++ LockDisplay(dpy);
++ GetReq(DRI2Authenticate, req);
++ req->reqType = info->codes->major_opcode;
++ req->dri2ReqType = X_DRI2Authenticate;
++ req->window = window;
++ req->magic = magic;
++
++ if (!_XReply(dpy, (xReply *) & rep, 0, xFalse)) {
++ UnlockDisplay(dpy);
++ SyncHandle();
++ return False;
++ }
++
++ UnlockDisplay(dpy);
++ SyncHandle();
++
++ return rep.authenticated;
++}
++
++static void
++DRI2CreateDrawable(Display * dpy, XID drawable)
++{
++ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++ xDRI2CreateDrawableReq *req;
++
++ XextSimpleCheckExtension(dpy, info, dri2ExtensionName);
++
++ LockDisplay(dpy);
++ GetReq(DRI2CreateDrawable, req);
++ req->reqType = info->codes->major_opcode;
++ req->dri2ReqType = X_DRI2CreateDrawable;
++ req->drawable = drawable;
++ UnlockDisplay(dpy);
++ SyncHandle();
++}
++
++static void DRI2SwapInterval(Display *dpy, XID drawable, int interval)
++{
++ XExtDisplayInfo *info = DRI2FindDisplay(dpy);
++ xDRI2SwapIntervalReq *req;
++
++ XextSimpleCheckExtension (dpy, info, dri2ExtensionName);
++
++ LockDisplay(dpy);
++ GetReq(DRI2SwapInterval, req);
++ req->reqType = info->codes->major_opcode;
++ req->dri2ReqType = X_DRI2SwapInterval;
++ req->drawable = drawable;
++ req->interval = interval;
++ UnlockDisplay(dpy);
++ SyncHandle();
++}
++
++static int _x_error_occurred;
++
++static int
++_check_error_handler(Display *display,
++ XErrorEvent *event)
++{
++ fprintf(stderr,
++ "X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n",
++ DisplayString(display),
++ event->serial,
++ event->error_code,
++ event->request_code,
++ event->minor_code);
++ _x_error_occurred++;
++ return False; /* ignored */
++}
++
++static double elapsed(const struct timespec *start,
++ const struct timespec *end)
++{
++ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000;
++}
++
++static void run(Display *dpy, Window win)
++{
++ xcb_connection_t *c = XGetXCBConnection(dpy);
++ struct timespec start, end;
++ int n, completed = 0;
++
++ clock_gettime(CLOCK_MONOTONIC, &start);
++ do {
++ for (n = 0; n < 1000; n++) {
++ unsigned int attachments[] = { DRI2BufferBackLeft };
++ unsigned int seq[2];
++
++ seq[0] = xcb_dri2_swap_buffers_unchecked(c, win,
++ 0, 0, 0, 0, 0, 0).sequence;
++
++
++ seq[1] = xcb_dri2_get_buffers_unchecked(c, win,
++ 1, 1, attachments).sequence;
++
++ xcb_flush(c);
++ xcb_discard_reply(c, seq[0]);
++ xcb_discard_reply(c, seq[1]);
++ completed++;
++ }
++ clock_gettime(CLOCK_MONOTONIC, &end);
++ } while (end.tv_sec < start.tv_sec + 10);
++
++ printf("%f\n", completed / (elapsed(&start, &end) / 1000000));
++}
++
++static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window)
++{
++ XRRScreenResources *res;
++
++ res = XRRGetScreenResourcesCurrent(dpy, window);
++ if (res == NULL)
++ res = XRRGetScreenResources(dpy, window);
++
++ return res;
++}
++
++static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id)
++{
++ int i;
++
++ for (i = 0; i < res->nmode; i++) {
++ if (res->modes[i].id == id)
++ return &res->modes[i];
++ }
++
++ return NULL;
++}
++
++static int dri2_open(Display *dpy)
++{
++ drm_auth_t auth;
++ char *driver, *device;
++ int fd;
++
++ if (!DRI2QueryExtension(dpy, &fd, &fd))
++ return -1;
++
++ if (!DRI2Connect(dpy, DefaultRootWindow(dpy), &driver, &device))
++ return -1;
++
++ fd = open(device, O_RDWR);
++ if (fd < 0)
++ return -1;
++
++ if (drmIoctl(fd, DRM_IOCTL_GET_MAGIC, &auth))
++ return -1;
++
++ if (!DRI2Authenticate(dpy, DefaultRootWindow(dpy), auth.magic))
++ return -1;
++
++ return fd;
++}
++
++static void fullscreen(Display *dpy, Window win)
++{
++ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False);
++ XChangeProperty(dpy, win,
++ XInternAtom(dpy, "_NET_WM_STATE", False),
++ XA_ATOM, 32, PropModeReplace,
++ (unsigned char *)&atom, 1);
++}
++
++static int has_composite(Display *dpy)
++{
++ int event, error;
++ int major, minor;
++
++ if (!XDamageQueryExtension (dpy, &event, &error))
++ return 0;
++
++ if (!XCompositeQueryExtension(dpy, &event, &error))
++ return 0;
++
++ XCompositeQueryVersion(dpy, &major, &minor);
++
++ return major > 0 || minor >= 4;
++}
++
++int main(int argc, char **argv)
++{
++ Display *dpy;
++ Window root, win;
++ XRRScreenResources *res;
++ XRRCrtcInfo **original_crtc;
++ XSetWindowAttributes attr;
++ enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN;
++ enum visible {REDIRECTED, NORMAL } v = NORMAL;
++ enum display { OFF, ON } d = OFF;
++ int width, height;
++ int i, fd;
++ int c;
++
++ while ((c = getopt(argc, argv, "d:v:w:")) != -1) {
++ switch (c) {
++ case 'd':
++ if (strcmp(optarg, "off") == 0)
++ d = OFF;
++ else if (strcmp(optarg, "on") == 0)
++ d = ON;
++ else
++ abort();
++ break;
++
++ case 'v':
++ if (strcmp(optarg, "redirected") == 0)
++ v = REDIRECTED;
++ else if (strcmp(optarg, "normal") == 0)
++ v = NORMAL;
++ else
++ abort();
++ break;
++
++ case 'w':
++ if (strcmp(optarg, "fullscreen") == 0)
++ w = FULLSCREEN;
++ else if (strcmp(optarg, "window") == 0)
++ w = WINDOW;
++ else if (strcmp(optarg, "root") == 0)
++ w = ROOT;
++ else
++ abort();
++ break;
++ }
++ }
++
++ attr.override_redirect = 1;
++
++ dpy = XOpenDisplay(NULL);
++ if (dpy == NULL)
++ return 77;
++
++ width = DisplayWidth(dpy, DefaultScreen(dpy));
++ height = DisplayHeight(dpy, DefaultScreen(dpy));
++
++ fd = dri2_open(dpy);
++ if (fd < 0)
++ return 77;
++
++ if (DPMSQueryExtension(dpy, &i, &i))
++ DPMSDisable(dpy);
++
++ root = DefaultRootWindow(dpy);
++
++ signal(SIGALRM, SIG_IGN);
++ XSetErrorHandler(_check_error_handler);
++
++ res = NULL;
++ if (XRRQueryVersion(dpy, &i, &i))
++ res = _XRRGetScreenResourcesCurrent(dpy, root);
++ if (res == NULL)
++ return 77;
++
++ if (v == REDIRECTED && !has_composite(dpy))
++ return 77;
++
++ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc);
++ for (i = 0; i < res->ncrtc; i++)
++ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]);
++
++ for (i = 0; i < res->ncrtc; i++)
++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++ 0, 0, None, RR_Rotate_0, NULL, 0);
++
++ DRI2CreateDrawable(dpy, root);
++ DRI2SwapInterval(dpy, root, 0);
++
++ if (d != OFF) {
++ for (i = 0; i < res->noutput; i++) {
++ XRROutputInfo *output;
++ XRRModeInfo *mode;
++
++ output = XRRGetOutputInfo(dpy, res, res->outputs[i]);
++ if (output == NULL)
++ continue;
++
++ mode = NULL;
++ if (res->nmode)
++ mode = lookup_mode(res, output->modes[0]);
++ if (mode == NULL)
++ continue;
++
++ XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime,
++ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1);
++ width = mode->width;
++ height = mode->height;
++ break;
++ }
++ if (i == res->noutput) {
++ _x_error_occurred = 77;
++ goto restore;
++ }
++ }
++
++ if (w == ROOT) {
++ run(dpy, root);
++ } else if (w == FULLSCREEN) {
++ win = XCreateWindow(dpy, root,
++ 0, 0, width, height, 0,
++ DefaultDepth(dpy, DefaultScreen(dpy)),
++ InputOutput,
++ DefaultVisual(dpy, DefaultScreen(dpy)),
++ CWOverrideRedirect, &attr);
++ DRI2CreateDrawable(dpy, win);
++ DRI2SwapInterval(dpy, win, 0);
++ if (v == REDIRECTED) {
++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual);
++ XDamageCreate(dpy, win, XDamageReportRawRectangles);
++ } else
++ fullscreen(dpy, win);
++ XMapWindow(dpy, win);
++ run(dpy, win);
++ } else if (w == WINDOW) {
++ win = XCreateWindow(dpy, root,
++ 0, 0, width/2, height/2, 0,
++ DefaultDepth(dpy, DefaultScreen(dpy)),
++ InputOutput,
++ DefaultVisual(dpy, DefaultScreen(dpy)),
++ CWOverrideRedirect, &attr);
++ DRI2CreateDrawable(dpy, win);
++ DRI2SwapInterval(dpy, win, 0);
++ if (v == REDIRECTED) {
++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual);
++ XDamageCreate(dpy, win, XDamageReportRawRectangles);
++ }
++ XMapWindow(dpy, win);
++ run(dpy, win);
++ }
++
++restore:
++ for (i = 0; i < res->ncrtc; i++)
++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++ 0, 0, None, RR_Rotate_0, NULL, 0);
++
++ for (i = 0; i < res->ncrtc; i++)
++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++ original_crtc[i]->x,
++ original_crtc[i]->y,
++ original_crtc[i]->mode,
++ original_crtc[i]->rotation,
++ original_crtc[i]->outputs,
++ original_crtc[i]->noutput);
++
++ if (DPMSQueryExtension(dpy, &i, &i))
++ DPMSEnable(dpy);
++
++ XSync(dpy, True);
++ return _x_error_occurred;
++}
+diff --git a/benchmarks/dri3-swap.c b/benchmarks/dri3-swap.c
+new file mode 100644
+index 0000000..4dd423b
+--- /dev/null
++++ b/benchmarks/dri3-swap.c
+@@ -0,0 +1,595 @@
++/*
++ * Copyright (c) 2015 Intel Corporation
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the next
++ * paragraph) shall be included in all copies or substantial portions of the
++ * Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
++ * SOFTWARE.
++ *
++ */
++
++#ifdef HAVE_CONFIG_H
++#include "config.h"
++#endif
++
++#include <X11/Xlib.h>
++#include <X11/Xatom.h>
++#include <X11/Xlib-xcb.h>
++#include <X11/xshmfence.h>
++#include <X11/Xutil.h>
++#include <X11/Xlibint.h>
++#include <X11/extensions/Xcomposite.h>
++#include <X11/extensions/Xdamage.h>
++#include <X11/extensions/dpms.h>
++#include <X11/extensions/randr.h>
++#include <X11/extensions/Xrandr.h>
++#include <xcb/xcb.h>
++#include <xcb/present.h>
++#include <xcb/dri3.h>
++#include <xcb/xfixes.h>
++#include <xf86drm.h>
++#include <i915_drm.h>
++
++#include <stdio.h>
++#include <string.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <assert.h>
++#include <errno.h>
++#include <setjmp.h>
++#include <signal.h>
++
++struct dri3_fence {
++ XID xid;
++ void *addr;
++};
++
++static int _x_error_occurred;
++static uint32_t stamp;
++
++struct list {
++ struct list *next, *prev;
++};
++
++static void
++list_init(struct list *list)
++{
++ list->next = list->prev = list;
++}
++
++static inline void
++__list_add(struct list *entry,
++ struct list *prev,
++ struct list *next)
++{
++ next->prev = entry;
++ entry->next = next;
++ entry->prev = prev;
++ prev->next = entry;
++}
++
++static inline void
++list_add(struct list *entry, struct list *head)
++{
++ __list_add(entry, head, head->next);
++}
++
++static inline void
++__list_del(struct list *prev, struct list *next)
++{
++ next->prev = prev;
++ prev->next = next;
++}
++
++static inline void
++_list_del(struct list *entry)
++{
++ __list_del(entry->prev, entry->next);
++}
++
++static inline void
++list_move(struct list *list, struct list *head)
++{
++ if (list->prev != head) {
++ _list_del(list);
++ list_add(list, head);
++ }
++}
++
++#define __container_of(ptr, sample, member) \
++ (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample)))
++
++#define list_for_each_entry(pos, head, member) \
++ for (pos = __container_of((head)->next, pos, member); \
++ &pos->member != (head); \
++ pos = __container_of(pos->member.next, pos, member))
++
++static int
++_check_error_handler(Display *display,
++ XErrorEvent *event)
++{
++ printf("X11 error from display %s, serial=%ld, error=%d, req=%d.%d\n",
++ DisplayString(display),
++ event->serial,
++ event->error_code,
++ event->request_code,
++ event->minor_code);
++ _x_error_occurred++;
++ return False; /* ignored */
++}
++
++static int dri3_create_fence(Display *dpy,
++ Pixmap pixmap,
++ struct dri3_fence *fence)
++{
++ xcb_connection_t *c = XGetXCBConnection(dpy);
++ struct dri3_fence f;
++ int fd;
++
++ fd = xshmfence_alloc_shm();
++ if (fd < 0)
++ return -1;
++
++ f.addr = xshmfence_map_shm(fd);
++ if (f.addr == NULL) {
++ close(fd);
++ return -1;
++ }
++
++ f.xid = xcb_generate_id(c);
++ xcb_dri3_fence_from_fd(c, pixmap, f.xid, 0, fd);
++
++ *fence = f;
++ return 0;
++}
++
++static double elapsed(const struct timespec *start,
++ const struct timespec *end)
++{
++ return 1e6*(end->tv_sec - start->tv_sec) + (end->tv_nsec - start->tv_nsec)/1000;
++}
++
++struct buffer {
++ struct list link;
++ Pixmap pixmap;
++ struct dri3_fence fence;
++ int fd;
++ int busy;
++};
++
++static void run(Display *dpy, Window win)
++{
++ xcb_connection_t *c = XGetXCBConnection(dpy);
++ struct timespec start, end;
++#define N_BACK 8
++ struct buffer buffer[N_BACK];
++ struct list mru;
++ Window root;
++ unsigned int width, height;
++ unsigned border, depth;
++ unsigned present_flags = XCB_PRESENT_OPTION_ASYNC;
++ xcb_xfixes_region_t update = 0;
++ int completed = 0;
++ int queued = 0;
++ uint32_t eid;
++ void *Q;
++ int i, n;
++
++ list_init(&mru);
++
++ XGetGeometry(dpy, win,
++ &root, &i, &n, &width, &height, &border, &depth);
++
++ _x_error_occurred = 0;
++
++ for (n = 0; n < N_BACK; n++) {
++ xcb_dri3_buffer_from_pixmap_reply_t *reply;
++ int *fds;
++
++ buffer[n].pixmap =
++ XCreatePixmap(dpy, win, width, height, depth);
++ buffer[n].fence.xid = 0;
++ buffer[n].fd = -1;
++
++ if (dri3_create_fence(dpy, win, &buffer[n].fence))
++ return;
++
++ reply = xcb_dri3_buffer_from_pixmap_reply (c,
++ xcb_dri3_buffer_from_pixmap(c, buffer[n].pixmap),
++ NULL);
++ if (reply == NULL)
++ return;
++
++ fds = xcb_dri3_buffer_from_pixmap_reply_fds (c, reply);
++ buffer[n].fd = fds[0];
++ free(reply);
++
++ /* start idle */
++ xshmfence_trigger(buffer[n].fence.addr);
++ buffer[n].busy = 0;
++ list_add(&buffer[n].link, &mru);
++ }
++
++ eid = xcb_generate_id(c);
++ xcb_present_select_input(c, eid, win,
++ XCB_PRESENT_EVENT_MASK_IDLE_NOTIFY |
++ XCB_PRESENT_EVENT_MASK_COMPLETE_NOTIFY);
++ Q = xcb_register_for_special_xge(c, &xcb_present_id, eid, &stamp);
++
++ clock_gettime(CLOCK_MONOTONIC, &start);
++ do {
++ for (n = 0; n < 1000; n++) {
++ struct buffer *tmp, *b = NULL;
++ list_for_each_entry(tmp, &mru, link) {
++ if (!tmp->busy) {
++ b = tmp;
++ break;
++ }
++ }
++ while (b == NULL) {
++ xcb_present_generic_event_t *ev;
++
++ ev = (xcb_present_generic_event_t *)
++ xcb_wait_for_special_event(c, Q);
++ if (ev == NULL)
++ abort();
++
++ do {
++ switch (ev->evtype) {
++ case XCB_PRESENT_COMPLETE_NOTIFY:
++ completed++;
++ queued--;
++ break;
++
++ case XCB_PRESENT_EVENT_IDLE_NOTIFY:
++ {
++ xcb_present_idle_notify_event_t *ie = (xcb_present_idle_notify_event_t *)ev;
++ assert(ie->serial < N_BACK);
++ buffer[ie->serial].busy = 0;
++ if (b == NULL)
++ b = &buffer[ie->serial];
++ break;
++ }
++ }
++ free(ev);
++ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q)));
++ }
++
++ b->busy = 1;
++ if (b->fence.xid) {
++ xshmfence_await(b->fence.addr);
++ xshmfence_reset(b->fence.addr);
++ }
++ xcb_present_pixmap(c, win, b->pixmap, b - buffer,
++ 0, /* valid */
++ update, /* update */
++ 0, /* x_off */
++ 0, /* y_off */
++ None,
++ None, /* wait fence */
++ b->fence.xid,
++ present_flags,
++ 0, /* target msc */
++ 0, /* divisor */
++ 0, /* remainder */
++ 0, NULL);
++ list_move(&b->link, &mru);
++ queued++;
++ xcb_flush(c);
++ }
++ clock_gettime(CLOCK_MONOTONIC, &end);
++ } while (end.tv_sec < start.tv_sec + 10);
++
++ while (queued) {
++ xcb_present_generic_event_t *ev;
++
++ ev = (xcb_present_generic_event_t *)
++ xcb_wait_for_special_event(c, Q);
++ if (ev == NULL)
++ abort();
++
++ do {
++ switch (ev->evtype) {
++ case XCB_PRESENT_COMPLETE_NOTIFY:
++ completed++;
++ queued--;
++ break;
++
++ case XCB_PRESENT_EVENT_IDLE_NOTIFY:
++ break;
++ }
++ free(ev);
++ } while ((ev = (xcb_present_generic_event_t *)xcb_poll_for_special_event(c, Q)));
++ }
++ clock_gettime(CLOCK_MONOTONIC, &end);
++
++ printf("%f\n", completed / (elapsed(&start, &end) / 1000000));
++}
++
++static int has_present(Display *dpy)
++{
++ xcb_connection_t *c = XGetXCBConnection(dpy);
++ xcb_generic_error_t *error = NULL;
++ void *reply;
++
++ reply = xcb_present_query_version_reply(c,
++ xcb_present_query_version(c,
++ XCB_PRESENT_MAJOR_VERSION,
++ XCB_PRESENT_MINOR_VERSION),
++ &error);
++
++ free(reply);
++ free(error);
++ if (reply == NULL) {
++ fprintf(stderr, "Present not supported on %s\n", DisplayString(dpy));
++ return 0;
++ }
++
++ return 1;
++}
++
++static int has_composite(Display *dpy)
++{
++ int event, error;
++ int major, minor;
++
++ if (!XDamageQueryExtension (dpy, &event, &error))
++ return 0;
++
++ if (!XCompositeQueryExtension(dpy, &event, &error))
++ return 0;
++
++ XCompositeQueryVersion(dpy, &major, &minor);
++
++ return major > 0 || minor >= 4;
++}
++
++static inline XRRScreenResources *_XRRGetScreenResourcesCurrent(Display *dpy, Window window)
++{
++ XRRScreenResources *res;
++
++ res = XRRGetScreenResourcesCurrent(dpy, window);
++ if (res == NULL)
++ res = XRRGetScreenResources(dpy, window);
++
++ return res;
++}
++
++static XRRModeInfo *lookup_mode(XRRScreenResources *res, int id)
++{
++ int i;
++
++ for (i = 0; i < res->nmode; i++) {
++ if (res->modes[i].id == id)
++ return &res->modes[i];
++ }
++
++ return NULL;
++}
++
++static void fullscreen(Display *dpy, Window win)
++{
++ Atom atom = XInternAtom(dpy, "_NET_WM_STATE_FULLSCREEN", False);
++ XChangeProperty(dpy, win,
++ XInternAtom(dpy, "_NET_WM_STATE", False),
++ XA_ATOM, 32, PropModeReplace,
++ (unsigned char *)&atom, 1);
++}
++
++static int dri3_query_version(Display *dpy, int *major, int *minor)
++{
++ xcb_connection_t *c = XGetXCBConnection(dpy);
++ xcb_dri3_query_version_reply_t *reply;
++ xcb_generic_error_t *error;
++
++ *major = *minor = -1;
++
++ reply = xcb_dri3_query_version_reply(c,
++ xcb_dri3_query_version(c,
++ XCB_DRI3_MAJOR_VERSION,
++ XCB_DRI3_MINOR_VERSION),
++ &error);
++ free(error);
++ if (reply == NULL)
++ return -1;
++
++ *major = reply->major_version;
++ *minor = reply->minor_version;
++ free(reply);
++
++ return 0;
++}
++
++static int has_dri3(Display *dpy)
++{
++ const xcb_query_extension_reply_t *ext;
++ int major, minor;
++
++ ext = xcb_get_extension_data(XGetXCBConnection(dpy), &xcb_dri3_id);
++ if (ext == NULL || !ext->present)
++ return 0;
++
++ if (dri3_query_version(dpy, &major, &minor) < 0)
++ return 0;
++
++ return major >= 0;
++}
++
++int main(int argc, char **argv)
++{
++ Display *dpy;
++ Window root, win;
++ XRRScreenResources *res;
++ XRRCrtcInfo **original_crtc;
++ XSetWindowAttributes attr;
++ enum window { ROOT, FULLSCREEN, WINDOW } w = FULLSCREEN;
++ enum visible {REDIRECTED, NORMAL } v = NORMAL;
++ enum display { OFF, ON } d = OFF;
++ int width, height;
++ int i;
++
++ while ((i = getopt(argc, argv, "d:v:w:")) != -1) {
++ switch (i) {
++ case 'd':
++ if (strcmp(optarg, "off") == 0)
++ d = OFF;
++ else if (strcmp(optarg, "on") == 0)
++ d = ON;
++ else
++ abort();
++ break;
++
++ case 'v':
++ if (strcmp(optarg, "redirected") == 0)
++ v = REDIRECTED;
++ else if (strcmp(optarg, "normal") == 0)
++ v = NORMAL;
++ else
++ abort();
++ break;
++
++ case 'w':
++ if (strcmp(optarg, "fullscreen") == 0)
++ w = FULLSCREEN;
++ else if (strcmp(optarg, "window") == 0)
++ w = WINDOW;
++ else if (strcmp(optarg, "root") == 0)
++ w = ROOT;
++ else
++ abort();
++ break;
++ }
++ }
++
++ attr.override_redirect = 1;
++
++ dpy = XOpenDisplay(NULL);
++ if (dpy == NULL)
++ return 77;
++
++ width = DisplayWidth(dpy, DefaultScreen(dpy));
++ height = DisplayHeight(dpy, DefaultScreen(dpy));
++
++ if (!has_present(dpy))
++ return 77;
++
++ if (!has_dri3(dpy))
++ return 77;
++
++ if (DPMSQueryExtension(dpy, &i, &i))
++ DPMSDisable(dpy);
++
++ root = DefaultRootWindow(dpy);
++
++ signal(SIGALRM, SIG_IGN);
++ XSetErrorHandler(_check_error_handler);
++
++ res = NULL;
++ if (XRRQueryVersion(dpy, &i, &i))
++ res = _XRRGetScreenResourcesCurrent(dpy, root);
++ if (res == NULL)
++ return 77;
++
++ if (v == REDIRECTED && !has_composite(dpy))
++ return 77;
++
++ original_crtc = malloc(sizeof(XRRCrtcInfo *)*res->ncrtc);
++ for (i = 0; i < res->ncrtc; i++)
++ original_crtc[i] = XRRGetCrtcInfo(dpy, res, res->crtcs[i]);
++
++ for (i = 0; i < res->ncrtc; i++)
++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++ 0, 0, None, RR_Rotate_0, NULL, 0);
++
++ if (d != OFF) {
++ for (i = 0; i < res->noutput; i++) {
++ XRROutputInfo *output;
++ XRRModeInfo *mode;
++
++ output = XRRGetOutputInfo(dpy, res, res->outputs[i]);
++ if (output == NULL)
++ continue;
++
++ mode = NULL;
++ if (res->nmode)
++ mode = lookup_mode(res, output->modes[0]);
++ if (mode == NULL)
++ continue;
++
++ XRRSetCrtcConfig(dpy, res, output->crtcs[0], CurrentTime,
++ 0, 0, output->modes[0], RR_Rotate_0, &res->outputs[i], 1);
++ width = mode->width;
++ height = mode->height;
++ break;
++ }
++ if (i == res->noutput) {
++ _x_error_occurred = 77;
++ goto restore;
++ }
++ }
++
++ if (w == ROOT) {
++ run(dpy, root);
++ } else if (w == FULLSCREEN) {
++ win = XCreateWindow(dpy, root,
++ 0, 0, width, height, 0,
++ DefaultDepth(dpy, DefaultScreen(dpy)),
++ InputOutput,
++ DefaultVisual(dpy, DefaultScreen(dpy)),
++ CWOverrideRedirect, &attr);
++ if (v == REDIRECTED) {
++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual);
++ XDamageCreate(dpy, win, XDamageReportRawRectangles);
++ } else
++ fullscreen(dpy, win);
++ XMapWindow(dpy, win);
++ run(dpy, win);
++ } else if (w == WINDOW) {
++ win = XCreateWindow(dpy, root,
++ 0, 0, width/2, height/2, 0,
++ DefaultDepth(dpy, DefaultScreen(dpy)),
++ InputOutput,
++ DefaultVisual(dpy, DefaultScreen(dpy)),
++ CWOverrideRedirect, &attr);
++ if (v == REDIRECTED) {
++ XCompositeRedirectWindow(dpy, win, CompositeRedirectManual);
++ XDamageCreate(dpy, win, XDamageReportRawRectangles);
++ }
++ XMapWindow(dpy, win);
++ run(dpy, win);
++ }
++
++restore:
++ for (i = 0; i < res->ncrtc; i++)
++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++ 0, 0, None, RR_Rotate_0, NULL, 0);
++
++ for (i = 0; i < res->ncrtc; i++)
++ XRRSetCrtcConfig(dpy, res, res->crtcs[i], CurrentTime,
++ original_crtc[i]->x,
++ original_crtc[i]->y,
++ original_crtc[i]->mode,
++ original_crtc[i]->rotation,
++ original_crtc[i]->outputs,
++ original_crtc[i]->noutput);
++
++ if (DPMSQueryExtension(dpy, &i, &i))
++ DPMSEnable(dpy);
++
++ XSync(dpy, True);
++ return _x_error_occurred;
++}
+diff --git a/configure.ac b/configure.ac
+index 61bea43..9aa7d97 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -195,18 +195,24 @@ AC_ARG_ENABLE(udev,
+ [UDEV="$enableval"],
+ [UDEV=auto])
+
++udev_msg=" disabled"
+ if test "x$UDEV" != "xno"; then
+ PKG_CHECK_MODULES(UDEV, [libudev], [udev="yes"], [udev="no"])
++ AC_CHECK_HEADERS([sys/stat.h], [], [udev="no"])
+ if test "x$UDEV" = "xyes" -a "x$udev" != "xyes"; then
+ AC_MSG_ERROR([udev support requested but not found (libudev)])
+ fi
+ if test "x$udev" = "xyes"; then
+ AC_DEFINE(HAVE_UDEV,1,[Enable udev-based monitor hotplug detection])
++ udev_msg=" yes"
++ else
++ udev_msg=" no"
+ fi
+ fi
+
+-PKG_CHECK_MODULES(X11, [x11 xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"])
++PKG_CHECK_MODULES(X11, [x11 x11-xcb xcb-dri2 xcomposite xdamage xrender xrandr xext xfixes cairo cairo-xlib-xrender pixman-1 libpng], [x11="yes"], [x11="no"])
+ AM_CONDITIONAL(HAVE_X11, test "x$x11" = "xyes")
++echo X11_CLFAGS="$X11_CLFAGS" X11_LIBS="$X11_LIBS"
+
+ cpuid="yes"
+ AC_TRY_LINK([
+@@ -270,7 +276,7 @@ if test "x$shm" = "xyes"; then
+ AC_DEFINE([HAVE_MIT_SHM], 1, [Define to 1 if MIT-SHM is available])
+ fi
+
+-PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-present x11-xcb xshmfence x11 xrender xext libdrm], [x11_dri3="yes"], [x11_dri3="no"])
++PKG_CHECK_MODULES(X11_DRI3, [xcb-dri3 xcb-sync xcb-xfixes xcb-present x11-xcb xshmfence x11 xcomposite xdamage xrender xrandr xxf86vm xext libdrm], [x11_dri3="yes"], [x11_dri3="no"])
+ AM_CONDITIONAL(X11_DRI3, test "x$x11_dri3" = "xyes" -a "x$shm" = "xyes")
+ AM_CONDITIONAL(X11_SHM, test "x$shm" = "xyes")
+
+@@ -307,6 +313,8 @@ if test "x$tools" != "xno"; then
+ tools="no"
+ fi
+
++ PKG_CHECK_MODULES(TOOL_CURSOR, [xfixes x11 libpng], [cursor="yes"], [ivo="no"])
++
+ IVO_CFLAGS="$IVO_CFLAGS $extra_cflags"
+ fi
+ if test "x$tools" != "xno"; then
+@@ -315,6 +323,7 @@ fi
+ AC_MSG_CHECKING([whether to build additional tools])
+ AC_MSG_RESULT([$tools])
+ AM_CONDITIONAL(BUILD_TOOLS, test "x$tools" != "xno")
++AM_CONDITIONAL(BUILD_TOOL_CURSOR, test "x$cursor" = "xyes")
+
+ # Define a configure option for an alternate module directory
+ AC_ARG_WITH(xorg-module-dir,
+@@ -339,10 +348,20 @@ AC_ARG_ENABLE(dri2,
+ [DRI2=$enableval],
+ [DRI2=yes])
+ AC_ARG_ENABLE(dri3,
+- AS_HELP_STRING([--enable-dri3],
+- [Enable DRI3 support [[default=no]]]),
++ AS_HELP_STRING([--disable-dri3],
++ [Disable DRI3 support [[default=yes]]]),
+ [DRI3=$enableval],
+- [DRI3=no])
++ [DRI3=yes])
++AC_ARG_WITH(default-dri,
++ AS_HELP_STRING([--with-default-dri],
++ [Select the default maximum DRI level [default 2]]),
++ [DRI_DEFAULT=$withval],
++ [DRI_DEFAULT=2])
++if test "x$DRI_DEFAULT" = "x0"; then
++ AC_DEFINE(DEFAULT_DRI_LEVEL, 0,[Default DRI level])
++else
++ AC_DEFINE(DEFAULT_DRI_LEVEL, ~0, [Default DRI level])
++fi
+
+ AC_ARG_ENABLE(xvmc, AS_HELP_STRING([--disable-xvmc],
+ [Disable XvMC support [[default=yes]]]),
+@@ -375,14 +394,12 @@ AC_ARG_ENABLE(ums-only,
+ required_xorg_server_version=1.6
+ required_pixman_version=0.16
+
+-if pkg-config --exists 'pixman-1 >= 0.27.1'; then
+- AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache])
+-fi
+-
+-if pkg-config --exists 'pixman-1 >= 0.24.0'; then
+- AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation])
+-fi
+-
++PKG_CHECK_EXISTS([pixman-1 >= 0.24.0],
++ AC_DEFINE([HAS_PIXMAN_TRIANGLES], 1, [Enable pixman triangle rasterisation])
++ [])
++PKG_CHECK_EXISTS([pixman-1 >= 0.27.1],
++ [AC_DEFINE([HAS_PIXMAN_GLYPHS], 1, [Enable pixman glyph cache])],
++ [])
+ # Store the list of server defined optional extensions in REQUIRED_MODULES
+ XORG_DRIVER_CHECK_EXT(RANDR, randrproto)
+ XORG_DRIVER_CHECK_EXT(RENDER, renderproto)
+@@ -398,24 +415,25 @@ AC_ARG_ENABLE(sna,
+ [SNA="$enableval"],
+ [SNA=auto])
+
++AC_CHECK_HEADERS([dev/wscons/wsconsio.h])
++AC_FUNC_ALLOCA
++AC_HEADER_MAJOR
++
+ if test "x$SNA" != "xno"; then
+ AC_DEFINE(USE_SNA, 1, [Enable SNA support])
+ AC_CHECK_HEADERS([sys/sysinfo.h], AC_CHECK_MEMBERS([struct sysinfo.totalram], [], [], [[#include <sys/sysinfo.h>]]))
+ fi
+
+ uxa_requires_libdrm=2.4.52
++uxa_requires_pixman=0.24.0
++
+ AC_ARG_ENABLE(uxa,
+ AS_HELP_STRING([--enable-uxa],
+ [Enable Unified Acceleration Architecture (UXA) [default=auto]]),
+ [UXA="$enableval"],
+ [UXA=auto])
+ if test "x$UXA" = "xauto"; then
+- if ! pkg-config --exists "libdrm_intel >= $uxa_requires_libdrm"; then
+- UXA=no
+- fi
+- if ! pkg-config --exists 'pixman-1 >= 0.24.0'; then
+- UXA=no
+- fi
++ PKG_CHECK_EXISTS([libdrm_intel >= $uxa_requires_libdrm pixman-1 >= $uxa_requires_pixman], [], [UXA=no])
+ fi
+ if test "x$UXA" != "xno"; then
+ AC_DEFINE(USE_UXA, 1, [Enable UXA support])
+@@ -426,6 +444,8 @@ fi
+
+ PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_server_version xproto fontsproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES])
+ ABI_VERSION=`$PKG_CONFIG --variable=abi_videodrv xorg-server`
++XSERVER_VERSION=`$PKG_CONFIG --modversion xorg-server`
++PIXMAN_VERSION=`$PKG_CONFIG --modversion pixman-1`
+
+ if test "x$ONLY_UMS" = "xyes"; then
+ UMS="yes"
+@@ -519,7 +539,12 @@ AC_MSG_RESULT([$have_dri1])
+ AM_CONDITIONAL(DRI1, test "x$have_dri1" != "xno")
+ if test "x$have_dri1" != "xno"; then
+ AC_DEFINE(HAVE_DRI1,1,[Enable DRI1 driver support])
+- dri_msg="$dri_msg DRI1"
++ str="DRI1"
++ if test "x$DRI_DEFAULT" = "x1"; then
++ AC_DEFINE(DEFAULT_DRI_LEVEL,1,[Default DRI level])
++ str="*$str"
++ fi
++ dri_msg="$dri_msg $str"
+ else
+ DRI1_CFLAGS=""
+ DRI1_LIBS=""
+@@ -576,7 +601,12 @@ AM_CONDITIONAL(DRI2, test "x$have_dri2" != "xno")
+ AC_MSG_RESULT([$have_dri2])
+ if test "x$have_dri2" != "xno"; then
+ AC_DEFINE(HAVE_DRI2,1,[Enable DRI2 driver support])
+- dri_msg="$dri_msg DRI2"
++ str="DRI2"
++ if test "x$DRI_DEFAULT" = "x2"; then
++ AC_DEFINE(DEFAULT_DRI_LEVEL,2,[Default DRI level])
++ str="*$str"
++ fi
++ dri_msg="$dri_msg $str"
+ else
+ if test "x$DRI" = "xyes" -a "x$DRI2" != "xno" -a "x$KMS" = "xyes"; then
+ AC_MSG_ERROR([DRI2 requested but prerequisites not found])
+@@ -591,13 +621,21 @@ AM_CONDITIONAL(DRI3, test "x$have_dri3" != "xno")
+ AC_MSG_RESULT([$have_dri3])
+ if test "x$have_dri3" != "xno"; then
+ AC_DEFINE(HAVE_DRI3,1,[Enable DRI3 driver support])
+- dri_msg="$dri_msg DRI3"
++ str="DRI3"
++ if test "x$DRI_DEFAULT" = "x3"; then
++ AC_DEFINE(DEFAULT_DRI_LEVEL,3,[Default DRI level])
++ str="*$str"
++ fi
++ dri_msg="$dri_msg $str"
+ else
+ if test "x$DRI" = "xyes" -a "x$DRI3" != "xno" -a "x$KMS" = "xyes"; then
+ AC_MSG_ERROR([DRI3 requested but prerequisites not found])
+ fi
+ fi
+
++AC_MSG_CHECKING([default DRI support])
++AC_MSG_RESULT([$DEFAULT_DRI_DEFAULT])
++
+ AC_CHECK_HEADERS([X11/extensions/dpmsconst.h])
+
+ PRESENT="no"
+@@ -711,27 +749,6 @@ if test "x$TEARFREE" = "xyes"; then
+ xp_msg="$xp_msg TearFree"
+ fi
+
+-AC_ARG_ENABLE(rendernode,
+- AS_HELP_STRING([--enable-rendernode],
+- [Enable use of render nodes (experimental) [default=no]]),
+- [RENDERNODE="$enableval"],
+- [RENDERNODE="no"])
+-AM_CONDITIONAL(USE_RENDERNODE, test "x$RENDERNODE" = "xyes")
+-if test "x$RENDERNODE" = "xyes"; then
+- AC_DEFINE(USE_RENDERNODE,1,[Assume "rendernode" support])
+- xp_msg="$xp_msg rendernode"
+-fi
+-
+-AC_ARG_ENABLE(wc-mmap,
+- AS_HELP_STRING([--enable-wc-mmap],
+- [Enable use of WriteCombining mmaps [default=no]]),
+- [WC_MMAP="$enableval"],
+- [WC_MMAP="no"])
+-if test "x$WC_MMAP" = "xyes"; then
+- AC_DEFINE(USE_WC_MMAP,1,[Enable use of WriteCombining mmaps])
+- xp_msg="$xp_msg mmap(wc)"
+-fi
+-
+ AC_ARG_ENABLE(create2,
+ AS_HELP_STRING([--enable-create2],
+ [Enable use of create2 ioctl (experimental) [default=no]]),
+@@ -848,6 +865,7 @@ AC_CONFIG_FILES([
+ xvmc/shader/mc/Makefile
+ xvmc/shader/vld/Makefile
+ test/Makefile
++ benchmarks/Makefile
+ tools/Makefile
+ tools/org.x.xf86-video-intel.backlight-helper.policy
+ ])
+@@ -855,7 +873,7 @@ AC_OUTPUT
+
+ echo ""
+ echo ""
+-test -e `pwd $0`/README && cat `pwd $0`/README
++cat $srcdir/README
+
+ accel_msg=""
+ if test "x$SNA" != "xno"; then
+@@ -895,13 +913,15 @@ fi
+
+ echo ""
+ echo "AC_PACKAGE_STRING will be compiled with:"
+-echo " Xorg Video ABI version: $ABI_VERSION"
++echo " Xorg Video ABI version: $ABI_VERSION (xorg-server-$XSERVER_VERSION)"
++echo " pixman version: pixman-1-$PIXMAN_VERSION"
+ echo " Acceleration backends:$accel_msg"
+ echo " Additional debugging support?$debug_msg"
+ echo " Support for Kernel Mode Setting? $KMS"
+ echo " Support for legacy User Mode Setting (for i810)? $UMS"
+ echo " Support for Direct Rendering Infrastructure:$dri_msg"
+ echo " Support for Xv motion compensation (XvMC and libXvMC):$xvmc_msg"
++echo " Support for display hotplug notifications (udev):$udev_msg"
+ echo " Build additional tools and utilities?$tools_msg"
+ if test -n "$xp_msg"; then
+ echo " Experimental support:$xp_msg"
+diff --git a/libobj/alloca.c b/libobj/alloca.c
+new file mode 100644
+index 0000000..883e1e9
+--- /dev/null
++++ b/libobj/alloca.c
+@@ -0,0 +1,4 @@
++void *alloca(size_t sz)
++{
++ return NULL;
++}
+diff --git a/man/intel.man b/man/intel.man
+index 1751520..8da496e 100644
+--- a/man/intel.man
++++ b/man/intel.man
+@@ -112,8 +112,8 @@ The default is 8192 if AGP allocable memory is < 128 MB, 16384 if < 192 MB,
+ 24576 if higher. DRI require at least a value of 16384. Higher values may give
+ better 3D performance, at expense of available system memory.
+ .TP
+-.BI "Option \*qNoAccel\*q \*q" boolean \*q
+-Disable or enable acceleration.
++.BI "Option \*qAccel\*q \*q" boolean \*q
++Enable or disable acceleration.
+ .IP
+ Default: acceleration is enabled.
+
+@@ -122,8 +122,8 @@ The following driver
+ .B Options
+ are supported for the 830M and later chipsets:
+ .TP
+-.BI "Option \*qNoAccel\*q \*q" boolean \*q
+-Disable or enable acceleration.
++.BI "Option \*qAccel\*q \*q" boolean \*q
++Enable or disable acceleration.
+ .IP
+ Default: acceleration is enabled.
+ .TP
+@@ -201,6 +201,16 @@ that choice by specifying the entry under /sys/class/backlight to use.
+ .IP
+ Default: Automatic selection.
+ .TP
++.BI "Option \*qCustomEDID\*q \*q" string \*q
++Override the probed EDID on particular outputs. Sometimes the manufacturer
++supplied EDID is corrupt or lacking a few usable modes and supplying a
++corrected EDID may be easier than specifying every modeline. This option
++allows to pass the path to load an EDID from per output. The format is a
++comma separated string of output:path pairs, e.g.
++DP1:/path/to/dp1.edid,DP2:/path/to/dp2.edid
++.IP
++Default: No override, use manufacturer supplied EDIDs.
++.TP
+ .BI "Option \*qFallbackDebug\*q \*q" boolean \*q
+ Enable printing of debugging information on acceleration fallbacks to the
+ server log.
+@@ -225,6 +235,15 @@ i.e. perform synchronous rendering.
+ .IP
+ Default: Disabled
+ .TP
++.BI "Option \*qHWRotation\*q \*q" boolean \*q
++Override the use of native hardware rotation and force the use of software,
++but GPU accelerated where possible, rotation. On some platforms the hardware
++can scanout directly into a rotated output bypassing the intermediate rendering
++and extra allocations required for software implemented rotation (i.e. native
++rotation uses less resources, is quicker and uses less power). This allows you
++to disable the native rotation in case of errors.
++.IP
++Default: Enabled (use hardware rotation)
+ .TP
+ .BI "Option \*qVSync\*q \*q" boolean \*q
+ This option controls the use of commands to synchronise rendering with the
+@@ -324,13 +343,29 @@ Default: 0
+ .BI "Option \*qZaphodHeads\*q \*q" string \*q
+ .IP
+ Specify the randr output(s) to use with zaphod mode for a particular driver
+-instance. If you this option you must use it with all instances of the
+-driver
++instance. If you set this option you must use it with all instances of the
++driver. By default, each head is assigned only one CRTC (which limits
++using multiple outputs with that head to cloned mode). CRTC can be manually
++assigned to individual heads by preceding the output names with a comma
++delimited list of pipe numbers followed by a colon. Note that different pipes
++may be limited in their functionality and some outputs may only work with
++different pipes.
+ .br
+ For example:
++
++.RS
+ .B
+ Option \*qZaphodHeads\*q \*qLVDS1,VGA1\*q
+-will assign xrandr outputs LVDS1 and VGA0 to this instance of the driver.
++
++will assign xrandr outputs LVDS1 and VGA1 to this instance of the driver.
++.RE
++
++.RS
++.B
++Option \*qZaphodHeads\*q \*q0,2:HDMI1,DP2\*q
++
++will assign xrandr outputs HDMI1 and DP2 and CRTCs 0 and 2 to this instance of the driver.
++.RE
+
+ .SH OUTPUT CONFIGURATION
+ On 830M and better chipsets, the driver supports runtime configuration of
+@@ -431,11 +466,11 @@ First DVI SDVO output
+ Second DVI SDVO output
+
+ .SS "TMDS-1", "TMDS-2", "HDMI-1", "HDMI-2"
+-DVI/HDMI outputs. Avaliable common properties include:
++DVI/HDMI outputs. Available common properties include:
+ .TP
+ \fBBROADCAST_RGB\fP - method used to set RGB color range
+ Adjusting this property allows you to set RGB color range on each
+-channel in order to match HDTV requirment(default 0 for full
++channel in order to match HDTV requirement(default 0 for full
+ range). Setting 1 means RGB color range is 16-235, 0 means RGB color
+ range is 0-255 on each channel. (Full range is 0-255, not 16-235)
+
+diff --git a/src/backlight.c b/src/backlight.c
+index 9f23986..d020a7c 100644
+--- a/src/backlight.c
++++ b/src/backlight.c
+@@ -34,6 +34,12 @@
+ #include <sys/stat.h>
+ #include <sys/ioctl.h>
+
++#if MAJOR_IN_MKDEV
++#include <sys/mkdev.h>
++#elif MAJOR_IN_SYSMACROS
++#include <sys/sysmacros.h>
++#endif
++
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+@@ -84,7 +90,7 @@ void backlight_init(struct backlight *b)
+ b->has_power = 0;
+ }
+
+-#ifdef __OpenBSD__
++#ifdef HAVE_DEV_WSCONS_WSCONSIO_H
+
+ #include <dev/wscons/wsconsio.h>
+ #include <xf86Priv.h>
+@@ -146,12 +152,9 @@ int backlight_open(struct backlight *b, char *iface)
+ return param.curval;
+ }
+
+-enum backlight_type backlight_exists(const char *iface)
++int backlight_exists(const char *iface)
+ {
+- if (iface != NULL)
+- return BL_NONE;
+-
+- return BL_PLATFORM;
++ return iface == NULL;
+ }
+
+ int backlight_on(struct backlight *b)
+@@ -244,10 +247,10 @@ static const char *known_interfaces[] = {
+ "intel_backlight",
+ };
+
+-static enum backlight_type __backlight_type(const char *iface)
++static int __backlight_type(const char *iface)
+ {
+ char buf[1024];
+- int fd, v;
++ int fd, v, i;
+
+ v = -1;
+ fd = __backlight_open(iface, "type", O_RDONLY);
+@@ -261,39 +264,41 @@ static enum backlight_type __backlight_type(const char *iface)
+ buf[v] = '\0';
+
+ if (strcmp(buf, "raw") == 0)
+- v = BL_RAW;
++ v = BL_RAW << 8;
+ else if (strcmp(buf, "platform") == 0)
+- v = BL_PLATFORM;
++ v = BL_PLATFORM << 8;
+ else if (strcmp(buf, "firmware") == 0)
+- v = BL_FIRMWARE;
++ v = BL_FIRMWARE << 8;
+ else
+- v = BL_NAMED;
++ v = BL_NAMED << 8;
+ } else
+- v = BL_NAMED;
++ v = BL_NAMED << 8;
+
+- if (v == BL_NAMED) {
+- int i;
+- for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) {
+- if (strcmp(iface, known_interfaces[i]) == 0)
+- break;
+- }
+- v += i;
++ for (i = 0; i < ARRAY_SIZE(known_interfaces); i++) {
++ if (strcmp(iface, known_interfaces[i]) == 0)
++ break;
+ }
++ v += i;
+
+ return v;
+ }
+
+-enum backlight_type backlight_exists(const char *iface)
++static int __backlight_exists(const char *iface)
+ {
+ if (__backlight_read(iface, "brightness") < 0)
+- return BL_NONE;
++ return -1;
+
+ if (__backlight_read(iface, "max_brightness") <= 0)
+- return BL_NONE;
++ return -1;
+
+ return __backlight_type(iface);
+ }
+
++int backlight_exists(const char *iface)
++{
++ return __backlight_exists(iface) != -1;
++}
++
+ static int __backlight_init(struct backlight *b, char *iface, int fd)
+ {
+ b->fd = fd_move_cloexec(fd_set_nonblock(fd));
+@@ -399,7 +404,10 @@ __backlight_find(void)
+ continue;
+
+ /* Fallback to priority list of known iface for old kernels */
+- v = backlight_exists(de->d_name);
++ v = __backlight_exists(de->d_name);
++ if (v < 0)
++ continue;
++
+ if (v < best_type) {
+ char *copy = strdup(de->d_name);
+ if (copy) {
+@@ -416,14 +424,17 @@ __backlight_find(void)
+
+ int backlight_open(struct backlight *b, char *iface)
+ {
+- int level;
++ int level, type;
+
+ if (iface == NULL)
+ iface = __backlight_find();
+ if (iface == NULL)
+ goto err;
+
+- b->type = __backlight_type(iface);
++ type = __backlight_type(iface);
++ if (type < 0)
++ goto err;
++ b->type = type >> 8;
+
+ b->max = __backlight_read(iface, "max_brightness");
+ if (b->max <= 0)
+@@ -517,7 +528,7 @@ void backlight_disable(struct backlight *b)
+ void backlight_close(struct backlight *b)
+ {
+ backlight_disable(b);
+- if (b->pid)
++ if (b->pid > 0)
+ waitpid(b->pid, NULL, 0);
+ }
+
+@@ -543,7 +554,10 @@ char *backlight_find_for_device(struct pci_device *pci)
+ if (*de->d_name == '.')
+ continue;
+
+- v = backlight_exists(de->d_name);
++ v = __backlight_exists(de->d_name);
++ if (v < 0)
++ continue;
++
+ if (v < best_type) {
+ char *copy = strdup(de->d_name);
+ if (copy) {
+diff --git a/src/backlight.h b/src/backlight.h
+index bb0e28b..ba17755 100644
+--- a/src/backlight.h
++++ b/src/backlight.h
+@@ -43,7 +43,7 @@ struct backlight {
+ int pid, fd;
+ };
+
+-enum backlight_type backlight_exists(const char *iface);
++int backlight_exists(const char *iface);
+
+ void backlight_init(struct backlight *backlight);
+ int backlight_open(struct backlight *backlight, char *iface);
+diff --git a/src/compat-api.h b/src/compat-api.h
+index d09e1fb..293e9d7 100644
+--- a/src/compat-api.h
++++ b/src/compat-api.h
+@@ -39,7 +39,13 @@
+
+ #ifndef XF86_HAS_SCRN_CONV
+ #define xf86ScreenToScrn(s) xf86Screens[(s)->myNum]
++#if XORG_VERSION_CURRENT < XORG_VERSION_NUMERIC(1,1,0,0,0)
+ #define xf86ScrnToScreen(s) screenInfo.screens[(s)->scrnIndex]
++#else
++#define xf86ScrnToScreen(s) ((s)->pScreen)
++#endif
++#else
++#define xf86ScrnToScreen(s) ((s)->pScreen)
+ #endif
+
+ #ifndef XF86_SCRN_INTERFACE
+@@ -131,6 +137,17 @@ region_rects(const RegionRec *r)
+ return r->data ? (const BoxRec *)(r->data + 1) : &r->extents;
+ }
+
++inline static void
++region_get_boxes(const RegionRec *r, const BoxRec **s, const BoxRec **e)
++{
++ int n;
++ if (r->data)
++ *s = region_boxptr(r), n = r->data->numRects;
++ else
++ *s = &r->extents, n = 1;
++ *e = *s + n;
++}
++
+ #ifndef INCLUDE_LEGACY_REGION_DEFINES
+ #define RegionCreate(r, s) REGION_CREATE(NULL, r, s)
+ #define RegionBreak(r) REGION_BREAK(NULL, r)
+@@ -223,4 +240,14 @@ static inline void FreePixmap(PixmapPtr pixmap)
+ dstx, dsty)
+ #endif
+
++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,12,99,901,0)
++#define isGPU(S) (S)->is_gpu
++#else
++#define isGPU(S) 0
++#endif
++
++#endif
++
++#if HAS_DIRTYTRACKING_ROTATION
++#define PixmapSyncDirtyHelper(d, dd) PixmapSyncDirtyHelper(d)
+ #endif
+diff --git a/src/i915_pciids.h b/src/i915_pciids.h
+index 180ad0e..f1a113e 100644
+--- a/src/i915_pciids.h
++++ b/src/i915_pciids.h
+@@ -208,40 +208,41 @@
+ #define INTEL_VLV_D_IDS(info) \
+ INTEL_VGA_DEVICE(0x0155, info)
+
+-#define _INTEL_BDW_M(gt, id, info) \
+- INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info)
+-#define _INTEL_BDW_D(gt, id, info) \
+- INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info)
+-
+-#define _INTEL_BDW_M_IDS(gt, info) \
+- _INTEL_BDW_M(gt, 0x1602, info), /* ULT */ \
+- _INTEL_BDW_M(gt, 0x1606, info), /* ULT */ \
+- _INTEL_BDW_M(gt, 0x160B, info), /* Iris */ \
+- _INTEL_BDW_M(gt, 0x160E, info) /* ULX */
+-
+-#define _INTEL_BDW_D_IDS(gt, info) \
+- _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \
+- _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */
+-
+-#define INTEL_BDW_GT12M_IDS(info) \
+- _INTEL_BDW_M_IDS(1, info), \
+- _INTEL_BDW_M_IDS(2, info)
++#define INTEL_BDW_GT12M_IDS(info) \
++ INTEL_VGA_DEVICE(0x1602, info), /* GT1 ULT */ \
++ INTEL_VGA_DEVICE(0x1606, info), /* GT1 ULT */ \
++ INTEL_VGA_DEVICE(0x160B, info), /* GT1 Iris */ \
++ INTEL_VGA_DEVICE(0x160E, info), /* GT1 ULX */ \
++ INTEL_VGA_DEVICE(0x1612, info), /* GT2 Halo */ \
++ INTEL_VGA_DEVICE(0x1616, info), /* GT2 ULT */ \
++ INTEL_VGA_DEVICE(0x161B, info), /* GT2 ULT */ \
++ INTEL_VGA_DEVICE(0x161E, info) /* GT2 ULX */
+
+ #define INTEL_BDW_GT12D_IDS(info) \
+- _INTEL_BDW_D_IDS(1, info), \
+- _INTEL_BDW_D_IDS(2, info)
++ INTEL_VGA_DEVICE(0x160A, info), /* GT1 Server */ \
++ INTEL_VGA_DEVICE(0x160D, info), /* GT1 Workstation */ \
++ INTEL_VGA_DEVICE(0x161A, info), /* GT2 Server */ \
++ INTEL_VGA_DEVICE(0x161D, info) /* GT2 Workstation */
+
+ #define INTEL_BDW_GT3M_IDS(info) \
+- _INTEL_BDW_M_IDS(3, info)
++ INTEL_VGA_DEVICE(0x1622, info), /* ULT */ \
++ INTEL_VGA_DEVICE(0x1626, info), /* ULT */ \
++ INTEL_VGA_DEVICE(0x162B, info), /* Iris */ \
++ INTEL_VGA_DEVICE(0x162E, info) /* ULX */
+
+ #define INTEL_BDW_GT3D_IDS(info) \
+- _INTEL_BDW_D_IDS(3, info)
++ INTEL_VGA_DEVICE(0x162A, info), /* Server */ \
++ INTEL_VGA_DEVICE(0x162D, info) /* Workstation */
+
+ #define INTEL_BDW_RSVDM_IDS(info) \
+- _INTEL_BDW_M_IDS(4, info)
++ INTEL_VGA_DEVICE(0x1632, info), /* ULT */ \
++ INTEL_VGA_DEVICE(0x1636, info), /* ULT */ \
++ INTEL_VGA_DEVICE(0x163B, info), /* Iris */ \
++ INTEL_VGA_DEVICE(0x163E, info) /* ULX */
+
+ #define INTEL_BDW_RSVDD_IDS(info) \
+- _INTEL_BDW_D_IDS(4, info)
++ INTEL_VGA_DEVICE(0x163A, info), /* Server */ \
++ INTEL_VGA_DEVICE(0x163D, info) /* Workstation */
+
+ #define INTEL_BDW_M_IDS(info) \
+ INTEL_BDW_GT12M_IDS(info), \
+@@ -259,21 +260,71 @@
+ INTEL_VGA_DEVICE(0x22b2, info), \
+ INTEL_VGA_DEVICE(0x22b3, info)
+
+-#define INTEL_SKL_IDS(info) \
+- INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \
++#define INTEL_SKL_GT1_IDS(info) \
+ INTEL_VGA_DEVICE(0x1906, info), /* ULT GT1 */ \
+- INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \
+- INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \
+ INTEL_VGA_DEVICE(0x190E, info), /* ULX GT1 */ \
++ INTEL_VGA_DEVICE(0x1902, info), /* DT GT1 */ \
++ INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \
++ INTEL_VGA_DEVICE(0x190A, info) /* SRV GT1 */
++
++#define INTEL_SKL_GT2_IDS(info) \
++ INTEL_VGA_DEVICE(0x1916, info), /* ULT GT2 */ \
++ INTEL_VGA_DEVICE(0x1921, info), /* ULT GT2F */ \
+ INTEL_VGA_DEVICE(0x191E, info), /* ULX GT2 */ \
+ INTEL_VGA_DEVICE(0x1912, info), /* DT GT2 */ \
+- INTEL_VGA_DEVICE(0x1902, info), /* DT GT1 */ \
+ INTEL_VGA_DEVICE(0x191B, info), /* Halo GT2 */ \
+- INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \
+- INTEL_VGA_DEVICE(0x190B, info), /* Halo GT1 */ \
+ INTEL_VGA_DEVICE(0x191A, info), /* SRV GT2 */ \
+- INTEL_VGA_DEVICE(0x192A, info), /* SRV GT3 */ \
+- INTEL_VGA_DEVICE(0x190A, info), /* SRV GT1 */ \
+ INTEL_VGA_DEVICE(0x191D, info) /* WKS GT2 */
+
++#define INTEL_SKL_GT3_IDS(info) \
++ INTEL_VGA_DEVICE(0x1926, info), /* ULT GT3 */ \
++ INTEL_VGA_DEVICE(0x192B, info), /* Halo GT3 */ \
++ INTEL_VGA_DEVICE(0x192A, info) /* SRV GT3 */ \
++
++#define INTEL_SKL_IDS(info) \
++ INTEL_SKL_GT1_IDS(info), \
++ INTEL_SKL_GT2_IDS(info), \
++ INTEL_SKL_GT3_IDS(info)
++
++#define INTEL_BXT_IDS(info) \
++ INTEL_VGA_DEVICE(0x0A84, info), \
++ INTEL_VGA_DEVICE(0x1A84, info), \
++ INTEL_VGA_DEVICE(0x5A84, info)
++
++#define INTEL_KBL_GT1_IDS(info) \
++ INTEL_VGA_DEVICE(0x5913, info), /* ULT GT1.5 */ \
++ INTEL_VGA_DEVICE(0x5915, info), /* ULX GT1.5 */ \
++ INTEL_VGA_DEVICE(0x5917, info), /* DT GT1.5 */ \
++ INTEL_VGA_DEVICE(0x5906, info), /* ULT GT1 */ \
++ INTEL_VGA_DEVICE(0x590E, info), /* ULX GT1 */ \
++ INTEL_VGA_DEVICE(0x5902, info), /* DT GT1 */ \
++ INTEL_VGA_DEVICE(0x590B, info), /* Halo GT1 */ \
++ INTEL_VGA_DEVICE(0x590A, info) /* SRV GT1 */
++
++#define INTEL_KBL_GT2_IDS(info) \
++ INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \
++ INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \
++ INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \
++ INTEL_VGA_DEVICE(0x5912, info), /* DT GT2 */ \
++ INTEL_VGA_DEVICE(0x591B, info), /* Halo GT2 */ \
++ INTEL_VGA_DEVICE(0x591A, info), /* SRV GT2 */ \
++ INTEL_VGA_DEVICE(0x591D, info) /* WKS GT2 */
++
++#define INTEL_KBL_GT3_IDS(info) \
++ INTEL_VGA_DEVICE(0x5926, info), /* ULT GT3 */ \
++ INTEL_VGA_DEVICE(0x592B, info), /* Halo GT3 */ \
++ INTEL_VGA_DEVICE(0x592A, info) /* SRV GT3 */
++
++#define INTEL_KBL_GT4_IDS(info) \
++ INTEL_VGA_DEVICE(0x5932, info), /* DT GT4 */ \
++ INTEL_VGA_DEVICE(0x593B, info), /* Halo GT4 */ \
++ INTEL_VGA_DEVICE(0x593A, info), /* SRV GT4 */ \
++ INTEL_VGA_DEVICE(0x593D, info) /* WKS GT4 */
++
++#define INTEL_KBL_IDS(info) \
++ INTEL_KBL_GT1_IDS(info), \
++ INTEL_KBL_GT2_IDS(info), \
++ INTEL_KBL_GT3_IDS(info), \
++ INTEL_KBL_GT4_IDS(info)
++
+ #endif /* _I915_PCIIDS_H */
+diff --git a/src/intel_device.c b/src/intel_device.c
+index 140e153..54c1443 100644
+--- a/src/intel_device.c
++++ b/src/intel_device.c
+@@ -38,6 +38,12 @@
+ #include <dirent.h>
+ #include <errno.h>
+
++#if MAJOR_IN_MKDEV
++#include <sys/mkdev.h>
++#elif MAJOR_IN_SYSMACROS
++#include <sys/sysmacros.h>
++#endif
++
+ #include <pciaccess.h>
+
+ #include <xorg-server.h>
+@@ -197,9 +203,15 @@ static inline struct intel_device *intel_device(ScrnInfoPtr scrn)
+ return xf86GetEntityPrivate(scrn->entityList[0], intel_device_key)->ptr;
+ }
+
++static const char *kernel_module_names[] ={
++ "i915",
++ NULL,
++};
++
+ static int is_i915_device(int fd)
+ {
+ drm_version_t version;
++ const char **kn;
+ char name[5] = "";
+
+ memset(&version, 0, sizeof(version));
+@@ -209,7 +221,22 @@ static int is_i915_device(int fd)
+ if (drmIoctl(fd, DRM_IOCTL_VERSION, &version))
+ return 0;
+
+- return strcmp("i915", name) == 0;
++ for (kn = kernel_module_names; *kn; kn++)
++ if (strcmp(*kn, name) == 0)
++ return 1;
++
++ return 0;
++}
++
++static int load_i915_kernel_module(void)
++{
++ const char **kn;
++
++ for (kn = kernel_module_names; *kn; kn++)
++ if (xf86LoadKernelModule(*kn) == 0)
++ return 0;
++
++ return -1;
+ }
+
+ static int is_i915_gem(int fd)
+@@ -336,7 +363,7 @@ static int __intel_open_device__pci(const struct pci_device *pci)
+
+ sprintf(path + base, "driver");
+ if (stat(path, &st)) {
+- if (xf86LoadKernelModule("i915"))
++ if (load_i915_kernel_module())
+ return -1;
+ (void)xf86LoadKernelModule("fbcon");
+ }
+@@ -399,7 +426,7 @@ static int __intel_open_device__legacy(const struct pci_device *pci)
+
+ ret = drmCheckModesettingSupported(id);
+ if (ret) {
+- if (xf86LoadKernelModule("i915"))
++ if (load_i915_kernel_module() == 0)
+ ret = drmCheckModesettingSupported(id);
+ if (ret)
+ return -1;
+@@ -461,9 +488,9 @@ static int is_render_node(int fd, struct stat *st)
+
+ static char *find_render_node(int fd)
+ {
+-#if defined(USE_RENDERNODE)
+ struct stat master, render;
+ char buf[128];
++ int i;
+
+ /* Are we a render-node ourselves? */
+ if (is_render_node(fd, &master))
+@@ -472,9 +499,17 @@ static char *find_render_node(int fd)
+ sprintf(buf, "/dev/dri/renderD%d", (int)((master.st_rdev | 0x80) & 0xbf));
+ if (stat(buf, &render) == 0 &&
+ master.st_mode == render.st_mode &&
+- render.st_rdev == ((master.st_rdev | 0x80) & 0xbf))
++ render.st_rdev == (master.st_rdev | 0x80))
+ return strdup(buf);
+-#endif
++
++ /* Misaligned card <-> renderD, do a full search */
++ for (i = 0; i < 16; i++) {
++ sprintf(buf, "/dev/dri/renderD%d", i + 128);
++ if (stat(buf, &render) == 0 &&
++ master.st_mode == render.st_mode &&
++ render.st_rdev == (master.st_rdev | 0x80))
++ return strdup(buf);
++ }
+
+ return NULL;
+ }
+@@ -672,6 +707,12 @@ struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd)
+ return dev;
+ }
+
++const char *intel_get_master_name(struct intel_device *dev)
++{
++ assert(dev && dev->master_node);
++ return dev->master_node;
++}
++
+ const char *intel_get_client_name(struct intel_device *dev)
+ {
+ assert(dev && dev->render_node);
+diff --git a/src/intel_driver.h b/src/intel_driver.h
+index 28ed1a0..fc9beaf 100644
+--- a/src/intel_driver.h
++++ b/src/intel_driver.h
+@@ -127,6 +127,7 @@ int intel_open_device(int entity_num,
+ int __intel_peek_fd(ScrnInfoPtr scrn);
+ struct intel_device *intel_get_device(ScrnInfoPtr scrn, int *fd);
+ int intel_has_render_node(struct intel_device *dev);
++const char *intel_get_master_name(struct intel_device *dev);
+ const char *intel_get_client_name(struct intel_device *dev);
+ int intel_get_client_fd(struct intel_device *dev);
+ int intel_get_device_id(struct intel_device *dev);
+diff --git a/src/intel_list.h b/src/intel_list.h
+index 51af825..c8a3187 100644
+--- a/src/intel_list.h
++++ b/src/intel_list.h
+@@ -306,8 +306,7 @@ list_is_empty(const struct list *head)
+ list_entry((ptr)->prev, type, member)
+
+ #define __container_of(ptr, sample, member) \
+- (void *)((char *)(ptr) \
+- - ((char *)&(sample)->member - (char *)(sample)))
++ (void *)((char *)(ptr) - ((char *)&(sample)->member - (char *)(sample)))
+ /**
+ * Loop through the list given by head and set pos to struct in the list.
+ *
+@@ -392,17 +391,50 @@ static inline void list_move_tail(struct list *list, struct list *head)
+ #define list_last_entry(ptr, type, member) \
+ list_entry((ptr)->prev, type, member)
+
+-#define list_for_each_entry_reverse(pos, head, member) \
++#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = __container_of((head)->prev, pos, member); \
+ &pos->member != (head); \
+ pos = __container_of(pos->member.prev, pos, member))
+
+ #endif
+
++#define list_for_each_entry_safe_from(pos, tmp, head, member) \
++ for (tmp = __container_of(pos->member.next, pos, member); \
++ &pos->member != (head); \
++ pos = tmp, tmp = __container_of(tmp->member.next, tmp, member))
++
+ #undef container_of
+ #define container_of(ptr, type, member) \
+ ((type *)((char *)(ptr) - (char *) &((type *)0)->member))
+
++static inline void __list_splice(const struct list *list,
++ struct list *prev,
++ struct list *next)
++{
++ struct list *first = list->next;
++ struct list *last = list->prev;
++
++ first->prev = prev;
++ prev->next = first;
++
++ last->next = next;
++ next->prev = last;
++}
++
++static inline void list_splice(const struct list *list,
++ struct list *head)
++{
++ if (!list_is_empty(list))
++ __list_splice(list, head, head->next);
++}
++
++static inline void list_splice_tail(const struct list *list,
++ struct list *head)
++{
++ if (!list_is_empty(list))
++ __list_splice(list, head->prev, head);
++}
++
+ static inline int list_is_singular(const struct list *list)
+ {
+ return list->next == list->prev;
+diff --git a/src/intel_module.c b/src/intel_module.c
+index 102d52a..60835b9 100644
+--- a/src/intel_module.c
++++ b/src/intel_module.c
+@@ -126,6 +126,13 @@ static const struct intel_device_info intel_skylake_info = {
+ .gen = 0110,
+ };
+
++static const struct intel_device_info intel_broxton_info = {
++ .gen = 0111,
++};
++
++static const struct intel_device_info intel_kabylake_info = {
++ .gen = 0112,
++};
+
+ static const SymTabRec intel_chipsets[] = {
+ {PCI_CHIP_I810, "i810"},
+@@ -234,30 +241,36 @@ static const SymTabRec intel_chipsets[] = {
+ {0x0157, "HD Graphics"},
+
+ /* Broadwell Marketing names */
+- {0x1602, "HD graphics"},
+- {0x1606, "HD graphics"},
+- {0x160B, "HD graphics"},
+- {0x160A, "HD graphics"},
+- {0x160D, "HD graphics"},
+- {0x160E, "HD graphics"},
+- {0x1612, "HD graphics 5600"},
+- {0x1616, "HD graphics 5500"},
+- {0x161B, "HD graphics"},
+- {0x161A, "HD graphics"},
+- {0x161D, "HD graphics"},
+- {0x161E, "HD graphics 5300"},
+- {0x1622, "Iris Pro graphics 6200"},
+- {0x1626, "HD graphics 6000"},
+- {0x162B, "Iris graphics 6100"},
+- {0x162A, "Iris Pro graphics P6300"},
+- {0x162D, "HD graphics"},
+- {0x162E, "HD graphics"},
+- {0x1632, "HD graphics"},
+- {0x1636, "HD graphics"},
+- {0x163B, "HD graphics"},
+- {0x163A, "HD graphics"},
+- {0x163D, "HD graphics"},
+- {0x163E, "HD graphics"},
++ {0x1602, "HD Graphics"},
++ {0x1606, "HD Graphics"},
++ {0x160B, "HD Graphics"},
++ {0x160A, "HD Graphics"},
++ {0x160D, "HD Graphics"},
++ {0x160E, "HD Graphics"},
++ {0x1612, "HD Graphics 5600"},
++ {0x1616, "HD Graphics 5500"},
++ {0x161B, "HD Graphics"},
++ {0x161A, "HD Graphics"},
++ {0x161D, "HD Graphics"},
++ {0x161E, "HD Graphics 5300"},
++ {0x1622, "Iris Pro Graphics 6200"},
++ {0x1626, "HD Graphics 6000"},
++ {0x162B, "Iris Graphics 6100"},
++ {0x162A, "Iris Pro Graphics P6300"},
++ {0x162D, "HD Graphics"},
++ {0x162E, "HD Graphics"},
++ {0x1632, "HD Graphics"},
++ {0x1636, "HD Graphics"},
++ {0x163B, "HD Graphics"},
++ {0x163A, "HD Graphics"},
++ {0x163D, "HD Graphics"},
++ {0x163E, "HD Graphics"},
++
++ /* Cherryview (Cherrytrail/Braswell) */
++ {0x22b0, "HD Graphics"},
++ {0x22b1, "HD Graphics"},
++ {0x22b2, "HD Graphics"},
++ {0x22b3, "HD Graphics"},
+
+ /* When adding new identifiers, also update:
+ * 1. intel_identify()
+@@ -318,6 +331,10 @@ static const struct pci_id_match intel_device_match[] = {
+
+ INTEL_SKL_IDS(&intel_skylake_info),
+
++ INTEL_BXT_IDS(&intel_broxton_info),
++
++ INTEL_KBL_IDS(&intel_kabylake_info),
++
+ INTEL_VGA_DEVICE(PCI_MATCH_ANY, &intel_generic_info),
+ #endif
+
+@@ -508,6 +525,9 @@ static enum accel_method { NOACCEL, SNA, UXA } get_accel_method(void)
+ if (hosted())
+ return SNA;
+
++ if (xf86configptr == NULL) /* X -configure */
++ return SNA;
++
+ dev = _xf86findDriver("intel", xf86configptr->conf_device_lst);
+ if (dev && dev->dev_option_lst) {
+ const char *s;
+@@ -582,10 +602,17 @@ intel_scrn_create(DriverPtr driver,
+ case NOACCEL:
+ #endif
+ case UXA:
+- return intel_init_scrn(scrn);
++ return intel_init_scrn(scrn);
+ #endif
+
+- default: break;
++ default:
++#if USE_SNA
++ return sna_init_scrn(scrn, entity_num);
++#elif USE_UXA
++ return intel_init_scrn(scrn);
++#else
++ break;
++#endif
+ }
+ #endif
+
+diff --git a/src/intel_options.c b/src/intel_options.c
+index ff8541a..7f253ac 100644
+--- a/src/intel_options.c
++++ b/src/intel_options.c
+@@ -2,18 +2,24 @@
+ #include "config.h"
+ #endif
+
++#include <xorg-server.h>
++#include <xorgVersion.h>
++#include <xf86Parser.h>
++
+ #include "intel_options.h"
+
+ const OptionInfoRec intel_options[] = {
+- {OPTION_ACCEL_DISABLE, "NoAccel", OPTV_BOOLEAN, {0}, 0},
++ {OPTION_ACCEL_ENABLE, "Accel", OPTV_BOOLEAN, {0}, 0},
+ {OPTION_ACCEL_METHOD, "AccelMethod", OPTV_STRING, {0}, 0},
+ {OPTION_BACKLIGHT, "Backlight", OPTV_STRING, {0}, 0},
++ {OPTION_EDID, "CustomEDID", OPTV_STRING, {0}, 0},
+ {OPTION_DRI, "DRI", OPTV_STRING, {0}, 0},
+ {OPTION_PRESENT, "Present", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, 0},
+ {OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, 0},
+ {OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_TILING_FB, "LinearFramebuffer", OPTV_BOOLEAN, {0}, 0},
++ {OPTION_ROTATION, "HWRotation", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_VSYNC, "VSync", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_PAGEFLIP, "PageFlip", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN, {0}, 1},
+@@ -21,7 +27,6 @@ const OptionInfoRec intel_options[] = {
+ {OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, 0},
+ {OPTION_HOTPLUG, "HotPlug", OPTV_BOOLEAN, {0}, 1},
+ {OPTION_REPROBE, "ReprobeOutputs", OPTV_BOOLEAN, {0}, 0},
+- {OPTION_DELETE_DP12, "DeleteUnusedDP12Displays", OPTV_BOOLEAN, {0}, 0},
+ #ifdef INTEL_XVMC
+ {OPTION_XVMC, "XvMC", OPTV_BOOLEAN, {0}, 1},
+ #endif
+@@ -54,3 +59,85 @@ OptionInfoPtr intel_options_get(ScrnInfoPtr scrn)
+
+ return options;
+ }
++
++Bool intel_option_cast_to_bool(OptionInfoPtr options, int id, Bool val)
++{
++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0)
++ xf86getBoolValue(&val, xf86GetOptValString(options, id));
++#endif
++ return val;
++}
++
++static int
++namecmp(const char *s1, const char *s2)
++{
++ char c1, c2;
++
++ if (!s1 || *s1 == 0) {
++ if (!s2 || *s2 == 0)
++ return 0;
++ else
++ return 1;
++ }
++
++ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t')
++ s1++;
++
++ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t')
++ s2++;
++
++ c1 = isupper(*s1) ? tolower(*s1) : *s1;
++ c2 = isupper(*s2) ? tolower(*s2) : *s2;
++ while (c1 == c2) {
++ if (c1 == '\0')
++ return 0;
++
++ s1++;
++ while (*s1 == '_' || *s1 == ' ' || *s1 == '\t')
++ s1++;
++
++ s2++;
++ while (*s2 == '_' || *s2 == ' ' || *s2 == '\t')
++ s2++;
++
++ c1 = isupper(*s1) ? tolower(*s1) : *s1;
++ c2 = isupper(*s2) ? tolower(*s2) : *s2;
++ }
++
++ return c1 - c2;
++}
++
++unsigned intel_option_cast_to_unsigned(OptionInfoPtr options, int id, unsigned val)
++{
++#if XORG_VERSION_CURRENT >= XORG_VERSION_NUMERIC(1,7,99,901,0)
++ const char *str = xf86GetOptValString(options, id);
++#else
++ const char *str = NULL;
++#endif
++ unsigned v;
++
++ if (str == NULL || *str == '\0')
++ return val;
++
++ if (namecmp(str, "on") == 0)
++ return val;
++ if (namecmp(str, "true") == 0)
++ return val;
++ if (namecmp(str, "yes") == 0)
++ return val;
++
++ if (namecmp(str, "0") == 0)
++ return 0;
++ if (namecmp(str, "off") == 0)
++ return 0;
++ if (namecmp(str, "false") == 0)
++ return 0;
++ if (namecmp(str, "no") == 0)
++ return 0;
++
++ v = atoi(str);
++ if (v)
++ return v;
++
++ return val;
++}
+diff --git a/src/intel_options.h b/src/intel_options.h
+index 7e2cbd9..43635f1 100644
+--- a/src/intel_options.h
++++ b/src/intel_options.h
+@@ -12,15 +12,17 @@
+ */
+
+ enum intel_options {
+- OPTION_ACCEL_DISABLE,
++ OPTION_ACCEL_ENABLE,
+ OPTION_ACCEL_METHOD,
+ OPTION_BACKLIGHT,
++ OPTION_EDID,
+ OPTION_DRI,
+ OPTION_PRESENT,
+ OPTION_VIDEO_KEY,
+ OPTION_COLOR_KEY,
+ OPTION_TILING_2D,
+ OPTION_TILING_FB,
++ OPTION_ROTATION,
+ OPTION_VSYNC,
+ OPTION_PAGEFLIP,
+ OPTION_SWAPBUFFERS_WAIT,
+@@ -28,7 +30,6 @@ enum intel_options {
+ OPTION_PREFER_OVERLAY,
+ OPTION_HOTPLUG,
+ OPTION_REPROBE,
+- OPTION_DELETE_DP12,
+ #if defined(XvMCExtension) && defined(ENABLE_XVMC)
+ OPTION_XVMC,
+ #define INTEL_XVMC 1
+@@ -51,5 +52,7 @@ enum intel_options {
+
+ extern const OptionInfoRec intel_options[];
+ OptionInfoPtr intel_options_get(ScrnInfoPtr scrn);
++unsigned intel_option_cast_to_unsigned(OptionInfoPtr, int id, unsigned val);
++Bool intel_option_cast_to_bool(OptionInfoPtr, int id, Bool val);
+
+ #endif /* INTEL_OPTIONS_H */
+diff --git a/src/legacy/i810/i810_common.h b/src/legacy/i810/i810_common.h
+index 4cc10e8..8355708 100644
+--- a/src/legacy/i810/i810_common.h
++++ b/src/legacy/i810/i810_common.h
+@@ -52,7 +52,7 @@
+
+ #define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1))
+
+-/* Using usleep() makes things noticably slow. */
++/* Using usleep() makes things noticeably slow. */
+ #if 0
+ #define DELAY(x) usleep(x)
+ #else
+@@ -185,7 +185,7 @@ enum {
+ * - zbuffer linear offset and pitch -- also invarient
+ * - drawing origin in back and depth buffers.
+ *
+- * Keep the depth/back buffer state here to acommodate private buffers
++ * Keep the depth/back buffer state here to accommodate private buffers
+ * in the future.
+ */
+ #define I810_DESTREG_DI0 0 /* CMD_OP_DESTBUFFER_INFO (2 dwords) */
+diff --git a/src/legacy/i810/i810_hwmc.c b/src/legacy/i810/i810_hwmc.c
+index 7cb9c1a..58661b0 100644
+--- a/src/legacy/i810/i810_hwmc.c
++++ b/src/legacy/i810/i810_hwmc.c
+@@ -171,7 +171,7 @@ static XF86MCAdaptorPtr ppAdapt[1] =
+ *
+ * I810InitMC
+ *
+- * Initialize the hardware motion compenstation extention for this
++ * Initialize the hardware motion compensation extension for this
+ * hardware. The initialization routines want the address of the pointers
+ * to the structures, not the address of the structures. This means we
+ * allocate (or create static?) the pointer memory and pass that
+diff --git a/src/legacy/i810/i810_memory.c b/src/legacy/i810/i810_memory.c
+index c3de277..6f27483 100644
+--- a/src/legacy/i810/i810_memory.c
++++ b/src/legacy/i810/i810_memory.c
+@@ -76,7 +76,7 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn)
+ unsigned long size = pScrn->videoRam * 1024UL;
+ I810Ptr pI810 = I810PTR(pScrn);
+ int key;
+- long tom = 0;
++ unsigned long tom = 0;
+ unsigned long physical;
+
+ if (!xf86AgpGARTSupported() || !xf86AcquireGART(pScrn->scrnIndex)) {
+@@ -132,8 +132,8 @@ I810AllocateGARTMemory(ScrnInfoPtr pScrn)
+ * Keep it 512K aligned for the sake of tiled regions.
+ */
+
+- tom += 0x7ffff;
+- tom &= ~0x7ffff;
++ tom += 0x7ffffUL;
++ tom &= ~0x7ffffUL;
+
+ if ((key = xf86AllocateGARTMemory(pScrn->scrnIndex, size, 1, NULL)) != -1) {
+ pI810->DcacheOffset = tom;
+diff --git a/src/legacy/i810/i810_reg.h b/src/legacy/i810/i810_reg.h
+index 54faeb3..fa091c5 100644
+--- a/src/legacy/i810/i810_reg.h
++++ b/src/legacy/i810/i810_reg.h
+@@ -245,7 +245,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * not sure they refer to local (graphics) memory.
+ *
+ * These details are for the local memory control registers,
+- * (pp301-310). The test machines are not equiped with local memory,
++ * (pp301-310). The test machines are not equipped with local memory,
+ * so nothing is tested. Only a single row seems to be supported.
+ */
+ #define DRAM_ROW_TYPE 0x3000
+diff --git a/src/legacy/i810/xvmc/I810XvMC.c b/src/legacy/i810/xvmc/I810XvMC.c
+index e6b63d3..a538e99 100644
+--- a/src/legacy/i810/xvmc/I810XvMC.c
++++ b/src/legacy/i810/xvmc/I810XvMC.c
+@@ -61,7 +61,7 @@ static int event_base;
+ // Arguments: pI810XvMC private data structure from the current context.
+ // Notes: We faked the drmMapBufs for the i810's security so now we have
+ // to insert an allocated page into the correct spot in the faked
+-// list to keep up appearences.
++// list to keep up appearances.
+ // Concept for this function was taken from Mesa sources.
+ // Returns: drmBufPtr containing the information about the allocated page.
+ ***************************************************************************/
+@@ -188,7 +188,7 @@ _X_EXPORT Status XvMCCreateContext(Display *display, XvPortID port,
+
+ /* Check for drm */
+ if(! drmAvailable()) {
+- printf("Direct Rendering is not avilable on this system!\n");
++ printf("Direct Rendering is not available on this system!\n");
+ return BadAlloc;
+ }
+
+@@ -3279,7 +3279,7 @@ _X_EXPORT Status XvMCSyncSurface(Display *display,XvMCSurface *surface) {
+ // display - Connection to X server
+ // surface - Surface to flush
+ // Info:
+-// This command is a noop for i810 becuase we always dispatch buffers in
++// This command is a noop for i810 because we always dispatch buffers in
+ // render. There is little gain to be had with 4k buffers.
+ // Returns: Status
+ ***************************************************************************/
+diff --git a/src/render_program/exa_wm.g4i b/src/render_program/exa_wm.g4i
+index 5d3d45b..587b581 100644
+--- a/src/render_program/exa_wm.g4i
++++ b/src/render_program/exa_wm.g4i
+@@ -57,7 +57,7 @@ define(`mask_dw_dy', `g6.4<0,1,0>F')
+ define(`mask_wo', `g6.12<0,1,0>F')
+
+ /*
+- * Local variables. Pairs must be aligned on even reg boundry
++ * Local variables. Pairs must be aligned on even reg boundary
+ */
+
+ /* this holds the X dest coordinates */
+diff --git a/src/render_program/exa_wm_yuv_rgb.g8a b/src/render_program/exa_wm_yuv_rgb.g8a
+index 7def093..34973ba 100644
+--- a/src/render_program/exa_wm_yuv_rgb.g8a
++++ b/src/render_program/exa_wm_yuv_rgb.g8a
+@@ -76,7 +76,7 @@ add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 };
+ /*
+ * R = Y + Cr * 1.596
+ */
+-mov (8) acc0<1>F Yn<8,8,1>F { compr align1 };
++mov (8) acc0<1>F Yn_01<8,8,1>F { compr align1 };
+ mac.sat(8) src_sample_r_01<1>F Crn_01<8,8,1>F 1.596F { compr align1 };
+
+ mov (8) acc0<1>F Yn_23<8,8,1>F { compr align1 };
+@@ -84,7 +84,7 @@ mac.sat(8) src_sample_r_23<1>F Crn_23<8,8,1>F 1.596F { compr align1 };
+ /*
+ * G = Crn * -0.813 + Cbn * -0.392 + Y
+ */
+-mov (8) acc0<1>F Yn_23<8,8,1>F { compr align1 };
++mov (8) acc0<1>F Yn_01<8,8,1>F { compr align1 };
+ mac (8) acc0<1>F Crn_01<8,8,1>F -0.813F { compr align1 };
+ mac.sat(8) src_sample_g_01<1>F Cbn_01<8,8,1>F -0.392F { compr align1 };
+
+diff --git a/src/render_program/exa_wm_yuv_rgb.g8b b/src/render_program/exa_wm_yuv_rgb.g8b
+index 4494953..2cd6fc4 100644
+--- a/src/render_program/exa_wm_yuv_rgb.g8b
++++ b/src/render_program/exa_wm_yuv_rgb.g8b
+@@ -6,7 +6,7 @@
+ { 0x80600048, 0x21c03ae8, 0x3e8d02c0, 0x3fcc49ba },
+ { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 },
+ { 0x80600048, 0x21e03ae8, 0x3e8d02e0, 0x3fcc49ba },
+- { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 },
++ { 0x00600001, 0x24003ae0, 0x008d0300, 0x00000000 },
+ { 0x00600048, 0x24003ae0, 0x3e8d02c0, 0xbf5020c5 },
+ { 0x80600048, 0x22003ae8, 0x3e8d0340, 0xbec8b439 },
+ { 0x00600001, 0x24003ae0, 0x008d0320, 0x00000000 },
+diff --git a/src/sna/blt.c b/src/sna/blt.c
+index b5bfee6..2dae9c2 100644
+--- a/src/sna/blt.c
++++ b/src/sna/blt.c
+@@ -30,6 +30,7 @@
+ #endif
+
+ #include "sna.h"
++#include <pixman.h>
+
+ #if __x86_64__
+ #define USE_SSE2 1
+@@ -333,420 +334,270 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
+ }
+ }
+
+-fast_memcpy static void
+-memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
+- int32_t src_stride, int32_t dst_stride,
+- int16_t src_x, int16_t src_y,
+- int16_t dst_x, int16_t dst_y,
+- uint16_t width, uint16_t height)
+-{
+- const unsigned tile_width = 512;
+- const unsigned tile_height = 8;
+- const unsigned tile_size = 4096;
+-
+- const unsigned cpp = bpp / 8;
+- const unsigned stride_tiles = dst_stride / tile_width;
+- const unsigned swizzle_pixels = 64 / cpp;
+- const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+- const unsigned tile_mask = (1 << tile_pixels) - 1;
+-
+- unsigned x, y;
+-
+- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+-
+- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+-
+- for (y = 0; y < height; ++y) {
+- const uint32_t dy = y + dst_y;
+- const uint32_t tile_row =
+- (dy / tile_height * stride_tiles * tile_size +
+- (dy & (tile_height-1)) * tile_width);
+- const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+- uint32_t dx = dst_x, offset;
+-
+- x = width * cpp;
+- if (dx & (swizzle_pixels - 1)) {
+- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= (offset >> 3) & 64;
+-
+- memcpy((char *)dst + offset, src_row, length * cpp);
+-
+- src_row += length * cpp;
+- x -= length * cpp;
+- dx += length;
+- }
+- while (x >= 64) {
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= (offset >> 3) & 64;
+-
+- memcpy((char *)dst + offset, src_row, 64);
+-
+- src_row += 64;
+- x -= 64;
+- dx += swizzle_pixels;
+- }
+- if (x) {
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= (offset >> 3) & 64;
+- memcpy((char *)dst + offset, src_row, x);
+- }
+- }
++#define memcpy_to_tiled_x(swizzle) \
++fast_memcpy static void \
++memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
++ int32_t src_stride, int32_t dst_stride, \
++ int16_t src_x, int16_t src_y, \
++ int16_t dst_x, int16_t dst_y, \
++ uint16_t width, uint16_t height) \
++{ \
++ const unsigned tile_width = 512; \
++ const unsigned tile_height = 8; \
++ const unsigned tile_size = 4096; \
++ const unsigned cpp = bpp / 8; \
++ const unsigned stride_tiles = dst_stride / tile_width; \
++ const unsigned swizzle_pixels = 64 / cpp; \
++ const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \
++ const unsigned tile_mask = (1 << tile_pixels) - 1; \
++ unsigned x, y; \
++ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \
++ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \
++ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp; \
++ for (y = 0; y < height; ++y) { \
++ const uint32_t dy = y + dst_y; \
++ const uint32_t tile_row = \
++ (dy / tile_height * stride_tiles * tile_size + \
++ (dy & (tile_height-1)) * tile_width); \
++ const uint8_t *src_row = (const uint8_t *)src + src_stride * y; \
++ uint32_t dx = dst_x; \
++ x = width * cpp; \
++ if (dx & (swizzle_pixels - 1)) { \
++ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels); \
++ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx; \
++ uint32_t offset = \
++ tile_row + \
++ (dx >> tile_pixels) * tile_size + \
++ (dx & tile_mask) * cpp; \
++ memcpy((char *)dst + swizzle(offset), src_row, length * cpp); \
++ src_row += length * cpp; \
++ x -= length * cpp; \
++ dx += length; \
++ } \
++ while (x >= 64) { \
++ uint32_t offset = \
++ tile_row + \
++ (dx >> tile_pixels) * tile_size + \
++ (dx & tile_mask) * cpp; \
++ memcpy((char *)dst + swizzle(offset), src_row, 64); \
++ src_row += 64; \
++ x -= 64; \
++ dx += swizzle_pixels; \
++ } \
++ if (x) { \
++ uint32_t offset = \
++ tile_row + \
++ (dx >> tile_pixels) * tile_size + \
++ (dx & tile_mask) * cpp; \
++ memcpy((char *)dst + swizzle(offset), src_row, x); \
++ } \
++ } \
+ }
+
+-fast_memcpy static void
+-memcpy_from_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
+- int32_t src_stride, int32_t dst_stride,
+- int16_t src_x, int16_t src_y,
+- int16_t dst_x, int16_t dst_y,
+- uint16_t width, uint16_t height)
+-{
+- const unsigned tile_width = 512;
+- const unsigned tile_height = 8;
+- const unsigned tile_size = 4096;
+-
+- const unsigned cpp = bpp / 8;
+- const unsigned stride_tiles = src_stride / tile_width;
+- const unsigned swizzle_pixels = 64 / cpp;
+- const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+- const unsigned tile_mask = (1 << tile_pixels) - 1;
++#define memcpy_from_tiled_x(swizzle) \
++fast_memcpy static void \
++memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
++ int32_t src_stride, int32_t dst_stride, \
++ int16_t src_x, int16_t src_y, \
++ int16_t dst_x, int16_t dst_y, \
++ uint16_t width, uint16_t height) \
++{ \
++ const unsigned tile_width = 512; \
++ const unsigned tile_height = 8; \
++ const unsigned tile_size = 4096; \
++ const unsigned cpp = bpp / 8; \
++ const unsigned stride_tiles = src_stride / tile_width; \
++ const unsigned swizzle_pixels = 64 / cpp; \
++ const unsigned tile_pixels = ffs(tile_width / cpp) - 1; \
++ const unsigned tile_mask = (1 << tile_pixels) - 1; \
++ unsigned x, y; \
++ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", \
++ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); \
++ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; \
++ for (y = 0; y < height; ++y) { \
++ const uint32_t sy = y + src_y; \
++ const uint32_t tile_row = \
++ (sy / tile_height * stride_tiles * tile_size + \
++ (sy & (tile_height-1)) * tile_width); \
++ uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; \
++ uint32_t sx = src_x; \
++ x = width * cpp; \
++ if (sx & (swizzle_pixels - 1)) { \
++ const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); \
++ const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; \
++ uint32_t offset = \
++ tile_row + \
++ (sx >> tile_pixels) * tile_size + \
++ (sx & tile_mask) * cpp; \
++ memcpy(dst_row, (const char *)src + swizzle(offset), length * cpp); \
++ dst_row += length * cpp; \
++ x -= length * cpp; \
++ sx += length; \
++ } \
++ while (x >= 64) { \
++ uint32_t offset = \
++ tile_row + \
++ (sx >> tile_pixels) * tile_size + \
++ (sx & tile_mask) * cpp; \
++ memcpy(dst_row, (const char *)src + swizzle(offset), 64); \
++ dst_row += 64; \
++ x -= 64; \
++ sx += swizzle_pixels; \
++ } \
++ if (x) { \
++ uint32_t offset = \
++ tile_row + \
++ (sx >> tile_pixels) * tile_size + \
++ (sx & tile_mask) * cpp; \
++ memcpy(dst_row, (const char *)src + swizzle(offset), x); \
++ } \
++ } \
++}
+
+- unsigned x, y;
++#define swizzle_9(X) ((X) ^ (((X) >> 3) & 64))
++memcpy_to_tiled_x(swizzle_9)
++memcpy_from_tiled_x(swizzle_9)
++#undef swizzle_9
+
+- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+-
+- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+-
+- for (y = 0; y < height; ++y) {
+- const uint32_t sy = y + src_y;
+- const uint32_t tile_row =
+- (sy / tile_height * stride_tiles * tile_size +
+- (sy & (tile_height-1)) * tile_width);
+- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+- uint32_t sx = src_x, offset;
+-
+- x = width * cpp;
+- if (sx & (swizzle_pixels - 1)) {
+- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= (offset >> 3) & 64;
+-
+- memcpy(dst_row, (const char *)src + offset, length * cpp);
+-
+- dst_row += length * cpp;
+- x -= length * cpp;
+- sx += length;
+- }
+- while (x >= 64) {
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= (offset >> 3) & 64;
++#define swizzle_9_10(X) ((X) ^ ((((X) ^ ((X) >> 1)) >> 3) & 64))
++memcpy_to_tiled_x(swizzle_9_10)
++memcpy_from_tiled_x(swizzle_9_10)
++#undef swizzle_9_10
+
+- memcpy(dst_row, (const char *)src + offset, 64);
++#define swizzle_9_11(X) ((X) ^ ((((X) ^ ((X) >> 2)) >> 3) & 64))
++memcpy_to_tiled_x(swizzle_9_11)
++memcpy_from_tiled_x(swizzle_9_11)
++#undef swizzle_9_11
+
+- dst_row += 64;
+- x -= 64;
+- sx += swizzle_pixels;
+- }
+- if (x) {
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= (offset >> 3) & 64;
+- memcpy(dst_row, (const char *)src + offset, x);
+- }
+- }
+-}
++#define swizzle_9_10_11(X) ((X) ^ ((((X) ^ ((X) >> 1) ^ ((X) >> 2)) >> 3) & 64))
++memcpy_to_tiled_x(swizzle_9_10_11)
++memcpy_from_tiled_x(swizzle_9_10_11)
++#undef swizzle_9_10_11
+
+-fast_memcpy static void
+-memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
+- int32_t src_stride, int32_t dst_stride,
+- int16_t src_x, int16_t src_y,
+- int16_t dst_x, int16_t dst_y,
+- uint16_t width, uint16_t height)
++static fast_memcpy void
++memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp,
++ int32_t src_stride, int32_t dst_stride,
++ int16_t src_x, int16_t src_y,
++ int16_t dst_x, int16_t dst_y,
++ uint16_t width, uint16_t height)
+ {
+- const unsigned tile_width = 512;
+- const unsigned tile_height = 8;
+- const unsigned tile_size = 4096;
++ const unsigned tile_width = 128;
++ const unsigned tile_height = 16;
++ const unsigned tile_size = 2048;
+
+ const unsigned cpp = bpp / 8;
+- const unsigned stride_tiles = dst_stride / tile_width;
+- const unsigned swizzle_pixels = 64 / cpp;
+- const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+- const unsigned tile_mask = (1 << tile_pixels) - 1;
+-
+- unsigned x, y;
++ const unsigned tile_pixels = tile_width / cpp;
++ const unsigned tile_shift = ffs(tile_pixels) - 1;
++ const unsigned tile_mask = tile_pixels - 1;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
++ assert(src != dst);
+
+- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+-
+- for (y = 0; y < height; ++y) {
+- const uint32_t dy = y + dst_y;
+- const uint32_t tile_row =
+- (dy / tile_height * stride_tiles * tile_size +
+- (dy & (tile_height-1)) * tile_width);
+- const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+- uint32_t dx = dst_x, offset;
+-
+- x = width * cpp;
+- if (dx & (swizzle_pixels - 1)) {
+- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-
+- memcpy((char *)dst + offset, src_row, length * cpp);
+-
+- src_row += length * cpp;
+- x -= length * cpp;
+- dx += length;
+- }
+- while (x >= 64) {
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-
+- memcpy((char *)dst + offset, src_row, 64);
+-
+- src_row += 64;
+- x -= 64;
+- dx += swizzle_pixels;
+- }
+- if (x) {
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+- memcpy((char *)dst + offset, src_row, x);
+- }
+- }
+-}
+-
+-fast_memcpy static void
+-memcpy_from_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
+- int32_t src_stride, int32_t dst_stride,
+- int16_t src_x, int16_t src_y,
+- int16_t dst_x, int16_t dst_y,
+- uint16_t width, uint16_t height)
+-{
+- const unsigned tile_width = 512;
+- const unsigned tile_height = 8;
+- const unsigned tile_size = 4096;
+-
+- const unsigned cpp = bpp / 8;
+- const unsigned stride_tiles = src_stride / tile_width;
+- const unsigned swizzle_pixels = 64 / cpp;
+- const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+- const unsigned tile_mask = (1 << tile_pixels) - 1;
++ if (src_x | src_y)
++ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
++ assert(src_stride >= width * cpp);
++ src_stride -= width * cpp;
+
+- unsigned x, y;
++ while (height--) {
++ unsigned w = width * cpp;
++ uint8_t *tile_row = dst;
+
+- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
++ tile_row += dst_y / tile_height * dst_stride * tile_height;
++ tile_row += (dst_y & (tile_height-1)) * tile_width;
++ if (dst_x) {
++ tile_row += (dst_x >> tile_shift) * tile_size;
++ if (dst_x & tile_mask) {
++ const unsigned x = (dst_x & tile_mask) * cpp;
++ const unsigned len = min(tile_width - x, w);
++ memcpy(tile_row + x, src, len);
+
+- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+-
+- for (y = 0; y < height; ++y) {
+- const uint32_t sy = y + src_y;
+- const uint32_t tile_row =
+- (sy / tile_height * stride_tiles * tile_size +
+- (sy & (tile_height-1)) * tile_width);
+- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+- uint32_t sx = src_x, offset;
+-
+- x = width * cpp;
+- if (sx & (swizzle_pixels - 1)) {
+- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-
+- memcpy(dst_row, (const char *)src + offset, length * cpp);
+-
+- dst_row += length * cpp;
+- x -= length * cpp;
+- sx += length;
++ tile_row += tile_size;
++ src = (const uint8_t *)src + len;
++ w -= len;
++ }
+ }
+- while (x >= 64) {
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+-
+- memcpy(dst_row, (const char *)src + offset, 64);
++ while (w >= tile_width) {
++ memcpy(tile_row, src, tile_width);
+
+- dst_row += 64;
+- x -= 64;
+- sx += swizzle_pixels;
+- }
+- if (x) {
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+- memcpy(dst_row, (const char *)src + offset, x);
++ tile_row += tile_size;
++ src = (const uint8_t *)src + tile_width;
++ w -= tile_width;
+ }
++ memcpy(tile_row, src, w);
++ src = (const uint8_t *)src + src_stride + w;
++ dst_y++;
+ }
+ }
+
+-fast_memcpy static void
+-memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
+- int32_t src_stride, int32_t dst_stride,
+- int16_t src_x, int16_t src_y,
+- int16_t dst_x, int16_t dst_y,
+- uint16_t width, uint16_t height)
++static fast_memcpy void
++memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp,
++ int32_t src_stride, int32_t dst_stride,
++ int16_t src_x, int16_t src_y,
++ int16_t dst_x, int16_t dst_y,
++ uint16_t width, uint16_t height)
+ {
+- const unsigned tile_width = 512;
+- const unsigned tile_height = 8;
+- const unsigned tile_size = 4096;
++ const unsigned tile_width = 128;
++ const unsigned tile_height = 16;
++ const unsigned tile_size = 2048;
+
+ const unsigned cpp = bpp / 8;
+- const unsigned stride_tiles = dst_stride / tile_width;
+- const unsigned swizzle_pixels = 64 / cpp;
+- const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+- const unsigned tile_mask = (1 << tile_pixels) - 1;
+-
+- unsigned x, y;
++ const unsigned tile_pixels = tile_width / cpp;
++ const unsigned tile_shift = ffs(tile_pixels) - 1;
++ const unsigned tile_mask = tile_pixels - 1;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
++ assert(src != dst);
+
+- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+-
+- for (y = 0; y < height; ++y) {
+- const uint32_t dy = y + dst_y;
+- const uint32_t tile_row =
+- (dy / tile_height * stride_tiles * tile_size +
+- (dy & (tile_height-1)) * tile_width);
+- const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+- uint32_t dx = dst_x, offset;
+-
+- x = width * cpp;
+- if (dx & (swizzle_pixels - 1)) {
+- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+- memcpy((char *)dst + offset, src_row, length * cpp);
+-
+- src_row += length * cpp;
+- x -= length * cpp;
+- dx += length;
+- }
+- while (x >= 64) {
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+-
+- memcpy((char *)dst + offset, src_row, 64);
+-
+- src_row += 64;
+- x -= 64;
+- dx += swizzle_pixels;
+- }
+- if (x) {
+- offset = tile_row +
+- (dx >> tile_pixels) * tile_size +
+- (dx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+- memcpy((char *)dst + offset, src_row, x);
+- }
+- }
+-}
+-
+-fast_memcpy static void
+-memcpy_from_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
+- int32_t src_stride, int32_t dst_stride,
+- int16_t src_x, int16_t src_y,
+- int16_t dst_x, int16_t dst_y,
+- uint16_t width, uint16_t height)
+-{
+- const unsigned tile_width = 512;
+- const unsigned tile_height = 8;
+- const unsigned tile_size = 4096;
+-
+- const unsigned cpp = bpp / 8;
+- const unsigned stride_tiles = src_stride / tile_width;
+- const unsigned swizzle_pixels = 64 / cpp;
+- const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+- const unsigned tile_mask = (1 << tile_pixels) - 1;
++ if (dst_x | dst_y)
++ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
++ assert(dst_stride >= width * cpp);
++ dst_stride -= width * cpp;
+
+- unsigned x, y;
++ while (height--) {
++ unsigned w = width * cpp;
++ const uint8_t *tile_row = src;
+
+- DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+- __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
++ tile_row += src_y / tile_height * src_stride * tile_height;
++ tile_row += (src_y & (tile_height-1)) * tile_width;
++ if (src_x) {
++ tile_row += (src_x >> tile_shift) * tile_size;
++ if (src_x & tile_mask) {
++ const unsigned x = (src_x & tile_mask) * cpp;
++ const unsigned len = min(tile_width - x, w);
++ memcpy(dst, tile_row + x, len);
+
+- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+-
+- for (y = 0; y < height; ++y) {
+- const uint32_t sy = y + src_y;
+- const uint32_t tile_row =
+- (sy / tile_height * stride_tiles * tile_size +
+- (sy & (tile_height-1)) * tile_width);
+- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+- uint32_t sx = src_x, offset;
+-
+- x = width * cpp;
+- if (sx & (swizzle_pixels - 1)) {
+- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+- memcpy(dst_row, (const char *)src + offset, length * cpp);
+-
+- dst_row += length * cpp;
+- x -= length * cpp;
+- sx += length;
++ tile_row += tile_size;
++ dst = (uint8_t *)dst + len;
++ w -= len;
++ }
+ }
+- while (x >= 64) {
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+-
+- memcpy(dst_row, (const char *)src + offset, 64);
++ while (w >= tile_width) {
++ memcpy(dst, tile_row, tile_width);
+
+- dst_row += 64;
+- x -= 64;
+- sx += swizzle_pixels;
+- }
+- if (x) {
+- offset = tile_row +
+- (sx >> tile_pixels) * tile_size +
+- (sx & tile_mask) * cpp;
+- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+- memcpy(dst_row, (const char *)src + offset, x);
++ tile_row += tile_size;
++ dst = (uint8_t *)dst + tile_width;
++ w -= tile_width;
+ }
++ memcpy(dst, tile_row, w);
++ dst = (uint8_t *)dst + dst_stride + w;
++ src_y++;
+ }
+ }
+
+ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling)
+ {
++ if (kgem->gen < 030) {
++ if (swizzling == I915_BIT_6_SWIZZLE_NONE) {
++ DBG(("%s: gen2, no swizzling\n", __FUNCTION__));
++ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__gen2;
++ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__gen2;
++ } else
++ DBG(("%s: no detiling with swizzle functions for gen2\n", __FUNCTION__));
++ return;
++ }
++
+ switch (swizzling) {
+ default:
+ DBG(("%s: unknown swizzling, %d\n", __FUNCTION__, swizzling));
+@@ -771,6 +622,11 @@ void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling)
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11;
+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_11;
+ break;
++ case I915_BIT_6_SWIZZLE_9_10_11:
++ DBG(("%s: 6^9^10^11 swizzling\n", __FUNCTION__));
++ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10_11;
++ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_10_11;
++ break;
+ }
+ }
+
+@@ -1118,3 +974,241 @@ memcpy_xor(const void *src, void *dst, int bpp,
+ }
+ }
+ }
++
++#define BILINEAR_INTERPOLATION_BITS 4
++static inline int
++bilinear_weight(pixman_fixed_t x)
++{
++ return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
++ ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
++}
++
++#if BILINEAR_INTERPOLATION_BITS <= 4
++/* Inspired by Filter_32_opaque from Skia */
++static inline uint32_t
++bilinear_interpolation(uint32_t tl, uint32_t tr,
++ uint32_t bl, uint32_t br,
++ int distx, int disty)
++{
++ int distxy, distxiy, distixy, distixiy;
++ uint32_t lo, hi;
++
++ distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
++ disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
++
++ distxy = distx * disty;
++ distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */
++ distixy = (disty << 4) - distxy; /* disty * (16 - distx) */
++ distixiy =
++ 16 * 16 - (disty << 4) -
++ (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
++
++ lo = (tl & 0xff00ff) * distixiy;
++ hi = ((tl >> 8) & 0xff00ff) * distixiy;
++
++ lo += (tr & 0xff00ff) * distxiy;
++ hi += ((tr >> 8) & 0xff00ff) * distxiy;
++
++ lo += (bl & 0xff00ff) * distixy;
++ hi += ((bl >> 8) & 0xff00ff) * distixy;
++
++ lo += (br & 0xff00ff) * distxy;
++ hi += ((br >> 8) & 0xff00ff) * distxy;
++
++ return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
++}
++#elif SIZEOF_LONG > 4
++static inline uint32_t
++bilinear_interpolation(uint32_t tl, uint32_t tr,
++ uint32_t bl, uint32_t br,
++ int distx, int disty)
++{
++ uint64_t distxy, distxiy, distixy, distixiy;
++ uint64_t tl64, tr64, bl64, br64;
++ uint64_t f, r;
++
++ distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
++ disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
++
++ distxy = distx * disty;
++ distxiy = distx * (256 - disty);
++ distixy = (256 - distx) * disty;
++ distixiy = (256 - distx) * (256 - disty);
++
++ /* Alpha and Blue */
++ tl64 = tl & 0xff0000ff;
++ tr64 = tr & 0xff0000ff;
++ bl64 = bl & 0xff0000ff;
++ br64 = br & 0xff0000ff;
++
++ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
++ r = f & 0x0000ff0000ff0000ull;
++
++ /* Red and Green */
++ tl64 = tl;
++ tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
++
++ tr64 = tr;
++ tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
++
++ bl64 = bl;
++ bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
++
++ br64 = br;
++ br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
++
++ f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
++ r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
++
++ return (uint32_t)(r >> 16);
++}
++#else
++static inline uint32_t
++bilinear_interpolation(uint32_t tl, uint32_t tr,
++ uint32_t bl, uint32_t br,
++ int distx, int disty)
++{
++ int distxy, distxiy, distixy, distixiy;
++ uint32_t f, r;
++
++ distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
++ disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
++
++ distxy = distx * disty;
++ distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */
++ distixy = (disty << 8) - distxy; /* disty * (256 - distx) */
++ distixiy =
++ 256 * 256 - (disty << 8) -
++ (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */
++
++ /* Blue */
++ r = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy +
++ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy);
++
++ /* Green */
++ f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy +
++ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy);
++ r |= f & 0xff000000;
++
++ tl >>= 16;
++ tr >>= 16;
++ bl >>= 16;
++ br >>= 16;
++ r >>= 16;
++
++ /* Red */
++ f = ((tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy +
++ (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy);
++ r |= f & 0x00ff0000;
++
++ /* Alpha */
++ f = ((tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy +
++ (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy);
++ r |= f & 0xff000000;
++
++ return r;
++}
++#endif
++
++static inline uint32_t convert_pixel(const uint8_t *p, int x)
++{
++ return ((uint32_t *)p)[x];
++}
++
++fast void
++affine_blt(const void *src, void *dst, int bpp,
++ int16_t src_x, int16_t src_y,
++ int16_t src_width, int16_t src_height,
++ int32_t src_stride,
++ int16_t dst_x, int16_t dst_y,
++ uint16_t dst_width, uint16_t dst_height,
++ int32_t dst_stride,
++ const struct pixman_f_transform *t)
++{
++ static const uint8_t zero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
++ const pixman_fixed_t ux = pixman_double_to_fixed(t->m[0][0]);
++ const pixman_fixed_t uy = pixman_double_to_fixed(t->m[1][0]);
++ int i, j;
++
++ assert(bpp == 32);
++
++ for (j = 0; j < dst_height; j++) {
++ pixman_fixed_t x, y;
++ struct pixman_f_vector v;
++ uint32_t *b;
++
++ /* reference point is the center of the pixel */
++ v.v[0] = dst_x + 0.5;
++ v.v[1] = dst_y + j + 0.5;
++ v.v[2] = 1.0;
++
++ pixman_f_transform_point_3d(t, &v);
++
++ x = pixman_double_to_fixed(v.v[0]);
++ x += pixman_int_to_fixed(src_x - dst_x);
++ y = pixman_double_to_fixed(v.v[1]);
++ y += pixman_int_to_fixed(src_y - dst_y);
++
++ b = (uint32_t*)((uint8_t *)dst + (dst_y + j) * dst_stride + dst_x * bpp / 8);
++ for (i = 0; i < dst_width; i++) {
++ const uint8_t *row1;
++ const uint8_t *row2;
++ int x1, y1, x2, y2;
++ uint32_t tl, tr, bl, br;
++ int32_t fx, fy;
++
++ x1 = x - pixman_fixed_1/2;
++ y1 = y - pixman_fixed_1/2;
++
++ fx = bilinear_weight(x1);
++ fy = bilinear_weight(y1);
++
++ x1 = pixman_fixed_to_int(x1);
++ x2 = x1 + 1;
++ y1 = pixman_fixed_to_int(y1);
++ y2 = y1 + 1;
++
++ if (x1 >= src_width || x2 < 0 ||
++ y1 >= src_height || y2 < 0) {
++ b[i] = 0;
++ goto next;
++ }
++
++ if (y2 == 0) {
++ row1 = zero;
++ } else {
++ row1 = (uint8_t *)src + src_stride * y1;
++ row1 += bpp / 8 * x1;
++ }
++
++ if (y1 == src_height - 1) {
++ row2 = zero;
++ } else {
++ row2 = (uint8_t *)src + src_stride * y2;
++ row2 += bpp / 8 * x1;
++ }
++
++ if (x2 == 0) {
++ tl = 0;
++ bl = 0;
++ } else {
++ tl = convert_pixel(row1, 0);
++ bl = convert_pixel(row2, 0);
++ }
++
++ if (x1 == src_width - 1) {
++ tr = 0;
++ br = 0;
++ } else {
++ tr = convert_pixel(row1, 1);
++ br = convert_pixel(row2, 1);
++ }
++
++ b[i] = bilinear_interpolation(tl, tr, bl, br, fx, fy);
++
++next:
++ x += ux;
++ y += uy;
++ }
++ }
++}
+diff --git a/src/sna/brw/brw_eu_emit.c b/src/sna/brw/brw_eu_emit.c
+index 00c984d..2f33022 100644
+--- a/src/sna/brw/brw_eu_emit.c
++++ b/src/sna/brw/brw_eu_emit.c
+@@ -700,7 +700,7 @@ push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
+ *
+ * When the matching 'else' instruction is reached (presumably by
+ * countdown of the instruction count patched in by our ELSE/ENDIF
+- * functions), the relevent flags are inverted.
++ * functions), the relevant flags are inverted.
+ *
+ * When the matching 'endif' instruction is reached, the flags are
+ * popped off. If the stack is now empty, normal execution resumes.
+diff --git a/src/sna/compiler.h b/src/sna/compiler.h
+index ff41217..c723137 100644
+--- a/src/sna/compiler.h
++++ b/src/sna/compiler.h
+@@ -39,6 +39,7 @@
+ #define pure __attribute__((pure))
+ #define tightly_packed __attribute__((__packed__))
+ #define flatten __attribute__((flatten))
++#define nonnull __attribute__((nonnull))
+ #define page_aligned __attribute__((aligned(4096)))
+ #else
+ #define likely(expr) (expr)
+@@ -51,6 +52,7 @@
+ #define pure
+ #define tighly_packed
+ #define flatten
++#define nonnull
+ #define page_aligned
+ #endif
+
+@@ -61,20 +63,18 @@
+ #define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse")))
+ #endif
+
+-#if HAS_GCC(4, 7)
+-#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse")))
+-#endif
+-
+ #if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
+ #define fast __attribute__((optimize("Ofast")))
+ #else
+ #define fast
+ #endif
+
+-#if HAS_GCC(4, 6) && defined(__OPTIMIZE__)
+-#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops")))
+-#elif HAS_GCC(4, 5) && defined(__OPTIMIZE__)
+-#define fast_memcpy __attribute__((target("inline-all-stringops")))
++#if HAS_GCC(4, 7)
++#define avx2 fast __attribute__((target("avx2,avx,sse4.2,sse2,fpmath=sse")))
++#endif
++
++#if HAS_GCC(4, 5) && defined(__OPTIMIZE__)
++#define fast_memcpy fast __attribute__((target("inline-all-stringops")))
+ #else
+ #define fast_memcpy
+ #endif
+diff --git a/src/sna/fb/fb.h b/src/sna/fb/fb.h
+index 8bf9008..9043174 100644
+--- a/src/sna/fb/fb.h
++++ b/src/sna/fb/fb.h
+@@ -24,10 +24,6 @@
+ #ifndef FB_H
+ #define FB_H
+
+-#ifdef HAVE_CONFIG_H
+-#include "config.h"
+-#endif
+-
+ #include <xorg-server.h>
+ #include <servermd.h>
+ #include <gcstruct.h>
+diff --git a/src/sna/fb/fbpict.h b/src/sna/fb/fbpict.h
+index 932032f..2087777 100644
+--- a/src/sna/fb/fbpict.h
++++ b/src/sna/fb/fbpict.h
+@@ -24,10 +24,6 @@
+ #ifndef FBPICT_H
+ #define FBPICT_H
+
+-#ifdef HAVE_CONFIG_H
+-#include "config.h"
+-#endif
+-
+ #include <xorg-server.h>
+ #include <picturestr.h>
+
+diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
+index 1104f46..12b741c 100644
+--- a/src/sna/gen2_render.c
++++ b/src/sna/gen2_render.c
+@@ -1572,12 +1572,12 @@ gen2_composite_picture(struct sna *sna,
+ if (channel->repeat &&
+ (x >= 0 &&
+ y >= 0 &&
+- x + w < pixmap->drawable.width &&
+- y + h < pixmap->drawable.height)) {
++ x + w <= pixmap->drawable.width &&
++ y + h <= pixmap->drawable.height)) {
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ if (priv && priv->clear) {
+ DBG(("%s: converting large pixmap source into solid [%08x]\n", __FUNCTION__, priv->clear_color));
+- return gen2_composite_solid_init(sna, channel, priv->clear_color);
++ return gen2_composite_solid_init(sna, channel, solid_color(picture->format, priv->clear_color));
+ }
+ }
+ } else
+diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
+index 78289f0..2d3fb1e 100644
+--- a/src/sna/gen3_render.c
++++ b/src/sna/gen3_render.c
+@@ -531,6 +531,7 @@ gen3_emit_composite_primitive_affine_gradient(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[0] = dst_x + r->width;
+ v[1] = dst_y + r->height;
+@@ -596,6 +597,7 @@ gen3_emit_composite_primitive_identity_source(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + w;
+@@ -643,6 +645,7 @@ gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[8] = v[4] = r->dst.x;
+ v[9] = r->dst.y;
+@@ -693,6 +696,7 @@ gen3_emit_composite_primitive_affine_source(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[0] = dst_x + r->width;
+ v[5] = v[1] = dst_y + r->height;
+@@ -756,6 +760,7 @@ gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + w;
+@@ -781,6 +786,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[8] = v[4] = r->dst.x;
+ v[9] = r->dst.y;
+@@ -817,6 +823,7 @@ gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 18;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[0] = dst_x + w;
+ v[1] = dst_y + h;
+@@ -862,6 +869,7 @@ gen3_emit_composite_primitive_affine_source_mask(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 18;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[0] = dst_x + w;
+ v[1] = dst_y + h;
+@@ -978,6 +986,7 @@ gen3_emit_composite_primitive_constant__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 6;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[4] = v[2] = r->dst.x + op->dst.x;
+ v[5] = r->dst.y + op->dst.y;
+@@ -1013,6 +1022,7 @@ gen3_emit_composite_primitive_identity_gradient__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ x = r->dst.x + op->dst.x;
+ y = r->dst.y + op->dst.y;
+@@ -1067,6 +1077,7 @@ gen3_emit_composite_primitive_affine_gradient__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[0] = dst_x + r->width;
+ v[1] = dst_y + r->height;
+@@ -1132,6 +1143,7 @@ gen3_emit_composite_primitive_identity_source__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + w;
+@@ -1179,6 +1191,7 @@ gen3_emit_composite_primitive_identity_source_no_offset__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[8] = v[4] = r->dst.x;
+ v[9] = r->dst.y;
+@@ -1229,6 +1242,7 @@ gen3_emit_composite_primitive_affine_source__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[0] = dst_x + r->width;
+ v[5] = v[1] = dst_y + r->height;
+@@ -1292,6 +1306,7 @@ gen3_emit_composite_primitive_constant_identity_mask__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[8] = v[4] = r->dst.x + op->dst.x;
+ v[0] = v[4] + w;
+@@ -1317,6 +1332,7 @@ gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2(struct sna
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 12;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[8] = v[4] = r->dst.x;
+ v[9] = r->dst.y;
+@@ -1353,6 +1369,7 @@ gen3_emit_composite_primitive_identity_source_mask__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 18;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[0] = dst_x + w;
+ v[1] = dst_y + h;
+@@ -1398,6 +1415,7 @@ gen3_emit_composite_primitive_affine_source_mask__sse2(struct sna *sna,
+
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 18;
++ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ v[0] = dst_x + w;
+ v[1] = dst_y + h;
+@@ -2233,6 +2251,7 @@ static void gen3_vertex_flush(struct sna *sna)
+ static int gen3_vertex_finish(struct sna *sna)
+ {
+ struct kgem_bo *bo;
++ unsigned hint, size;
+
+ DBG(("%s: used=%d/%d, vbo active? %d\n",
+ __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
+@@ -2243,6 +2262,7 @@ static int gen3_vertex_finish(struct sna *sna)
+
+ sna_vertex_wait__locked(&sna->render);
+
++ hint = CREATE_GTT_MAP;
+ bo = sna->render.vbo;
+ if (bo) {
+ DBG(("%s: reloc = %d\n", __FUNCTION__,
+@@ -2251,7 +2271,7 @@ static int gen3_vertex_finish(struct sna *sna)
+ if (sna->render.vertex_reloc[0]) {
+ sna->kgem.batch[sna->render.vertex_reloc[0]] =
+ kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
+- bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
++ bo, I915_GEM_DOMAIN_VERTEX << 16 | KGEM_RELOC_FENCED, 0);
+
+ sna->render.vertex_reloc[0] = 0;
+ }
+@@ -2260,17 +2280,29 @@ static int gen3_vertex_finish(struct sna *sna)
+ sna->render.vbo = NULL;
+
+ kgem_bo_destroy(&sna->kgem, bo);
++ hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
+ }
+
++ size = 256*1024;
+ sna->render.vertices = NULL;
+- sna->render.vbo = kgem_create_linear(&sna->kgem,
+- 256*1024, CREATE_GTT_MAP);
+- if (sna->render.vbo)
++ sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
++ while (sna->render.vbo == NULL && size > sizeof(sna->render.vertex_data)) {
++ size /= 2;
++ sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
++ }
++ if (sna->render.vbo == NULL)
++ sna->render.vbo = kgem_create_linear(&sna->kgem,
++ 256*1024, CREATE_GTT_MAP);
++ if (sna->render.vbo &&
++ kgem_check_bo(&sna->kgem, sna->render.vbo, NULL))
+ sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
+ if (sna->render.vertices == NULL) {
+- if (sna->render.vbo)
++ if (sna->render.vbo) {
+ kgem_bo_destroy(&sna->kgem, sna->render.vbo);
+- sna->render.vbo = NULL;
++ sna->render.vbo = NULL;
++ }
++ sna->render.vertices = sna->render.vertex_data;
++ sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
+ return 0;
+ }
+ assert(sna->render.vbo->snoop == false);
+@@ -2280,8 +2312,14 @@ static int gen3_vertex_finish(struct sna *sna)
+ sna->render.vertex_data,
+ sizeof(float)*sna->render.vertex_used);
+ }
+- sna->render.vertex_size = 64 * 1024 - 1;
+- return sna->render.vertex_size - sna->render.vertex_used;
++
++ size = __kgem_bo_size(sna->render.vbo)/4;
++ if (size >= UINT16_MAX)
++ size = UINT16_MAX - 1;
++ assert(size > sna->render.vertex_used);
++
++ sna->render.vertex_size = size;
++ return size - sna->render.vertex_used;
+ }
+
+ static void gen3_vertex_close(struct sna *sna)
+@@ -2345,7 +2383,7 @@ static void gen3_vertex_close(struct sna *sna)
+ DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
+ sna->kgem.batch[sna->render.vertex_reloc[0]] =
+ kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
<Skipped 25553 lines>
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/xorg-driver-video-intel.git/commitdiff/96bae86b9a37f6ed2340946a458a2ee5909ce60e
More information about the pld-cvs-commit
mailing list