[packages/llvm] - up to 3.3

arekm arekm at pld-linux.org
Sat Jun 15 20:17:36 CEST 2013


commit c0fd916863545c83a6eaac1b5048b1da5ea0c5ba
Author: Arkadiusz Miśkiewicz <arekm at maven.pl>
Date:   Sat Jun 15 20:17:33 2013 +0200

    - up to 3.3

 llvm-r600.patch | 23023 ------------------------------------------------------
 1 file changed, 23023 deletions(-)
---
diff --git a/llvm-r600.patch b/llvm-r600.patch
deleted file mode 100644
index 0957c01..0000000
--- a/llvm-r600.patch
+++ /dev/null
@@ -1,23023 +0,0 @@
-diff -Nur -x .git llvm-3.2.src/autoconf/configure.ac llvm-r600/autoconf/configure.ac
---- llvm-3.2.src/autoconf/configure.ac	2012-11-21 17:13:35.000000000 +0100
-+++ llvm-r600/autoconf/configure.ac	2013-01-25 19:43:56.096716416 +0100
-@@ -751,6 +751,11 @@
- 
- if test ${enableval} != "disable"
- then
-+  if test ${enableval} = "AMDGPU"
-+  then
-+    AC_MSG_ERROR([The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600])
-+    enableval="R600"
-+  fi
-   TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
- fi
- 
-diff -Nur -x .git llvm-3.2.src/configure llvm-r600/configure
---- llvm-3.2.src/configure	2012-11-21 17:13:35.000000000 +0100
-+++ llvm-r600/configure	2013-01-25 19:43:56.173383081 +0100
-@@ -5473,6 +5473,13 @@
- 
- if test ${enableval} != "disable"
- then
-+  if test ${enableval} = "AMDGPU"
-+  then
-+    { { echo "$as_me:$LINENO: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&5
-+echo "$as_me: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&2;}
-+   { (exit 1); exit 1; }; }
-+    enableval="R600"
-+  fi
-   TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
- fi
- 
-@@ -10316,7 +10323,7 @@
-   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
-   lt_status=$lt_dlunknown
-   cat > conftest.$ac_ext <<EOF
--#line 10317 "configure"
-+#line 10326 "configure"
- #include "confdefs.h"
- 
- #if HAVE_DLFCN_H
-diff -Nur -x .git llvm-3.2.src/include/llvm/IntrinsicsR600.td llvm-r600/include/llvm/IntrinsicsR600.td
---- llvm-3.2.src/include/llvm/IntrinsicsR600.td	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/include/llvm/IntrinsicsR600.td	2013-01-25 19:43:56.433383075 +0100
-@@ -0,0 +1,36 @@
-+//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines all of the R600-specific intrinsics.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let TargetPrefix = "r600" in {
-+
-+class R600ReadPreloadRegisterIntrinsic<string name>
-+  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-+    GCCBuiltin<name>;
-+
-+multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
-+  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
-+  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
-+  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
-+}
-+
-+defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
-+                                       "__builtin_r600_read_global_size">;
-+defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
-+                                       "__builtin_r600_read_local_size">;
-+defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
-+                                       "__builtin_r600_read_ngroups">;
-+defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
-+                                       "__builtin_r600_read_tgid">;
-+defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
-+                                       "__builtin_r600_read_tidig">;
-+} // End TargetPrefix = "r600"
-diff -Nur -x .git llvm-3.2.src/include/llvm/Intrinsics.td llvm-r600/include/llvm/Intrinsics.td
---- llvm-3.2.src/include/llvm/Intrinsics.td	2012-10-20 01:00:20.000000000 +0200
-+++ llvm-r600/include/llvm/Intrinsics.td	2013-01-25 19:43:56.426716409 +0100
-@@ -469,3 +469,4 @@
- include "llvm/IntrinsicsHexagon.td"
- include "llvm/IntrinsicsNVVM.td"
- include "llvm/IntrinsicsMips.td"
-+include "llvm/IntrinsicsR600.td"
-diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
---- llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp	2012-11-26 18:01:12.000000000 +0100
-+++ llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp	2013-01-25 19:43:56.720049736 +0100
-@@ -8514,11 +8514,8 @@
-     if (Opcode == ISD::DELETED_NODE &&
-         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
-       Opcode = Opc;
--      // If not supported by target, bail out.
--      if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
--          TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
--        return SDValue();
-     }
-+
-     if (Opc != Opcode)
-       return SDValue();
- 
-@@ -8543,6 +8540,10 @@
-   assert(SrcVT != MVT::Other && "Cannot determine source type!");
- 
-   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
-+
-+  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
-+    return SDValue();
-+
-   SmallVector<SDValue, 8> Opnds;
-   for (unsigned i = 0; i != NumInScalars; ++i) {
-     SDValue In = N->getOperand(i);
-diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
---- llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp	2012-10-24 19:25:11.000000000 +0200
-+++ llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp	2013-01-25 19:43:56.733383069 +0100
-@@ -731,9 +731,10 @@
-           return;
-         }
-         case TargetLowering::Promote: {
--          assert(VT.isVector() && "Unknown legal promote case!");
--          Value = DAG.getNode(ISD::BITCAST, dl,
--                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
-+          EVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
-+          assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
-+                 "Can only promote stores to same size type");
-+          Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
-           SDValue Result =
-             DAG.getStore(Chain, dl, Value, Ptr,
-                          ST->getPointerInfo(), isVolatile,
-@@ -889,10 +890,9 @@
-       break;
-     }
-     case TargetLowering::Promote: {
--      // Only promote a load of vector type to another.
--      assert(VT.isVector() && "Cannot promote this load!");
--      // Change base type to a different vector type.
-       EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
-+      assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
-+             "Can only promote loads to same size type");
- 
-       SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
-                          LD->isVolatile(), LD->isNonTemporal(),
-diff -Nur -x .git llvm-3.2.src/lib/Target/LLVMBuild.txt llvm-r600/lib/Target/LLVMBuild.txt
---- llvm-3.2.src/lib/Target/LLVMBuild.txt	2012-07-16 20:19:46.000000000 +0200
-+++ llvm-r600/lib/Target/LLVMBuild.txt	2013-01-25 19:43:57.173383060 +0100
-@@ -16,7 +16,7 @@
- ;===------------------------------------------------------------------------===;
- 
- [common]
--subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
-+subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
- 
- ; This is a special group whose required libraries are extended (by llvm-build)
- ; with the best execution engine (the native JIT, if available, or the
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp	2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,138 @@
-+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+///
-+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
-+/// code.  When passed an MCAsmStreamer it prints assembly and when passed
-+/// an MCObjectStreamer it outputs binary code.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+
-+
-+#include "AMDGPUAsmPrinter.h"
-+#include "AMDGPU.h"
-+#include "SIMachineFunctionInfo.h"
-+#include "SIRegisterInfo.h"
-+#include "llvm/MC/MCStreamer.h"
-+#include "llvm/Target/TargetLoweringObjectFile.h"
-+#include "llvm/Support/TargetRegistry.h"
-+
-+using namespace llvm;
-+
-+
-+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
-+                                              MCStreamer &Streamer) {
-+  return new AMDGPUAsmPrinter(tm, Streamer);
-+}
-+
-+extern "C" void LLVMInitializeR600AsmPrinter() {
-+  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
-+}
-+
-+/// We need to override this function so we can avoid
-+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
-+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-+  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
-+  if (STM.dumpCode()) {
-+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-+    MF.dump();
-+#endif
-+  }
-+  SetupMachineFunction(MF);
-+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
-+  if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+    EmitProgramInfo(MF);
-+  }
-+  EmitFunctionBody();
-+  return false;
-+}
-+
-+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
-+  unsigned MaxSGPR = 0;
-+  unsigned MaxVGPR = 0;
-+  bool VCCUsed = false;
-+  const SIRegisterInfo * RI =
-+                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
-+
-+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+                                                  BB != BB_E; ++BB) {
-+    MachineBasicBlock &MBB = *BB;
-+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-+                                                    I != E; ++I) {
-+      MachineInstr &MI = *I;
-+
-+      unsigned numOperands = MI.getNumOperands();
-+      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
-+        MachineOperand & MO = MI.getOperand(op_idx);
-+        unsigned maxUsed;
-+        unsigned width = 0;
-+        bool isSGPR = false;
-+        unsigned reg;
-+        unsigned hwReg;
-+        if (!MO.isReg()) {
-+          continue;
-+        }
-+        reg = MO.getReg();
-+        if (reg == AMDGPU::VCC) {
-+          VCCUsed = true;
-+          continue;
-+        }
-+        switch (reg) {
-+        default: break;
-+        case AMDGPU::EXEC:
-+        case AMDGPU::SI_LITERAL_CONSTANT:
-+        case AMDGPU::SREG_LIT_0:
-+        case AMDGPU::M0:
-+          continue;
-+        }
-+
-+        if (AMDGPU::SReg_32RegClass.contains(reg)) {
-+          isSGPR = true;
-+          width = 1;
-+        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
-+          isSGPR = false;
-+          width = 1;
-+        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
-+          isSGPR = true;
-+          width = 2;
-+        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
-+          isSGPR = false;
-+          width = 2;
-+        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
-+          isSGPR = true;
-+          width = 4;
-+        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
-+          isSGPR = false;
-+          width = 4;
-+        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
-+          isSGPR = true;
-+          width = 8;
-+        } else {
-+          assert(!"Unknown register class");
-+        }
-+        hwReg = RI->getEncodingValue(reg);
-+        maxUsed = hwReg + width - 1;
-+        if (isSGPR) {
-+          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
-+        } else {
-+          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
-+        }
-+      }
-+    }
-+  }
-+  if (VCCUsed) {
-+    MaxSGPR += 2;
-+  }
-+  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
-+  OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
-+  OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
-+  OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h	2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,44 @@
-+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief AMDGPU Assembly printer class.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_ASMPRINTER_H
-+#define AMDGPU_ASMPRINTER_H
-+
-+#include "llvm/CodeGen/AsmPrinter.h"
-+
-+namespace llvm {
-+
-+class AMDGPUAsmPrinter : public AsmPrinter {
-+
-+public:
-+  explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-+    : AsmPrinter(TM, Streamer) { }
-+
-+  virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+  virtual const char *getPassName() const {
-+    return "AMDGPU Assembly Printer";
-+  }
-+
-+  /// \brief Emit register usage information so that the GPU driver
-+  /// can correctly setup the GPU state.
-+  void EmitProgramInfo(MachineFunction &MF);
-+
-+  /// Implemented in AMDGPUMCInstLower.cpp
-+  virtual void EmitInstruction(const MachineInstr *MI);
-+};
-+
-+} // End anonymous llvm
-+
-+#endif //AMDGPU_ASMPRINTER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h	2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,49 @@
-+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief CodeEmitter interface for R600 and SI codegen.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUCODEEMITTER_H
-+#define AMDGPUCODEEMITTER_H
-+
-+namespace llvm {
-+
-+class AMDGPUCodeEmitter {
-+public:
-+  uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-+  virtual uint64_t getMachineOpValue(const MachineInstr &MI,
-+                                   const MachineOperand &MO) const { return 0; }
-+  virtual unsigned GPR4AlignEncode(const MachineInstr  &MI,
-+                                     unsigned OpNo) const {
-+    return 0;
-+  }
-+  virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
-+                                   unsigned OpNo) const {
-+    return 0;
-+  }
-+  virtual uint64_t VOPPostEncode(const MachineInstr &MI,
-+                                 uint64_t Value) const {
-+    return Value;
-+  }
-+  virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
-+                                    unsigned OpNo) const {
-+    return 0;
-+  }
-+  virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
-+                                                                   const {
-+    return 0;
-+  }
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUCODEEMITTER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp	2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,62 @@
-+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This pass lowers AMDIL machine instructions to the appropriate
-+/// hardware instructions.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "AMDGPUInstrInfo.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class AMDGPUConvertToISAPass : public MachineFunctionPass {
-+
-+private:
-+  static char ID;
-+  TargetMachine &TM;
-+
-+public:
-+  AMDGPUConvertToISAPass(TargetMachine &tm) :
-+    MachineFunctionPass(ID), TM(tm) { }
-+
-+  virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+  virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
-+
-+};
-+
-+} // End anonymous namespace
-+
-+char AMDGPUConvertToISAPass::ID = 0;
-+
-+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
-+  return new AMDGPUConvertToISAPass(tm);
-+}
-+
-+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
-+  const AMDGPUInstrInfo * TII =
-+                      static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-+
-+  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+                                                  BB != BB_E; ++BB) {
-+    MachineBasicBlock &MBB = *BB;
-+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-+                                                      I != E; ++I) {
-+      MachineInstr &MI = *I;
-+      TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
-+    }
-+  }
-+  return false;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.h llvm-r600/lib/Target/R600/AMDGPU.h
---- llvm-3.2.src/lib/Target/R600/AMDGPU.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPU.h	2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,51 @@
-+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_H
-+#define AMDGPU_H
-+
-+#include "AMDGPUTargetMachine.h"
-+#include "llvm/Support/TargetRegistry.h"
-+#include "llvm/Target/TargetMachine.h"
-+
-+namespace llvm {
-+
-+class FunctionPass;
-+class AMDGPUTargetMachine;
-+
-+// R600 Passes
-+FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
-+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
-+FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
-+
-+// SI Passes
-+FunctionPass *createSIAnnotateControlFlowPass();
-+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
-+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
-+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
-+FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
-+FunctionPass *createSIInsertWaits(TargetMachine &tm);
-+
-+// Passes common to R600 and SI
-+Pass *createAMDGPUStructurizeCFGPass();
-+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-+
-+} // End namespace llvm
-+
-+namespace ShaderType {
-+  enum Type {
-+    PIXEL = 0,
-+    VERTEX = 1,
-+    GEOMETRY = 2,
-+    COMPUTE = 3
-+  };
-+}
-+
-+#endif // AMDGPU_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp	2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,257 @@
-+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Implementation of the TargetInstrInfo class that is common to all
-+/// AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+#include "AMDIL.h"
-+#include "llvm/CodeGen/MachineFrameInfo.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+
-+#define GET_INSTRINFO_CTOR
-+#include "AMDGPUGenInstrInfo.inc"
-+
-+using namespace llvm;
-+
-+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
-+  : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
-+
-+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
-+  return RI;
-+}
-+
-+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
-+                                           unsigned &SrcReg, unsigned &DstReg,
-+                                           unsigned &SubIdx) const {
-+// TODO: Implement this function
-+  return false;
-+}
-+
-+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-+                                             int &FrameIndex) const {
-+// TODO: Implement this function
-+  return 0;
-+}
-+
-+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
-+                                                   int &FrameIndex) const {
-+// TODO: Implement this function
-+  return 0;
-+}
-+
-+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
-+                                          const MachineMemOperand *&MMO,
-+                                          int &FrameIndex) const {
-+// TODO: Implement this function
-+  return false;
-+}
-+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
-+                                              int &FrameIndex) const {
-+// TODO: Implement this function
-+  return 0;
-+}
-+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
-+                                                    int &FrameIndex) const {
-+// TODO: Implement this function
-+  return 0;
-+}
-+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
-+                                           const MachineMemOperand *&MMO,
-+                                           int &FrameIndex) const {
-+// TODO: Implement this function
-+  return false;
-+}
-+
-+MachineInstr *
-+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
-+                                      MachineBasicBlock::iterator &MBBI,
-+                                      LiveVariables *LV) const {
-+// TODO: Implement this function
-+  return NULL;
-+}
-+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
-+                                        MachineBasicBlock &MBB) const {
-+  while (iter != MBB.end()) {
-+    switch (iter->getOpcode()) {
-+    default:
-+      break;
-+    case AMDGPU::BRANCH_COND_i32:
-+    case AMDGPU::BRANCH_COND_f32:
-+    case AMDGPU::BRANCH:
-+      return true;
-+    };
-+    ++iter;
-+  }
-+  return false;
-+}
-+
-+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
-+  MachineBasicBlock::iterator tmp = MBB->end();
-+  if (!MBB->size()) {
-+    return MBB->end();
-+  }
-+  while (--tmp) {
-+    if (tmp->getOpcode() == AMDGPU::ENDLOOP
-+        || tmp->getOpcode() == AMDGPU::ENDIF
-+        || tmp->getOpcode() == AMDGPU::ELSE) {
-+      if (tmp == MBB->begin()) {
-+        return tmp;
-+      } else {
-+        continue;
-+      }
-+    }  else {
-+      return ++tmp;
-+    }
-+  }
-+  return MBB->end();
-+}
-+
-+void
-+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-+                                    MachineBasicBlock::iterator MI,
-+                                    unsigned SrcReg, bool isKill,
-+                                    int FrameIndex,
-+                                    const TargetRegisterClass *RC,
-+                                    const TargetRegisterInfo *TRI) const {
-+  assert(!"Not Implemented");
-+}
-+
-+void
-+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-+                                     MachineBasicBlock::iterator MI,
-+                                     unsigned DestReg, int FrameIndex,
-+                                     const TargetRegisterClass *RC,
-+                                     const TargetRegisterInfo *TRI) const {
-+  assert(!"Not Implemented");
-+}
-+
-+MachineInstr *
-+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-+                                      MachineInstr *MI,
-+                                      const SmallVectorImpl<unsigned> &Ops,
-+                                      int FrameIndex) const {
-+// TODO: Implement this function
-+  return 0;
-+}
-+MachineInstr*
-+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-+                                      MachineInstr *MI,
-+                                      const SmallVectorImpl<unsigned> &Ops,
-+                                      MachineInstr *LoadMI) const {
-+  // TODO: Implement this function
-+  return 0;
-+}
-+bool
-+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-+                                     const SmallVectorImpl<unsigned> &Ops) const {
-+  // TODO: Implement this function
-+  return false;
-+}
-+bool
-+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
-+                                 unsigned Reg, bool UnfoldLoad,
-+                                 bool UnfoldStore,
-+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
-+  // TODO: Implement this function
-+  return false;
-+}
-+
-+bool
-+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
-+                                    SmallVectorImpl<SDNode*> &NewNodes) const {
-+  // TODO: Implement this function
-+  return false;
-+}
-+
-+unsigned
-+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
-+                                           bool UnfoldLoad, bool UnfoldStore,
-+                                           unsigned *LoadRegIndex) const {
-+  // TODO: Implement this function
-+  return 0;
-+}
-+
-+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
-+                                             int64_t Offset1, int64_t Offset2,
-+                                             unsigned NumLoads) const {
-+  assert(Offset2 > Offset1
-+         && "Second offset should be larger than first offset!");
-+  // If we have less than 16 loads in a row, and the offsets are within 16,
-+  // then schedule together.
-+  // TODO: Make the loads schedule near if it fits in a cacheline
-+  return (NumLoads < 16 && (Offset2 - Offset1) < 16);
-+}
-+
-+bool
-+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
-+  const {
-+  // TODO: Implement this function
-+  return true;
-+}
-+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
-+                                MachineBasicBlock::iterator MI) const {
-+  // TODO: Implement this function
-+}
-+
-+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
-+  // TODO: Implement this function
-+  return false;
-+}
-+bool
-+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-+                                  const SmallVectorImpl<MachineOperand> &Pred2)
-+  const {
-+  // TODO: Implement this function
-+  return false;
-+}
-+
-+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
-+                                      std::vector<MachineOperand> &Pred) const {
-+  // TODO: Implement this function
-+  return false;
-+}
-+
-+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
-+  // TODO: Implement this function
-+  return MI->getDesc().isPredicable();
-+}
-+
-+bool
-+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
-+  // TODO: Implement this function
-+  return true;
-+}
-+ 
-+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
-+    DebugLoc DL) const {
-+  MachineRegisterInfo &MRI = MF.getRegInfo();
-+  const AMDGPURegisterInfo & RI = getRegisterInfo();
-+
-+  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
-+    MachineOperand &MO = MI.getOperand(i);
-+    // Convert dst regclass to one that is supported by the ISA
-+    if (MO.isReg() && MO.isDef()) {
-+      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-+        const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
-+        const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
-+
-+        assert(newRegClass);
-+
-+        MRI.setRegClass(MO.getReg(), newRegClass);
-+      }
-+    }
-+  }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h	2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,149 @@
-+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Contains the definition of a TargetInstrInfo class that is common
-+/// to all AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUINSTRUCTIONINFO_H
-+#define AMDGPUINSTRUCTIONINFO_H
-+
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUInstrInfo.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+
-+#include <map>
-+
-+#define GET_INSTRINFO_HEADER
-+#define GET_INSTRINFO_ENUM
-+#include "AMDGPUGenInstrInfo.inc"
-+
-+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
-+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
-+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
-+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
-+
-+namespace llvm {
-+
-+class AMDGPUTargetMachine;
-+class MachineFunction;
-+class MachineInstr;
-+class MachineInstrBuilder;
-+
-+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
-+private:
-+  const AMDGPURegisterInfo RI;
-+  TargetMachine &TM;
-+  bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
-+                          MachineBasicBlock &MBB) const;
-+public:
-+  explicit AMDGPUInstrInfo(TargetMachine &tm);
-+
-+  virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
-+
-+  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
-+                             unsigned &DstReg, unsigned &SubIdx) const;
-+
-+  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-+  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
-+                                     int &FrameIndex) const;
-+  bool hasLoadFromStackSlot(const MachineInstr *MI,
-+                            const MachineMemOperand *&MMO,
-+                            int &FrameIndex) const;
-+  unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-+  unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
-+                                      int &FrameIndex) const;
-+  bool hasStoreFromStackSlot(const MachineInstr *MI,
-+                             const MachineMemOperand *&MMO,
-+                             int &FrameIndex) const;
-+
-+  MachineInstr *
-+  convertToThreeAddress(MachineFunction::iterator &MFI,
-+                        MachineBasicBlock::iterator &MBBI,
-+                        LiveVariables *LV) const;
-+
-+
-+  virtual void copyPhysReg(MachineBasicBlock &MBB,
-+                           MachineBasicBlock::iterator MI, DebugLoc DL,
-+                           unsigned DestReg, unsigned SrcReg,
-+                           bool KillSrc) const = 0;
-+
-+  void storeRegToStackSlot(MachineBasicBlock &MBB,
-+                           MachineBasicBlock::iterator MI,
-+                           unsigned SrcReg, bool isKill, int FrameIndex,
-+                           const TargetRegisterClass *RC,
-+                           const TargetRegisterInfo *TRI) const;
-+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
-+                            MachineBasicBlock::iterator MI,
-+                            unsigned DestReg, int FrameIndex,
-+                            const TargetRegisterClass *RC,
-+                            const TargetRegisterInfo *TRI) const;
-+
-+protected:
-+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
-+                                      MachineInstr *MI,
-+                                      const SmallVectorImpl<unsigned> &Ops,
-+                                      int FrameIndex) const;
-+  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
-+                                      MachineInstr *MI,
-+                                      const SmallVectorImpl<unsigned> &Ops,
-+                                      MachineInstr *LoadMI) const;
-+public:
-+  bool canFoldMemoryOperand(const MachineInstr *MI,
-+                            const SmallVectorImpl<unsigned> &Ops) const;
-+  bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
-+                           unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
-+                           SmallVectorImpl<MachineInstr *> &NewMIs) const;
-+  bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
-+                           SmallVectorImpl<SDNode *> &NewNodes) const;
-+  unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
-+                                      bool UnfoldLoad, bool UnfoldStore,
-+                                      unsigned *LoadRegIndex = 0) const;
-+  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
-+                               int64_t Offset1, int64_t Offset2,
-+                               unsigned NumLoads) const;
-+
-+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-+  void insertNoop(MachineBasicBlock &MBB,
-+                  MachineBasicBlock::iterator MI) const;
-+  bool isPredicated(const MachineInstr *MI) const;
-+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
-+  bool DefinesPredicate(MachineInstr *MI,
-+                        std::vector<MachineOperand> &Pred) const;
-+  bool isPredicable(MachineInstr *MI) const;
-+  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
-+
-+  // Helper functions that check the opcode for status information
-+  bool isLoadInst(llvm::MachineInstr *MI) const;
-+  bool isExtLoadInst(llvm::MachineInstr *MI) const;
-+  bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
-+  bool isSExtLoadInst(llvm::MachineInstr *MI) const;
-+  bool isZExtLoadInst(llvm::MachineInstr *MI) const;
-+  bool isAExtLoadInst(llvm::MachineInstr *MI) const;
-+  bool isStoreInst(llvm::MachineInstr *MI) const;
-+  bool isTruncStoreInst(llvm::MachineInstr *MI) const;
-+
-+  virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
-+                                       int64_t Imm) const = 0;
-+  virtual unsigned getIEQOpcode() const = 0;
-+  virtual bool isMov(unsigned opcode) const = 0;
-+
-+  /// \brief Convert the AMDIL MachineInstr to a supported ISA
-+  /// MachineInstr
-+  virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
-+    DebugLoc DL) const;
-+
-+};
-+
-+} // End llvm namespace
-+
-+#endif // AMDGPUINSTRINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td	2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,74 @@
-+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file contains DAG node defintions for the AMDGPU target.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+//===----------------------------------------------------------------------===//
-+// AMDGPU DAG Profiles
-+//===----------------------------------------------------------------------===//
-+
-+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
-+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
-+]>;
-+
-+//===----------------------------------------------------------------------===//
-+// AMDGPU DAG Nodes
-+//
-+
-+// out = ((a << 32) | b) >> c)
-+//
-+// Can be used to optimize rtol:
-+// rotl(a, b) = bitalign(a, a, 32 - b)
-+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
-+
-+// This argument to this node is a dword address.
-+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
-+
-+// out = a - floor(a)
-+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
-+
-+// out = max(a, b) a and b are floats
-+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
-+  [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = max(a, b) a and b are signed ints
-+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
-+  [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = max(a, b) a and b are unsigned ints
-+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
-+  [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a and b are floats
-+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
-+  [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a snd b are signed ints
-+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
-+  [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a and b are unsigned ints
-+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
-+  [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// urecip - This operation is a helper for integer division, it returns the
-+// result of 1 / a as a fractional unsigned integer.
-+// out = (2^32 / a) + e
-+// e is rounding error
-+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
-+
-+def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td llvm-r600/lib/Target/R600/AMDGPUInstructions.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstructions.td	2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,190 @@
-+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file contains instruction defs that are common to all hw codegen
-+// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
-+  field bits<16> AMDILOp = 0;
-+  field bits<3> Gen = 0;
-+
-+  let Namespace = "AMDGPU";
-+  let OutOperandList = outs;
-+  let InOperandList = ins;
-+  let AsmString = asm;
-+  let Pattern = pattern;
-+  let Itinerary = NullALU;
-+  let TSFlags{42-40} = Gen;
-+  let TSFlags{63-48} = AMDILOp;
-+}
-+
-+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
-+    : AMDGPUInst<outs, ins, asm, pattern> {
-+
-+  field bits<32> Inst = 0xffffffff;
-+
-+}
-+
-+def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
-+
-+def COND_EQ : PatLeaf <
-+  (cond),
-+  [{switch(N->get()){{default: return false;
-+                     case ISD::SETOEQ: case ISD::SETUEQ:
-+                     case ISD::SETEQ: return true;}}}]
-+>;
-+
-+def COND_NE : PatLeaf <
-+  (cond),
-+  [{switch(N->get()){{default: return false;
-+                     case ISD::SETONE: case ISD::SETUNE:
-+                     case ISD::SETNE: return true;}}}]
-+>;
-+def COND_GT : PatLeaf <
-+  (cond),
-+  [{switch(N->get()){{default: return false;
-+                     case ISD::SETOGT: case ISD::SETUGT:
-+                     case ISD::SETGT: return true;}}}]
-+>;
-+
-+def COND_GE : PatLeaf <
-+  (cond),
-+  [{switch(N->get()){{default: return false;
-+                     case ISD::SETOGE: case ISD::SETUGE:
-+                     case ISD::SETGE: return true;}}}]
-+>;
-+
-+def COND_LT : PatLeaf <
-+  (cond),
-+  [{switch(N->get()){{default: return false;
-+                     case ISD::SETOLT: case ISD::SETULT:
-+                     case ISD::SETLT: return true;}}}]
-+>;
-+
-+def COND_LE : PatLeaf <
-+  (cond),
-+  [{switch(N->get()){{default: return false;
-+                     case ISD::SETOLE: case ISD::SETULE:
-+                     case ISD::SETLE: return true;}}}]
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// Load/Store Pattern Fragments
-+//===----------------------------------------------------------------------===//
-+
-+def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
-+    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-+}]>;
-+
-+class Constants {
-+int TWO_PI = 0x40c90fdb;
-+int PI = 0x40490fdb;
-+int TWO_PI_INV = 0x3e22f983;
-+}
-+def CONST : Constants;
-+
-+def FP_ZERO : PatLeaf <
-+  (fpimm),
-+  [{return N->getValueAPF().isZero();}]
-+>;
-+
-+def FP_ONE : PatLeaf <
-+  (fpimm),
-+  [{return N->isExactlyValue(1.0);}]
-+>;
-+
-+let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1  in {
-+
-+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
-+  (outs rc:$dst),
-+  (ins rc:$src0),
-+  "CLAMP $dst, $src0",
-+  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
-+>;
-+
-+class FABS <RegisterClass rc> : AMDGPUShaderInst <
-+  (outs rc:$dst),
-+  (ins rc:$src0),
-+  "FABS $dst, $src0",
-+  [(set rc:$dst, (fabs rc:$src0))]
-+>;
-+
-+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
-+  (outs rc:$dst),
-+  (ins rc:$src0),
-+  "FNEG $dst, $src0",
-+  [(set rc:$dst, (fneg rc:$src0))]
-+>;
-+
-+def SHADER_TYPE : AMDGPUShaderInst <
-+  (outs),
-+  (ins i32imm:$type),
-+  "SHADER_TYPE $type",
-+  [(int_AMDGPU_shader_type imm:$type)]
-+>;
-+
-+} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
-+
-+/* Generic helper patterns for intrinsics */
-+/* -------------------------------------- */
-+
-+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
-+                  RegisterClass rc> : Pat <
-+  (fpow rc:$src0, rc:$src1),
-+  (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
-+>;
-+
-+/* Other helper patterns */
-+/* --------------------- */
-+
-+/* Extract element pattern */
-+class Extract_Element <ValueType sub_type, ValueType vec_type,
-+                     RegisterClass vec_class, int sub_idx, 
-+                     SubRegIndex sub_reg>: Pat<
-+  (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
-+  (EXTRACT_SUBREG vec_class:$src, sub_reg)
-+>;
-+
-+/* Insert element pattern */
-+class Insert_Element <ValueType elem_type, ValueType vec_type,
-+                      RegisterClass elem_class, RegisterClass vec_class,
-+                      int sub_idx, SubRegIndex sub_reg> : Pat <
-+
-+  (vec_type (vector_insert (vec_type vec_class:$vec),
-+                           (elem_type elem_class:$elem), sub_idx)),
-+  (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
-+>;
-+
-+// Vector Build pattern
-+class Vector_Build <ValueType vecType, RegisterClass vectorClass,
-+                    ValueType elemType, RegisterClass elemClass> : Pat <
-+  (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
-+                         (elemType elemClass:$z), (elemType elemClass:$w))),
-+  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-+  (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
-+                            elemClass:$z, sel_z), elemClass:$w, sel_w)
-+>;
-+
-+// bitconvert pattern
-+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
-+  (dt (bitconvert (st rc:$src0))),
-+  (dt rc:$src0)
-+>;
-+
-+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
-+  (vt (AMDGPUdwordaddr (vt rc:$addr))),
-+  (vt rc:$addr)
-+>;
-+
-+include "R600Instructions.td"
-+
-+include "SIInstrInfo.td"
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td	2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,62 @@
-+//===-- AMDGPUIntrinsics.td - Common intrinsics  -*- tablegen -*-----------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines intrinsics that are used by all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let TargetPrefix = "AMDGPU", isTarget = 1 in {
-+
-+  def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
-+  def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
-+
-+  def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
-+  def int_AMDGPU_kilp : Intrinsic<[], [], []>;
-+  def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+  def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-+
-+  def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
-+}
-+
-+let TargetPrefix = "TGSI", isTarget = 1 in {
-+
-+  def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
-+}
-+
-+include "SIIntrinsics.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp	2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,418 @@
-+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This is the parent TargetLowering class for hardware code gen
-+/// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUISelLowering.h"
-+#include "AMDILIntrinsicInfo.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/CodeGen/SelectionDAG.h"
-+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-+
-+using namespace llvm;
-+
-+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
-+  TargetLowering(TM, new TargetLoweringObjectFileELF()) {
-+
-+  // Initialize target lowering borrowed from AMDIL
-+  InitAMDILLowering();
-+
-+  // We need to custom lower some of the intrinsics
-+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-+
-+  // Library functions.  These default to Expand, but we have instructions
-+  // for them.
-+  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
-+  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
-+  setOperationAction(ISD::FPOW,   MVT::f32, Legal);
-+  setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
-+  setOperationAction(ISD::FABS,   MVT::f32, Legal);
-+  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
-+  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
-+
-+  // Lower floating point store/load to integer store/load to reduce the number
-+  // of patterns in tablegen.
-+  setOperationAction(ISD::STORE, MVT::f32, Promote);
-+  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
-+
-+  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
-+  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
-+
-+  setOperationAction(ISD::LOAD, MVT::f32, Promote);
-+  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
-+
-+  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
-+  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
-+
-+  setOperationAction(ISD::UDIV, MVT::i32, Expand);
-+  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
-+  setOperationAction(ISD::UREM, MVT::i32, Expand);
-+}
-+
-+//===---------------------------------------------------------------------===//
-+// TargetLowering Callbacks
-+//===---------------------------------------------------------------------===//
-+
-+SDValue AMDGPUTargetLowering::LowerFormalArguments(
-+                                      SDValue Chain,
-+                                      CallingConv::ID CallConv,
-+                                      bool isVarArg,
-+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
-+                                      DebugLoc DL, SelectionDAG &DAG,
-+                                      SmallVectorImpl<SDValue> &InVals) const {
-+  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
-+    InVals.push_back(SDValue());
-+  }
-+  return Chain;
-+}
-+
-+SDValue AMDGPUTargetLowering::LowerReturn(
-+                                     SDValue Chain,
-+                                     CallingConv::ID CallConv,
-+                                     bool isVarArg,
-+                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
-+                                     const SmallVectorImpl<SDValue> &OutVals,
-+                                     DebugLoc DL, SelectionDAG &DAG) const {
-+  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
-+}
-+
-+//===---------------------------------------------------------------------===//
-+// Target specific lowering
-+//===---------------------------------------------------------------------===//
-+
-+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
-+    const {
-+  switch (Op.getOpcode()) {
-+  default:
-+    Op.getNode()->dump();
-+    assert(0 && "Custom lowering code for this"
-+        "instruction is not implemented yet!");
-+    break;
-+  // AMDIL DAG lowering
-+  case ISD::SDIV: return LowerSDIV(Op, DAG);
-+  case ISD::SREM: return LowerSREM(Op, DAG);
-+  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
-+  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
-+  // AMDGPU DAG lowering
-+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
-+  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
-+  }
-+  return Op;
-+}
-+
-+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
-+    SelectionDAG &DAG) const {
-+  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-+  DebugLoc DL = Op.getDebugLoc();
-+  EVT VT = Op.getValueType();
-+
-+  switch (IntrinsicID) {
-+    default: return Op;
-+    case AMDGPUIntrinsic::AMDIL_abs:
-+      return LowerIntrinsicIABS(Op, DAG);
-+    case AMDGPUIntrinsic::AMDIL_exp:
-+      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
-+    case AMDGPUIntrinsic::AMDGPU_lrp:
-+      return LowerIntrinsicLRP(Op, DAG);
-+    case AMDGPUIntrinsic::AMDIL_fraction:
-+      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
-+    case AMDGPUIntrinsic::AMDIL_mad:
-+      return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-+                              Op.getOperand(2), Op.getOperand(3));
-+    case AMDGPUIntrinsic::AMDIL_max:
-+      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
-+                                                  Op.getOperand(2));
-+    case AMDGPUIntrinsic::AMDGPU_imax:
-+      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
-+                                                  Op.getOperand(2));
-+    case AMDGPUIntrinsic::AMDGPU_umax:
-+      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
-+                                                  Op.getOperand(2));
-+    case AMDGPUIntrinsic::AMDIL_min:
-+      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
-+                                                  Op.getOperand(2));
-+    case AMDGPUIntrinsic::AMDGPU_imin:
-+      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
-+                                                  Op.getOperand(2));
-+    case AMDGPUIntrinsic::AMDGPU_umin:
-+      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
-+                                                  Op.getOperand(2));
-+    case AMDGPUIntrinsic::AMDIL_round_nearest:
-+      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
-+  }
-+}
-+
-+///IABS(a) = SMAX(sub(0, a), a)
-+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
-+    SelectionDAG &DAG) const {
-+
-+  DebugLoc DL = Op.getDebugLoc();
-+  EVT VT = Op.getValueType();
-+  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
-+                                              Op.getOperand(1));
-+
-+  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
-+}
-+
-+/// Linear Interpolation
-+/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
-+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
-+    SelectionDAG &DAG) const {
-+  DebugLoc DL = Op.getDebugLoc();
-+  EVT VT = Op.getValueType();
-+  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
-+                                DAG.getConstantFP(1.0f, MVT::f32),
-+                                Op.getOperand(1));
-+  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
-+                                                    Op.getOperand(3));
-+  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-+                                               Op.getOperand(2),
-+                                               OneSubAC);
-+}
-+
-+/// \brief Generate Min/Max node
-+SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
-+    SelectionDAG &DAG) const {
-+  DebugLoc DL = Op.getDebugLoc();
-+  EVT VT = Op.getValueType();
-+
-+  SDValue LHS = Op.getOperand(0);
-+  SDValue RHS = Op.getOperand(1);
-+  SDValue True = Op.getOperand(2);
-+  SDValue False = Op.getOperand(3);
-+  SDValue CC = Op.getOperand(4);
-+
-+  if (VT != MVT::f32 ||
-+      !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
-+    return SDValue();
-+  }
-+
-+  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
-+  switch (CCOpcode) {
-+  case ISD::SETOEQ:
-+  case ISD::SETONE:
-+  case ISD::SETUNE:
-+  case ISD::SETNE:
-+  case ISD::SETUEQ:
-+  case ISD::SETEQ:
-+  case ISD::SETFALSE:
-+  case ISD::SETFALSE2:
-+  case ISD::SETTRUE:
-+  case ISD::SETTRUE2:
-+  case ISD::SETUO:
-+  case ISD::SETO:
-+    assert(0 && "Operation should already be optimised !");
-+  case ISD::SETULE:
-+  case ISD::SETULT:
-+  case ISD::SETOLE:
-+  case ISD::SETOLT:
-+  case ISD::SETLE:
-+  case ISD::SETLT: {
-+    if (LHS == True)
-+      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
-+    else
-+      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
-+  }
-+  case ISD::SETGT:
-+  case ISD::SETGE:
-+  case ISD::SETUGE:
-+  case ISD::SETOGE:
-+  case ISD::SETUGT:
-+  case ISD::SETOGT: {
-+    if (LHS == True)
-+      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
-+    else
-+      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
-+  }
-+  case ISD::SETCC_INVALID:
-+    assert(0 && "Invalid setcc condcode !");
-+  }
-+  return Op;
-+}
-+
-+
-+
-+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
-+    SelectionDAG &DAG) const {
-+  DebugLoc DL = Op.getDebugLoc();
-+  EVT VT = Op.getValueType();
-+
-+  SDValue Num = Op.getOperand(0);
-+  SDValue Den = Op.getOperand(1);
-+
-+  SmallVector<SDValue, 8> Results;
-+
-+  // RCP =  URECIP(Den) = 2^32 / Den + e
-+  // e is rounding error.
-+  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
-+
-+  // RCP_LO = umulo(RCP, Den) */
-+  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
-+
-+  // RCP_HI = mulhu (RCP, Den) */
-+  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
-+
-+  // NEG_RCP_LO = -RCP_LO
-+  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
-+                                                     RCP_LO);
-+
-+  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
-+  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
-+                                           NEG_RCP_LO, RCP_LO,
-+                                           ISD::SETEQ);
-+  // Calculate the rounding error from the URECIP instruction
-+  // E = mulhu(ABS_RCP_LO, RCP)
-+  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
-+
-+  // RCP_A_E = RCP + E
-+  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
-+
-+  // RCP_S_E = RCP - E
-+  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
-+
-+  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
-+  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
-+                                     RCP_A_E, RCP_S_E,
-+                                     ISD::SETEQ);
-+  // Quotient = mulhu(Tmp0, Num)
-+  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
-+
-+  // Num_S_Remainder = Quotient * Den
-+  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
-+
-+  // Remainder = Num - Num_S_Remainder
-+  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
-+
-+  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
-+  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
-+                                                 DAG.getConstant(-1, VT),
-+                                                 DAG.getConstant(0, VT),
-+                                                 ISD::SETGE);
-+  // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
-+  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
-+                                                  DAG.getConstant(0, VT),
-+                                                  DAG.getConstant(-1, VT),
-+                                                  DAG.getConstant(0, VT),
-+                                                  ISD::SETGE);
-+  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
-+  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
-+                                               Remainder_GE_Zero);
-+
-+  // Calculate Division result:
-+
-+  // Quotient_A_One = Quotient + 1
-+  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
-+                                                         DAG.getConstant(1, VT));
-+
-+  // Quotient_S_One = Quotient - 1
-+  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
-+                                                         DAG.getConstant(1, VT));
-+
-+  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
-+  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
-+                                     Quotient, Quotient_A_One, ISD::SETEQ);
-+
-+  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
-+  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
-+                            Quotient_S_One, Div, ISD::SETEQ);
-+
-+  // Calculate Rem result:
-+
-+  // Remainder_S_Den = Remainder - Den
-+  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
-+
-+  // Remainder_A_Den = Remainder + Den
-+  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
-+
-+  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
-+  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
-+                                    Remainder, Remainder_S_Den, ISD::SETEQ);
-+
-+  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
-+  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
-+                            Remainder_A_Den, Rem, ISD::SETEQ);
-+  SDValue Ops[2];
-+  Ops[0] = Div;
-+  Ops[1] = Rem;
-+  return DAG.getMergeValues(Ops, 2, DL);
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Helper functions
-+//===----------------------------------------------------------------------===//
-+
-+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
-+  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
-+    return CFP->isExactlyValue(1.0);
-+  }
-+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-+    return C->isAllOnesValue();
-+  }
-+  return false;
-+}
-+
-+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
-+  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
-+    return CFP->getValueAPF().isZero();
-+  }
-+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-+    return C->isNullValue();
-+  }
-+  return false;
-+}
-+
-+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
-+                                                  const TargetRegisterClass *RC,
-+                                                   unsigned Reg, EVT VT) const {
-+  MachineFunction &MF = DAG.getMachineFunction();
-+  MachineRegisterInfo &MRI = MF.getRegInfo();
-+  unsigned VirtualRegister;
-+  if (!MRI.isLiveIn(Reg)) {
-+    VirtualRegister = MRI.createVirtualRegister(RC);
-+    MRI.addLiveIn(Reg, VirtualRegister);
-+  } else {
-+    VirtualRegister = MRI.getLiveInVirtReg(Reg);
-+  }
-+  return DAG.getRegister(VirtualRegister, VT);
-+}
-+
-+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
-+
-+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
-+  switch (Opcode) {
-+  default: return 0;
-+  // AMDIL DAG nodes
-+  NODE_NAME_CASE(MAD);
-+  NODE_NAME_CASE(CALL);
-+  NODE_NAME_CASE(UMUL);
-+  NODE_NAME_CASE(DIV_INF);
-+  NODE_NAME_CASE(RET_FLAG);
-+  NODE_NAME_CASE(BRANCH_COND);
-+
-+  // AMDGPU DAG nodes
-+  NODE_NAME_CASE(DWORDADDR)
-+  NODE_NAME_CASE(FRACT)
-+  NODE_NAME_CASE(FMAX)
-+  NODE_NAME_CASE(SMAX)
-+  NODE_NAME_CASE(UMAX)
-+  NODE_NAME_CASE(FMIN)
-+  NODE_NAME_CASE(SMIN)
-+  NODE_NAME_CASE(UMIN)
-+  NODE_NAME_CASE(URECIP)
-+  NODE_NAME_CASE(INTERP)
-+  NODE_NAME_CASE(INTERP_P0)
-+  NODE_NAME_CASE(EXPORT)
-+  NODE_NAME_CASE(CONST_ADDRESS)
-+  }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h llvm-r600/lib/Target/R600/AMDGPUISelLowering.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.h	2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,145 @@
-+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface definition of the TargetLowering class that is common
-+/// to all AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUISELLOWERING_H
-+#define AMDGPUISELLOWERING_H
-+
-+#include "llvm/Target/TargetLowering.h"
-+
-+namespace llvm {
-+
-+class MachineRegisterInfo;
-+
-+class AMDGPUTargetLowering : public TargetLowering {
-+private:
-+  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
-+
-+protected:
-+
-+  /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
-+  /// MachineFunction.
-+  ///
-+  /// \returns a RegisterSDNode representing Reg.
-+  SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
-+                                                  unsigned Reg, EVT VT) const;
-+
-+  bool isHWTrueValue(SDValue Op) const;
-+  bool isHWFalseValue(SDValue Op) const;
-+
-+public:
-+  AMDGPUTargetLowering(TargetMachine &TM);
-+
-+  virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
-+                             bool isVarArg,
-+                             const SmallVectorImpl<ISD::InputArg> &Ins,
-+                             DebugLoc DL, SelectionDAG &DAG,
-+                             SmallVectorImpl<SDValue> &InVals) const;
-+
-+  virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-+                              bool isVarArg,
-+                              const SmallVectorImpl<ISD::OutputArg> &Outs,
-+                              const SmallVectorImpl<SDValue> &OutVals,
-+                              DebugLoc DL, SelectionDAG &DAG) const;
-+
-+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
-+  virtual const char* getTargetNodeName(unsigned Opcode) const;
-+
-+// Functions defined in AMDILISelLowering.cpp
-+public:
-+
-+  /// \brief Determine which of the bits specified in \p Mask are known to be
-+  /// either zero or one and return them in the \p KnownZero and \p KnownOne
-+  /// bitsets.
-+  virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-+                                              APInt &KnownZero,
-+                                              APInt &KnownOne,
-+                                              const SelectionDAG &DAG,
-+                                              unsigned Depth = 0) const;
-+
-+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-+                                  const CallInst &I, unsigned Intrinsic) const;
-+
-+  /// We want to mark f32/f64 floating point values as legal.
-+  bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
-+
-+  /// We don't want to shrink f64/f32 constants.
-+  bool ShouldShrinkFPConstant(EVT VT) const;
-+
-+private:
-+  void InitAMDILLowering();
-+  SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
-+  EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
-+  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
-+};
-+
-+namespace AMDGPUISD {
-+
-+enum {
-+  // AMDIL ISD Opcodes
-+  FIRST_NUMBER = ISD::BUILTIN_OP_END,
-+  MAD,         // 32bit Fused Multiply Add instruction
-+  CALL,        // Function call based on a single integer
-+  UMUL,        // 32bit unsigned multiplication
-+  DIV_INF,      // Divide with infinity returned on zero divisor
-+  RET_FLAG,
-+  BRANCH_COND,
-+  // End AMDIL ISD Opcodes
-+  BITALIGN,
-+  DWORDADDR,
-+  FRACT,
-+  FMAX,
-+  SMAX,
-+  UMAX,
-+  FMIN,
-+  SMIN,
-+  UMIN,
-+  URECIP,
-+  INTERP,
-+  INTERP_P0,
-+  EXPORT,
-+  CONST_ADDRESS,
-+  LAST_AMDGPU_ISD_NUMBER
-+};
-+
-+
-+} // End namespace AMDGPUISD
-+
-+namespace SIISD {
-+
-+enum {
-+  SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
-+  VCC_AND,
-+  VCC_BITCAST
-+};
-+
-+} // End namespace SIISD
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUISELLOWERING_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp	2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,83 @@
-+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+
-+#include "AMDGPUMCInstLower.h"
-+#include "AMDGPUAsmPrinter.h"
-+#include "R600InstrInfo.h"
-+#include "llvm/CodeGen/MachineBasicBlock.h"
-+#include "llvm/CodeGen/MachineInstr.h"
-+#include "llvm/Constants.h"
-+#include "llvm/MC/MCInst.h"
-+#include "llvm/MC/MCStreamer.h"
-+#include "llvm/MC/MCExpr.h"
-+#include "llvm/Support/ErrorHandling.h"
-+
-+using namespace llvm;
-+
-+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
-+  Ctx(ctx)
-+{ }
-+
-+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
-+  OutMI.setOpcode(MI->getOpcode());
-+
-+  for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
-+    const MachineOperand &MO = MI->getOperand(i);
-+
-+    MCOperand MCOp;
-+    switch (MO.getType()) {
-+    default:
-+      llvm_unreachable("unknown operand type");
-+    case MachineOperand::MO_FPImmediate: {
-+      const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
-+      assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
-+             "Only floating point immediates are supported at the moment.");
-+      MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
-+      break;
-+    }
-+    case MachineOperand::MO_Immediate:
-+      MCOp = MCOperand::CreateImm(MO.getImm());
-+      break;
-+    case MachineOperand::MO_Register:
-+      MCOp = MCOperand::CreateReg(MO.getReg());
-+      break;
-+    case MachineOperand::MO_MachineBasicBlock:
-+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-+                                   MO.getMBB()->getSymbol(), Ctx));
-+    }
-+    OutMI.addOperand(MCOp);
-+  }
-+}
-+
-+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-+  AMDGPUMCInstLower MCInstLowering(OutContext);
-+
-+  if (MI->isBundle()) {
-+    const MachineBasicBlock *MBB = MI->getParent();
-+    MachineBasicBlock::const_instr_iterator I = MI;
-+    ++I;
-+    while (I != MBB->end() && I->isInsideBundle()) {
-+      MCInst MCBundleInst;
-+      const MachineInstr *BundledInst = I;
-+      MCInstLowering.lower(BundledInst, MCBundleInst);
-+      OutStreamer.EmitInstruction(MCBundleInst);
-+      ++I;
-+    }
-+  } else {
-+    MCInst TmpInst;
-+    MCInstLowering.lower(MI, TmpInst);
-+    OutStreamer.EmitInstruction(TmpInst);
-+  }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h	2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,34 @@
-+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_MCINSTLOWER_H
-+#define AMDGPU_MCINSTLOWER_H
-+
-+namespace llvm {
-+
-+class MCInst;
-+class MCContext;
-+class MachineInstr;
-+
-+class AMDGPUMCInstLower {
-+
-+  MCContext &Ctx;
-+
-+public:
-+  AMDGPUMCInstLower(MCContext &ctx);
-+
-+  /// \brief Lower a MachineInstr to an MCInst
-+  void lower(const MachineInstr *MI, MCInst &OutMI) const;
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif //AMDGPU_MCINSTLOWER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp	2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,51 @@
-+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+
-+using namespace llvm;
-+
-+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
-+    const TargetInstrInfo &tii)
-+: AMDGPUGenRegisterInfo(0),
-+  TM(tm),
-+  TII(tii)
-+  { }
-+
-+//===----------------------------------------------------------------------===//
-+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
-+// they are not supported at this time.
-+//===----------------------------------------------------------------------===//
-+
-+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
-+
-+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
-+                                                                         const {
-+  return &CalleeSavedReg;
-+}
-+
-+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
-+                                             int SPAdj,
-+                                             RegScavenger *RS) const {
-+  assert(!"Subroutines not supported yet");
-+}
-+
-+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-+  assert(!"Subroutines not supported yet");
-+  return 0;
-+}
-+
-+#define GET_REGINFO_TARGET_DESC
-+#include "AMDGPUGenRegisterInfo.inc"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h	2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,63 @@
-+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
-+/// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUREGISTERINFO_H
-+#define AMDGPUREGISTERINFO_H
-+
-+#include "llvm/ADT/BitVector.h"
-+#include "llvm/Target/TargetRegisterInfo.h"
-+
-+#define GET_REGINFO_HEADER
-+#define GET_REGINFO_ENUM
-+#include "AMDGPUGenRegisterInfo.inc"
-+
-+namespace llvm {
-+
-+class AMDGPUTargetMachine;
-+class TargetInstrInfo;
-+
-+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
-+  TargetMachine &TM;
-+  const TargetInstrInfo &TII;
-+  static const uint16_t CalleeSavedReg;
-+
-+  AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
-+
-+  virtual BitVector getReservedRegs(const MachineFunction &MF) const {
-+    assert(!"Unimplemented");  return BitVector();
-+  }
-+
-+  /// \param RC is an AMDIL reg class.
-+  ///
-+  /// \returns The ISA reg class that is equivalent to \p RC.
-+  virtual const TargetRegisterClass * getISARegClass(
-+                                         const TargetRegisterClass * RC) const {
-+    assert(!"Unimplemented"); return NULL;
-+  }
-+
-+  virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
-+    assert(!"Unimplemented"); return NULL;
-+  }
-+
-+  const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
-+  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
-+                           RegScavenger *RS) const;
-+  unsigned getFrameRegister(const MachineFunction &MF) const;
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDIDSAREGISTERINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td	2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,22 @@
-+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// Tablegen register definitions common to all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let Namespace = "AMDGPU" in {
-+  def sel_x : SubRegIndex;
-+  def sel_y : SubRegIndex;
-+  def sel_z : SubRegIndex;
-+  def sel_w : SubRegIndex;
-+}
-+
-+include "R600RegisterInfo.td"
-+include "SIRegisterInfo.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp	2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,714 @@
-+//===-- AMDGPUStructurizeCFG.cpp -  ------------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// The pass implemented in this file transforms the programs control flow
-+/// graph into a form that's suitable for code generation on hardware that
-+/// implements control flow by execution masking. This currently includes all
-+/// AMD GPUs but may as well be useful for other types of hardware.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "llvm/Module.h"
-+#include "llvm/ADT/SCCIterator.h"
-+#include "llvm/Analysis/RegionIterator.h"
-+#include "llvm/Analysis/RegionInfo.h"
-+#include "llvm/Analysis/RegionPass.h"
-+#include "llvm/Transforms/Utils/SSAUpdater.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+// Definition of the complex types used in this pass.
-+
-+typedef std::pair<BasicBlock *, Value *> BBValuePair;
-+typedef ArrayRef<BasicBlock*> BBVecRef;
-+
-+typedef SmallVector<RegionNode*, 8> RNVector;
-+typedef SmallVector<BasicBlock*, 8> BBVector;
-+typedef SmallVector<BBValuePair, 2> BBValueVector;
-+
-+typedef DenseMap<PHINode *, BBValueVector> PhiMap;
-+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
-+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
-+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
-+typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
-+
-+// The name for newly created blocks.
-+
-+static const char *FlowBlockName = "Flow";
-+
-+/// @brief Transforms the control flow graph on one single entry/exit region
-+/// at a time.
-+///
-+/// After the transform all "If"/"Then"/"Else" style control flow looks like
-+/// this:
-+///
-+/// \verbatim
-+/// 1
-+/// ||
-+/// | |
-+/// 2 |
-+/// | /
-+/// |/   
-+/// 3
-+/// ||   Where:
-+/// | |  1 = "If" block, calculates the condition
-+/// 4 |  2 = "Then" subregion, runs if the condition is true
-+/// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
-+/// |/   4 = "Else" optional subregion, runs if the condition is false
-+/// 5    5 = "End" block, also rejoins the control flow
-+/// \endverbatim
-+///
-+/// Control flow is expressed as a branch where the true exit goes into the
-+/// "Then"/"Else" region, while the false exit skips the region
-+/// The condition for the optional "Else" region is expressed as a PHI node.
-+/// The incomming values of the PHI node are true for the "If" edge and false
-+/// for the "Then" edge.
-+///
-+/// Additionally to that even complicated loops look like this:
-+///
-+/// \verbatim
-+/// 1
-+/// ||
-+/// | |
-+/// 2 ^  Where:
-+/// | /  1 = "Entry" block
-+/// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
-+/// 3    3 = "Flow" block, with back edge to entry block
-+/// |
-+/// \endverbatim
-+///
-+/// The back edge of the "Flow" block is always on the false side of the branch
-+/// while the true side continues the general flow. So the loop condition
-+/// consist of a network of PHI nodes where the true incoming values expresses
-+/// breaks and the false values expresses continue states.
-+class AMDGPUStructurizeCFG : public RegionPass {
-+
-+  static char ID;
-+
-+  Type *Boolean;
-+  ConstantInt *BoolTrue;
-+  ConstantInt *BoolFalse;
-+  UndefValue *BoolUndef;
-+
-+  Function *Func;
-+  Region *ParentRegion;
-+
-+  DominatorTree *DT;
-+
-+  RNVector Order;
-+  VisitedMap Visited;
-+  PredMap Predicates;
-+  BBPhiMap DeletedPhis;
-+  BBVector FlowsInserted;
-+
-+  BasicBlock *LoopStart;
-+  BasicBlock *LoopEnd;
-+  BBPredicates LoopPred;
-+
-+  void orderNodes();
-+
-+  void buildPredicate(BranchInst *Term, unsigned Idx,
-+                      BBPredicates &Pred, bool Invert);
-+
-+  void analyzeBlock(BasicBlock *BB);
-+
-+  void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
-+
-+  void collectInfos();
-+
-+  bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
-+
-+  void killTerminator(BasicBlock *BB);
-+
-+  RegionNode *skipChained(RegionNode *Node);
-+
-+  void delPhiValues(BasicBlock *From, BasicBlock *To);
-+
-+  void addPhiValues(BasicBlock *From, BasicBlock *To);
-+
-+  BasicBlock *getNextFlow(BasicBlock *Prev);
-+
-+  bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
-+
-+  BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
-+
-+  void createFlow();
-+
-+  void insertConditions();
-+
-+  void rebuildSSA();
-+
-+public:
-+  AMDGPUStructurizeCFG():
-+    RegionPass(ID) {
-+
-+    initializeRegionInfoPass(*PassRegistry::getPassRegistry());
-+  }
-+
-+  virtual bool doInitialization(Region *R, RGPassManager &RGM);
-+
-+  virtual bool runOnRegion(Region *R, RGPassManager &RGM);
-+
-+  virtual const char *getPassName() const {
-+    return "AMDGPU simplify control flow";
-+  }
-+
-+  void getAnalysisUsage(AnalysisUsage &AU) const {
-+
-+    AU.addRequired<DominatorTree>();
-+    AU.addPreserved<DominatorTree>();
-+    RegionPass::getAnalysisUsage(AU);
-+  }
-+
-+};
-+
-+} // end anonymous namespace
-+
-+char AMDGPUStructurizeCFG::ID = 0;
-+
-+/// \brief Initialize the types and constants used in the pass
-+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
-+  LLVMContext &Context = R->getEntry()->getContext();
-+
-+  Boolean = Type::getInt1Ty(Context);
-+  BoolTrue = ConstantInt::getTrue(Context);
-+  BoolFalse = ConstantInt::getFalse(Context);
-+  BoolUndef = UndefValue::get(Boolean);
-+
-+  return false;
-+}
-+
-+/// \brief Build up the general order of nodes
-+void AMDGPUStructurizeCFG::orderNodes() {
-+  scc_iterator<Region *> I = scc_begin(ParentRegion),
-+                         E = scc_end(ParentRegion);
-+  for (Order.clear(); I != E; ++I) {
-+    std::vector<RegionNode *> &Nodes = *I;
-+    Order.append(Nodes.begin(), Nodes.end());
-+  }
-+}
-+
-+/// \brief Build blocks and loop predicates
-+void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
-+                                          BBPredicates &Pred, bool Invert) {
-+  Value *True = Invert ? BoolFalse : BoolTrue;
-+  Value *False = Invert ? BoolTrue : BoolFalse;
-+
-+  RegionInfo *RI = ParentRegion->getRegionInfo();
-+  BasicBlock *BB = Term->getParent();
-+
-+  // Handle the case where multiple regions start at the same block
-+  Region *R = BB != ParentRegion->getEntry() ?
-+              RI->getRegionFor(BB) : ParentRegion;
-+
-+  if (R == ParentRegion) {
-+    // It's a top level block in our region
-+    Value *Cond = True;
-+    if (Term->isConditional()) {
-+      BasicBlock *Other = Term->getSuccessor(!Idx);
-+
-+      if (Visited.count(Other)) {
-+        if (!Pred.count(Other))
-+          Pred[Other] = False;
-+
-+        if (!Pred.count(BB))
-+          Pred[BB] = True;
-+        return;
-+      }
-+      Cond = Term->getCondition();
-+
-+      if (Idx != Invert)
-+        Cond = BinaryOperator::CreateNot(Cond, "", Term);
-+    }
-+
-+    Pred[BB] = Cond;
-+
-+  } else if (ParentRegion->contains(R)) {
-+    // It's a block in a sub region
-+    while(R->getParent() != ParentRegion)
-+      R = R->getParent();
-+
-+    Pred[R->getEntry()] = True;
-+
-+  } else {
-+    // It's a branch from outside into our parent region
-+    Pred[BB] = True;
-+  }
-+}
-+
-+/// \brief Analyze the successors of each block and build up predicates
-+void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
-+  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-+  BBPredicates &Pred = Predicates[BB];
-+
-+  for (; PI != PE; ++PI) {
-+    BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
-+
-+    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-+      BasicBlock *Succ = Term->getSuccessor(i);
-+      if (Succ != BB)
-+        continue;
-+      buildPredicate(Term, i, Pred, false);
-+    }
-+  }
-+}
-+
-+/// \brief Analyze the conditions leading to loop to a previous block
-+void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
-+  BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-+
-+  for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-+    BasicBlock *Succ = Term->getSuccessor(i);
-+
-+    // Ignore it if it's not a back edge
-+    if (!Visited.count(Succ))
-+      continue;
-+
-+    buildPredicate(Term, i, LoopPred, true);
-+
-+    LoopEnd = BB;
-+    if (Visited[Succ] < LoopIdx) {
-+      LoopIdx = Visited[Succ];
-+      LoopStart = Succ;
-+    }
-+  }
-+}
-+
-+/// \brief Collect various loop and predicate infos
-+void AMDGPUStructurizeCFG::collectInfos() {
-+  unsigned Number = 0, LoopIdx = ~0;
-+
-+  // Reset predicate
-+  Predicates.clear();
-+
-+  // and loop infos
-+  LoopStart = LoopEnd = 0;
-+  LoopPred.clear();
-+
-+  RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
-+  for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
-+
-+    // Analyze all the conditions leading to a node
-+    analyzeBlock((*OI)->getEntry());
-+
-+    if ((*OI)->isSubRegion())
-+      continue;
-+
-+    // Find the first/last loop nodes and loop predicates
-+    analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
-+  }
-+}
-+
-+/// \brief Does A dominate all the predicates of B ?
-+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
-+  BBPredicates &Preds = Predicates[B];
-+  for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
-+       PI != PE; ++PI) {
-+
-+    if (!DT->dominates(A, PI->first))
-+      return false;
-+  }
-+  return true;
-+}
-+
-+/// \brief Remove phi values from all successors and the remove the terminator.
-+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
-+  TerminatorInst *Term = BB->getTerminator();
-+  if (!Term)
-+    return;
-+
-+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
-+       SI != SE; ++SI) {
-+
-+    delPhiValues(BB, *SI);
-+  }
-+
-+  Term->eraseFromParent();
-+}
-+
-+/// First: Skip forward to the first region node that either isn't a subregion or not
-+/// dominating it's exit, remove all the skipped nodes from the node order.
-+///
-+/// Second: Handle the first successor directly if the resulting nodes successor
-+/// predicates are still dominated by the original entry
-+RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
-+  BasicBlock *Entry = Node->getEntry();
-+
-+  // Skip forward as long as it is just a linear flow
-+  while (true) {
-+    BasicBlock *Entry = Node->getEntry();
-+    BasicBlock *Exit;
-+
-+    if (Node->isSubRegion()) {
-+      Exit = Node->getNodeAs<Region>()->getExit();
-+    } else {
-+      TerminatorInst *Term = Entry->getTerminator();
-+      if (Term->getNumSuccessors() != 1)
-+        break;
-+      Exit = Term->getSuccessor(0);
-+    }
-+
-+    // It's a back edge, break here so we can insert a loop node
-+    if (!Visited.count(Exit))
-+      return Node;
-+
-+    // More than node edges are pointing to exit
-+    if (!DT->dominates(Entry, Exit))
-+      return Node;
-+
-+    RegionNode *Next = ParentRegion->getNode(Exit);
-+    RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
-+    assert(I != Order.end());
-+
-+    Visited.erase(Next->getEntry());
-+    Order.erase(I);
-+    Node = Next;
-+  }
-+
-+  BasicBlock *BB = Node->getEntry();
-+  TerminatorInst *Term = BB->getTerminator();
-+  if (Term->getNumSuccessors() != 2)
-+    return Node;
-+
-+  // Our node has exactly two succesors, check if we can handle
-+  // any of them directly
-+  BasicBlock *Succ = Term->getSuccessor(0);
-+  if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
-+    Succ = Term->getSuccessor(1);
-+    if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
-+      return Node;
-+  } else {
-+    BasicBlock *Succ2 = Term->getSuccessor(1);
-+    if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
-+        dominatesPredicates(Entry, Succ2))
-+      Succ = Succ2;
-+  }
-+
-+  RegionNode *Next = ParentRegion->getNode(Succ);
-+  RNVector::iterator E = Order.end();
-+  RNVector::iterator I = std::find(Order.begin(), E, Next);
-+  assert(I != E);
-+
-+  killTerminator(BB);
-+  FlowsInserted.push_back(BB);
-+  Visited.erase(Succ);
-+  Order.erase(I);
-+  return ParentRegion->getNode(wireFlowBlock(BB, Next));
-+}
-+
-+/// \brief Remove all PHI values coming from "From" into "To" and remember
-+/// them in DeletedPhis
-+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
-+  PhiMap &Map = DeletedPhis[To];
-+  for (BasicBlock::iterator I = To->begin(), E = To->end();
-+       I != E && isa<PHINode>(*I);) {
-+
-+    PHINode &Phi = cast<PHINode>(*I++);
-+    while (Phi.getBasicBlockIndex(From) != -1) {
-+      Value *Deleted = Phi.removeIncomingValue(From, false);
-+      Map[&Phi].push_back(std::make_pair(From, Deleted));
-+    }
-+  }
-+}
-+
-+/// \brief Add the PHI values back once we knew the new predecessor
-+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
-+  if (!DeletedPhis.count(To))
-+    return;
-+
-+  PhiMap &Map = DeletedPhis[To];
-+  SSAUpdater Updater;
-+
-+  for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
-+
-+    PHINode *Phi = I->first;
-+    Updater.Initialize(Phi->getType(), "");
-+    BasicBlock *Fallback = To;
-+    bool HaveFallback = false;
-+
-+    for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
-+         VI != VE; ++VI) {
-+
-+      Updater.AddAvailableValue(VI->first, VI->second);
-+      BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
-+      if (Dom == VI->first)
-+        HaveFallback = true;
-+      else if (Dom != Fallback)
-+        HaveFallback = false;
-+      Fallback = Dom;
-+    }
-+    if (!HaveFallback) {
-+      Value *Undef = UndefValue::get(Phi->getType());
-+      Updater.AddAvailableValue(Fallback, Undef);
-+    }
-+
-+    Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
-+  }
-+  DeletedPhis.erase(To);
-+}
-+
-+/// \brief Create a new flow node and update dominator tree and region info
-+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
-+  LLVMContext &Context = Func->getContext();
-+  BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
-+                       Order.back()->getEntry();
-+  BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
-+                                        Func, Insert);
-+  DT->addNewBlock(Flow, Prev);
-+  ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
-+  FlowsInserted.push_back(Flow);
-+  return Flow;
-+}
-+
-+/// \brief Can we predict that this node will always be called?
-+bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
-+                                             BasicBlock *Node) {
-+  BBPredicates &Preds = Predicates[Node];
-+  bool Dominated = false;
-+
-+  for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
-+       I != E; ++I) {
-+
-+    if (I->second != BoolTrue)
-+      return false;
-+
-+    if (!Dominated && DT->dominates(I->first, Prev))
-+      Dominated = true;
-+  }
-+  return Dominated;
-+}
-+
-+/// \brief Wire up the new control flow by inserting or updating the branch
-+/// instructions at node exits
-+BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
-+                                                RegionNode *Node) {
-+  BasicBlock *Entry = Node->getEntry();
-+
-+  if (LoopStart == Entry) {
-+    LoopStart = Prev;
-+    LoopPred[Prev] = BoolTrue;
-+  }
-+
-+  // Wire it up temporary, skipChained may recurse into us
-+  BranchInst::Create(Entry, Prev);
-+  DT->changeImmediateDominator(Entry, Prev);
-+  addPhiValues(Prev, Entry);
-+
-+  Node = skipChained(Node);
-+
-+  BasicBlock *Next = getNextFlow(Prev);
-+  if (!isPredictableTrue(Prev, Entry)) {
-+    // Let Prev point to entry and next block
-+    Prev->getTerminator()->eraseFromParent();
-+    BranchInst::Create(Entry, Next, BoolUndef, Prev);
-+  } else {
-+    DT->changeImmediateDominator(Next, Entry);
-+  }
-+
-+  // Let node exit(s) point to next block
-+  if (Node->isSubRegion()) {
-+    Region *SubRegion = Node->getNodeAs<Region>();
-+    BasicBlock *Exit = SubRegion->getExit();
-+
-+    // Find all the edges from the sub region to the exit
-+    BBVector ToDo;
-+    for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
-+      if (SubRegion->contains(*I))
-+        ToDo.push_back(*I);
-+    }
-+
-+    // Modify the edges to point to the new flow block
-+    for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
-+      delPhiValues(*I, Exit);
-+      TerminatorInst *Term = (*I)->getTerminator();
-+      Term->replaceUsesOfWith(Exit, Next);
-+    }
-+
-+    // Update the region info
-+    SubRegion->replaceExit(Next);
-+
-+  } else {
-+    BasicBlock *BB = Node->getNodeAs<BasicBlock>();
-+    killTerminator(BB);
-+    BranchInst::Create(Next, BB);
-+
-+    if (BB == LoopEnd)
-+      LoopEnd = 0;
-+  }
-+
-+  return Next;
-+}
-+
-+/// Destroy node order and visited map, build up flow order instead.
-+/// After this function control flow looks like it should be, but
-+/// branches only have undefined conditions.
-+void AMDGPUStructurizeCFG::createFlow() {
-+  DeletedPhis.clear();
-+
-+  BasicBlock *Prev = Order.pop_back_val()->getEntry();
-+  assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
-+  Visited.erase(Prev);
-+
-+  if (LoopStart == Prev) {
-+    // Loop starts at entry, split entry so that we can predicate it
-+    BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
-+    BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
-+    DT->addNewBlock(Split, Prev);
-+    ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
-+    Predicates[Split] = Predicates[Prev];
-+    Order.push_back(ParentRegion->getBBNode(Split));
-+    LoopPred[Prev] = BoolTrue;
-+
-+  } else if (LoopStart == Order.back()->getEntry()) {
-+    // Loop starts behind entry, split entry so that we can jump to it
-+    Instruction *Term = Prev->getTerminator();
-+    BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
-+    DT->addNewBlock(Split, Prev);
-+    ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
-+    Prev = Split;
-+  }
-+
-+  killTerminator(Prev);
-+  FlowsInserted.clear();
-+  FlowsInserted.push_back(Prev);
-+
-+  while (!Order.empty()) {
-+    RegionNode *Node = Order.pop_back_val();
-+    Visited.erase(Node->getEntry());
-+    Prev = wireFlowBlock(Prev, Node);
-+    if (LoopStart && !LoopEnd) {
-+      // Create an extra loop end node
-+      LoopEnd = Prev;
-+      Prev = getNextFlow(LoopEnd);
-+      BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
-+      addPhiValues(LoopEnd, LoopStart);
-+    }
-+  }
-+
-+  BasicBlock *Exit = ParentRegion->getExit();
-+  BranchInst::Create(Exit, Prev);
-+  addPhiValues(Prev, Exit);
-+  if (DT->dominates(ParentRegion->getEntry(), Exit))
-+    DT->changeImmediateDominator(Exit, Prev);
-+
-+  if (LoopStart && LoopEnd) {
-+    BBVector::iterator FI = std::find(FlowsInserted.begin(),
-+                                      FlowsInserted.end(),
-+                                      LoopStart);
-+    for (; *FI != LoopEnd; ++FI) {
-+      addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
-+    }
-+  }
-+
-+  assert(Order.empty());
-+  assert(Visited.empty());
-+  assert(DeletedPhis.empty());
-+}
-+
-+/// \brief Insert the missing branch conditions
-+void AMDGPUStructurizeCFG::insertConditions() {
-+  SSAUpdater PhiInserter;
-+
-+  for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
-+       FI != FE; ++FI) {
-+
-+    BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
-+    if (Term->isUnconditional())
-+      continue;
-+
-+    PhiInserter.Initialize(Boolean, "");
-+    PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
-+
-+    BasicBlock *Succ = Term->getSuccessor(0);
-+    BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
-+    for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
-+         PI != PE; ++PI) {
-+
-+      PhiInserter.AddAvailableValue(PI->first, PI->second);
-+    }
-+
-+    Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
-+  }
-+}
-+
-+/// Handle a rare case where the disintegrated nodes instructions
-+/// no longer dominate all their uses. Not sure if this is really nessasary
-+void AMDGPUStructurizeCFG::rebuildSSA() {
-+  SSAUpdater Updater;
-+  for (Region::block_iterator I = ParentRegion->block_begin(),
-+                              E = ParentRegion->block_end();
-+       I != E; ++I) {
-+
-+    BasicBlock *BB = *I;
-+    for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
-+         II != IE; ++II) {
-+
-+      bool Initialized = false;
-+      for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
-+
-+        Next = I->getNext();
-+
-+        Instruction *User = cast<Instruction>(I->getUser());
-+        if (User->getParent() == BB) {
-+          continue;
-+
-+        } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
-+          if (UserPN->getIncomingBlock(*I) == BB)
-+            continue;
-+        }
-+
-+        if (DT->dominates(II, User))
-+          continue;
-+
-+        if (!Initialized) {
-+          Value *Undef = UndefValue::get(II->getType());
-+          Updater.Initialize(II->getType(), "");
-+          Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
-+          Updater.AddAvailableValue(BB, II);
-+          Initialized = true;
-+        }
-+        Updater.RewriteUseAfterInsertions(*I);
-+      }
-+    }
-+  }
-+}
-+
-+/// \brief Run the transformation for each region found
-+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
-+  if (R->isTopLevelRegion())
-+    return false;
-+
-+  Func = R->getEntry()->getParent();
-+  ParentRegion = R;
-+
-+  DT = &getAnalysis<DominatorTree>();
-+
-+  orderNodes();
-+  collectInfos();
-+  createFlow();
-+  insertConditions();
-+  rebuildSSA();
-+
-+  Order.clear();
-+  Visited.clear();
-+  Predicates.clear();
-+  DeletedPhis.clear();
-+  FlowsInserted.clear();
-+
-+  return true;
-+}
-+
-+/// \brief Create the pass
-+Pass *llvm::createAMDGPUStructurizeCFGPass() {
-+  return new AMDGPUStructurizeCFG();
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp	2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,87 @@
-+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUSubtarget.h"
-+
-+using namespace llvm;
-+
-+#define GET_SUBTARGETINFO_ENUM
-+#define GET_SUBTARGETINFO_TARGET_DESC
-+#define GET_SUBTARGETINFO_CTOR
-+#include "AMDGPUGenSubtargetInfo.inc"
-+
-+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
-+  AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
-+    InstrItins = getInstrItineraryForCPU(CPU);
-+
-+  memset(CapsOverride, 0, sizeof(*CapsOverride)
-+      * AMDGPUDeviceInfo::MaxNumberCapabilities);
-+  // Default card
-+  StringRef GPU = CPU;
-+  Is64bit = false;
-+  DefaultSize[0] = 64;
-+  DefaultSize[1] = 1;
-+  DefaultSize[2] = 1;
-+  ParseSubtargetFeatures(GPU, FS);
-+  DevName = GPU;
-+  Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
-+}
-+
-+AMDGPUSubtarget::~AMDGPUSubtarget() {
-+  delete Device;
-+}
-+
-+bool
-+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
-+  assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
-+      "Caps index is out of bounds!");
-+  return CapsOverride[caps];
-+}
-+bool
-+AMDGPUSubtarget::is64bit() const  {
-+  return Is64bit;
-+}
-+bool
-+AMDGPUSubtarget::isTargetELF() const {
-+  return false;
-+}
-+size_t
-+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
-+  if (dim > 3) {
-+    return 1;
-+  } else {
-+    return DefaultSize[dim];
-+  }
-+}
-+
-+std::string
-+AMDGPUSubtarget::getDataLayout() const {
-+    if (!Device) {
-+        return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
-+                "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
-+                "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
-+                "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
-+                "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
-+    }
-+    return Device->getDataLayout();
-+}
-+
-+std::string
-+AMDGPUSubtarget::getDeviceName() const {
-+  return DevName;
-+}
-+const AMDGPUDevice *
-+AMDGPUSubtarget::device() const {
-+  return Device;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h llvm-r600/lib/Target/R600/AMDGPUSubtarget.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.h	2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,65 @@
-+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief AMDGPU specific subclass of TargetSubtarget.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUSUBTARGET_H
-+#define AMDGPUSUBTARGET_H
-+#include "AMDILDevice.h"
-+#include "llvm/ADT/StringExtras.h"
-+#include "llvm/ADT/StringRef.h"
-+#include "llvm/Target/TargetSubtargetInfo.h"
-+
-+#define GET_SUBTARGETINFO_HEADER
-+#include "AMDGPUGenSubtargetInfo.inc"
-+
-+#define MAX_CB_SIZE (1 << 16)
-+
-+namespace llvm {
-+
-+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
-+private:
-+  bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
-+  const AMDGPUDevice *Device;
-+  size_t DefaultSize[3];
-+  std::string DevName;
-+  bool Is64bit;
-+  bool Is32on64bit;
-+  bool DumpCode;
-+  bool R600ALUInst;
-+
-+  InstrItineraryData InstrItins;
-+
-+public:
-+  AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
-+  virtual ~AMDGPUSubtarget();
-+
-+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
-+  virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
-+
-+  bool isOverride(AMDGPUDeviceInfo::Caps) const;
-+  bool is64bit() const;
-+
-+  // Helper functions to simplify if statements
-+  bool isTargetELF() const;
-+  const AMDGPUDevice* device() const;
-+  std::string getDataLayout() const;
-+  std::string getDeviceName() const;
-+  virtual size_t getDefaultSize(uint32_t dim) const;
-+  bool dumpCode() const { return DumpCode; }
-+  bool r600ALUEncoding() const { return R600ALUInst; }
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUSUBTARGET_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp	2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,148 @@
-+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief The AMDGPU target machine contains all of the hardware specific
-+/// information  needed to emit code for R600 and SI GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUTargetMachine.h"
-+#include "AMDGPU.h"
-+#include "R600ISelLowering.h"
-+#include "R600InstrInfo.h"
-+#include "SIISelLowering.h"
-+#include "SIInstrInfo.h"
-+#include "llvm/Analysis/Passes.h"
-+#include "llvm/Analysis/Verifier.h"
-+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-+#include "llvm/CodeGen/MachineModuleInfo.h"
-+#include "llvm/CodeGen/Passes.h"
-+#include "llvm/MC/MCAsmInfo.h"
-+#include "llvm/PassManager.h"
-+#include "llvm/Support/TargetRegistry.h"
-+#include "llvm/Support/raw_os_ostream.h"
-+#include "llvm/Transforms/IPO.h"
-+#include "llvm/Transforms/Scalar.h"
-+#include <llvm/CodeGen/Passes.h>
-+
-+using namespace llvm;
-+
-+extern "C" void LLVMInitializeR600Target() {
-+  // Register the target
-+  RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
-+}
-+
-+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
-+    StringRef CPU, StringRef FS,
-+  TargetOptions Options,
-+  Reloc::Model RM, CodeModel::Model CM,
-+  CodeGenOpt::Level OptLevel
-+)
-+:
-+  LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
-+  Subtarget(TT, CPU, FS),
-+  Layout(Subtarget.getDataLayout()),
-+  FrameLowering(TargetFrameLowering::StackGrowsUp,
-+      Subtarget.device()->getStackAlignment(), 0),
-+  IntrinsicInfo(this),
-+  InstrItins(&Subtarget.getInstrItineraryData()) {
-+  // TLInfo uses InstrInfo so it must be initialized after.
-+  if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
-+    InstrInfo = new R600InstrInfo(*this);
-+    TLInfo = new R600TargetLowering(*this);
-+  } else {
-+    InstrInfo = new SIInstrInfo(*this);
-+    TLInfo = new SITargetLowering(*this);
-+  }
-+}
-+
-+AMDGPUTargetMachine::~AMDGPUTargetMachine() {
-+}
-+
-+namespace {
-+class AMDGPUPassConfig : public TargetPassConfig {
-+public:
-+  AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
-+    : TargetPassConfig(TM, PM) {}
-+
-+  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
-+    return getTM<AMDGPUTargetMachine>();
-+  }
-+
-+  virtual bool addPreISel();
-+  virtual bool addInstSelector();
-+  virtual bool addPreRegAlloc();
-+  virtual bool addPostRegAlloc();
-+  virtual bool addPreSched2();
-+  virtual bool addPreEmitPass();
-+};
-+} // End of anonymous namespace
-+
-+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
-+  return new AMDGPUPassConfig(this, PM);
-+}
-+
-+bool
-+AMDGPUPassConfig::addPreISel() {
-+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+    addPass(createAMDGPUStructurizeCFGPass());
-+    addPass(createSIAnnotateControlFlowPass());
-+  }
-+  return false;
-+}
-+
-+bool AMDGPUPassConfig::addInstSelector() {
-+  addPass(createAMDGPUPeepholeOpt(*TM));
-+  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
-+  return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreRegAlloc() {
-+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+
-+  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+    addPass(createSIAssignInterpRegsPass(*TM));
-+  }
-+  addPass(createAMDGPUConvertToISAPass(*TM));
-+  return false;
-+}
-+
-+bool AMDGPUPassConfig::addPostRegAlloc() {
-+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+
-+  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+    addPass(createSIInsertWaits(*TM));
-+  }
-+  return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreSched2() {
-+
-+  addPass(&IfConverterID);
-+  return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreEmitPass() {
-+  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
-+    addPass(createAMDGPUCFGPreparationPass(*TM));
-+    addPass(createAMDGPUCFGStructurizerPass(*TM));
-+    addPass(createR600ExpandSpecialInstrsPass(*TM));
-+    addPass(createR600LowerConstCopy(*TM));
-+    addPass(&FinalizeMachineBundlesID);
-+  } else {
-+    addPass(createSILowerLiteralConstantsPass(*TM));
-+    addPass(createSILowerControlFlowPass(*TM));
-+  }
-+
-+  return false;
-+}
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h	2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,70 @@
-+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_TARGET_MACHINE_H
-+#define AMDGPU_TARGET_MACHINE_H
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDGPUSubtarget.h"
-+#include "AMDILFrameLowering.h"
-+#include "AMDILIntrinsicInfo.h"
-+#include "R600ISelLowering.h"
-+#include "llvm/ADT/OwningPtr.h"
-+#include "llvm/DataLayout.h"
-+
-+namespace llvm {
-+
-+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
-+
-+class AMDGPUTargetMachine : public LLVMTargetMachine {
-+
-+  AMDGPUSubtarget Subtarget;
-+  const DataLayout Layout;
-+  AMDGPUFrameLowering FrameLowering;
-+  AMDGPUIntrinsicInfo IntrinsicInfo;
-+  const AMDGPUInstrInfo * InstrInfo;
-+  AMDGPUTargetLowering * TLInfo;
-+  const InstrItineraryData* InstrItins;
-+
-+public:
-+   AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
-+                       StringRef CPU,
-+                       TargetOptions Options,
-+                       Reloc::Model RM, CodeModel::Model CM,
-+                       CodeGenOpt::Level OL);
-+   ~AMDGPUTargetMachine();
-+   virtual const AMDGPUFrameLowering* getFrameLowering() const {
-+     return &FrameLowering;
-+   }
-+   virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
-+     return &IntrinsicInfo;
-+   }
-+   virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
-+   virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
-+   virtual const AMDGPURegisterInfo *getRegisterInfo() const {
-+      return &InstrInfo->getRegisterInfo();
-+   }
-+   virtual AMDGPUTargetLowering * getTargetLowering() const {
-+      return TLInfo;
-+   }
-+   virtual const InstrItineraryData* getInstrItineraryData() const {
-+      return InstrItins;
-+   }
-+   virtual const DataLayout* getDataLayout() const { return &Layout; }
-+   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPU_TARGET_MACHINE_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.td llvm-r600/lib/Target/R600/AMDGPU.td
---- llvm-3.2.src/lib/Target/R600/AMDGPU.td	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPU.td	2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,40 @@
-+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+
-+// Include AMDIL TD files
-+include "AMDILBase.td"
-+
-+
-+def AMDGPUInstrInfo : InstrInfo {
-+  let guessInstructionProperties = 1;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Declare the target which we are implementing
-+//===----------------------------------------------------------------------===//
-+def AMDGPUAsmWriter : AsmWriter {
-+    string AsmWriterClassName = "InstPrinter";
-+    int Variant = 0;
-+    bit isMCAsmWriter = 1;
-+}
-+
-+def AMDGPU : Target {
-+  // Pull in Instruction Info:
-+  let InstructionSet = AMDGPUInstrInfo;
-+  let AssemblyWriters = [AMDGPUAsmWriter];
-+}
-+
-+// Include AMDGPU TD files
-+include "R600Schedule.td"
-+include "SISchedule.td"
-+include "Processors.td"
-+include "AMDGPUInstrInfo.td"
-+include "AMDGPUIntrinsics.td"
-+include "AMDGPURegisterInfo.td"
-+include "AMDGPUInstructions.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp
---- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp	2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,115 @@
-+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+// \file
-+//==-----------------------------------------------------------------------===//
-+#include "AMDIL7XXDevice.h"
-+#include "AMDGPUSubtarget.h"
-+#include "AMDILDevice.h"
-+
-+using namespace llvm;
-+
-+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
-+  setCaps();
-+  std::string name = mSTM->getDeviceName();
-+  if (name == "rv710") {
-+    DeviceFlag = OCL_DEVICE_RV710;
-+  } else if (name == "rv730") {
-+    DeviceFlag = OCL_DEVICE_RV730;
-+  } else {
-+    DeviceFlag = OCL_DEVICE_RV770;
-+  }
-+}
-+
-+AMDGPU7XXDevice::~AMDGPU7XXDevice() {
-+}
-+
-+void AMDGPU7XXDevice::setCaps() {
-+  mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-+}
-+
-+size_t AMDGPU7XXDevice::getMaxLDSSize() const {
-+  if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+    return MAX_LDS_SIZE_700;
-+  }
-+  return 0;
-+}
-+
-+size_t AMDGPU7XXDevice::getWavefrontSize() const {
-+  return AMDGPUDevice::HalfWavefrontSize;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getGeneration() const {
-+  return AMDGPUDeviceInfo::HD4XXX;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
-+  switch (DeviceID) {
-+  default:
-+    assert(0 && "ID type passed in is unknown!");
-+    break;
-+  case GLOBAL_ID:
-+  case CONSTANT_ID:
-+  case RAW_UAV_ID:
-+  case ARENA_UAV_ID:
-+    break;
-+  case LDS_ID:
-+    if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+      return DEFAULT_LDS_ID;
-+    }
-+    break;
-+  case SCRATCH_ID:
-+    if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
-+      return DEFAULT_SCRATCH_ID;
-+    }
-+    break;
-+  case GDS_ID:
-+    assert(0 && "GDS UAV ID is not supported on this chip");
-+    if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
-+      return DEFAULT_GDS_ID;
-+    }
-+    break;
-+  };
-+
-+  return 0;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
-+  return 1;
-+}
-+
-+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
-+  setCaps();
-+}
-+
-+AMDGPU770Device::~AMDGPU770Device() {
-+}
-+
-+void AMDGPU770Device::setCaps() {
-+  if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
-+    mSWBits.set(AMDGPUDeviceInfo::FMA);
-+    mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
-+  }
-+  mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
-+  mHWBits.reset(AMDGPUDeviceInfo::LongOps);
-+  mSWBits.set(AMDGPUDeviceInfo::LongOps);
-+  mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-+}
-+
-+size_t AMDGPU770Device::getWavefrontSize() const {
-+  return AMDGPUDevice::WavefrontSize;
-+}
-+
-+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
-+}
-+
-+AMDGPU710Device::~AMDGPU710Device() {
-+}
-+
-+size_t AMDGPU710Device::getWavefrontSize() const {
-+  return AMDGPUDevice::QuarterWavefrontSize;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h llvm-r600/lib/Target/R600/AMDIL7XXDevice.h
---- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.h	2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,72 @@
-+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+/// \file
-+/// \brief Interface for the subtarget data classes.
-+///
-+/// This file will define the interface that each generation needs to
-+/// implement in order to correctly answer queries on the capabilities of the
-+/// specific hardware.
-+//===----------------------------------------------------------------------===//
-+#ifndef AMDIL7XXDEVICEIMPL_H
-+#define AMDIL7XXDEVICEIMPL_H
-+#include "AMDILDevice.h"
-+
-+namespace llvm {
-+class AMDGPUSubtarget;
-+
-+//===----------------------------------------------------------------------===//
-+// 7XX generation of devices and their respective sub classes
-+//===----------------------------------------------------------------------===//
-+
-+/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
-+///
-+/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
-+/// support the minimal features that are required to be considered OpenCL 1.0
-+/// compliant and nothing more.
-+class AMDGPU7XXDevice : public AMDGPUDevice {
-+public:
-+  AMDGPU7XXDevice(AMDGPUSubtarget *ST);
-+  virtual ~AMDGPU7XXDevice();
-+  virtual size_t getMaxLDSSize() const;
-+  virtual size_t getWavefrontSize() const;
-+  virtual uint32_t getGeneration() const;
-+  virtual uint32_t getResourceID(uint32_t DeviceID) const;
-+  virtual uint32_t getMaxNumUAVs() const;
-+
-+protected:
-+  virtual void setCaps();
-+};
-+
-+/// \brief The AMDGPU770Device class represents the RV770 chip and it's
-+/// derivative cards.
-+///
-+/// The difference between this device and the base class is this device device
-+/// adds support for double precision and has a larger wavefront size.
-+class AMDGPU770Device : public AMDGPU7XXDevice {
-+public:
-+  AMDGPU770Device(AMDGPUSubtarget *ST);
-+  virtual ~AMDGPU770Device();
-+  virtual size_t getWavefrontSize() const;
-+private:
-+  virtual void setCaps();
-+};
-+
-+/// \brief The AMDGPU710Device class derives from the 7XX base class.
-+///
-+/// This class is a smaller derivative, so we need to overload some of the
-+/// functions in order to correctly specify this information.
-+class AMDGPU710Device : public AMDGPU7XXDevice {
-+public:
-+  AMDGPU710Device(AMDGPUSubtarget *ST);
-+  virtual ~AMDGPU710Device();
-+  virtual size_t getWavefrontSize() const;
-+};
-+
-+} // namespace llvm
-+#endif // AMDILDEVICEIMPL_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILBase.td llvm-r600/lib/Target/R600/AMDILBase.td
---- llvm-3.2.src/lib/Target/R600/AMDILBase.td	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILBase.td	2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,85 @@
-+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+// Target-independent interfaces which we are implementing
-+//===----------------------------------------------------------------------===//
-+
-+include "llvm/Target/Target.td"
-+
-+// Dummy Instruction itineraries for pseudo instructions
-+def ALU_NULL : FuncUnit;
-+def NullALU : InstrItinClass;
-+
-+//===----------------------------------------------------------------------===//
-+// AMDIL Subtarget features.
-+//===----------------------------------------------------------------------===//
-+def FeatureFP64     : SubtargetFeature<"fp64",
-+        "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
-+        "true",
-+        "Enable 64bit double precision operations">;
-+def FeatureByteAddress    : SubtargetFeature<"byte_addressable_store",
-+        "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
-+        "true",
-+        "Enable byte addressable stores">;
-+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
-+        "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
-+        "true",
-+        "Enable duplicate barrier detection(HD5XXX or later).">;
-+def FeatureImages : SubtargetFeature<"images",
-+        "CapsOverride[AMDGPUDeviceInfo::Images]",
-+        "true",
-+        "Enable image functions">;
-+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
-+        "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
-+        "true",
-+        "Generate multiple UAV code(HD5XXX family or later)">;
-+def FeatureMacroDB : SubtargetFeature<"macrodb",
-+        "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
-+        "true",
-+        "Use internal macrodb, instead of macrodb in driver">;
-+def FeatureNoAlias : SubtargetFeature<"noalias",
-+        "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
-+        "true",
-+        "assert that all kernel argument pointers are not aliased">;
-+def FeatureNoInline : SubtargetFeature<"no-inline",
-+        "CapsOverride[AMDGPUDeviceInfo::NoInline]",
-+        "true",
-+        "specify whether to not inline functions">;
-+
-+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
-+        "Is64bit",
-+        "false",
-+        "Specify if 64bit addressing should be used.">;
-+
-+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
-+        "Is32on64bit",
-+        "false",
-+        "Specify if 64bit sized pointers with 32bit addressing should be used.">;
-+def FeatureDebug : SubtargetFeature<"debug",
-+        "CapsOverride[AMDGPUDeviceInfo::Debug]",
-+        "true",
-+        "Debug mode is enabled, so disable hardware accelerated address spaces.">;
-+def FeatureDumpCode : SubtargetFeature <"DumpCode",
-+        "DumpCode",
-+        "true",
-+        "Dump MachineInstrs in the CodeEmitter">;
-+
-+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
-+        "R600ALUInst",
-+        "false",
-+        "Older version of ALU instructions encoding.">;
-+
-+
-+//===----------------------------------------------------------------------===//
-+// Register File, Calling Conv, Instruction Descriptions
-+//===----------------------------------------------------------------------===//
-+
-+
-+include "AMDILRegisterInfo.td"
-+include "AMDILInstrInfo.td"
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp	1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp	2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,3045 @@
-+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+
-+#define DEBUGME 0
-+#define DEBUG_TYPE "structcfg"
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDIL.h"
-+#include "llvm/ADT/SCCIterator.h"
-+#include "llvm/ADT/SmallVector.h"
-+#include "llvm/ADT/Statistic.h"
-+#include "llvm/Analysis/DominatorInternals.h"
-+#include "llvm/Analysis/Dominators.h"
-+#include "llvm/CodeGen/MachinePostDominators.h"
-+#include "llvm/CodeGen/MachineDominators.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineJumpTableInfo.h"
-+#include "llvm/CodeGen/MachineLoopInfo.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+
-+using namespace llvm;
-+
-+// TODO: move-begin.
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// Statistics for CFGStructurizer.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
-+    "matched");
-+STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
-+    "matched");
-+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
-+    "pattern matched");
-+STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
-+    "pattern matched");
-+STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
-+    "matched");
-+STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
-+STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// Miscellaneous utility for CFGStructurizer.
-+//
-+//===----------------------------------------------------------------------===//
-+namespace llvmCFGStruct {
-+#define SHOWNEWINSTR(i) \
-+  if (DEBUGME) errs() << "New instr: " << *i << "\n"
-+
-+#define SHOWNEWBLK(b, msg) \
-+if (DEBUGME) { \
-+  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
-+  errs() << "\n"; \
-+}
-+
-+#define SHOWBLK_DETAIL(b, msg) \
-+if (DEBUGME) { \
-+  if (b) { \
-+  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
-+  b->print(errs()); \
-+  errs() << "\n"; \
-+  } \
-+}
-+
-+#define INVALIDSCCNUM -1
-+#define INVALIDREGNUM 0
-+
-+template<class LoopinfoT>
-+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
-+  for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
-+       iterEnd = LoopInfo.end();
-+       iter != iterEnd; ++iter) {
-+    (*iter)->print(OS, 0);
-+  }
-+}
-+
-+template<class NodeT>
-+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
-+  size_t sz = Src.size();
-+  for (size_t i = 0; i < sz/2; ++i) {
-+    NodeT *t = Src[i];
-+    Src[i] = Src[sz - i - 1];
-+    Src[sz - i - 1] = t;
-+  }
-+}
-+
-+} //end namespace llvmCFGStruct
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// supporting data structure for CFGStructurizer
-+//
-+//===----------------------------------------------------------------------===//
-+
-+namespace llvmCFGStruct {
-+template<class PassT>
-+struct CFGStructTraits {
-+};
-+
-+template <class InstrT>
-+class BlockInformation {
-+public:
-+  bool isRetired;
-+  int  sccNum;
-+  //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
-+  //Instructions defining the corresponding successor.
-+  BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
-+};
-+
-+template <class BlockT, class InstrT, class RegiT>
-+class LandInformation {
-+public:
-+  BlockT *landBlk;
-+  std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
-+                                  //WHILELOOP(thisloop) init before entering
-+                                  //thisloop.
-+  std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
-+                                  //WHILELOOP(thisloop) init after entering
-+                                  //thisloop.
-+  std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
-+                                     //land block, branch cond on this reg.
-+  std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
-+                                     //endif" after ENDLOOP(thisloop) break
-+                                     //outerLoopOf(thisLoop).
-+  std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
-+                                    //endif" after ENDLOOP(thisloop) continue on
-+                                    //outerLoopOf(thisLoop).
-+  LandInformation() : landBlk(NULL) {}
-+};
-+
-+} //end of namespace llvmCFGStruct
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// CFGStructurizer
-+//
-+//===----------------------------------------------------------------------===//
-+
-+namespace llvmCFGStruct {
-+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
-+template<class PassT>
-+class  CFGStructurizer {
-+public:
-+  typedef enum {
-+    Not_SinglePath = 0,
-+    SinglePath_InPath = 1,
-+    SinglePath_NotInPath = 2
-+  } PathToKind;
-+
-+public:
-+  typedef typename PassT::InstructionType         InstrT;
-+  typedef typename PassT::FunctionType            FuncT;
-+  typedef typename PassT::DominatortreeType       DomTreeT;
-+  typedef typename PassT::PostDominatortreeType   PostDomTreeT;
-+  typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
-+  typedef typename PassT::LoopinfoType            LoopInfoT;
-+
-+  typedef GraphTraits<FuncT *>                    FuncGTraits;
-+  //typedef FuncGTraits::nodes_iterator BlockIterator;
-+  typedef typename FuncT::iterator                BlockIterator;
-+
-+  typedef typename FuncGTraits::NodeType          BlockT;
-+  typedef GraphTraits<BlockT *>                   BlockGTraits;
-+  typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
-+  //typedef BlockGTraits::succ_iterator InstructionIterator;
-+  typedef typename BlockT::iterator               InstrIterator;
-+
-+  typedef CFGStructTraits<PassT>                  CFGTraits;
-+  typedef BlockInformation<InstrT>                BlockInfo;
-+  typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
-+
-+  typedef int                                     RegiT;
-+  typedef typename PassT::LoopType                LoopT;
-+  typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
-+        typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
-+        //landing info for loop break
-+  typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
-+
-+public:
-+  CFGStructurizer();
-+  ~CFGStructurizer();
-+
-+  /// Perform the CFG structurization
-+  bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
-+
-+  /// Perform the CFG preparation
-+  bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
-+
-+private:
-+  void reversePredicateSetter(typename BlockT::iterator);
-+  void   orderBlocks();
-+  void   printOrderedBlocks(llvm::raw_ostream &OS);
-+  int patternMatch(BlockT *CurBlock);
-+  int patternMatchGroup(BlockT *CurBlock);
-+
-+  int serialPatternMatch(BlockT *CurBlock);
-+  int ifPatternMatch(BlockT *CurBlock);
-+  int switchPatternMatch(BlockT *CurBlock);
-+  int loopendPatternMatch(BlockT *CurBlock);
-+  int loopPatternMatch(BlockT *CurBlock);
-+
-+  int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
-+  int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
-+  //int loopWithoutBreak(BlockT *);
-+
-+  void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
-+                        BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
-+  void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
-+                           BlockT *ContBlock, LoopT *contLoop);
-+  bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
-+  int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+                       BlockT *FalseBlock);
-+  int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
-+                          BlockT *FalseBlock);
-+  int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+                              BlockT *FalseBlock, BlockT **LandBlockPtr);
-+  void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+                                   BlockT *FalseBlock, BlockT *LandBlock,
-+                                   bool Detail = false);
-+  PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
-+                          bool AllowSideEntry = true);
-+  BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
-+                        bool AllowSideEntry = true);
-+  int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
-+  void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
-+
-+  void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
-+                            BlockT *TrueBlock, BlockT *FalseBlock,
-+                            BlockT *LandBlock);
-+  void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
-+  void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
-+                           BlockT *ExitLandBlock, RegiT SetReg);
-+  void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
-+                           RegiT SetReg);
-+  BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
-+                                std::set<BlockT*> &ExitBlockSet,
-+                                BlockT *ExitLandBlk);
-+  BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
-+                                BlockTSmallerVector &ExitingBlocks,
-+                                BlockTSmallerVector &ExitBlocks);
-+  BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
-+  void removeUnconditionalBranch(BlockT *SrcBlock);
-+  void removeRedundantConditionalBranch(BlockT *SrcBlock);
-+  void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
-+
-+  void removeSuccessor(BlockT *SrcBlock);
-+  BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
-+  BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
-+
-+  void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
-+                          InstrIterator InsertPos);
-+
-+  void recordSccnum(BlockT *SrcBlock, int SCCNum);
-+  int getSCCNum(BlockT *srcBlk);
-+
-+  void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
-+  bool isRetiredBlock(BlockT *SrcBlock);
-+  bool isActiveLoophead(BlockT *CurBlock);
-+  bool needMigrateBlock(BlockT *Block);
-+
-+  BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
-+                              BlockTSmallerVector &exitBlocks,
-+                              std::set<BlockT*> &ExitBlockSet);
-+  void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
-+  BlockT *getLoopLandBlock(LoopT *LoopRep);
-+  LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
-+
-+  void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
-+  void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
-+  void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
-+  void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
-+  void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
-+
-+  bool hasBackEdge(BlockT *curBlock);
-+  unsigned getLoopDepth  (LoopT *LoopRep);
-+  int countActiveBlock(
-+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
-+    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
-+    BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
-+  BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
-+
-+private:
-+  DomTreeT *domTree;
-+  PostDomTreeT *postDomTree;
-+  LoopInfoT *loopInfo;
-+  PassT *passRep;
-+  FuncT *funcRep;
-+
-+  BlockInfoMap blockInfoMap;
-+  LoopLandInfoMap loopLandInfoMap;
-+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
-+  const AMDGPURegisterInfo *TRI;
-+
-+};  //template class CFGStructurizer
-+
-+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
-+  : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
-+}
-+
-+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
-+  for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
-+       E = blockInfoMap.end(); I != E; ++I) {
-+    delete I->second;
-+  }
-+}
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
-+                                     const AMDGPURegisterInfo * tri) {
-+  passRep = &pass;
-+  funcRep = &func;
-+  TRI = tri;
-+
-+  bool changed = false;
-+
-+  //FIXME: if not reducible flow graph, make it so ???
-+
-+  if (DEBUGME) {
-+        errs() << "AMDGPUCFGStructurizer::prepare\n";
-+  }
-+
-+  loopInfo = CFGTraits::getLoopInfo(pass);
-+  if (DEBUGME) {
-+    errs() << "LoopInfo:\n";
-+    PrintLoopinfo(*loopInfo, errs());
-+  }
-+
-+  orderBlocks();
-+  if (DEBUGME) {
-+    errs() << "Ordered blocks:\n";
-+    printOrderedBlocks(errs());
-+  }
-+
-+  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
-+
-+  for (typename LoopInfoT::iterator iter = loopInfo->begin(),
-+       iterEnd = loopInfo->end();
-+       iter != iterEnd; ++iter) {
-+    LoopT* loopRep = (*iter);
-+    BlockTSmallerVector exitingBlks;
-+    loopRep->getExitingBlocks(exitingBlks);
-+    
-+    if (exitingBlks.size() == 0) {
-+      BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
-+      if (dummyExitBlk != NULL)
-+        retBlks.push_back(dummyExitBlk);
-+    }
-+  }
-+
-+  // Remove unconditional branch instr.
-+  // Add dummy exit block iff there are multiple returns.
-+
-+  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
-+       iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
-+       iterBlk != iterEndBlk;
-+       ++iterBlk) {
-+    BlockT *curBlk = *iterBlk;
-+    removeUnconditionalBranch(curBlk);
-+    removeRedundantConditionalBranch(curBlk);
-+    if (CFGTraits::isReturnBlock(curBlk)) {
-+      retBlks.push_back(curBlk);
-+    }
-+    assert(curBlk->succ_size() <= 2);
-+  } //for
-+
-+  if (retBlks.size() >= 2) {
-+    addDummyExitBlock(retBlks);
-+    changed = true;
-+  }
-+
-+  return changed;
-+} //CFGStructurizer::prepare
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
-+    const AMDGPURegisterInfo * tri) {
-+  passRep = &pass;
-+  funcRep = &func;
-+  TRI = tri;
-+
-+  //Assume reducible CFG...
-+  if (DEBUGME) {
-+    errs() << "AMDGPUCFGStructurizer::run\n";
-+    func.viewCFG();
-+  }
-+
-+  domTree = CFGTraits::getDominatorTree(pass);
-+  if (DEBUGME) {
-+    domTree->print(errs(), (const llvm::Module*)0);
-+  }
-+
-+  postDomTree = CFGTraits::getPostDominatorTree(pass);
-+  if (DEBUGME) {
-+    postDomTree->print(errs());
-+  }
-+
-+  loopInfo = CFGTraits::getLoopInfo(pass);
-+  if (DEBUGME) {
-+    errs() << "LoopInfo:\n";
-+    PrintLoopinfo(*loopInfo, errs());
-+  }
-+
<Skipped 19052 lines>
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/llvm.git/commitdiff/c0fd916863545c83a6eaac1b5048b1da5ea0c5ba




More information about the pld-cvs-commit mailing list