[packages/llvm] - added r600 patch (R600 target support)

Fri Jan 25 22:28:01 CET 2013

commit 251890425981d4d0076266858a1b1ef0ea2b617a
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date:   Fri Jan 25 22:28:37 2013 +0100

    - added r600 patch (R600 target support)

 llvm-r600.patch | 23023 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 llvm.spec       |     4 +
 2 files changed, 23027 insertions(+)
---

diff --git a/llvm.spec b/llvm.spec
index 8e0eff3..a51d296 100644
--- a/llvm.spec
+++ b/llvm.spec
@@ -30,6 +30,8 @@ Patch0:		%{name}-config.patch
 # Data files should be installed with timestamps preserved
 Patch1:		%{name}-2.6-timestamp.patch
 Patch2:		%{name}-pld.patch
+# R600 target support from git://people.freedesktop.org/~tstellar/llvm
+Patch3:		%{name}-r600.patch
 URL:		http://llvm.org/
 BuildRequires:	autoconf >= 2.60
 BuildRequires:	automake >= 1:1.9.6
@@ -269,6 +271,7 @@ mv clang-*.* tools/clang
 %patch0 -p1
 %patch1 -p1
 %patch2 -p1
+%patch3 -p1
 
 # configure does not properly specify libdir
 sed -i 's|(PROJ_prefix)/lib|(PROJ_prefix)/%{_lib}|g' Makefile.config.in
@@ -305,6 +308,7 @@ bash ../%configure \
 %if %{with apidocs}
 	--enable-doxygen \
 %endif
+	--enable-experimental-targets=R600 \
 	--enable-jit \
 	--enable-optimized \
 	--enable-shared \
diff --git a/llvm-r600.patch b/llvm-r600.patch
new file mode 100644
index 0000000..0957c01
--- /dev/null
+++ b/llvm-r600.patch
@@ -0,0 +1,23023 @@
+diff -Nur -x .git llvm-3.2.src/autoconf/configure.ac llvm-r600/autoconf/configure.ac
+--- llvm-3.2.src/autoconf/configure.ac	2012-11-21 17:13:35.000000000 +0100
++++ llvm-r600/autoconf/configure.ac	2013-01-25 19:43:56.096716416 +0100
+@@ -751,6 +751,11 @@
+ 
+ if test ${enableval} != "disable"
+ then
++  if test ${enableval} = "AMDGPU"
++  then
++    AC_MSG_ERROR([The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600])
++    enableval="R600"
++  fi
+   TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
+ fi
+ 
+diff -Nur -x .git llvm-3.2.src/configure llvm-r600/configure
+--- llvm-3.2.src/configure	2012-11-21 17:13:35.000000000 +0100
++++ llvm-r600/configure	2013-01-25 19:43:56.173383081 +0100
+@@ -5473,6 +5473,13 @@
+ 
+ if test ${enableval} != "disable"
+ then
++  if test ${enableval} = "AMDGPU"
++  then
++    { { echo "$as_me:$LINENO: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&5
++echo "$as_me: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&2;}
++   { (exit 1); exit 1; }; }
++    enableval="R600"
++  fi
+   TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
+ fi
+ 
+@@ -10316,7 +10323,7 @@
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<EOF
+-#line 10317 "configure"
++#line 10326 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+diff -Nur -x .git llvm-3.2.src/include/llvm/IntrinsicsR600.td llvm-r600/include/llvm/IntrinsicsR600.td
+--- llvm-3.2.src/include/llvm/IntrinsicsR600.td	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/include/llvm/IntrinsicsR600.td	2013-01-25 19:43:56.433383075 +0100
+@@ -0,0 +1,36 @@
++//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines all of the R600-specific intrinsics.
++//
++//===----------------------------------------------------------------------===//
++
++let TargetPrefix = "r600" in {
++
++class R600ReadPreloadRegisterIntrinsic<string name>
++  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
++    GCCBuiltin<name>;
++
++multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
++  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
++  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
++  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
++}
++
++defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
++                                       "__builtin_r600_read_global_size">;
++defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
++                                       "__builtin_r600_read_local_size">;
++defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
++                                       "__builtin_r600_read_ngroups">;
++defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
++                                       "__builtin_r600_read_tgid">;
++defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
++                                       "__builtin_r600_read_tidig">;
++} // End TargetPrefix = "r600"
+diff -Nur -x .git llvm-3.2.src/include/llvm/Intrinsics.td llvm-r600/include/llvm/Intrinsics.td
+--- llvm-3.2.src/include/llvm/Intrinsics.td	2012-10-20 01:00:20.000000000 +0200
++++ llvm-r600/include/llvm/Intrinsics.td	2013-01-25 19:43:56.426716409 +0100
+@@ -469,3 +469,4 @@
+ include "llvm/IntrinsicsHexagon.td"
+ include "llvm/IntrinsicsNVVM.td"
+ include "llvm/IntrinsicsMips.td"
++include "llvm/IntrinsicsR600.td"
+diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+--- llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp	2012-11-26 18:01:12.000000000 +0100
++++ llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp	2013-01-25 19:43:56.720049736 +0100
+@@ -8514,11 +8514,8 @@
+     if (Opcode == ISD::DELETED_NODE &&
+         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+       Opcode = Opc;
+-      // If not supported by target, bail out.
+-      if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
+-          TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
+-        return SDValue();
+     }
++
+     if (Opc != Opcode)
+       return SDValue();
+ 
+@@ -8543,6 +8540,10 @@
+   assert(SrcVT != MVT::Other && "Cannot determine source type!");
+ 
+   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
++
++  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
++    return SDValue();
++
+   SmallVector<SDValue, 8> Opnds;
+   for (unsigned i = 0; i != NumInScalars; ++i) {
+     SDValue In = N->getOperand(i);
+diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+--- llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp	2012-10-24 19:25:11.000000000 +0200
++++ llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp	2013-01-25 19:43:56.733383069 +0100
+@@ -731,9 +731,10 @@
+           return;
+         }
+         case TargetLowering::Promote: {
+-          assert(VT.isVector() && "Unknown legal promote case!");
+-          Value = DAG.getNode(ISD::BITCAST, dl,
+-                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
++          EVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
++          assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
++                 "Can only promote stores to same size type");
++          Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+           SDValue Result =
+             DAG.getStore(Chain, dl, Value, Ptr,
+                          ST->getPointerInfo(), isVolatile,
+@@ -889,10 +890,9 @@
+       break;
+     }
+     case TargetLowering::Promote: {
+-      // Only promote a load of vector type to another.
+-      assert(VT.isVector() && "Cannot promote this load!");
+-      // Change base type to a different vector type.
+       EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
++      assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
++             "Can only promote loads to same size type");
+ 
+       SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+                          LD->isVolatile(), LD->isNonTemporal(),
+diff -Nur -x .git llvm-3.2.src/lib/Target/LLVMBuild.txt llvm-r600/lib/Target/LLVMBuild.txt
+--- llvm-3.2.src/lib/Target/LLVMBuild.txt	2012-07-16 20:19:46.000000000 +0200
++++ llvm-r600/lib/Target/LLVMBuild.txt	2013-01-25 19:43:57.173383060 +0100
+@@ -16,7 +16,7 @@
+ ;===------------------------------------------------------------------------===;
+ 
+ [common]
+-subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
++subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
+ 
+ ; This is a special group whose required libraries are extended (by llvm-build)
+ ; with the best execution engine (the native JIT, if available, or the
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp	2013-01-25 19:43:57.423383055 +0100
+@@ -0,0 +1,138 @@
++//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer  --------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++///
++/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
++/// code.  When passed an MCAsmStreamer it prints assembly and when passed
++/// an MCObjectStreamer it outputs binary code.
++//
++//===----------------------------------------------------------------------===//
++//
++
++
++#include "AMDGPUAsmPrinter.h"
++#include "AMDGPU.h"
++#include "SIMachineFunctionInfo.h"
++#include "SIRegisterInfo.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/Target/TargetLoweringObjectFile.h"
++#include "llvm/Support/TargetRegistry.h"
++
++using namespace llvm;
++
++
++static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
++                                              MCStreamer &Streamer) {
++  return new AMDGPUAsmPrinter(tm, Streamer);
++}
++
++extern "C" void LLVMInitializeR600AsmPrinter() {
++  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
++}
++
++/// We need to override this function so we can avoid
++/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
++bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
++  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
++  if (STM.dumpCode()) {
++#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
++    MF.dump();
++#endif
++  }
++  SetupMachineFunction(MF);
++  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
++  if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
++    EmitProgramInfo(MF);
++  }
++  EmitFunctionBody();
++  return false;
++}
++
++void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
++  unsigned MaxSGPR = 0;
++  unsigned MaxVGPR = 0;
++  bool VCCUsed = false;
++  const SIRegisterInfo * RI =
++                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
++
++  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
++                                                  BB != BB_E; ++BB) {
++    MachineBasicBlock &MBB = *BB;
++    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
++                                                    I != E; ++I) {
++      MachineInstr &MI = *I;
++
++      unsigned numOperands = MI.getNumOperands();
++      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
++        MachineOperand & MO = MI.getOperand(op_idx);
++        unsigned maxUsed;
++        unsigned width = 0;
++        bool isSGPR = false;
++        unsigned reg;
++        unsigned hwReg;
++        if (!MO.isReg()) {
++          continue;
++        }
++        reg = MO.getReg();
++        if (reg == AMDGPU::VCC) {
++          VCCUsed = true;
++          continue;
++        }
++        switch (reg) {
++        default: break;
++        case AMDGPU::EXEC:
++        case AMDGPU::SI_LITERAL_CONSTANT:
++        case AMDGPU::SREG_LIT_0:
++        case AMDGPU::M0:
++          continue;
++        }
++
++        if (AMDGPU::SReg_32RegClass.contains(reg)) {
++          isSGPR = true;
++          width = 1;
++        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
++          isSGPR = false;
++          width = 1;
++        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
++          isSGPR = true;
++          width = 2;
++        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
++          isSGPR = false;
++          width = 2;
++        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
++          isSGPR = true;
++          width = 4;
++        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
++          isSGPR = false;
++          width = 4;
++        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
++          isSGPR = true;
++          width = 8;
++        } else {
++          assert(!"Unknown register class");
++        }
++        hwReg = RI->getEncodingValue(reg);
++        maxUsed = hwReg + width - 1;
++        if (isSGPR) {
++          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
++        } else {
++          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
++        }
++      }
++    }
++  }
++  if (VCCUsed) {
++    MaxSGPR += 2;
++  }
++  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
++  OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
++  OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
++  OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h	2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,44 @@
++//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief AMDGPU Assembly printer class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPU_ASMPRINTER_H
++#define AMDGPU_ASMPRINTER_H
++
++#include "llvm/CodeGen/AsmPrinter.h"
++
++namespace llvm {
++
++class AMDGPUAsmPrinter : public AsmPrinter {
++
++public:
++  explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
++    : AsmPrinter(TM, Streamer) { }
++
++  virtual bool runOnMachineFunction(MachineFunction &MF);
++
++  virtual const char *getPassName() const {
++    return "AMDGPU Assembly Printer";
++  }
++
++  /// \brief Emit register usage information so that the GPU driver
++  /// can correctly setup the GPU state.
++  void EmitProgramInfo(MachineFunction &MF);
++
++  /// Implemented in AMDGPUMCInstLower.cpp
++  virtual void EmitInstruction(const MachineInstr *MI);
++};
++
++} // End anonymous llvm
++
++#endif //AMDGPU_ASMPRINTER_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h	2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,49 @@
++//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief CodeEmitter interface for R600 and SI codegen.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUCODEEMITTER_H
++#define AMDGPUCODEEMITTER_H
++
++namespace llvm {
++
++class AMDGPUCodeEmitter {
++public:
++  uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
++  virtual uint64_t getMachineOpValue(const MachineInstr &MI,
++                                   const MachineOperand &MO) const { return 0; }
++  virtual unsigned GPR4AlignEncode(const MachineInstr  &MI,
++                                     unsigned OpNo) const {
++    return 0;
++  }
++  virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
++                                   unsigned OpNo) const {
++    return 0;
++  }
++  virtual uint64_t VOPPostEncode(const MachineInstr &MI,
++                                 uint64_t Value) const {
++    return Value;
++  }
++  virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
++                                    unsigned OpNo) const {
++    return 0;
++  }
++  virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
++                                                                   const {
++    return 0;
++  }
++};
++
++} // End namespace llvm
++
++#endif // AMDGPUCODEEMITTER_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp	2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,62 @@
++//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief This pass lowers AMDIL machine instructions to the appropriate
++/// hardware instructions.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPU.h"
++#include "AMDGPUInstrInfo.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++
++using namespace llvm;
++
++namespace {
++
++class AMDGPUConvertToISAPass : public MachineFunctionPass {
++
++private:
++  static char ID;
++  TargetMachine &TM;
++
++public:
++  AMDGPUConvertToISAPass(TargetMachine &tm) :
++    MachineFunctionPass(ID), TM(tm) { }
++
++  virtual bool runOnMachineFunction(MachineFunction &MF);
++
++  virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
++
++};
++
++} // End anonymous namespace
++
++char AMDGPUConvertToISAPass::ID = 0;
++
++FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
++  return new AMDGPUConvertToISAPass(tm);
++}
++
++bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
++  const AMDGPUInstrInfo * TII =
++                      static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
++
++  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
++                                                  BB != BB_E; ++BB) {
++    MachineBasicBlock &MBB = *BB;
++    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
++                                                      I != E; ++I) {
++      MachineInstr &MI = *I;
++      TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
++    }
++  }
++  return false;
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.h llvm-r600/lib/Target/R600/AMDGPU.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPU.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPU.h	2013-01-25 19:43:57.423383055 +0100
+@@ -0,0 +1,51 @@
++//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++/// \file
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPU_H
++#define AMDGPU_H
++
++#include "AMDGPUTargetMachine.h"
++#include "llvm/Support/TargetRegistry.h"
++#include "llvm/Target/TargetMachine.h"
++
++namespace llvm {
++
++class FunctionPass;
++class AMDGPUTargetMachine;
++
++// R600 Passes
++FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
++FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
++FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
++
++// SI Passes
++FunctionPass *createSIAnnotateControlFlowPass();
++FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
++FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
++FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
++FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
++FunctionPass *createSIInsertWaits(TargetMachine &tm);
++
++// Passes common to R600 and SI
++Pass *createAMDGPUStructurizeCFGPass();
++FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
++
++} // End namespace llvm
++
++namespace ShaderType {
++  enum Type {
++    PIXEL = 0,
++    VERTEX = 1,
++    GEOMETRY = 2,
++    COMPUTE = 3
++  };
++}
++
++#endif // AMDGPU_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp	2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,257 @@
++//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Implementation of the TargetInstrInfo class that is common to all
++/// AMD GPUs.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPUInstrInfo.h"
++#include "AMDGPURegisterInfo.h"
++#include "AMDGPUTargetMachine.h"
++#include "AMDIL.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++
++#define GET_INSTRINFO_CTOR
++#include "AMDGPUGenInstrInfo.inc"
++
++using namespace llvm;
++
++AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
++  : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
++
++const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
++  return RI;
++}
++
++bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
++                                           unsigned &SrcReg, unsigned &DstReg,
++                                           unsigned &SubIdx) const {
++// TODO: Implement this function
++  return false;
++}
++
++unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
++                                             int &FrameIndex) const {
++// TODO: Implement this function
++  return 0;
++}
++
++unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
++                                                   int &FrameIndex) const {
++// TODO: Implement this function
++  return 0;
++}
++
++bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
++                                          const MachineMemOperand *&MMO,
++                                          int &FrameIndex) const {
++// TODO: Implement this function
++  return false;
++}
++unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
++                                              int &FrameIndex) const {
++// TODO: Implement this function
++  return 0;
++}
++unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
++                                                    int &FrameIndex) const {
++// TODO: Implement this function
++  return 0;
++}
++bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
++                                           const MachineMemOperand *&MMO,
++                                           int &FrameIndex) const {
++// TODO: Implement this function
++  return false;
++}
++
++MachineInstr *
++AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
++                                      MachineBasicBlock::iterator &MBBI,
++                                      LiveVariables *LV) const {
++// TODO: Implement this function
++  return NULL;
++}
++bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
++                                        MachineBasicBlock &MBB) const {
++  while (iter != MBB.end()) {
++    switch (iter->getOpcode()) {
++    default:
++      break;
++    case AMDGPU::BRANCH_COND_i32:
++    case AMDGPU::BRANCH_COND_f32:
++    case AMDGPU::BRANCH:
++      return true;
++    };
++    ++iter;
++  }
++  return false;
++}
++
++MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
++  MachineBasicBlock::iterator tmp = MBB->end();
++  if (!MBB->size()) {
++    return MBB->end();
++  }
++  while (--tmp) {
++    if (tmp->getOpcode() == AMDGPU::ENDLOOP
++        || tmp->getOpcode() == AMDGPU::ENDIF
++        || tmp->getOpcode() == AMDGPU::ELSE) {
++      if (tmp == MBB->begin()) {
++        return tmp;
++      } else {
++        continue;
++      }
++    }  else {
++      return ++tmp;
++    }
++  }
++  return MBB->end();
++}
++
++void
++AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
++                                    MachineBasicBlock::iterator MI,
++                                    unsigned SrcReg, bool isKill,
++                                    int FrameIndex,
++                                    const TargetRegisterClass *RC,
++                                    const TargetRegisterInfo *TRI) const {
++  assert(!"Not Implemented");
++}
++
++void
++AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
++                                     MachineBasicBlock::iterator MI,
++                                     unsigned DestReg, int FrameIndex,
++                                     const TargetRegisterClass *RC,
++                                     const TargetRegisterInfo *TRI) const {
++  assert(!"Not Implemented");
++}
++
++MachineInstr *
++AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
++                                      MachineInstr *MI,
++                                      const SmallVectorImpl<unsigned> &Ops,
++                                      int FrameIndex) const {
++// TODO: Implement this function
++  return 0;
++}
++MachineInstr*
++AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
++                                      MachineInstr *MI,
++                                      const SmallVectorImpl<unsigned> &Ops,
++                                      MachineInstr *LoadMI) const {
++  // TODO: Implement this function
++  return 0;
++}
++bool
++AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
++                                     const SmallVectorImpl<unsigned> &Ops) const {
++  // TODO: Implement this function
++  return false;
++}
++bool
++AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
++                                 unsigned Reg, bool UnfoldLoad,
++                                 bool UnfoldStore,
++                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
++  // TODO: Implement this function
++  return false;
++}
++
++bool
++AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
++                                    SmallVectorImpl<SDNode*> &NewNodes) const {
++  // TODO: Implement this function
++  return false;
++}
++
++unsigned
++AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
++                                           bool UnfoldLoad, bool UnfoldStore,
++                                           unsigned *LoadRegIndex) const {
++  // TODO: Implement this function
++  return 0;
++}
++
++bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
++                                             int64_t Offset1, int64_t Offset2,
++                                             unsigned NumLoads) const {
++  assert(Offset2 > Offset1
++         && "Second offset should be larger than first offset!");
++  // If we have less than 16 loads in a row, and the offsets are within 16,
++  // then schedule together.
++  // TODO: Make the loads schedule near if it fits in a cacheline
++  return (NumLoads < 16 && (Offset2 - Offset1) < 16);
++}
++
++bool
++AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
++  const {
++  // TODO: Implement this function
++  return true;
++}
++void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
++                                MachineBasicBlock::iterator MI) const {
++  // TODO: Implement this function
++}
++
++bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
++  // TODO: Implement this function
++  return false;
++}
++bool
++AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
++                                  const SmallVectorImpl<MachineOperand> &Pred2)
++  const {
++  // TODO: Implement this function
++  return false;
++}
++
++bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
++                                      std::vector<MachineOperand> &Pred) const {
++  // TODO: Implement this function
++  return false;
++}
++
++bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
++  // TODO: Implement this function
++  return MI->getDesc().isPredicable();
++}
++
++bool
++AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
++  // TODO: Implement this function
++  return true;
++}
++ 
++void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
++    DebugLoc DL) const {
++  MachineRegisterInfo &MRI = MF.getRegInfo();
++  const AMDGPURegisterInfo & RI = getRegisterInfo();
++
++  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
++    MachineOperand &MO = MI.getOperand(i);
++    // Convert dst regclass to one that is supported by the ISA
++    if (MO.isReg() && MO.isDef()) {
++      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
++        const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
++        const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
++
++        assert(newRegClass);
++
++        MRI.setRegClass(MO.getReg(), newRegClass);
++      }
++    }
++  }
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h	2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,149 @@
++//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Contains the definition of a TargetInstrInfo class that is common
++/// to all AMD GPUs.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUINSTRUCTIONINFO_H
++#define AMDGPUINSTRUCTIONINFO_H
++
++#include "AMDGPURegisterInfo.h"
++#include "AMDGPUInstrInfo.h"
++#include "llvm/Target/TargetInstrInfo.h"
++
++#include <map>
++
++#define GET_INSTRINFO_HEADER
++#define GET_INSTRINFO_ENUM
++#include "AMDGPUGenInstrInfo.inc"
++
++#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
++#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
++#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
++#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
++
++namespace llvm {
++
++class AMDGPUTargetMachine;
++class MachineFunction;
++class MachineInstr;
++class MachineInstrBuilder;
++
++class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
++private:
++  const AMDGPURegisterInfo RI;
++  TargetMachine &TM;
++  bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
++                          MachineBasicBlock &MBB) const;
++public:
++  explicit AMDGPUInstrInfo(TargetMachine &tm);
++
++  virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
++
++  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
++                             unsigned &DstReg, unsigned &SubIdx) const;
++
++  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
++  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
++                                     int &FrameIndex) const;
++  bool hasLoadFromStackSlot(const MachineInstr *MI,
++                            const MachineMemOperand *&MMO,
++                            int &FrameIndex) const;
++  unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
++  unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
++                                      int &FrameIndex) const;
++  bool hasStoreFromStackSlot(const MachineInstr *MI,
++                             const MachineMemOperand *&MMO,
++                             int &FrameIndex) const;
++
++  MachineInstr *
++  convertToThreeAddress(MachineFunction::iterator &MFI,
++                        MachineBasicBlock::iterator &MBBI,
++                        LiveVariables *LV) const;
++
++
++  virtual void copyPhysReg(MachineBasicBlock &MBB,
++                           MachineBasicBlock::iterator MI, DebugLoc DL,
++                           unsigned DestReg, unsigned SrcReg,
++                           bool KillSrc) const = 0;
++
++  void storeRegToStackSlot(MachineBasicBlock &MBB,
++                           MachineBasicBlock::iterator MI,
++                           unsigned SrcReg, bool isKill, int FrameIndex,
++                           const TargetRegisterClass *RC,
++                           const TargetRegisterInfo *TRI) const;
++  void loadRegFromStackSlot(MachineBasicBlock &MBB,
++                            MachineBasicBlock::iterator MI,
++                            unsigned DestReg, int FrameIndex,
++                            const TargetRegisterClass *RC,
++                            const TargetRegisterInfo *TRI) const;
++
++protected:
++  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
++                                      MachineInstr *MI,
++                                      const SmallVectorImpl<unsigned> &Ops,
++                                      int FrameIndex) const;
++  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
++                                      MachineInstr *MI,
++                                      const SmallVectorImpl<unsigned> &Ops,
++                                      MachineInstr *LoadMI) const;
++public:
++  bool canFoldMemoryOperand(const MachineInstr *MI,
++                            const SmallVectorImpl<unsigned> &Ops) const;
++  bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
++                           unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
++                           SmallVectorImpl<MachineInstr *> &NewMIs) const;
++  bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
++                           SmallVectorImpl<SDNode *> &NewNodes) const;
++  unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
++                                      bool UnfoldLoad, bool UnfoldStore,
++                                      unsigned *LoadRegIndex = 0) const;
++  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
++                               int64_t Offset1, int64_t Offset2,
++                               unsigned NumLoads) const;
++
++  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
++  void insertNoop(MachineBasicBlock &MBB,
++                  MachineBasicBlock::iterator MI) const;
++  bool isPredicated(const MachineInstr *MI) const;
++  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
++                         const SmallVectorImpl<MachineOperand> &Pred2) const;
++  bool DefinesPredicate(MachineInstr *MI,
++                        std::vector<MachineOperand> &Pred) const;
++  bool isPredicable(MachineInstr *MI) const;
++  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
++
++  // Helper functions that check the opcode for status information
++  bool isLoadInst(llvm::MachineInstr *MI) const;
++  bool isExtLoadInst(llvm::MachineInstr *MI) const;
++  bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
++  bool isSExtLoadInst(llvm::MachineInstr *MI) const;
++  bool isZExtLoadInst(llvm::MachineInstr *MI) const;
++  bool isAExtLoadInst(llvm::MachineInstr *MI) const;
++  bool isStoreInst(llvm::MachineInstr *MI) const;
++  bool isTruncStoreInst(llvm::MachineInstr *MI) const;
++
++  virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
++                                       int64_t Imm) const = 0;
++  virtual unsigned getIEQOpcode() const = 0;
++  virtual bool isMov(unsigned opcode) const = 0;
++
++  /// \brief Convert the AMDIL MachineInstr to a supported ISA
++  /// MachineInstr
++  virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
++    DebugLoc DL) const;
++
++};
++
++} // End llvm namespace
++
++#endif // AMDGPUINSTRINFO_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td	2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,74 @@
++//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains DAG node defintions for the AMDGPU target.
++//
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++// AMDGPU DAG Profiles
++//===----------------------------------------------------------------------===//
++
++def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
++  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
++]>;
++
++//===----------------------------------------------------------------------===//
++// AMDGPU DAG Nodes
++//
++
++// out = ((a << 32) | b) >> c)
++//
++// Can be used to optimize rtol:
++// rotl(a, b) = bitalign(a, a, 32 - b)
++def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
++
++// This argument to this node is a dword address.
++def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
++
++// out = a - floor(a)
++def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
++
++// out = max(a, b) a and b are floats
++def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
++  [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = max(a, b) a and b are signed ints
++def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
++  [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = max(a, b) a and b are unsigned ints
++def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
++  [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = min(a, b) a and b are floats
++def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
++  [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = min(a, b) a snd b are signed ints
++def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
++  [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = min(a, b) a and b are unsigned ints
++def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
++  [SDNPCommutative, SDNPAssociative]
++>;
++
++// urecip - This operation is a helper for integer division, it returns the
++// result of 1 / a as a fractional unsigned integer.
++// out = (2^32 / a) + e
++// e is rounding error
++def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
++
++def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td llvm-r600/lib/Target/R600/AMDGPUInstructions.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUInstructions.td	2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,190 @@
++//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains instruction defs that are common to all hw codegen
++// targets.
++//
++//===----------------------------------------------------------------------===//
++
++class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
++  field bits<16> AMDILOp = 0;
++  field bits<3> Gen = 0;
++
++  let Namespace = "AMDGPU";
++  let OutOperandList = outs;
++  let InOperandList = ins;
++  let AsmString = asm;
++  let Pattern = pattern;
++  let Itinerary = NullALU;
++  let TSFlags{42-40} = Gen;
++  let TSFlags{63-48} = AMDILOp;
++}
++
++class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
++    : AMDGPUInst<outs, ins, asm, pattern> {
++
++  field bits<32> Inst = 0xffffffff;
++
++}
++
++def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
++
++def COND_EQ : PatLeaf <
++  (cond),
++  [{switch(N->get()){{default: return false;
++                     case ISD::SETOEQ: case ISD::SETUEQ:
++                     case ISD::SETEQ: return true;}}}]
++>;
++
++def COND_NE : PatLeaf <
++  (cond),
++  [{switch(N->get()){{default: return false;
++                     case ISD::SETONE: case ISD::SETUNE:
++                     case ISD::SETNE: return true;}}}]
++>;
++def COND_GT : PatLeaf <
++  (cond),
++  [{switch(N->get()){{default: return false;
++                     case ISD::SETOGT: case ISD::SETUGT:
++                     case ISD::SETGT: return true;}}}]
++>;
++
++def COND_GE : PatLeaf <
++  (cond),
++  [{switch(N->get()){{default: return false;
++                     case ISD::SETOGE: case ISD::SETUGE:
++                     case ISD::SETGE: return true;}}}]
++>;
++
++def COND_LT : PatLeaf <
++  (cond),
++  [{switch(N->get()){{default: return false;
++                     case ISD::SETOLT: case ISD::SETULT:
++                     case ISD::SETLT: return true;}}}]
++>;
++
++def COND_LE : PatLeaf <
++  (cond),
++  [{switch(N->get()){{default: return false;
++                     case ISD::SETOLE: case ISD::SETULE:
++                     case ISD::SETLE: return true;}}}]
++>;
++
++//===----------------------------------------------------------------------===//
++// Load/Store Pattern Fragments
++//===----------------------------------------------------------------------===//
++
++def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
++    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
++}]>;
++
++class Constants {
++int TWO_PI = 0x40c90fdb;
++int PI = 0x40490fdb;
++int TWO_PI_INV = 0x3e22f983;
++}
++def CONST : Constants;
++
++def FP_ZERO : PatLeaf <
++  (fpimm),
++  [{return N->getValueAPF().isZero();}]
++>;
++
++def FP_ONE : PatLeaf <
++  (fpimm),
++  [{return N->isExactlyValue(1.0);}]
++>;
++
++let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1  in {
++
++class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
++  (outs rc:$dst),
++  (ins rc:$src0),
++  "CLAMP $dst, $src0",
++  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
++>;
++
++class FABS <RegisterClass rc> : AMDGPUShaderInst <
++  (outs rc:$dst),
++  (ins rc:$src0),
++  "FABS $dst, $src0",
++  [(set rc:$dst, (fabs rc:$src0))]
++>;
++
++class FNEG <RegisterClass rc> : AMDGPUShaderInst <
++  (outs rc:$dst),
++  (ins rc:$src0),
++  "FNEG $dst, $src0",
++  [(set rc:$dst, (fneg rc:$src0))]
++>;
++
++def SHADER_TYPE : AMDGPUShaderInst <
++  (outs),
++  (ins i32imm:$type),
++  "SHADER_TYPE $type",
++  [(int_AMDGPU_shader_type imm:$type)]
++>;
++
++} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
++
++/* Generic helper patterns for intrinsics */
++/* -------------------------------------- */
++
++class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
++                  RegisterClass rc> : Pat <
++  (fpow rc:$src0, rc:$src1),
++  (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
++>;
++
++/* Other helper patterns */
++/* --------------------- */
++
++/* Extract element pattern */
++class Extract_Element <ValueType sub_type, ValueType vec_type,
++                     RegisterClass vec_class, int sub_idx, 
++                     SubRegIndex sub_reg>: Pat<
++  (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
++  (EXTRACT_SUBREG vec_class:$src, sub_reg)
++>;
++
++/* Insert element pattern */
++class Insert_Element <ValueType elem_type, ValueType vec_type,
++                      RegisterClass elem_class, RegisterClass vec_class,
++                      int sub_idx, SubRegIndex sub_reg> : Pat <
++
++  (vec_type (vector_insert (vec_type vec_class:$vec),
++                           (elem_type elem_class:$elem), sub_idx)),
++  (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
++>;
++
++// Vector Build pattern
++class Vector_Build <ValueType vecType, RegisterClass vectorClass,
++                    ValueType elemType, RegisterClass elemClass> : Pat <
++  (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
++                         (elemType elemClass:$z), (elemType elemClass:$w))),
++  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
++  (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
++                            elemClass:$z, sel_z), elemClass:$w, sel_w)
++>;
++
++// bitconvert pattern
++class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
++  (dt (bitconvert (st rc:$src0))),
++  (dt rc:$src0)
++>;
++
++class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
++  (vt (AMDGPUdwordaddr (vt rc:$addr))),
++  (vt rc:$addr)
++>;
++
++include "R600Instructions.td"
++
++include "SIInstrInfo.td"
++
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td	2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,62 @@
++//===-- AMDGPUIntrinsics.td - Common intrinsics  -*- tablegen -*-----------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines intrinsics that are used by all hw codegen targets.
++//
++//===----------------------------------------------------------------------===//
++
++let TargetPrefix = "AMDGPU", isTarget = 1 in {
++
++  def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
++  def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
++
++  def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++  def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
++  def int_AMDGPU_kilp : Intrinsic<[], [], []>;
++  def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
++  def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++  def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
++}
++
++let TargetPrefix = "TGSI", isTarget = 1 in {
++
++  def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
++}
++
++include "SIIntrinsics.td"
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp	2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,418 @@
++//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief This is the parent TargetLowering class for hardware code gen
++/// targets.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPUISelLowering.h"
++#include "AMDILIntrinsicInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/SelectionDAG.h"
++#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
++
++using namespace llvm;
++
++AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
++  TargetLowering(TM, new TargetLoweringObjectFileELF()) {
++
++  // Initialize target lowering borrowed from AMDIL
++  InitAMDILLowering();
++
++  // We need to custom lower some of the intrinsics
++  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
++
++  // Library functions.  These default to Expand, but we have instructions
++  // for them.
++  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
++  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
++  setOperationAction(ISD::FPOW,   MVT::f32, Legal);
++  setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
++  setOperationAction(ISD::FABS,   MVT::f32, Legal);
++  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
++  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
++
++  // Lower floating point store/load to integer store/load to reduce the number
++  // of patterns in tablegen.
++  setOperationAction(ISD::STORE, MVT::f32, Promote);
++  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
++
++  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
++  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
++
++  setOperationAction(ISD::LOAD, MVT::f32, Promote);
++  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
++
++  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
++  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
++
++  setOperationAction(ISD::UDIV, MVT::i32, Expand);
++  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
++  setOperationAction(ISD::UREM, MVT::i32, Expand);
++}
++
++//===---------------------------------------------------------------------===//
++// TargetLowering Callbacks
++//===---------------------------------------------------------------------===//
++
++SDValue AMDGPUTargetLowering::LowerFormalArguments(
++                                      SDValue Chain,
++                                      CallingConv::ID CallConv,
++                                      bool isVarArg,
++                                      const SmallVectorImpl<ISD::InputArg> &Ins,
++                                      DebugLoc DL, SelectionDAG &DAG,
++                                      SmallVectorImpl<SDValue> &InVals) const {
++  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
++    InVals.push_back(SDValue());
++  }
++  return Chain;
++}
++
++SDValue AMDGPUTargetLowering::LowerReturn(
++                                     SDValue Chain,
++                                     CallingConv::ID CallConv,
++                                     bool isVarArg,
++                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
++                                     const SmallVectorImpl<SDValue> &OutVals,
++                                     DebugLoc DL, SelectionDAG &DAG) const {
++  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
++}
++
++//===---------------------------------------------------------------------===//
++// Target specific lowering
++//===---------------------------------------------------------------------===//
++
++SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
++    const {
++  switch (Op.getOpcode()) {
++  default:
++    Op.getNode()->dump();
++    assert(0 && "Custom lowering code for this"
++        "instruction is not implemented yet!");
++    break;
++  // AMDIL DAG lowering
++  case ISD::SDIV: return LowerSDIV(Op, DAG);
++  case ISD::SREM: return LowerSREM(Op, DAG);
++  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
++  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
++  // AMDGPU DAG lowering
++  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
++  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
++  }
++  return Op;
++}
++
++SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
++    SelectionDAG &DAG) const {
++  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
++  DebugLoc DL = Op.getDebugLoc();
++  EVT VT = Op.getValueType();
++
++  switch (IntrinsicID) {
++    default: return Op;
++    case AMDGPUIntrinsic::AMDIL_abs:
++      return LowerIntrinsicIABS(Op, DAG);
++    case AMDGPUIntrinsic::AMDIL_exp:
++      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
++    case AMDGPUIntrinsic::AMDGPU_lrp:
++      return LowerIntrinsicLRP(Op, DAG);
++    case AMDGPUIntrinsic::AMDIL_fraction:
++      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
++    case AMDGPUIntrinsic::AMDIL_mad:
++      return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
++                              Op.getOperand(2), Op.getOperand(3));
++    case AMDGPUIntrinsic::AMDIL_max:
++      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
++                                                  Op.getOperand(2));
++    case AMDGPUIntrinsic::AMDGPU_imax:
++      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
++                                                  Op.getOperand(2));
++    case AMDGPUIntrinsic::AMDGPU_umax:
++      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
++                                                  Op.getOperand(2));
++    case AMDGPUIntrinsic::AMDIL_min:
++      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
++                                                  Op.getOperand(2));
++    case AMDGPUIntrinsic::AMDGPU_imin:
++      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
++                                                  Op.getOperand(2));
++    case AMDGPUIntrinsic::AMDGPU_umin:
++      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
++                                                  Op.getOperand(2));
++    case AMDGPUIntrinsic::AMDIL_round_nearest:
++      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
++  }
++}
++
++///IABS(a) = SMAX(sub(0, a), a)
++SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
++    SelectionDAG &DAG) const {
++
++  DebugLoc DL = Op.getDebugLoc();
++  EVT VT = Op.getValueType();
++  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
++                                              Op.getOperand(1));
++
++  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
++}
++
++/// Linear Interpolation
++/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
++SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
++    SelectionDAG &DAG) const {
++  DebugLoc DL = Op.getDebugLoc();
++  EVT VT = Op.getValueType();
++  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
++                                DAG.getConstantFP(1.0f, MVT::f32),
++                                Op.getOperand(1));
++  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
++                                                    Op.getOperand(3));
++  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
++                                               Op.getOperand(2),
++                                               OneSubAC);
++}
++
++/// \brief Generate Min/Max node
++SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
++    SelectionDAG &DAG) const {
++  DebugLoc DL = Op.getDebugLoc();
++  EVT VT = Op.getValueType();
++
++  SDValue LHS = Op.getOperand(0);
++  SDValue RHS = Op.getOperand(1);
++  SDValue True = Op.getOperand(2);
++  SDValue False = Op.getOperand(3);
++  SDValue CC = Op.getOperand(4);
++
++  if (VT != MVT::f32 ||
++      !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
++    return SDValue();
++  }
++
++  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
++  switch (CCOpcode) {
++  case ISD::SETOEQ:
++  case ISD::SETONE:
++  case ISD::SETUNE:
++  case ISD::SETNE:
++  case ISD::SETUEQ:
++  case ISD::SETEQ:
++  case ISD::SETFALSE:
++  case ISD::SETFALSE2:
++  case ISD::SETTRUE:
++  case ISD::SETTRUE2:
++  case ISD::SETUO:
++  case ISD::SETO:
++    assert(0 && "Operation should already be optimised !");
++  case ISD::SETULE:
++  case ISD::SETULT:
++  case ISD::SETOLE:
++  case ISD::SETOLT:
++  case ISD::SETLE:
++  case ISD::SETLT: {
++    if (LHS == True)
++      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
++    else
++      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
++  }
++  case ISD::SETGT:
++  case ISD::SETGE:
++  case ISD::SETUGE:
++  case ISD::SETOGE:
++  case ISD::SETUGT:
++  case ISD::SETOGT: {
++    if (LHS == True)
++      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
++    else
++      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
++  }
++  case ISD::SETCC_INVALID:
++    assert(0 && "Invalid setcc condcode !");
++  }
++  return Op;
++}
++
++
++
++SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
++    SelectionDAG &DAG) const {
++  DebugLoc DL = Op.getDebugLoc();
++  EVT VT = Op.getValueType();
++
++  SDValue Num = Op.getOperand(0);
++  SDValue Den = Op.getOperand(1);
++
++  SmallVector<SDValue, 8> Results;
++
++  // RCP =  URECIP(Den) = 2^32 / Den + e
++  // e is rounding error.
++  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
++
++  // RCP_LO = umulo(RCP, Den) */
++  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
++
++  // RCP_HI = mulhu (RCP, Den) */
++  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
++
++  // NEG_RCP_LO = -RCP_LO
++  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
++                                                     RCP_LO);
++
++  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
++  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
++                                           NEG_RCP_LO, RCP_LO,
++                                           ISD::SETEQ);
++  // Calculate the rounding error from the URECIP instruction
++  // E = mulhu(ABS_RCP_LO, RCP)
++  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
++
++  // RCP_A_E = RCP + E
++  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
++
++  // RCP_S_E = RCP - E
++  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
++
++  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
++  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
++                                     RCP_A_E, RCP_S_E,
++                                     ISD::SETEQ);
++  // Quotient = mulhu(Tmp0, Num)
++  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
++
++  // Num_S_Remainder = Quotient * Den
++  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
++
++  // Remainder = Num - Num_S_Remainder
++  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
++
++  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
++  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
++                                                 DAG.getConstant(-1, VT),
++                                                 DAG.getConstant(0, VT),
++                                                 ISD::SETGE);
++  // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
++  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
++                                                  DAG.getConstant(0, VT),
++                                                  DAG.getConstant(-1, VT),
++                                                  DAG.getConstant(0, VT),
++                                                  ISD::SETGE);
++  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
++  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
++                                               Remainder_GE_Zero);
++
++  // Calculate Division result:
++
++  // Quotient_A_One = Quotient + 1
++  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
++                                                         DAG.getConstant(1, VT));
++
++  // Quotient_S_One = Quotient - 1
++  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
++                                                         DAG.getConstant(1, VT));
++
++  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
++  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
++                                     Quotient, Quotient_A_One, ISD::SETEQ);
++
++  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
++  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
++                            Quotient_S_One, Div, ISD::SETEQ);
++
++  // Calculate Rem result:
++
++  // Remainder_S_Den = Remainder - Den
++  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
++
++  // Remainder_A_Den = Remainder + Den
++  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
++
++  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
++  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
++                                    Remainder, Remainder_S_Den, ISD::SETEQ);
++
++  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
++  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
++                            Remainder_A_Den, Rem, ISD::SETEQ);
++  SDValue Ops[2];
++  Ops[0] = Div;
++  Ops[1] = Rem;
++  return DAG.getMergeValues(Ops, 2, DL);
++}
++
++//===----------------------------------------------------------------------===//
++// Helper functions
++//===----------------------------------------------------------------------===//
++
++bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
++  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
++    return CFP->isExactlyValue(1.0);
++  }
++  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
++    return C->isAllOnesValue();
++  }
++  return false;
++}
++
++bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
++  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
++    return CFP->getValueAPF().isZero();
++  }
++  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
++    return C->isNullValue();
++  }
++  return false;
++}
++
++SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
++                                                  const TargetRegisterClass *RC,
++                                                   unsigned Reg, EVT VT) const {
++  MachineFunction &MF = DAG.getMachineFunction();
++  MachineRegisterInfo &MRI = MF.getRegInfo();
++  unsigned VirtualRegister;
++  if (!MRI.isLiveIn(Reg)) {
++    VirtualRegister = MRI.createVirtualRegister(RC);
++    MRI.addLiveIn(Reg, VirtualRegister);
++  } else {
++    VirtualRegister = MRI.getLiveInVirtReg(Reg);
++  }
++  return DAG.getRegister(VirtualRegister, VT);
++}
++
++#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
++
++const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
++  switch (Opcode) {
++  default: return 0;
++  // AMDIL DAG nodes
++  NODE_NAME_CASE(MAD);
++  NODE_NAME_CASE(CALL);
++  NODE_NAME_CASE(UMUL);
++  NODE_NAME_CASE(DIV_INF);
++  NODE_NAME_CASE(RET_FLAG);
++  NODE_NAME_CASE(BRANCH_COND);
++
++  // AMDGPU DAG nodes
++  NODE_NAME_CASE(DWORDADDR)
++  NODE_NAME_CASE(FRACT)
++  NODE_NAME_CASE(FMAX)
++  NODE_NAME_CASE(SMAX)
++  NODE_NAME_CASE(UMAX)
++  NODE_NAME_CASE(FMIN)
++  NODE_NAME_CASE(SMIN)
++  NODE_NAME_CASE(UMIN)
++  NODE_NAME_CASE(URECIP)
++  NODE_NAME_CASE(INTERP)
++  NODE_NAME_CASE(INTERP_P0)
++  NODE_NAME_CASE(EXPORT)
++  NODE_NAME_CASE(CONST_ADDRESS)
++  }
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h llvm-r600/lib/Target/R600/AMDGPUISelLowering.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.h	2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,145 @@
++//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Interface definition of the TargetLowering class that is common
++/// to all AMD GPUs.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUISELLOWERING_H
++#define AMDGPUISELLOWERING_H
++
++#include "llvm/Target/TargetLowering.h"
++
++namespace llvm {
++
++class MachineRegisterInfo;
++
++class AMDGPUTargetLowering : public TargetLowering {
++private:
++  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
++
++protected:
++
++  /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
++  /// MachineFunction.
++  ///
++  /// \returns a RegisterSDNode representing Reg.
++  SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
++                                                  unsigned Reg, EVT VT) const;
++
++  bool isHWTrueValue(SDValue Op) const;
++  bool isHWFalseValue(SDValue Op) const;
++
++public:
++  AMDGPUTargetLowering(TargetMachine &TM);
++
++  virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
++                             bool isVarArg,
++                             const SmallVectorImpl<ISD::InputArg> &Ins,
++                             DebugLoc DL, SelectionDAG &DAG,
++                             SmallVectorImpl<SDValue> &InVals) const;
++
++  virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
++                              bool isVarArg,
++                              const SmallVectorImpl<ISD::OutputArg> &Outs,
++                              const SmallVectorImpl<SDValue> &OutVals,
++                              DebugLoc DL, SelectionDAG &DAG) const;
++
++  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
++  virtual const char* getTargetNodeName(unsigned Opcode) const;
++
++// Functions defined in AMDILISelLowering.cpp
++public:
++
++  /// \brief Determine which of the bits specified in \p Mask are known to be
++  /// either zero or one and return them in the \p KnownZero and \p KnownOne
++  /// bitsets.
++  virtual void computeMaskedBitsForTargetNode(const SDValue Op,
++                                              APInt &KnownZero,
++                                              APInt &KnownOne,
++                                              const SelectionDAG &DAG,
++                                              unsigned Depth = 0) const;
++
++  virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
++                                  const CallInst &I, unsigned Intrinsic) const;
++
++  /// We want to mark f32/f64 floating point values as legal.
++  bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
++
++  /// We don't want to shrink f64/f32 constants.
++  bool ShouldShrinkFPConstant(EVT VT) const;
++
++private:
++  void InitAMDILLowering();
++  SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
++  EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
++  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
++  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
++};
++
++namespace AMDGPUISD {
++
++enum {
++  // AMDIL ISD Opcodes
++  FIRST_NUMBER = ISD::BUILTIN_OP_END,
++  MAD,         // 32bit Fused Multiply Add instruction
++  CALL,        // Function call based on a single integer
++  UMUL,        // 32bit unsigned multiplication
++  DIV_INF,      // Divide with infinity returned on zero divisor
++  RET_FLAG,
++  BRANCH_COND,
++  // End AMDIL ISD Opcodes
++  BITALIGN,
++  DWORDADDR,
++  FRACT,
++  FMAX,
++  SMAX,
++  UMAX,
++  FMIN,
++  SMIN,
++  UMIN,
++  URECIP,
++  INTERP,
++  INTERP_P0,
++  EXPORT,
++  CONST_ADDRESS,
++  LAST_AMDGPU_ISD_NUMBER
++};
++
++
++} // End namespace AMDGPUISD
++
++namespace SIISD {
++
++enum {
++  SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
++  VCC_AND,
++  VCC_BITCAST
++};
++
++} // End namespace SIISD
++
++} // End namespace llvm
++
++#endif // AMDGPUISELLOWERING_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp	2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,83 @@
++//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
++//
++//===----------------------------------------------------------------------===//
++//
++
++#include "AMDGPUMCInstLower.h"
++#include "AMDGPUAsmPrinter.h"
++#include "R600InstrInfo.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/Constants.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/Support/ErrorHandling.h"
++
++using namespace llvm;
++
++AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
++  Ctx(ctx)
++{ }
++
++void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
++  OutMI.setOpcode(MI->getOpcode());
++
++  for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
++    const MachineOperand &MO = MI->getOperand(i);
++
++    MCOperand MCOp;
++    switch (MO.getType()) {
++    default:
++      llvm_unreachable("unknown operand type");
++    case MachineOperand::MO_FPImmediate: {
++      const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
++      assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
++             "Only floating point immediates are supported at the moment.");
++      MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
++      break;
++    }
++    case MachineOperand::MO_Immediate:
++      MCOp = MCOperand::CreateImm(MO.getImm());
++      break;
++    case MachineOperand::MO_Register:
++      MCOp = MCOperand::CreateReg(MO.getReg());
++      break;
++    case MachineOperand::MO_MachineBasicBlock:
++      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
++                                   MO.getMBB()->getSymbol(), Ctx));
++    }
++    OutMI.addOperand(MCOp);
++  }
++}
++
++void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
++  AMDGPUMCInstLower MCInstLowering(OutContext);
++
++  if (MI->isBundle()) {
++    const MachineBasicBlock *MBB = MI->getParent();
++    MachineBasicBlock::const_instr_iterator I = MI;
++    ++I;
++    while (I != MBB->end() && I->isInsideBundle()) {
++      MCInst MCBundleInst;
++      const MachineInstr *BundledInst = I;
++      MCInstLowering.lower(BundledInst, MCBundleInst);
++      OutStreamer.EmitInstruction(MCBundleInst);
++      ++I;
++    }
++  } else {
++    MCInst TmpInst;
++    MCInstLowering.lower(MI, TmpInst);
++    OutStreamer.EmitInstruction(TmpInst);
++  }
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h	2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,34 @@
++//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++/// \file
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPU_MCINSTLOWER_H
++#define AMDGPU_MCINSTLOWER_H
++
++namespace llvm {
++
++class MCInst;
++class MCContext;
++class MachineInstr;
++
++class AMDGPUMCInstLower {
++
++  MCContext &Ctx;
++
++public:
++  AMDGPUMCInstLower(MCContext &ctx);
++
++  /// \brief Lower a MachineInstr to an MCInst
++  void lower(const MachineInstr *MI, MCInst &OutMI) const;
++
++};
++
++} // End namespace llvm
++
++#endif //AMDGPU_MCINSTLOWER_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp	2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,51 @@
++//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPURegisterInfo.h"
++#include "AMDGPUTargetMachine.h"
++
++using namespace llvm;
++
++AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
++    const TargetInstrInfo &tii)
++: AMDGPUGenRegisterInfo(0),
++  TM(tm),
++  TII(tii)
++  { }
++
++//===----------------------------------------------------------------------===//
++// Function handling callbacks - Functions are a seldom used feature of GPUS, so
++// they are not supported at this time.
++//===----------------------------------------------------------------------===//
++
++const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
++
++const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
++                                                                         const {
++  return &CalleeSavedReg;
++}
++
++void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
++                                             int SPAdj,
++                                             RegScavenger *RS) const {
++  assert(!"Subroutines not supported yet");
++}
++
++unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
++  assert(!"Subroutines not supported yet");
++  return 0;
++}
++
++#define GET_REGINFO_TARGET_DESC
++#include "AMDGPUGenRegisterInfo.inc"
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h	2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,63 @@
++//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
++/// targets.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUREGISTERINFO_H
++#define AMDGPUREGISTERINFO_H
++
++#include "llvm/ADT/BitVector.h"
++#include "llvm/Target/TargetRegisterInfo.h"
++
++#define GET_REGINFO_HEADER
++#define GET_REGINFO_ENUM
++#include "AMDGPUGenRegisterInfo.inc"
++
++namespace llvm {
++
++class AMDGPUTargetMachine;
++class TargetInstrInfo;
++
++struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
++  TargetMachine &TM;
++  const TargetInstrInfo &TII;
++  static const uint16_t CalleeSavedReg;
++
++  AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
++
++  virtual BitVector getReservedRegs(const MachineFunction &MF) const {
++    assert(!"Unimplemented");  return BitVector();
++  }
++
++  /// \param RC is an AMDIL reg class.
++  ///
++  /// \returns The ISA reg class that is equivalent to \p RC.
++  virtual const TargetRegisterClass * getISARegClass(
++                                         const TargetRegisterClass * RC) const {
++    assert(!"Unimplemented"); return NULL;
++  }
++
++  virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
++    assert(!"Unimplemented"); return NULL;
++  }
++
++  const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
++  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
++                           RegScavenger *RS) const;
++  unsigned getFrameRegister(const MachineFunction &MF) const;
++
++};
++
++} // End namespace llvm
++
++#endif // AMDIDSAREGISTERINFO_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td	2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,22 @@
++//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// Tablegen register definitions common to all hw codegen targets.
++//
++//===----------------------------------------------------------------------===//
++
++let Namespace = "AMDGPU" in {
++  def sel_x : SubRegIndex;
++  def sel_y : SubRegIndex;
++  def sel_z : SubRegIndex;
++  def sel_w : SubRegIndex;
++}
++
++include "R600RegisterInfo.td"
++include "SIRegisterInfo.td"
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp	2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,714 @@
++//===-- AMDGPUStructurizeCFG.cpp -  ------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// The pass implemented in this file transforms the programs control flow
++/// graph into a form that's suitable for code generation on hardware that
++/// implements control flow by execution masking. This currently includes all
++/// AMD GPUs but may as well be useful for other types of hardware.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPU.h"
++#include "llvm/Module.h"
++#include "llvm/ADT/SCCIterator.h"
++#include "llvm/Analysis/RegionIterator.h"
++#include "llvm/Analysis/RegionInfo.h"
++#include "llvm/Analysis/RegionPass.h"
++#include "llvm/Transforms/Utils/SSAUpdater.h"
++
++using namespace llvm;
++
++namespace {
++
++// Definition of the complex types used in this pass.
++
++typedef std::pair<BasicBlock *, Value *> BBValuePair;
++typedef ArrayRef<BasicBlock*> BBVecRef;
++
++typedef SmallVector<RegionNode*, 8> RNVector;
++typedef SmallVector<BasicBlock*, 8> BBVector;
++typedef SmallVector<BBValuePair, 2> BBValueVector;
++
++typedef DenseMap<PHINode *, BBValueVector> PhiMap;
++typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
++typedef DenseMap<BasicBlock *, Value *> BBPredicates;
++typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
++typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
++
++// The name for newly created blocks.
++
++static const char *FlowBlockName = "Flow";
++
++/// @brief Transforms the control flow graph on one single entry/exit region
++/// at a time.
++///
++/// After the transform all "If"/"Then"/"Else" style control flow looks like
++/// this:
++///
++/// \verbatim
++/// 1
++/// ||
++/// | |
++/// 2 |
++/// | /
++/// |/   
++/// 3
++/// ||   Where:
++/// | |  1 = "If" block, calculates the condition
++/// 4 |  2 = "Then" subregion, runs if the condition is true
++/// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
++/// |/   4 = "Else" optional subregion, runs if the condition is false
++/// 5    5 = "End" block, also rejoins the control flow
++/// \endverbatim
++///
++/// Control flow is expressed as a branch where the true exit goes into the
++/// "Then"/"Else" region, while the false exit skips the region
++/// The condition for the optional "Else" region is expressed as a PHI node.
++/// The incomming values of the PHI node are true for the "If" edge and false
++/// for the "Then" edge.
++///
++/// Additionally to that even complicated loops look like this:
++///
++/// \verbatim
++/// 1
++/// ||
++/// | |
++/// 2 ^  Where:
++/// | /  1 = "Entry" block
++/// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
++/// 3    3 = "Flow" block, with back edge to entry block
++/// |
++/// \endverbatim
++///
++/// The back edge of the "Flow" block is always on the false side of the branch
++/// while the true side continues the general flow. So the loop condition
++/// consist of a network of PHI nodes where the true incoming values expresses
++/// breaks and the false values expresses continue states.
++class AMDGPUStructurizeCFG : public RegionPass {
++
++  static char ID;
++
++  Type *Boolean;
++  ConstantInt *BoolTrue;
++  ConstantInt *BoolFalse;
++  UndefValue *BoolUndef;
++
++  Function *Func;
++  Region *ParentRegion;
++
++  DominatorTree *DT;
++
++  RNVector Order;
++  VisitedMap Visited;
++  PredMap Predicates;
++  BBPhiMap DeletedPhis;
++  BBVector FlowsInserted;
++
++  BasicBlock *LoopStart;
++  BasicBlock *LoopEnd;
++  BBPredicates LoopPred;
++
++  void orderNodes();
++
++  void buildPredicate(BranchInst *Term, unsigned Idx,
++                      BBPredicates &Pred, bool Invert);
++
++  void analyzeBlock(BasicBlock *BB);
++
++  void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
++
++  void collectInfos();
++
++  bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
++
++  void killTerminator(BasicBlock *BB);
++
++  RegionNode *skipChained(RegionNode *Node);
++
++  void delPhiValues(BasicBlock *From, BasicBlock *To);
++
++  void addPhiValues(BasicBlock *From, BasicBlock *To);
++
++  BasicBlock *getNextFlow(BasicBlock *Prev);
++
++  bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
++
++  BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
++
++  void createFlow();
++
++  void insertConditions();
++
++  void rebuildSSA();
++
++public:
++  AMDGPUStructurizeCFG():
++    RegionPass(ID) {
++
++    initializeRegionInfoPass(*PassRegistry::getPassRegistry());
++  }
++
++  virtual bool doInitialization(Region *R, RGPassManager &RGM);
++
++  virtual bool runOnRegion(Region *R, RGPassManager &RGM);
++
++  virtual const char *getPassName() const {
++    return "AMDGPU simplify control flow";
++  }
++
++  void getAnalysisUsage(AnalysisUsage &AU) const {
++
++    AU.addRequired<DominatorTree>();
++    AU.addPreserved<DominatorTree>();
++    RegionPass::getAnalysisUsage(AU);
++  }
++
++};
++
++} // end anonymous namespace
++
++char AMDGPUStructurizeCFG::ID = 0;
++
++/// \brief Initialize the types and constants used in the pass
++bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
++  LLVMContext &Context = R->getEntry()->getContext();
++
++  Boolean = Type::getInt1Ty(Context);
++  BoolTrue = ConstantInt::getTrue(Context);
++  BoolFalse = ConstantInt::getFalse(Context);
++  BoolUndef = UndefValue::get(Boolean);
++
++  return false;
++}
++
++/// \brief Build up the general order of nodes
++void AMDGPUStructurizeCFG::orderNodes() {
++  scc_iterator<Region *> I = scc_begin(ParentRegion),
++                         E = scc_end(ParentRegion);
++  for (Order.clear(); I != E; ++I) {
++    std::vector<RegionNode *> &Nodes = *I;
++    Order.append(Nodes.begin(), Nodes.end());
++  }
++}
++
++/// \brief Build blocks and loop predicates
++void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
++                                          BBPredicates &Pred, bool Invert) {
++  Value *True = Invert ? BoolFalse : BoolTrue;
++  Value *False = Invert ? BoolTrue : BoolFalse;
++
++  RegionInfo *RI = ParentRegion->getRegionInfo();
++  BasicBlock *BB = Term->getParent();
++
++  // Handle the case where multiple regions start at the same block
++  Region *R = BB != ParentRegion->getEntry() ?
++              RI->getRegionFor(BB) : ParentRegion;
++
++  if (R == ParentRegion) {
++    // It's a top level block in our region
++    Value *Cond = True;
++    if (Term->isConditional()) {
++      BasicBlock *Other = Term->getSuccessor(!Idx);
++
++      if (Visited.count(Other)) {
++        if (!Pred.count(Other))
++          Pred[Other] = False;
++
++        if (!Pred.count(BB))
++          Pred[BB] = True;
++        return;
++      }
++      Cond = Term->getCondition();
++
++      if (Idx != Invert)
++        Cond = BinaryOperator::CreateNot(Cond, "", Term);
++    }
++
++    Pred[BB] = Cond;
++
++  } else if (ParentRegion->contains(R)) {
++    // It's a block in a sub region
++    while(R->getParent() != ParentRegion)
++      R = R->getParent();
++
++    Pred[R->getEntry()] = True;
++
++  } else {
++    // It's a branch from outside into our parent region
++    Pred[BB] = True;
++  }
++}
++
++/// \brief Analyze the successors of each block and build up predicates
++void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
++  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
++  BBPredicates &Pred = Predicates[BB];
++
++  for (; PI != PE; ++PI) {
++    BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
++
++    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
++      BasicBlock *Succ = Term->getSuccessor(i);
++      if (Succ != BB)
++        continue;
++      buildPredicate(Term, i, Pred, false);
++    }
++  }
++}
++
++/// \brief Analyze the conditions leading to loop to a previous block
++void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
++  BranchInst *Term = cast<BranchInst>(BB->getTerminator());
++
++  for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
++    BasicBlock *Succ = Term->getSuccessor(i);
++
++    // Ignore it if it's not a back edge
++    if (!Visited.count(Succ))
++      continue;
++
++    buildPredicate(Term, i, LoopPred, true);
++
++    LoopEnd = BB;
++    if (Visited[Succ] < LoopIdx) {
++      LoopIdx = Visited[Succ];
++      LoopStart = Succ;
++    }
++  }
++}
++
++/// \brief Collect various loop and predicate infos
++void AMDGPUStructurizeCFG::collectInfos() {
++  unsigned Number = 0, LoopIdx = ~0;
++
++  // Reset predicate
++  Predicates.clear();
++
++  // and loop infos
++  LoopStart = LoopEnd = 0;
++  LoopPred.clear();
++
++  RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
++  for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
++
++    // Analyze all the conditions leading to a node
++    analyzeBlock((*OI)->getEntry());
++
++    if ((*OI)->isSubRegion())
++      continue;
++
++    // Find the first/last loop nodes and loop predicates
++    analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
++  }
++}
++
++/// \brief Does A dominate all the predicates of B ?
++bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
++  BBPredicates &Preds = Predicates[B];
++  for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
++       PI != PE; ++PI) {
++
++    if (!DT->dominates(A, PI->first))
++      return false;
++  }
++  return true;
++}
++
++/// \brief Remove phi values from all successors and the remove the terminator.
++void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
++  TerminatorInst *Term = BB->getTerminator();
++  if (!Term)
++    return;
++
++  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
++       SI != SE; ++SI) {
++
++    delPhiValues(BB, *SI);
++  }
++
++  Term->eraseFromParent();
++}
++
++/// First: Skip forward to the first region node that either isn't a subregion or not
++/// dominating it's exit, remove all the skipped nodes from the node order.
++///
++/// Second: Handle the first successor directly if the resulting nodes successor
++/// predicates are still dominated by the original entry
++RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
++  BasicBlock *Entry = Node->getEntry();
++
++  // Skip forward as long as it is just a linear flow
++  while (true) {
++    BasicBlock *Entry = Node->getEntry();
++    BasicBlock *Exit;
++
++    if (Node->isSubRegion()) {
++      Exit = Node->getNodeAs<Region>()->getExit();
++    } else {
++      TerminatorInst *Term = Entry->getTerminator();
++      if (Term->getNumSuccessors() != 1)
++        break;
++      Exit = Term->getSuccessor(0);
++    }
++
++    // It's a back edge, break here so we can insert a loop node
++    if (!Visited.count(Exit))
++      return Node;
++
++    // More than node edges are pointing to exit
++    if (!DT->dominates(Entry, Exit))
++      return Node;
++
++    RegionNode *Next = ParentRegion->getNode(Exit);
++    RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
++    assert(I != Order.end());
++
++    Visited.erase(Next->getEntry());
++    Order.erase(I);
++    Node = Next;
++  }
++
++  BasicBlock *BB = Node->getEntry();
++  TerminatorInst *Term = BB->getTerminator();
++  if (Term->getNumSuccessors() != 2)
++    return Node;
++
++  // Our node has exactly two succesors, check if we can handle
++  // any of them directly
++  BasicBlock *Succ = Term->getSuccessor(0);
++  if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
++    Succ = Term->getSuccessor(1);
++    if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
++      return Node;
++  } else {
++    BasicBlock *Succ2 = Term->getSuccessor(1);
++    if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
++        dominatesPredicates(Entry, Succ2))
++      Succ = Succ2;
++  }
++
++  RegionNode *Next = ParentRegion->getNode(Succ);
++  RNVector::iterator E = Order.end();
++  RNVector::iterator I = std::find(Order.begin(), E, Next);
++  assert(I != E);
++
++  killTerminator(BB);
++  FlowsInserted.push_back(BB);
++  Visited.erase(Succ);
++  Order.erase(I);
++  return ParentRegion->getNode(wireFlowBlock(BB, Next));
++}
++
++/// \brief Remove all PHI values coming from "From" into "To" and remember
++/// them in DeletedPhis
++void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
++  PhiMap &Map = DeletedPhis[To];
++  for (BasicBlock::iterator I = To->begin(), E = To->end();
++       I != E && isa<PHINode>(*I);) {
++
++    PHINode &Phi = cast<PHINode>(*I++);
++    while (Phi.getBasicBlockIndex(From) != -1) {
++      Value *Deleted = Phi.removeIncomingValue(From, false);
++      Map[&Phi].push_back(std::make_pair(From, Deleted));
++    }
++  }
++}
++
++/// \brief Add the PHI values back once we knew the new predecessor
++void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
++  if (!DeletedPhis.count(To))
++    return;
++
++  PhiMap &Map = DeletedPhis[To];
++  SSAUpdater Updater;
++
++  for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
++
++    PHINode *Phi = I->first;
++    Updater.Initialize(Phi->getType(), "");
++    BasicBlock *Fallback = To;
++    bool HaveFallback = false;
++
++    for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
++         VI != VE; ++VI) {
++
++      Updater.AddAvailableValue(VI->first, VI->second);
++      BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
++      if (Dom == VI->first)
++        HaveFallback = true;
++      else if (Dom != Fallback)
++        HaveFallback = false;
++      Fallback = Dom;
++    }
++    if (!HaveFallback) {
++      Value *Undef = UndefValue::get(Phi->getType());
++      Updater.AddAvailableValue(Fallback, Undef);
++    }
++
++    Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
++  }
++  DeletedPhis.erase(To);
++}
++
++/// \brief Create a new flow node and update dominator tree and region info
++BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
++  LLVMContext &Context = Func->getContext();
++  BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
++                       Order.back()->getEntry();
++  BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
++                                        Func, Insert);
++  DT->addNewBlock(Flow, Prev);
++  ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
++  FlowsInserted.push_back(Flow);
++  return Flow;
++}
++
++/// \brief Can we predict that this node will always be called?
++bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
++                                             BasicBlock *Node) {
++  BBPredicates &Preds = Predicates[Node];
++  bool Dominated = false;
++
++  for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
++       I != E; ++I) {
++
++    if (I->second != BoolTrue)
++      return false;
++
++    if (!Dominated && DT->dominates(I->first, Prev))
++      Dominated = true;
++  }
++  return Dominated;
++}
++
++/// \brief Wire up the new control flow by inserting or updating the branch
++/// instructions at node exits
++BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
++                                                RegionNode *Node) {
++  BasicBlock *Entry = Node->getEntry();
++
++  if (LoopStart == Entry) {
++    LoopStart = Prev;
++    LoopPred[Prev] = BoolTrue;
++  }
++
++  // Wire it up temporary, skipChained may recurse into us
++  BranchInst::Create(Entry, Prev);
++  DT->changeImmediateDominator(Entry, Prev);
++  addPhiValues(Prev, Entry);
++
++  Node = skipChained(Node);
++
++  BasicBlock *Next = getNextFlow(Prev);
++  if (!isPredictableTrue(Prev, Entry)) {
++    // Let Prev point to entry and next block
++    Prev->getTerminator()->eraseFromParent();
++    BranchInst::Create(Entry, Next, BoolUndef, Prev);
++  } else {
++    DT->changeImmediateDominator(Next, Entry);
++  }
++
++  // Let node exit(s) point to next block
++  if (Node->isSubRegion()) {
++    Region *SubRegion = Node->getNodeAs<Region>();
++    BasicBlock *Exit = SubRegion->getExit();
++
++    // Find all the edges from the sub region to the exit
++    BBVector ToDo;
++    for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
++      if (SubRegion->contains(*I))
++        ToDo.push_back(*I);
++    }
++
++    // Modify the edges to point to the new flow block
++    for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
++      delPhiValues(*I, Exit);
++      TerminatorInst *Term = (*I)->getTerminator();
++      Term->replaceUsesOfWith(Exit, Next);
++    }
++
++    // Update the region info
++    SubRegion->replaceExit(Next);
++
++  } else {
++    BasicBlock *BB = Node->getNodeAs<BasicBlock>();
++    killTerminator(BB);
++    BranchInst::Create(Next, BB);
++
++    if (BB == LoopEnd)
++      LoopEnd = 0;
++  }
++
++  return Next;
++}
++
++/// Destroy node order and visited map, build up flow order instead.
++/// After this function control flow looks like it should be, but
++/// branches only have undefined conditions.
++void AMDGPUStructurizeCFG::createFlow() {
++  DeletedPhis.clear();
++
++  BasicBlock *Prev = Order.pop_back_val()->getEntry();
++  assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
++  Visited.erase(Prev);
++
++  if (LoopStart == Prev) {
++    // Loop starts at entry, split entry so that we can predicate it
++    BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
++    BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
++    DT->addNewBlock(Split, Prev);
++    ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
++    Predicates[Split] = Predicates[Prev];
++    Order.push_back(ParentRegion->getBBNode(Split));
++    LoopPred[Prev] = BoolTrue;
++
++  } else if (LoopStart == Order.back()->getEntry()) {
++    // Loop starts behind entry, split entry so that we can jump to it
++    Instruction *Term = Prev->getTerminator();
++    BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
++    DT->addNewBlock(Split, Prev);
++    ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
++    Prev = Split;
++  }
++
++  killTerminator(Prev);
++  FlowsInserted.clear();
++  FlowsInserted.push_back(Prev);
++
++  while (!Order.empty()) {
++    RegionNode *Node = Order.pop_back_val();
++    Visited.erase(Node->getEntry());
++    Prev = wireFlowBlock(Prev, Node);
++    if (LoopStart && !LoopEnd) {
++      // Create an extra loop end node
++      LoopEnd = Prev;
++      Prev = getNextFlow(LoopEnd);
++      BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
++      addPhiValues(LoopEnd, LoopStart);
++    }
++  }
++
++  BasicBlock *Exit = ParentRegion->getExit();
++  BranchInst::Create(Exit, Prev);
++  addPhiValues(Prev, Exit);
++  if (DT->dominates(ParentRegion->getEntry(), Exit))
++    DT->changeImmediateDominator(Exit, Prev);
++
++  if (LoopStart && LoopEnd) {
++    BBVector::iterator FI = std::find(FlowsInserted.begin(),
++                                      FlowsInserted.end(),
++                                      LoopStart);
++    for (; *FI != LoopEnd; ++FI) {
++      addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
++    }
++  }
++
++  assert(Order.empty());
++  assert(Visited.empty());
++  assert(DeletedPhis.empty());
++}
++
++/// \brief Insert the missing branch conditions
++void AMDGPUStructurizeCFG::insertConditions() {
++  SSAUpdater PhiInserter;
++
++  for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
++       FI != FE; ++FI) {
++
++    BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
++    if (Term->isUnconditional())
++      continue;
++
++    PhiInserter.Initialize(Boolean, "");
++    PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
++
++    BasicBlock *Succ = Term->getSuccessor(0);
++    BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
++    for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
++         PI != PE; ++PI) {
++
++      PhiInserter.AddAvailableValue(PI->first, PI->second);
++    }
++
++    Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
++  }
++}
++
++/// Handle a rare case where the disintegrated nodes instructions
++/// no longer dominate all their uses. Not sure if this is really nessasary
++void AMDGPUStructurizeCFG::rebuildSSA() {
++  SSAUpdater Updater;
++  for (Region::block_iterator I = ParentRegion->block_begin(),
++                              E = ParentRegion->block_end();
++       I != E; ++I) {
++
++    BasicBlock *BB = *I;
++    for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
++         II != IE; ++II) {
++
++      bool Initialized = false;
++      for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
++
++        Next = I->getNext();
++
++        Instruction *User = cast<Instruction>(I->getUser());
++        if (User->getParent() == BB) {
++          continue;
++
++        } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
++          if (UserPN->getIncomingBlock(*I) == BB)
++            continue;
++        }
++
++        if (DT->dominates(II, User))
++          continue;
++
++        if (!Initialized) {
++          Value *Undef = UndefValue::get(II->getType());
++          Updater.Initialize(II->getType(), "");
++          Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
++          Updater.AddAvailableValue(BB, II);
++          Initialized = true;
++        }
++        Updater.RewriteUseAfterInsertions(*I);
++      }
++    }
++  }
++}
++
++/// \brief Run the transformation for each region found
++bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
++  if (R->isTopLevelRegion())
++    return false;
++
++  Func = R->getEntry()->getParent();
++  ParentRegion = R;
++
++  DT = &getAnalysis<DominatorTree>();
++
++  orderNodes();
++  collectInfos();
++  createFlow();
++  insertConditions();
++  rebuildSSA();
++
++  Order.clear();
++  Visited.clear();
++  Predicates.clear();
++  DeletedPhis.clear();
++  FlowsInserted.clear();
++
++  return true;
++}
++
++/// \brief Create the pass
++Pass *llvm::createAMDGPUStructurizeCFGPass() {
++  return new AMDGPUStructurizeCFG();
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp	2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,87 @@
++//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPUSubtarget.h"
++
++using namespace llvm;
++
++#define GET_SUBTARGETINFO_ENUM
++#define GET_SUBTARGETINFO_TARGET_DESC
++#define GET_SUBTARGETINFO_CTOR
++#include "AMDGPUGenSubtargetInfo.inc"
++
++AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
++  AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
++    InstrItins = getInstrItineraryForCPU(CPU);
++
++  memset(CapsOverride, 0, sizeof(*CapsOverride)
++      * AMDGPUDeviceInfo::MaxNumberCapabilities);
++  // Default card
++  StringRef GPU = CPU;
++  Is64bit = false;
++  DefaultSize[0] = 64;
++  DefaultSize[1] = 1;
++  DefaultSize[2] = 1;
++  ParseSubtargetFeatures(GPU, FS);
++  DevName = GPU;
++  Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
++}
++
++AMDGPUSubtarget::~AMDGPUSubtarget() {
++  delete Device;
++}
++
++bool
++AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
++  assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
++      "Caps index is out of bounds!");
++  return CapsOverride[caps];
++}
++bool
++AMDGPUSubtarget::is64bit() const  {
++  return Is64bit;
++}
++bool
++AMDGPUSubtarget::isTargetELF() const {
++  return false;
++}
++size_t
++AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
++  if (dim > 3) {
++    return 1;
++  } else {
++    return DefaultSize[dim];
++  }
++}
++
++std::string
++AMDGPUSubtarget::getDataLayout() const {
++    if (!Device) {
++        return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
++                "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
++                "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
++                "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
++                "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
++    }
++    return Device->getDataLayout();
++}
++
++std::string
++AMDGPUSubtarget::getDeviceName() const {
++  return DevName;
++}
++const AMDGPUDevice *
++AMDGPUSubtarget::device() const {
++  return Device;
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h llvm-r600/lib/Target/R600/AMDGPUSubtarget.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.h	2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,65 @@
++//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//==-----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief AMDGPU specific subclass of TargetSubtarget.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUSUBTARGET_H
++#define AMDGPUSUBTARGET_H
++#include "AMDILDevice.h"
++#include "llvm/ADT/StringExtras.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/Target/TargetSubtargetInfo.h"
++
++#define GET_SUBTARGETINFO_HEADER
++#include "AMDGPUGenSubtargetInfo.inc"
++
++#define MAX_CB_SIZE (1 << 16)
++
++namespace llvm {
++
++class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
++private:
++  bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
++  const AMDGPUDevice *Device;
++  size_t DefaultSize[3];
++  std::string DevName;
++  bool Is64bit;
++  bool Is32on64bit;
++  bool DumpCode;
++  bool R600ALUInst;
++
++  InstrItineraryData InstrItins;
++
++public:
++  AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
++  virtual ~AMDGPUSubtarget();
++
++  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
++  virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
++
++  bool isOverride(AMDGPUDeviceInfo::Caps) const;
++  bool is64bit() const;
++
++  // Helper functions to simplify if statements
++  bool isTargetELF() const;
++  const AMDGPUDevice* device() const;
++  std::string getDataLayout() const;
++  std::string getDeviceName() const;
++  virtual size_t getDefaultSize(uint32_t dim) const;
++  bool dumpCode() const { return DumpCode; }
++  bool r600ALUEncoding() const { return R600ALUInst; }
++
++};
++
++} // End namespace llvm
++
++#endif // AMDGPUSUBTARGET_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp	2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,148 @@
++//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief The AMDGPU target machine contains all of the hardware specific
++/// information  needed to emit code for R600 and SI GPUs.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPUTargetMachine.h"
++#include "AMDGPU.h"
++#include "R600ISelLowering.h"
++#include "R600InstrInfo.h"
++#include "SIISelLowering.h"
++#include "SIInstrInfo.h"
++#include "llvm/Analysis/Passes.h"
++#include "llvm/Analysis/Verifier.h"
++#include "llvm/CodeGen/MachineFunctionAnalysis.h"
++#include "llvm/CodeGen/MachineModuleInfo.h"
++#include "llvm/CodeGen/Passes.h"
++#include "llvm/MC/MCAsmInfo.h"
++#include "llvm/PassManager.h"
++#include "llvm/Support/TargetRegistry.h"
++#include "llvm/Support/raw_os_ostream.h"
++#include "llvm/Transforms/IPO.h"
++#include "llvm/Transforms/Scalar.h"
++#include <llvm/CodeGen/Passes.h>
++
++using namespace llvm;
++
++extern "C" void LLVMInitializeR600Target() {
++  // Register the target
++  RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
++}
++
++AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
++    StringRef CPU, StringRef FS,
++  TargetOptions Options,
++  Reloc::Model RM, CodeModel::Model CM,
++  CodeGenOpt::Level OptLevel
++)
++:
++  LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
++  Subtarget(TT, CPU, FS),
++  Layout(Subtarget.getDataLayout()),
++  FrameLowering(TargetFrameLowering::StackGrowsUp,
++      Subtarget.device()->getStackAlignment(), 0),
++  IntrinsicInfo(this),
++  InstrItins(&Subtarget.getInstrItineraryData()) {
++  // TLInfo uses InstrInfo so it must be initialized after.
++  if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
++    InstrInfo = new R600InstrInfo(*this);
++    TLInfo = new R600TargetLowering(*this);
++  } else {
++    InstrInfo = new SIInstrInfo(*this);
++    TLInfo = new SITargetLowering(*this);
++  }
++}
++
++AMDGPUTargetMachine::~AMDGPUTargetMachine() {
++}
++
++namespace {
++class AMDGPUPassConfig : public TargetPassConfig {
++public:
++  AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
++    : TargetPassConfig(TM, PM) {}
++
++  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
++    return getTM<AMDGPUTargetMachine>();
++  }
++
++  virtual bool addPreISel();
++  virtual bool addInstSelector();
++  virtual bool addPreRegAlloc();
++  virtual bool addPostRegAlloc();
++  virtual bool addPreSched2();
++  virtual bool addPreEmitPass();
++};
++} // End of anonymous namespace
++
++TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
++  return new AMDGPUPassConfig(this, PM);
++}
++
++bool
++AMDGPUPassConfig::addPreISel() {
++  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
++  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
++    addPass(createAMDGPUStructurizeCFGPass());
++    addPass(createSIAnnotateControlFlowPass());
++  }
++  return false;
++}
++
++bool AMDGPUPassConfig::addInstSelector() {
++  addPass(createAMDGPUPeepholeOpt(*TM));
++  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
++  return false;
++}
++
++bool AMDGPUPassConfig::addPreRegAlloc() {
++  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
++
++  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
++    addPass(createSIAssignInterpRegsPass(*TM));
++  }
++  addPass(createAMDGPUConvertToISAPass(*TM));
++  return false;
++}
++
++bool AMDGPUPassConfig::addPostRegAlloc() {
++  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
++
++  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
++    addPass(createSIInsertWaits(*TM));
++  }
++  return false;
++}
++
++bool AMDGPUPassConfig::addPreSched2() {
++
++  addPass(&IfConverterID);
++  return false;
++}
++
++bool AMDGPUPassConfig::addPreEmitPass() {
++  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
++  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
++    addPass(createAMDGPUCFGPreparationPass(*TM));
++    addPass(createAMDGPUCFGStructurizerPass(*TM));
++    addPass(createR600ExpandSpecialInstrsPass(*TM));
++    addPass(createR600LowerConstCopy(*TM));
++    addPass(&FinalizeMachineBundlesID);
++  } else {
++    addPass(createSILowerLiteralConstantsPass(*TM));
++    addPass(createSILowerControlFlowPass(*TM));
++  }
++
++  return false;
++}
++
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h	2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,70 @@
++//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPU_TARGET_MACHINE_H
++#define AMDGPU_TARGET_MACHINE_H
++
++#include "AMDGPUInstrInfo.h"
++#include "AMDGPUSubtarget.h"
++#include "AMDILFrameLowering.h"
++#include "AMDILIntrinsicInfo.h"
++#include "R600ISelLowering.h"
++#include "llvm/ADT/OwningPtr.h"
++#include "llvm/DataLayout.h"
++
++namespace llvm {
++
++MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
++
++class AMDGPUTargetMachine : public LLVMTargetMachine {
++
++  AMDGPUSubtarget Subtarget;
++  const DataLayout Layout;
++  AMDGPUFrameLowering FrameLowering;
++  AMDGPUIntrinsicInfo IntrinsicInfo;
++  const AMDGPUInstrInfo * InstrInfo;
++  AMDGPUTargetLowering * TLInfo;
++  const InstrItineraryData* InstrItins;
++
++public:
++   AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
++                       StringRef CPU,
++                       TargetOptions Options,
++                       Reloc::Model RM, CodeModel::Model CM,
++                       CodeGenOpt::Level OL);
++   ~AMDGPUTargetMachine();
++   virtual const AMDGPUFrameLowering* getFrameLowering() const {
++     return &FrameLowering;
++   }
++   virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
++     return &IntrinsicInfo;
++   }
++   virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
++   virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
++   virtual const AMDGPURegisterInfo *getRegisterInfo() const {
++      return &InstrInfo->getRegisterInfo();
++   }
++   virtual AMDGPUTargetLowering * getTargetLowering() const {
++      return TLInfo;
++   }
++   virtual const InstrItineraryData* getInstrItineraryData() const {
++      return InstrItins;
++   }
++   virtual const DataLayout* getDataLayout() const { return &Layout; }
++   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
++};
++
++} // End namespace llvm
++
++#endif // AMDGPU_TARGET_MACHINE_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.td llvm-r600/lib/Target/R600/AMDGPU.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPU.td	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPU.td	2013-01-25 19:43:57.423383055 +0100
+@@ -0,0 +1,40 @@
++//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//==-----------------------------------------------------------------------===//
++
++// Include AMDIL TD files
++include "AMDILBase.td"
++
++
++def AMDGPUInstrInfo : InstrInfo {
++  let guessInstructionProperties = 1;
++}
++
++//===----------------------------------------------------------------------===//
++// Declare the target which we are implementing
++//===----------------------------------------------------------------------===//
++def AMDGPUAsmWriter : AsmWriter {
++    string AsmWriterClassName = "InstPrinter";
++    int Variant = 0;
++    bit isMCAsmWriter = 1;
++}
++
++def AMDGPU : Target {
++  // Pull in Instruction Info:
++  let InstructionSet = AMDGPUInstrInfo;
++  let AssemblyWriters = [AMDGPUAsmWriter];
++}
++
++// Include AMDGPU TD files
++include "R600Schedule.td"
++include "SISchedule.td"
++include "Processors.td"
++include "AMDGPUInstrInfo.td"
++include "AMDGPUIntrinsics.td"
++include "AMDGPURegisterInfo.td"
++include "AMDGPUInstructions.td"
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp	2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,115 @@
++//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++// \file
++//==-----------------------------------------------------------------------===//
++#include "AMDIL7XXDevice.h"
++#include "AMDGPUSubtarget.h"
++#include "AMDILDevice.h"
++
++using namespace llvm;
++
++AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
++  setCaps();
++  std::string name = mSTM->getDeviceName();
++  if (name == "rv710") {
++    DeviceFlag = OCL_DEVICE_RV710;
++  } else if (name == "rv730") {
++    DeviceFlag = OCL_DEVICE_RV730;
++  } else {
++    DeviceFlag = OCL_DEVICE_RV770;
++  }
++}
++
++AMDGPU7XXDevice::~AMDGPU7XXDevice() {
++}
++
++void AMDGPU7XXDevice::setCaps() {
++  mSWBits.set(AMDGPUDeviceInfo::LocalMem);
++}
++
++size_t AMDGPU7XXDevice::getMaxLDSSize() const {
++  if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
++    return MAX_LDS_SIZE_700;
++  }
++  return 0;
++}
++
++size_t AMDGPU7XXDevice::getWavefrontSize() const {
++  return AMDGPUDevice::HalfWavefrontSize;
++}
++
++uint32_t AMDGPU7XXDevice::getGeneration() const {
++  return AMDGPUDeviceInfo::HD4XXX;
++}
++
++uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
++  switch (DeviceID) {
++  default:
++    assert(0 && "ID type passed in is unknown!");
++    break;
++  case GLOBAL_ID:
++  case CONSTANT_ID:
++  case RAW_UAV_ID:
++  case ARENA_UAV_ID:
++    break;
++  case LDS_ID:
++    if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
++      return DEFAULT_LDS_ID;
++    }
++    break;
++  case SCRATCH_ID:
++    if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
++      return DEFAULT_SCRATCH_ID;
++    }
++    break;
++  case GDS_ID:
++    assert(0 && "GDS UAV ID is not supported on this chip");
++    if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
++      return DEFAULT_GDS_ID;
++    }
++    break;
++  };
++
++  return 0;
++}
++
++uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
++  return 1;
++}
++
++AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
++  setCaps();
++}
++
++AMDGPU770Device::~AMDGPU770Device() {
++}
++
++void AMDGPU770Device::setCaps() {
++  if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
++    mSWBits.set(AMDGPUDeviceInfo::FMA);
++    mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
++  }
++  mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
++  mHWBits.reset(AMDGPUDeviceInfo::LongOps);
++  mSWBits.set(AMDGPUDeviceInfo::LongOps);
++  mSWBits.set(AMDGPUDeviceInfo::LocalMem);
++}
++
++size_t AMDGPU770Device::getWavefrontSize() const {
++  return AMDGPUDevice::WavefrontSize;
++}
++
++AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
++}
++
++AMDGPU710Device::~AMDGPU710Device() {
++}
++
++size_t AMDGPU710Device::getWavefrontSize() const {
++  return AMDGPUDevice::QuarterWavefrontSize;
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h llvm-r600/lib/Target/R600/AMDIL7XXDevice.h
+--- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.h	2013-01-25 19:43:57.436716388 +0100
+@@ -0,0 +1,72 @@
++//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//==-----------------------------------------------------------------------===//
++/// \file
++/// \brief Interface for the subtarget data classes.
++///
++/// This file will define the interface that each generation needs to
++/// implement in order to correctly answer queries on the capabilities of the
++/// specific hardware.
++//===----------------------------------------------------------------------===//
++#ifndef AMDIL7XXDEVICEIMPL_H
++#define AMDIL7XXDEVICEIMPL_H
++#include "AMDILDevice.h"
++
++namespace llvm {
++class AMDGPUSubtarget;
++
++//===----------------------------------------------------------------------===//
++// 7XX generation of devices and their respective sub classes
++//===----------------------------------------------------------------------===//
++
++/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
++///
++/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
++/// support the minimal features that are required to be considered OpenCL 1.0
++/// compliant and nothing more.
++class AMDGPU7XXDevice : public AMDGPUDevice {
++public:
++  AMDGPU7XXDevice(AMDGPUSubtarget *ST);
++  virtual ~AMDGPU7XXDevice();
++  virtual size_t getMaxLDSSize() const;
++  virtual size_t getWavefrontSize() const;
++  virtual uint32_t getGeneration() const;
++  virtual uint32_t getResourceID(uint32_t DeviceID) const;
++  virtual uint32_t getMaxNumUAVs() const;
++
++protected:
++  virtual void setCaps();
++};
++
++/// \brief The AMDGPU770Device class represents the RV770 chip and it's
++/// derivative cards.
++///
++/// The difference between this device and the base class is this device device
++/// adds support for double precision and has a larger wavefront size.
++class AMDGPU770Device : public AMDGPU7XXDevice {
++public:
++  AMDGPU770Device(AMDGPUSubtarget *ST);
++  virtual ~AMDGPU770Device();
++  virtual size_t getWavefrontSize() const;
++private:
++  virtual void setCaps();
++};
++
++/// \brief The AMDGPU710Device class derives from the 7XX base class.
++///
++/// This class is a smaller derivative, so we need to overload some of the
++/// functions in order to correctly specify this information.
++class AMDGPU710Device : public AMDGPU7XXDevice {
++public:
++  AMDGPU710Device(AMDGPUSubtarget *ST);
++  virtual ~AMDGPU710Device();
++  virtual size_t getWavefrontSize() const;
++};
++
++} // namespace llvm
++#endif // AMDILDEVICEIMPL_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILBase.td llvm-r600/lib/Target/R600/AMDILBase.td
+--- llvm-3.2.src/lib/Target/R600/AMDILBase.td	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDILBase.td	2013-01-25 19:43:57.436716388 +0100
+@@ -0,0 +1,85 @@
++//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++// Target-independent interfaces which we are implementing
++//===----------------------------------------------------------------------===//
++
++include "llvm/Target/Target.td"
++
++// Dummy Instruction itineraries for pseudo instructions
++def ALU_NULL : FuncUnit;
++def NullALU : InstrItinClass;
++
++//===----------------------------------------------------------------------===//
++// AMDIL Subtarget features.
++//===----------------------------------------------------------------------===//
++def FeatureFP64     : SubtargetFeature<"fp64",
++        "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
++        "true",
++        "Enable 64bit double precision operations">;
++def FeatureByteAddress    : SubtargetFeature<"byte_addressable_store",
++        "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
++        "true",
++        "Enable byte addressable stores">;
++def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
++        "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
++        "true",
++        "Enable duplicate barrier detection(HD5XXX or later).">;
++def FeatureImages : SubtargetFeature<"images",
++        "CapsOverride[AMDGPUDeviceInfo::Images]",
++        "true",
++        "Enable image functions">;
++def FeatureMultiUAV : SubtargetFeature<"multi_uav",
++        "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
++        "true",
++        "Generate multiple UAV code(HD5XXX family or later)">;
++def FeatureMacroDB : SubtargetFeature<"macrodb",
++        "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
++        "true",
++        "Use internal macrodb, instead of macrodb in driver">;
++def FeatureNoAlias : SubtargetFeature<"noalias",
++        "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
++        "true",
++        "assert that all kernel argument pointers are not aliased">;
++def FeatureNoInline : SubtargetFeature<"no-inline",
++        "CapsOverride[AMDGPUDeviceInfo::NoInline]",
++        "true",
++        "specify whether to not inline functions">;
++
++def Feature64BitPtr : SubtargetFeature<"64BitPtr",
++        "Is64bit",
++        "false",
++        "Specify if 64bit addressing should be used.">;
++
++def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
++        "Is32on64bit",
++        "false",
++        "Specify if 64bit sized pointers with 32bit addressing should be used.">;
++def FeatureDebug : SubtargetFeature<"debug",
++        "CapsOverride[AMDGPUDeviceInfo::Debug]",
++        "true",
++        "Debug mode is enabled, so disable hardware accelerated address spaces.">;
++def FeatureDumpCode : SubtargetFeature <"DumpCode",
++        "DumpCode",
++        "true",
++        "Dump MachineInstrs in the CodeEmitter">;
++
++def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
++        "R600ALUInst",
++        "false",
++        "Older version of ALU instructions encoding.">;
++
++
++//===----------------------------------------------------------------------===//
++// Register File, Calling Conv, Instruction Descriptions
++//===----------------------------------------------------------------------===//
++
++
++include "AMDILRegisterInfo.td"
++include "AMDILInstrInfo.td"
++
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp	1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp	2013-01-25 19:43:57.436716388 +0100
+@@ -0,0 +1,3045 @@
++//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
++//
++//                     The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++/// \file
++//==-----------------------------------------------------------------------===//
++
++#define DEBUGME 0
++#define DEBUG_TYPE "structcfg"
++
++#include "AMDGPUInstrInfo.h"
++#include "AMDIL.h"
++#include "llvm/ADT/SCCIterator.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/Statistic.h"
++#include "llvm/Analysis/DominatorInternals.h"
++#include "llvm/Analysis/Dominators.h"
++#include "llvm/CodeGen/MachinePostDominators.h"
++#include "llvm/CodeGen/MachineDominators.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineFunctionAnalysis.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineJumpTableInfo.h"
++#include "llvm/CodeGen/MachineLoopInfo.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/Target/TargetInstrInfo.h"
++
++using namespace llvm;
++
++// TODO: move-begin.
++
++//===----------------------------------------------------------------------===//
++//
++// Statistics for CFGStructurizer.
++//
++//===----------------------------------------------------------------------===//
++
++STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
++    "matched");
++STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
++    "matched");
++STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
++    "pattern matched");
++STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
++    "pattern matched");
++STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
++    "matched");
++STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
++STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
++
++//===----------------------------------------------------------------------===//
++//
++// Miscellaneous utility for CFGStructurizer.
++//
++//===----------------------------------------------------------------------===//
++namespace llvmCFGStruct {
++#define SHOWNEWINSTR(i) \
++  if (DEBUGME) errs() << "New instr: " << *i << "\n"
++
++#define SHOWNEWBLK(b, msg) \
++if (DEBUGME) { \
++  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
++  errs() << "\n"; \
++}
++
++#define SHOWBLK_DETAIL(b, msg) \
++if (DEBUGME) { \
++  if (b) { \
++  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
++  b->print(errs()); \
++  errs() << "\n"; \
++  } \
++}
++
++#define INVALIDSCCNUM -1
++#define INVALIDREGNUM 0
++
++template<class LoopinfoT>
++void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
++  for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
++       iterEnd = LoopInfo.end();
++       iter != iterEnd; ++iter) {
++    (*iter)->print(OS, 0);
++  }
++}
++
++template<class NodeT>
++void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
++  size_t sz = Src.size();
++  for (size_t i = 0; i < sz/2; ++i) {
++    NodeT *t = Src[i];
++    Src[i] = Src[sz - i - 1];
++    Src[sz - i - 1] = t;
++  }
++}
++
++} //end namespace llvmCFGStruct
++
++//===----------------------------------------------------------------------===//
++//
++// supporting data structure for CFGStructurizer
++//
++//===----------------------------------------------------------------------===//
++
++namespace llvmCFGStruct {
++template<class PassT>
++struct CFGStructTraits {
++};
++
++template <class InstrT>
++class BlockInformation {
++public:
++  bool isRetired;
++  int  sccNum;
++  //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
++  //Instructions defining the corresponding successor.
++  BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
++};
++
++template <class BlockT, class InstrT, class RegiT>
++class LandInformation {
++public:
++  BlockT *landBlk;
++  std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
++                                  //WHILELOOP(thisloop) init before entering
++                                  //thisloop.
++  std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
++                                  //WHILELOOP(thisloop) init after entering
++                                  //thisloop.
++  std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
++                                     //land block, branch cond on this reg.
++  std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
++                                     //endif" after ENDLOOP(thisloop) break
++                                     //outerLoopOf(thisLoop).
++  std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
++                                    //endif" after ENDLOOP(thisloop) continue on
++                                    //outerLoopOf(thisLoop).
++  LandInformation() : landBlk(NULL) {}
++};
++
++} //end of namespace llvmCFGStruct
++
++//===----------------------------------------------------------------------===//
++//
++// CFGStructurizer
++//
++//===----------------------------------------------------------------------===//
++
++namespace llvmCFGStruct {
++// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
++template<class PassT>
++class  CFGStructurizer {
++public:
++  typedef enum {
++    Not_SinglePath = 0,
++    SinglePath_InPath = 1,
++    SinglePath_NotInPath = 2
++  } PathToKind;
++
++public:
++  typedef typename PassT::InstructionType         InstrT;
++  typedef typename PassT::FunctionType            FuncT;
++  typedef typename PassT::DominatortreeType       DomTreeT;
++  typedef typename PassT::PostDominatortreeType   PostDomTreeT;
++  typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
++  typedef typename PassT::LoopinfoType            LoopInfoT;
++
++  typedef GraphTraits<FuncT *>                    FuncGTraits;
++  //typedef FuncGTraits::nodes_iterator BlockIterator;
++  typedef typename FuncT::iterator                BlockIterator;
++
++  typedef typename FuncGTraits::NodeType          BlockT;
++  typedef GraphTraits<BlockT *>                   BlockGTraits;
++  typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
++  //typedef BlockGTraits::succ_iterator InstructionIterator;
++  typedef typename BlockT::iterator               InstrIterator;
++
++  typedef CFGStructTraits<PassT>                  CFGTraits;
++  typedef BlockInformation<InstrT>                BlockInfo;
++  typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
++
++  typedef int                                     RegiT;
++  typedef typename PassT::LoopType                LoopT;
++  typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
++        typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
++        //landing info for loop break
++  typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
++
++public:
++  CFGStructurizer();
++  ~CFGStructurizer();
++
++  /// Perform the CFG structurization
++  bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
++
++  /// Perform the CFG preparation
++  bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
++
++private:
++  void reversePredicateSetter(typename BlockT::iterator);
++  void   orderBlocks();
++  void   printOrderedBlocks(llvm::raw_ostream &OS);
++  int patternMatch(BlockT *CurBlock);
++  int patternMatchGroup(BlockT *CurBlock);
++
++  int serialPatternMatch(BlockT *CurBlock);
++  int ifPatternMatch(BlockT *CurBlock);
++  int switchPatternMatch(BlockT *CurBlock);
++  int loopendPatternMatch(BlockT *CurBlock);
++  int loopPatternMatch(BlockT *CurBlock);
++
++  int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
++  int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
++  //int loopWithoutBreak(BlockT *);
++
++  void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
++                        BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
++  void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
++                           BlockT *ContBlock, LoopT *contLoop);
++  bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
++  int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
++                       BlockT *FalseBlock);
++  int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
++                          BlockT *FalseBlock);
++  int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
++                              BlockT *FalseBlock, BlockT **LandBlockPtr);
++  void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
++                                   BlockT *FalseBlock, BlockT *LandBlock,
++                                   bool Detail = false);
++  PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
++                          bool AllowSideEntry = true);
++  BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
++                        bool AllowSideEntry = true);
++  int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
++  void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
++
++  void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
++                            BlockT *TrueBlock, BlockT *FalseBlock,
++                            BlockT *LandBlock);
++  void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
++  void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
++                           BlockT *ExitLandBlock, RegiT SetReg);
++  void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
++                           RegiT SetReg);
++  BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
++                                std::set<BlockT*> &ExitBlockSet,
++                                BlockT *ExitLandBlk);
++  BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
++                                BlockTSmallerVector &ExitingBlocks,
++                                BlockTSmallerVector &ExitBlocks);
++  BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
++  void removeUnconditionalBranch(BlockT *SrcBlock);
++  void removeRedundantConditionalBranch(BlockT *SrcBlock);
++  void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
++
++  void removeSuccessor(BlockT *SrcBlock);
++  BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
++  BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
++
++  void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
++                          InstrIterator InsertPos);
++
++  void recordSccnum(BlockT *SrcBlock, int SCCNum);
++  int getSCCNum(BlockT *srcBlk);
++
++  void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
++  bool isRetiredBlock(BlockT *SrcBlock);
++  bool isActiveLoophead(BlockT *CurBlock);
++  bool needMigrateBlock(BlockT *Block);
++
++  BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
++                              BlockTSmallerVector &exitBlocks,
++                              std::set<BlockT*> &ExitBlockSet);
++  void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
++  BlockT *getLoopLandBlock(LoopT *LoopRep);
++  LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
++
++  void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
++  void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
++  void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
++  void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
++  void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
++
++  bool hasBackEdge(BlockT *curBlock);
++  unsigned getLoopDepth  (LoopT *LoopRep);
++  int countActiveBlock(
++    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
++    typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
++    BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
++  BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
++
++private:
++  DomTreeT *domTree;
++  PostDomTreeT *postDomTree;
++  LoopInfoT *loopInfo;
++  PassT *passRep;
++  FuncT *funcRep;
++
++  BlockInfoMap blockInfoMap;
++  LoopLandInfoMap loopLandInfoMap;
++  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
++  const AMDGPURegisterInfo *TRI;
++
++};  //template class CFGStructurizer
++
++template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
++  : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
++}
++
++template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
++  for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
++       E = blockInfoMap.end(); I != E; ++I) {
++    delete I->second;
++  }
++}
++
++template<class PassT>
++bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
++                                     const AMDGPURegisterInfo * tri) {
++  passRep = &pass;
++  funcRep = &func;
++  TRI = tri;
++
++  bool changed = false;
++
++  //FIXME: if not reducible flow graph, make it so ???
++
++  if (DEBUGME) {
++        errs() << "AMDGPUCFGStructurizer::prepare\n";
++  }
++
++  loopInfo = CFGTraits::getLoopInfo(pass);
++  if (DEBUGME) {
++    errs() << "LoopInfo:\n";
++    PrintLoopinfo(*loopInfo, errs());
++  }
++
++  orderBlocks();
++  if (DEBUGME) {
++    errs() << "Ordered blocks:\n";
++    printOrderedBlocks(errs());
++  }
++
++  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
++
++  for (typename LoopInfoT::iterator iter = loopInfo->begin(),
++       iterEnd = loopInfo->end();
++       iter != iterEnd; ++iter) {
++    LoopT* loopRep = (*iter);
++    BlockTSmallerVector exitingBlks;
++    loopRep->getExitingBlocks(exitingBlks);
++    
++    if (exitingBlks.size() == 0) {
++      BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
++      if (dummyExitBlk != NULL)
++        retBlks.push_back(dummyExitBlk);
++    }
++  }
++
++  // Remove unconditional branch instr.
++  // Add dummy exit block iff there are multiple returns.
++
++  for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
++       iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
++       iterBlk != iterEndBlk;
++       ++iterBlk) {
++    BlockT *curBlk = *iterBlk;
++    removeUnconditionalBranch(curBlk);
++    removeRedundantConditionalBranch(curBlk);
++    if (CFGTraits::isReturnBlock(curBlk)) {
++      retBlks.push_back(curBlk);
++    }
++    assert(curBlk->succ_size() <= 2);
++  } //for
++
++  if (retBlks.size() >= 2) {
++    addDummyExitBlock(retBlks);
++    changed = true;
++  }
++
++  return changed;
++} //CFGStructurizer::prepare
<Skipped 19082 lines>
================================================================

---- gitweb:

http://git.pld-linux.org/gitweb.cgi/packages/llvm.git/commitdiff/251890425981d4d0076266858a1b1ef0ea2b617a