[packages/llvm] - up to 3.3
arekm
arekm at pld-linux.org
Sat Jun 15 20:17:36 CEST 2013
commit c0fd916863545c83a6eaac1b5048b1da5ea0c5ba
Author: Arkadiusz Miśkiewicz <arekm at maven.pl>
Date: Sat Jun 15 20:17:33 2013 +0200
- up to 3.3
llvm-r600.patch | 23023 ------------------------------------------------------
1 file changed, 23023 deletions(-)
---
diff --git a/llvm-r600.patch b/llvm-r600.patch
deleted file mode 100644
index 0957c01..0000000
--- a/llvm-r600.patch
+++ /dev/null
@@ -1,23023 +0,0 @@
-diff -Nur -x .git llvm-3.2.src/autoconf/configure.ac llvm-r600/autoconf/configure.ac
---- llvm-3.2.src/autoconf/configure.ac 2012-11-21 17:13:35.000000000 +0100
-+++ llvm-r600/autoconf/configure.ac 2013-01-25 19:43:56.096716416 +0100
-@@ -751,6 +751,11 @@
-
- if test ${enableval} != "disable"
- then
-+ if test ${enableval} = "AMDGPU"
-+ then
-+ AC_MSG_ERROR([The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600])
-+ enableval="R600"
-+ fi
- TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
- fi
-
-diff -Nur -x .git llvm-3.2.src/configure llvm-r600/configure
---- llvm-3.2.src/configure 2012-11-21 17:13:35.000000000 +0100
-+++ llvm-r600/configure 2013-01-25 19:43:56.173383081 +0100
-@@ -5473,6 +5473,13 @@
-
- if test ${enableval} != "disable"
- then
-+ if test ${enableval} = "AMDGPU"
-+ then
-+ { { echo "$as_me:$LINENO: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&5
-+echo "$as_me: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&2;}
-+ { (exit 1); exit 1; }; }
-+ enableval="R600"
-+ fi
- TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
- fi
-
-@@ -10316,7 +10323,7 @@
- lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
- lt_status=$lt_dlunknown
- cat > conftest.$ac_ext <<EOF
--#line 10317 "configure"
-+#line 10326 "configure"
- #include "confdefs.h"
-
- #if HAVE_DLFCN_H
-diff -Nur -x .git llvm-3.2.src/include/llvm/IntrinsicsR600.td llvm-r600/include/llvm/IntrinsicsR600.td
---- llvm-3.2.src/include/llvm/IntrinsicsR600.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/include/llvm/IntrinsicsR600.td 2013-01-25 19:43:56.433383075 +0100
-@@ -0,0 +1,36 @@
-+//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines all of the R600-specific intrinsics.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let TargetPrefix = "r600" in {
-+
-+class R600ReadPreloadRegisterIntrinsic<string name>
-+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
-+ GCCBuiltin<name>;
-+
-+multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
-+ def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
-+ def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
-+ def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
-+}
-+
-+defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_global_size">;
-+defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_local_size">;
-+defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_ngroups">;
-+defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_tgid">;
-+defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
-+ "__builtin_r600_read_tidig">;
-+} // End TargetPrefix = "r600"
-diff -Nur -x .git llvm-3.2.src/include/llvm/Intrinsics.td llvm-r600/include/llvm/Intrinsics.td
---- llvm-3.2.src/include/llvm/Intrinsics.td 2012-10-20 01:00:20.000000000 +0200
-+++ llvm-r600/include/llvm/Intrinsics.td 2013-01-25 19:43:56.426716409 +0100
-@@ -469,3 +469,4 @@
- include "llvm/IntrinsicsHexagon.td"
- include "llvm/IntrinsicsNVVM.td"
- include "llvm/IntrinsicsMips.td"
-+include "llvm/IntrinsicsR600.td"
-diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
---- llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2012-11-26 18:01:12.000000000 +0100
-+++ llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2013-01-25 19:43:56.720049736 +0100
-@@ -8514,11 +8514,8 @@
- if (Opcode == ISD::DELETED_NODE &&
- (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
- Opcode = Opc;
-- // If not supported by target, bail out.
-- if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
-- TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
-- return SDValue();
- }
-+
- if (Opc != Opcode)
- return SDValue();
-
-@@ -8543,6 +8540,10 @@
- assert(SrcVT != MVT::Other && "Cannot determine source type!");
-
- EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
-+
-+ if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
-+ return SDValue();
-+
- SmallVector<SDValue, 8> Opnds;
- for (unsigned i = 0; i != NumInScalars; ++i) {
- SDValue In = N->getOperand(i);
-diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
---- llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 2012-10-24 19:25:11.000000000 +0200
-+++ llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 2013-01-25 19:43:56.733383069 +0100
-@@ -731,9 +731,10 @@
- return;
- }
- case TargetLowering::Promote: {
-- assert(VT.isVector() && "Unknown legal promote case!");
-- Value = DAG.getNode(ISD::BITCAST, dl,
-- TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
-+ EVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
-+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
-+ "Can only promote stores to same size type");
-+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
- SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr,
- ST->getPointerInfo(), isVolatile,
-@@ -889,10 +890,9 @@
- break;
- }
- case TargetLowering::Promote: {
-- // Only promote a load of vector type to another.
-- assert(VT.isVector() && "Cannot promote this load!");
-- // Change base type to a different vector type.
- EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
-+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
-+ "Can only promote loads to same size type");
-
- SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
- LD->isVolatile(), LD->isNonTemporal(),
-diff -Nur -x .git llvm-3.2.src/lib/Target/LLVMBuild.txt llvm-r600/lib/Target/LLVMBuild.txt
---- llvm-3.2.src/lib/Target/LLVMBuild.txt 2012-07-16 20:19:46.000000000 +0200
-+++ llvm-r600/lib/Target/LLVMBuild.txt 2013-01-25 19:43:57.173383060 +0100
-@@ -16,7 +16,7 @@
- ;===------------------------------------------------------------------------===;
-
- [common]
--subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
-+subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
-
- ; This is a special group whose required libraries are extended (by llvm-build)
- ; with the best execution engine (the native JIT, if available, or the
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp 2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,138 @@
-+//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+///
-+/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
-+/// code. When passed an MCAsmStreamer it prints assembly and when passed
-+/// an MCObjectStreamer it outputs binary code.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+
-+
-+#include "AMDGPUAsmPrinter.h"
-+#include "AMDGPU.h"
-+#include "SIMachineFunctionInfo.h"
-+#include "SIRegisterInfo.h"
-+#include "llvm/MC/MCStreamer.h"
-+#include "llvm/Target/TargetLoweringObjectFile.h"
-+#include "llvm/Support/TargetRegistry.h"
-+
-+using namespace llvm;
-+
-+
-+static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
-+ MCStreamer &Streamer) {
-+ return new AMDGPUAsmPrinter(tm, Streamer);
-+}
-+
-+extern "C" void LLVMInitializeR600AsmPrinter() {
-+ TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
-+}
-+
-+/// We need to override this function so we can avoid
-+/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
-+bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
-+ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
-+ if (STM.dumpCode()) {
-+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-+ MF.dump();
-+#endif
-+ }
-+ SetupMachineFunction(MF);
-+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
-+ if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ EmitProgramInfo(MF);
-+ }
-+ EmitFunctionBody();
-+ return false;
-+}
-+
-+void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
-+ unsigned MaxSGPR = 0;
-+ unsigned MaxVGPR = 0;
-+ bool VCCUsed = false;
-+ const SIRegisterInfo * RI =
-+ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
-+
-+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+ BB != BB_E; ++BB) {
-+ MachineBasicBlock &MBB = *BB;
-+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-+ I != E; ++I) {
-+ MachineInstr &MI = *I;
-+
-+ unsigned numOperands = MI.getNumOperands();
-+ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
-+ MachineOperand & MO = MI.getOperand(op_idx);
-+ unsigned maxUsed;
-+ unsigned width = 0;
-+ bool isSGPR = false;
-+ unsigned reg;
-+ unsigned hwReg;
-+ if (!MO.isReg()) {
-+ continue;
-+ }
-+ reg = MO.getReg();
-+ if (reg == AMDGPU::VCC) {
-+ VCCUsed = true;
-+ continue;
-+ }
-+ switch (reg) {
-+ default: break;
-+ case AMDGPU::EXEC:
-+ case AMDGPU::SI_LITERAL_CONSTANT:
-+ case AMDGPU::SREG_LIT_0:
-+ case AMDGPU::M0:
-+ continue;
-+ }
-+
-+ if (AMDGPU::SReg_32RegClass.contains(reg)) {
-+ isSGPR = true;
-+ width = 1;
-+ } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
-+ isSGPR = false;
-+ width = 1;
-+ } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
-+ isSGPR = true;
-+ width = 2;
-+ } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
-+ isSGPR = false;
-+ width = 2;
-+ } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
-+ isSGPR = true;
-+ width = 4;
-+ } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
-+ isSGPR = false;
-+ width = 4;
-+ } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
-+ isSGPR = true;
-+ width = 8;
-+ } else {
-+ assert(!"Unknown register class");
-+ }
-+ hwReg = RI->getEncodingValue(reg);
-+ maxUsed = hwReg + width - 1;
-+ if (isSGPR) {
-+ MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
-+ } else {
-+ MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
-+ }
-+ }
-+ }
-+ }
-+ if (VCCUsed) {
-+ MaxSGPR += 2;
-+ }
-+ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
-+ OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
-+ OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
-+ OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,44 @@
-+//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief AMDGPU Assembly printer class.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_ASMPRINTER_H
-+#define AMDGPU_ASMPRINTER_H
-+
-+#include "llvm/CodeGen/AsmPrinter.h"
-+
-+namespace llvm {
-+
-+class AMDGPUAsmPrinter : public AsmPrinter {
-+
-+public:
-+ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
-+ : AsmPrinter(TM, Streamer) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ virtual const char *getPassName() const {
-+ return "AMDGPU Assembly Printer";
-+ }
-+
-+ /// \brief Emit register usage information so that the GPU driver
-+ /// can correctly setup the GPU state.
-+ void EmitProgramInfo(MachineFunction &MF);
-+
-+ /// Implemented in AMDGPUMCInstLower.cpp
-+ virtual void EmitInstruction(const MachineInstr *MI);
-+};
-+
-+} // End anonymous llvm
-+
-+#endif //AMDGPU_ASMPRINTER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,49 @@
-+//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief CodeEmitter interface for R600 and SI codegen.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUCODEEMITTER_H
-+#define AMDGPUCODEEMITTER_H
-+
-+namespace llvm {
-+
-+class AMDGPUCodeEmitter {
-+public:
-+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
-+ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
-+ const MachineOperand &MO) const { return 0; }
-+ virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
-+ unsigned OpNo) const {
-+ return 0;
-+ }
-+ virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
-+ unsigned OpNo) const {
-+ return 0;
-+ }
-+ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
-+ uint64_t Value) const {
-+ return Value;
-+ }
-+ virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
-+ unsigned OpNo) const {
-+ return 0;
-+ }
-+ virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
-+ const {
-+ return 0;
-+ }
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUCODEEMITTER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,62 @@
-+//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This pass lowers AMDIL machine instructions to the appropriate
-+/// hardware instructions.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "AMDGPUInstrInfo.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class AMDGPUConvertToISAPass : public MachineFunctionPass {
-+
-+private:
-+ static char ID;
-+ TargetMachine &TM;
-+
-+public:
-+ AMDGPUConvertToISAPass(TargetMachine &tm) :
-+ MachineFunctionPass(ID), TM(tm) { }
-+
-+ virtual bool runOnMachineFunction(MachineFunction &MF);
-+
-+ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
-+
-+};
-+
-+} // End anonymous namespace
-+
-+char AMDGPUConvertToISAPass::ID = 0;
-+
-+FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
-+ return new AMDGPUConvertToISAPass(tm);
-+}
-+
-+bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
-+ const AMDGPUInstrInfo * TII =
-+ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
-+
-+ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
-+ BB != BB_E; ++BB) {
-+ MachineBasicBlock &MBB = *BB;
-+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-+ I != E; ++I) {
-+ MachineInstr &MI = *I;
-+ TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
-+ }
-+ }
-+ return false;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.h llvm-r600/lib/Target/R600/AMDGPU.h
---- llvm-3.2.src/lib/Target/R600/AMDGPU.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPU.h 2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,51 @@
-+//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_H
-+#define AMDGPU_H
-+
-+#include "AMDGPUTargetMachine.h"
-+#include "llvm/Support/TargetRegistry.h"
-+#include "llvm/Target/TargetMachine.h"
-+
-+namespace llvm {
-+
-+class FunctionPass;
-+class AMDGPUTargetMachine;
-+
-+// R600 Passes
-+FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
-+FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
-+FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
-+
-+// SI Passes
-+FunctionPass *createSIAnnotateControlFlowPass();
-+FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
-+FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
-+FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
-+FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
-+FunctionPass *createSIInsertWaits(TargetMachine &tm);
-+
-+// Passes common to R600 and SI
-+Pass *createAMDGPUStructurizeCFGPass();
-+FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
-+
-+} // End namespace llvm
-+
-+namespace ShaderType {
-+ enum Type {
-+ PIXEL = 0,
-+ VERTEX = 1,
-+ GEOMETRY = 2,
-+ COMPUTE = 3
-+ };
-+}
-+
-+#endif // AMDGPU_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,257 @@
-+//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Implementation of the TargetInstrInfo class that is common to all
-+/// AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+#include "AMDIL.h"
-+#include "llvm/CodeGen/MachineFrameInfo.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+
-+#define GET_INSTRINFO_CTOR
-+#include "AMDGPUGenInstrInfo.inc"
-+
-+using namespace llvm;
-+
-+AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
-+ : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
-+
-+const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
-+ return RI;
-+}
-+
-+bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
-+ unsigned &SrcReg, unsigned &DstReg,
-+ unsigned &SubIdx) const {
-+// TODO: Implement this function
-+ return false;
-+}
-+
-+unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+
-+unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+
-+bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
-+ const MachineMemOperand *&MMO,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return false;
-+}
-+unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
-+ const MachineMemOperand *&MMO,
-+ int &FrameIndex) const {
-+// TODO: Implement this function
-+ return false;
-+}
-+
-+MachineInstr *
-+AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
-+ MachineBasicBlock::iterator &MBBI,
-+ LiveVariables *LV) const {
-+// TODO: Implement this function
-+ return NULL;
-+}
-+bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
-+ MachineBasicBlock &MBB) const {
-+ while (iter != MBB.end()) {
-+ switch (iter->getOpcode()) {
-+ default:
-+ break;
-+ case AMDGPU::BRANCH_COND_i32:
-+ case AMDGPU::BRANCH_COND_f32:
-+ case AMDGPU::BRANCH:
-+ return true;
-+ };
-+ ++iter;
-+ }
-+ return false;
-+}
-+
-+MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
-+ MachineBasicBlock::iterator tmp = MBB->end();
-+ if (!MBB->size()) {
-+ return MBB->end();
-+ }
-+ while (--tmp) {
-+ if (tmp->getOpcode() == AMDGPU::ENDLOOP
-+ || tmp->getOpcode() == AMDGPU::ENDIF
-+ || tmp->getOpcode() == AMDGPU::ELSE) {
-+ if (tmp == MBB->begin()) {
-+ return tmp;
-+ } else {
-+ continue;
-+ }
-+ } else {
-+ return ++tmp;
-+ }
-+ }
-+ return MBB->end();
-+}
-+
-+void
-+AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI,
-+ unsigned SrcReg, bool isKill,
-+ int FrameIndex,
-+ const TargetRegisterClass *RC,
-+ const TargetRegisterInfo *TRI) const {
-+ assert(!"Not Implemented");
-+}
-+
-+void
-+AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI,
-+ unsigned DestReg, int FrameIndex,
-+ const TargetRegisterClass *RC,
-+ const TargetRegisterInfo *TRI) const {
-+ assert(!"Not Implemented");
-+}
-+
-+MachineInstr *
-+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-+ MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops,
-+ int FrameIndex) const {
-+// TODO: Implement this function
-+ return 0;
-+}
-+MachineInstr*
-+AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
-+ MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops,
-+ MachineInstr *LoadMI) const {
-+ // TODO: Implement this function
-+ return 0;
-+}
-+bool
-+AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+bool
-+AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
-+ unsigned Reg, bool UnfoldLoad,
-+ bool UnfoldStore,
-+ SmallVectorImpl<MachineInstr*> &NewMIs) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+
-+bool
-+AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
-+ SmallVectorImpl<SDNode*> &NewNodes) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+
-+unsigned
-+AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
-+ bool UnfoldLoad, bool UnfoldStore,
-+ unsigned *LoadRegIndex) const {
-+ // TODO: Implement this function
-+ return 0;
-+}
-+
-+bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
-+ int64_t Offset1, int64_t Offset2,
-+ unsigned NumLoads) const {
-+ assert(Offset2 > Offset1
-+ && "Second offset should be larger than first offset!");
-+ // If we have less than 16 loads in a row, and the offsets are within 16,
-+ // then schedule together.
-+ // TODO: Make the loads schedule near if it fits in a cacheline
-+ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
-+}
-+
-+bool
-+AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
-+ const {
-+ // TODO: Implement this function
-+ return true;
-+}
-+void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI) const {
-+ // TODO: Implement this function
-+}
-+
-+bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+bool
-+AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-+ const SmallVectorImpl<MachineOperand> &Pred2)
-+ const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+
-+bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
-+ std::vector<MachineOperand> &Pred) const {
-+ // TODO: Implement this function
-+ return false;
-+}
-+
-+bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
-+ // TODO: Implement this function
-+ return MI->getDesc().isPredicable();
-+}
-+
-+bool
-+AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
-+ // TODO: Implement this function
-+ return true;
-+}
-+
-+void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
-+ DebugLoc DL) const {
-+ MachineRegisterInfo &MRI = MF.getRegInfo();
-+ const AMDGPURegisterInfo & RI = getRegisterInfo();
-+
-+ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
-+ MachineOperand &MO = MI.getOperand(i);
-+ // Convert dst regclass to one that is supported by the ISA
-+ if (MO.isReg() && MO.isDef()) {
-+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
-+ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
-+ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
-+
-+ assert(newRegClass);
-+
-+ MRI.setRegClass(MO.getReg(), newRegClass);
-+ }
-+ }
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,149 @@
-+//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Contains the definition of a TargetInstrInfo class that is common
-+/// to all AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUINSTRUCTIONINFO_H
-+#define AMDGPUINSTRUCTIONINFO_H
-+
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUInstrInfo.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+
-+#include <map>
-+
-+#define GET_INSTRINFO_HEADER
-+#define GET_INSTRINFO_ENUM
-+#include "AMDGPUGenInstrInfo.inc"
-+
-+#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
-+#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
-+#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
-+#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
-+
-+namespace llvm {
-+
-+class AMDGPUTargetMachine;
-+class MachineFunction;
-+class MachineInstr;
-+class MachineInstrBuilder;
-+
-+class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
-+private:
-+ const AMDGPURegisterInfo RI;
-+ TargetMachine &TM;
-+ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
-+ MachineBasicBlock &MBB) const;
-+public:
-+ explicit AMDGPUInstrInfo(TargetMachine &tm);
-+
-+ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
-+
-+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
-+ unsigned &DstReg, unsigned &SubIdx) const;
-+
-+ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
-+ int &FrameIndex) const;
-+ bool hasLoadFromStackSlot(const MachineInstr *MI,
-+ const MachineMemOperand *&MMO,
-+ int &FrameIndex) const;
-+ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
-+ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
-+ int &FrameIndex) const;
-+ bool hasStoreFromStackSlot(const MachineInstr *MI,
-+ const MachineMemOperand *&MMO,
-+ int &FrameIndex) const;
-+
-+ MachineInstr *
-+ convertToThreeAddress(MachineFunction::iterator &MFI,
-+ MachineBasicBlock::iterator &MBBI,
-+ LiveVariables *LV) const;
-+
-+
-+ virtual void copyPhysReg(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI, DebugLoc DL,
-+ unsigned DestReg, unsigned SrcReg,
-+ bool KillSrc) const = 0;
-+
-+ void storeRegToStackSlot(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI,
-+ unsigned SrcReg, bool isKill, int FrameIndex,
-+ const TargetRegisterClass *RC,
-+ const TargetRegisterInfo *TRI) const;
-+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI,
-+ unsigned DestReg, int FrameIndex,
-+ const TargetRegisterClass *RC,
-+ const TargetRegisterInfo *TRI) const;
-+
-+protected:
-+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
-+ MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops,
-+ int FrameIndex) const;
-+ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
-+ MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops,
-+ MachineInstr *LoadMI) const;
-+public:
-+ bool canFoldMemoryOperand(const MachineInstr *MI,
-+ const SmallVectorImpl<unsigned> &Ops) const;
-+ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
-+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
-+ SmallVectorImpl<MachineInstr *> &NewMIs) const;
-+ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
-+ SmallVectorImpl<SDNode *> &NewNodes) const;
-+ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
-+ bool UnfoldLoad, bool UnfoldStore,
-+ unsigned *LoadRegIndex = 0) const;
-+ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
-+ int64_t Offset1, int64_t Offset2,
-+ unsigned NumLoads) const;
-+
-+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-+ void insertNoop(MachineBasicBlock &MBB,
-+ MachineBasicBlock::iterator MI) const;
-+ bool isPredicated(const MachineInstr *MI) const;
-+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-+ const SmallVectorImpl<MachineOperand> &Pred2) const;
-+ bool DefinesPredicate(MachineInstr *MI,
-+ std::vector<MachineOperand> &Pred) const;
-+ bool isPredicable(MachineInstr *MI) const;
-+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
-+
-+ // Helper functions that check the opcode for status information
-+ bool isLoadInst(llvm::MachineInstr *MI) const;
-+ bool isExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isSExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isZExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isAExtLoadInst(llvm::MachineInstr *MI) const;
-+ bool isStoreInst(llvm::MachineInstr *MI) const;
-+ bool isTruncStoreInst(llvm::MachineInstr *MI) const;
-+
-+ virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
-+ int64_t Imm) const = 0;
-+ virtual unsigned getIEQOpcode() const = 0;
-+ virtual bool isMov(unsigned opcode) const = 0;
-+
-+ /// \brief Convert the AMDIL MachineInstr to a supported ISA
-+ /// MachineInstr
-+ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
-+ DebugLoc DL) const;
-+
-+};
-+
-+} // End llvm namespace
-+
-+#endif // AMDGPUINSTRINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,74 @@
-+//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file contains DAG node defintions for the AMDGPU target.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+//===----------------------------------------------------------------------===//
-+// AMDGPU DAG Profiles
-+//===----------------------------------------------------------------------===//
-+
-+def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
-+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
-+]>;
-+
-+//===----------------------------------------------------------------------===//
-+// AMDGPU DAG Nodes
-+//
-+
-+// out = ((a << 32) | b) >> c)
-+//
-+// Can be used to optimize rtol:
-+// rotl(a, b) = bitalign(a, a, 32 - b)
-+def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
-+
-+// This argument to this node is a dword address.
-+def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
-+
-+// out = a - floor(a)
-+def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
-+
-+// out = max(a, b) a and b are floats
-+def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = max(a, b) a and b are signed ints
-+def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = max(a, b) a and b are unsigned ints
-+def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a and b are floats
-+def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a snd b are signed ints
-+def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// out = min(a, b) a and b are unsigned ints
-+def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
-+ [SDNPCommutative, SDNPAssociative]
-+>;
-+
-+// urecip - This operation is a helper for integer division, it returns the
-+// result of 1 / a as a fractional unsigned integer.
-+// out = (2^32 / a) + e
-+// e is rounding error
-+def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
-+
-+def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td llvm-r600/lib/Target/R600/AMDGPUInstructions.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUInstructions.td 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,190 @@
-+//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file contains instruction defs that are common to all hw codegen
-+// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
-+ field bits<16> AMDILOp = 0;
-+ field bits<3> Gen = 0;
-+
-+ let Namespace = "AMDGPU";
-+ let OutOperandList = outs;
-+ let InOperandList = ins;
-+ let AsmString = asm;
-+ let Pattern = pattern;
-+ let Itinerary = NullALU;
-+ let TSFlags{42-40} = Gen;
-+ let TSFlags{63-48} = AMDILOp;
-+}
-+
-+class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
-+ : AMDGPUInst<outs, ins, asm, pattern> {
-+
-+ field bits<32> Inst = 0xffffffff;
-+
-+}
-+
-+def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
-+
-+def COND_EQ : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOEQ: case ISD::SETUEQ:
-+ case ISD::SETEQ: return true;}}}]
-+>;
-+
-+def COND_NE : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETONE: case ISD::SETUNE:
-+ case ISD::SETNE: return true;}}}]
-+>;
-+def COND_GT : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOGT: case ISD::SETUGT:
-+ case ISD::SETGT: return true;}}}]
-+>;
-+
-+def COND_GE : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOGE: case ISD::SETUGE:
-+ case ISD::SETGE: return true;}}}]
-+>;
-+
-+def COND_LT : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOLT: case ISD::SETULT:
-+ case ISD::SETLT: return true;}}}]
-+>;
-+
-+def COND_LE : PatLeaf <
-+ (cond),
-+ [{switch(N->get()){{default: return false;
-+ case ISD::SETOLE: case ISD::SETULE:
-+ case ISD::SETLE: return true;}}}]
-+>;
-+
-+//===----------------------------------------------------------------------===//
-+// Load/Store Pattern Fragments
-+//===----------------------------------------------------------------------===//
-+
-+def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
-+ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
-+}]>;
-+
-+class Constants {
-+int TWO_PI = 0x40c90fdb;
-+int PI = 0x40490fdb;
-+int TWO_PI_INV = 0x3e22f983;
-+}
-+def CONST : Constants;
-+
-+def FP_ZERO : PatLeaf <
-+ (fpimm),
-+ [{return N->getValueAPF().isZero();}]
-+>;
-+
-+def FP_ONE : PatLeaf <
-+ (fpimm),
-+ [{return N->isExactlyValue(1.0);}]
-+>;
-+
-+let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
-+
-+class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
-+ (outs rc:$dst),
-+ (ins rc:$src0),
-+ "CLAMP $dst, $src0",
-+ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
-+>;
-+
-+class FABS <RegisterClass rc> : AMDGPUShaderInst <
-+ (outs rc:$dst),
-+ (ins rc:$src0),
-+ "FABS $dst, $src0",
-+ [(set rc:$dst, (fabs rc:$src0))]
-+>;
-+
-+class FNEG <RegisterClass rc> : AMDGPUShaderInst <
-+ (outs rc:$dst),
-+ (ins rc:$src0),
-+ "FNEG $dst, $src0",
-+ [(set rc:$dst, (fneg rc:$src0))]
-+>;
-+
-+def SHADER_TYPE : AMDGPUShaderInst <
-+ (outs),
-+ (ins i32imm:$type),
-+ "SHADER_TYPE $type",
-+ [(int_AMDGPU_shader_type imm:$type)]
-+>;
-+
-+} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
-+
-+/* Generic helper patterns for intrinsics */
-+/* -------------------------------------- */
-+
-+class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
-+ RegisterClass rc> : Pat <
-+ (fpow rc:$src0, rc:$src1),
-+ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
-+>;
-+
-+/* Other helper patterns */
-+/* --------------------- */
-+
-+/* Extract element pattern */
-+class Extract_Element <ValueType sub_type, ValueType vec_type,
-+ RegisterClass vec_class, int sub_idx,
-+ SubRegIndex sub_reg>: Pat<
-+ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
-+ (EXTRACT_SUBREG vec_class:$src, sub_reg)
-+>;
-+
-+/* Insert element pattern */
-+class Insert_Element <ValueType elem_type, ValueType vec_type,
-+ RegisterClass elem_class, RegisterClass vec_class,
-+ int sub_idx, SubRegIndex sub_reg> : Pat <
-+
-+ (vec_type (vector_insert (vec_type vec_class:$vec),
-+ (elem_type elem_class:$elem), sub_idx)),
-+ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
-+>;
-+
-+// Vector Build pattern
-+class Vector_Build <ValueType vecType, RegisterClass vectorClass,
-+ ValueType elemType, RegisterClass elemClass> : Pat <
-+ (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
-+ (elemType elemClass:$z), (elemType elemClass:$w))),
-+ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
-+ (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
-+ elemClass:$z, sel_z), elemClass:$w, sel_w)
-+>;
-+
-+// bitconvert pattern
-+class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
-+ (dt (bitconvert (st rc:$src0))),
-+ (dt rc:$src0)
-+>;
-+
-+class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
-+ (vt (AMDGPUdwordaddr (vt rc:$addr))),
-+ (vt rc:$addr)
-+>;
-+
-+include "R600Instructions.td"
-+
-+include "SIInstrInfo.td"
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td
---- llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,62 @@
-+//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines intrinsics that are used by all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let TargetPrefix = "AMDGPU", isTarget = 1 in {
-+
-+ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
-+ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
-+
-+ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
-+ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
-+ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
-+ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
-+ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-+
-+ def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
-+}
-+
-+let TargetPrefix = "TGSI", isTarget = 1 in {
-+
-+ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
-+}
-+
-+include "SIIntrinsics.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,418 @@
-+//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief This is the parent TargetLowering class for hardware code gen
-+/// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUISelLowering.h"
-+#include "AMDILIntrinsicInfo.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/CodeGen/SelectionDAG.h"
-+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-+
-+using namespace llvm;
-+
-+AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
-+ TargetLowering(TM, new TargetLoweringObjectFileELF()) {
-+
-+ // Initialize target lowering borrowed from AMDIL
-+ InitAMDILLowering();
-+
-+ // We need to custom lower some of the intrinsics
-+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
-+
-+ // Library functions. These default to Expand, but we have instructions
-+ // for them.
-+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
-+ setOperationAction(ISD::FEXP2, MVT::f32, Legal);
-+ setOperationAction(ISD::FPOW, MVT::f32, Legal);
-+ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
-+ setOperationAction(ISD::FABS, MVT::f32, Legal);
-+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
-+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
-+
-+ // Lower floating point store/load to integer store/load to reduce the number
-+ // of patterns in tablegen.
-+ setOperationAction(ISD::STORE, MVT::f32, Promote);
-+ AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
-+
-+ setOperationAction(ISD::STORE, MVT::v4f32, Promote);
-+ AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
-+
-+ setOperationAction(ISD::LOAD, MVT::f32, Promote);
-+ AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
-+
-+ setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
-+ AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
-+
-+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
-+ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
-+ setOperationAction(ISD::UREM, MVT::i32, Expand);
-+}
-+
-+//===---------------------------------------------------------------------===//
-+// TargetLowering Callbacks
-+//===---------------------------------------------------------------------===//
-+
-+SDValue AMDGPUTargetLowering::LowerFormalArguments(
-+ SDValue Chain,
-+ CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::InputArg> &Ins,
-+ DebugLoc DL, SelectionDAG &DAG,
-+ SmallVectorImpl<SDValue> &InVals) const {
-+ for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
-+ InVals.push_back(SDValue());
-+ }
-+ return Chain;
-+}
-+
-+SDValue AMDGPUTargetLowering::LowerReturn(
-+ SDValue Chain,
-+ CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::OutputArg> &Outs,
-+ const SmallVectorImpl<SDValue> &OutVals,
-+ DebugLoc DL, SelectionDAG &DAG) const {
-+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
-+}
-+
-+//===---------------------------------------------------------------------===//
-+// Target specific lowering
-+//===---------------------------------------------------------------------===//
-+
-+SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
-+ const {
-+ switch (Op.getOpcode()) {
-+ default:
-+ Op.getNode()->dump();
-+ assert(0 && "Custom lowering code for this"
-+ "instruction is not implemented yet!");
-+ break;
-+ // AMDIL DAG lowering
-+ case ISD::SDIV: return LowerSDIV(Op, DAG);
-+ case ISD::SREM: return LowerSREM(Op, DAG);
-+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
-+ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
-+ // AMDGPU DAG lowering
-+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
-+ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
-+ }
-+ return Op;
-+}
-+
-+SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+
-+ switch (IntrinsicID) {
-+ default: return Op;
-+ case AMDGPUIntrinsic::AMDIL_abs:
-+ return LowerIntrinsicIABS(Op, DAG);
-+ case AMDGPUIntrinsic::AMDIL_exp:
-+ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
-+ case AMDGPUIntrinsic::AMDGPU_lrp:
-+ return LowerIntrinsicLRP(Op, DAG);
-+ case AMDGPUIntrinsic::AMDIL_fraction:
-+ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
-+ case AMDGPUIntrinsic::AMDIL_mad:
-+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2), Op.getOperand(3));
-+ case AMDGPUIntrinsic::AMDIL_max:
-+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDGPU_imax:
-+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDGPU_umax:
-+ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDIL_min:
-+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDGPU_imin:
-+ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDGPU_umin:
-+ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2));
-+ case AMDGPUIntrinsic::AMDIL_round_nearest:
-+ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
-+ }
-+}
-+
-+///IABS(a) = SMAX(sub(0, a), a)
-+SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
-+ SelectionDAG &DAG) const {
-+
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
-+ Op.getOperand(1));
-+
-+ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
-+}
-+
-+/// Linear Interpolation
-+/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
-+SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
-+ DAG.getConstantFP(1.0f, MVT::f32),
-+ Op.getOperand(1));
-+ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
-+ Op.getOperand(3));
-+ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
-+ Op.getOperand(2),
-+ OneSubAC);
-+}
-+
-+/// \brief Generate Min/Max node
-+SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+
-+ SDValue LHS = Op.getOperand(0);
-+ SDValue RHS = Op.getOperand(1);
-+ SDValue True = Op.getOperand(2);
-+ SDValue False = Op.getOperand(3);
-+ SDValue CC = Op.getOperand(4);
-+
-+ if (VT != MVT::f32 ||
-+ !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
-+ return SDValue();
-+ }
-+
-+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
-+ switch (CCOpcode) {
-+ case ISD::SETOEQ:
-+ case ISD::SETONE:
-+ case ISD::SETUNE:
-+ case ISD::SETNE:
-+ case ISD::SETUEQ:
-+ case ISD::SETEQ:
-+ case ISD::SETFALSE:
-+ case ISD::SETFALSE2:
-+ case ISD::SETTRUE:
-+ case ISD::SETTRUE2:
-+ case ISD::SETUO:
-+ case ISD::SETO:
-+ assert(0 && "Operation should already be optimised !");
-+ case ISD::SETULE:
-+ case ISD::SETULT:
-+ case ISD::SETOLE:
-+ case ISD::SETOLT:
-+ case ISD::SETLE:
-+ case ISD::SETLT: {
-+ if (LHS == True)
-+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
-+ else
-+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
-+ }
-+ case ISD::SETGT:
-+ case ISD::SETGE:
-+ case ISD::SETUGE:
-+ case ISD::SETOGE:
-+ case ISD::SETUGT:
-+ case ISD::SETOGT: {
-+ if (LHS == True)
-+ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
-+ else
-+ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
-+ }
-+ case ISD::SETCC_INVALID:
-+ assert(0 && "Invalid setcc condcode !");
-+ }
-+ return Op;
-+}
-+
-+
-+
-+SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
-+ SelectionDAG &DAG) const {
-+ DebugLoc DL = Op.getDebugLoc();
-+ EVT VT = Op.getValueType();
-+
-+ SDValue Num = Op.getOperand(0);
-+ SDValue Den = Op.getOperand(1);
-+
-+ SmallVector<SDValue, 8> Results;
-+
-+ // RCP = URECIP(Den) = 2^32 / Den + e
-+ // e is rounding error.
-+ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
-+
-+ // RCP_LO = umulo(RCP, Den) */
-+ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
-+
-+ // RCP_HI = mulhu (RCP, Den) */
-+ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
-+
-+ // NEG_RCP_LO = -RCP_LO
-+ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
-+ RCP_LO);
-+
-+ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
-+ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
-+ NEG_RCP_LO, RCP_LO,
-+ ISD::SETEQ);
-+ // Calculate the rounding error from the URECIP instruction
-+ // E = mulhu(ABS_RCP_LO, RCP)
-+ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
-+
-+ // RCP_A_E = RCP + E
-+ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
-+
-+ // RCP_S_E = RCP - E
-+ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
-+
-+ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
-+ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
-+ RCP_A_E, RCP_S_E,
-+ ISD::SETEQ);
-+ // Quotient = mulhu(Tmp0, Num)
-+ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
-+
-+ // Num_S_Remainder = Quotient * Den
-+ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
-+
-+ // Remainder = Num - Num_S_Remainder
-+ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
-+
-+ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
-+ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
-+ DAG.getConstant(-1, VT),
-+ DAG.getConstant(0, VT),
-+ ISD::SETGE);
-+ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
-+ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
-+ DAG.getConstant(0, VT),
-+ DAG.getConstant(-1, VT),
-+ DAG.getConstant(0, VT),
-+ ISD::SETGE);
-+ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
-+ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
-+ Remainder_GE_Zero);
-+
-+ // Calculate Division result:
-+
-+ // Quotient_A_One = Quotient + 1
-+ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
-+ DAG.getConstant(1, VT));
-+
-+ // Quotient_S_One = Quotient - 1
-+ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
-+ DAG.getConstant(1, VT));
-+
-+ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
-+ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
-+ Quotient, Quotient_A_One, ISD::SETEQ);
-+
-+ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
-+ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
-+ Quotient_S_One, Div, ISD::SETEQ);
-+
-+ // Calculate Rem result:
-+
-+ // Remainder_S_Den = Remainder - Den
-+ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
-+
-+ // Remainder_A_Den = Remainder + Den
-+ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
-+
-+ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
-+ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
-+ Remainder, Remainder_S_Den, ISD::SETEQ);
-+
-+ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
-+ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
-+ Remainder_A_Den, Rem, ISD::SETEQ);
-+ SDValue Ops[2];
-+ Ops[0] = Div;
-+ Ops[1] = Rem;
-+ return DAG.getMergeValues(Ops, 2, DL);
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Helper functions
-+//===----------------------------------------------------------------------===//
-+
-+bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
-+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
-+ return CFP->isExactlyValue(1.0);
-+ }
-+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-+ return C->isAllOnesValue();
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
-+ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
-+ return CFP->getValueAPF().isZero();
-+ }
-+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
-+ return C->isNullValue();
-+ }
-+ return false;
-+}
-+
-+SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
-+ const TargetRegisterClass *RC,
-+ unsigned Reg, EVT VT) const {
-+ MachineFunction &MF = DAG.getMachineFunction();
-+ MachineRegisterInfo &MRI = MF.getRegInfo();
-+ unsigned VirtualRegister;
-+ if (!MRI.isLiveIn(Reg)) {
-+ VirtualRegister = MRI.createVirtualRegister(RC);
-+ MRI.addLiveIn(Reg, VirtualRegister);
-+ } else {
-+ VirtualRegister = MRI.getLiveInVirtReg(Reg);
-+ }
-+ return DAG.getRegister(VirtualRegister, VT);
-+}
-+
-+#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
-+
-+const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
-+ switch (Opcode) {
-+ default: return 0;
-+ // AMDIL DAG nodes
-+ NODE_NAME_CASE(MAD);
-+ NODE_NAME_CASE(CALL);
-+ NODE_NAME_CASE(UMUL);
-+ NODE_NAME_CASE(DIV_INF);
-+ NODE_NAME_CASE(RET_FLAG);
-+ NODE_NAME_CASE(BRANCH_COND);
-+
-+ // AMDGPU DAG nodes
-+ NODE_NAME_CASE(DWORDADDR)
-+ NODE_NAME_CASE(FRACT)
-+ NODE_NAME_CASE(FMAX)
-+ NODE_NAME_CASE(SMAX)
-+ NODE_NAME_CASE(UMAX)
-+ NODE_NAME_CASE(FMIN)
-+ NODE_NAME_CASE(SMIN)
-+ NODE_NAME_CASE(UMIN)
-+ NODE_NAME_CASE(URECIP)
-+ NODE_NAME_CASE(INTERP)
-+ NODE_NAME_CASE(INTERP_P0)
-+ NODE_NAME_CASE(EXPORT)
-+ NODE_NAME_CASE(CONST_ADDRESS)
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h llvm-r600/lib/Target/R600/AMDGPUISelLowering.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.h 2013-01-25 19:43:57.426716388 +0100
-@@ -0,0 +1,145 @@
-+//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Interface definition of the TargetLowering class that is common
-+/// to all AMD GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUISELLOWERING_H
-+#define AMDGPUISELLOWERING_H
-+
-+#include "llvm/Target/TargetLowering.h"
-+
-+namespace llvm {
-+
-+class MachineRegisterInfo;
-+
-+class AMDGPUTargetLowering : public TargetLowering {
-+private:
-+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
-+
-+protected:
-+
-+ /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
-+ /// MachineFunction.
-+ ///
-+ /// \returns a RegisterSDNode representing Reg.
-+ SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
-+ unsigned Reg, EVT VT) const;
-+
-+ bool isHWTrueValue(SDValue Op) const;
-+ bool isHWFalseValue(SDValue Op) const;
-+
-+public:
-+ AMDGPUTargetLowering(TargetMachine &TM);
-+
-+ virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::InputArg> &Ins,
-+ DebugLoc DL, SelectionDAG &DAG,
-+ SmallVectorImpl<SDValue> &InVals) const;
-+
-+ virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
-+ bool isVarArg,
-+ const SmallVectorImpl<ISD::OutputArg> &Outs,
-+ const SmallVectorImpl<SDValue> &OutVals,
-+ DebugLoc DL, SelectionDAG &DAG) const;
-+
-+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
-+ virtual const char* getTargetNodeName(unsigned Opcode) const;
-+
-+// Functions defined in AMDILISelLowering.cpp
-+public:
-+
-+ /// \brief Determine which of the bits specified in \p Mask are known to be
-+ /// either zero or one and return them in the \p KnownZero and \p KnownOne
-+ /// bitsets.
-+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
-+ APInt &KnownZero,
-+ APInt &KnownOne,
-+ const SelectionDAG &DAG,
-+ unsigned Depth = 0) const;
-+
-+ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-+ const CallInst &I, unsigned Intrinsic) const;
-+
-+ /// We want to mark f32/f64 floating point values as legal.
-+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
-+
-+ /// We don't want to shrink f64/f32 constants.
-+ bool ShouldShrinkFPConstant(EVT VT) const;
-+
-+private:
-+ void InitAMDILLowering();
-+ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
-+ EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
-+ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
-+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
-+};
-+
-+namespace AMDGPUISD {
-+
-+enum {
-+ // AMDIL ISD Opcodes
-+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
-+ MAD, // 32bit Fused Multiply Add instruction
-+ CALL, // Function call based on a single integer
-+ UMUL, // 32bit unsigned multiplication
-+ DIV_INF, // Divide with infinity returned on zero divisor
-+ RET_FLAG,
-+ BRANCH_COND,
-+ // End AMDIL ISD Opcodes
-+ BITALIGN,
-+ DWORDADDR,
-+ FRACT,
-+ FMAX,
-+ SMAX,
-+ UMAX,
-+ FMIN,
-+ SMIN,
-+ UMIN,
-+ URECIP,
-+ INTERP,
-+ INTERP_P0,
-+ EXPORT,
-+ CONST_ADDRESS,
-+ LAST_AMDGPU_ISD_NUMBER
-+};
-+
-+
-+} // End namespace AMDGPUISD
-+
-+namespace SIISD {
-+
-+enum {
-+ SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
-+ VCC_AND,
-+ VCC_BITCAST
-+};
-+
-+} // End namespace SIISD
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUISELLOWERING_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,83 @@
-+//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+
-+#include "AMDGPUMCInstLower.h"
-+#include "AMDGPUAsmPrinter.h"
-+#include "R600InstrInfo.h"
-+#include "llvm/CodeGen/MachineBasicBlock.h"
-+#include "llvm/CodeGen/MachineInstr.h"
-+#include "llvm/Constants.h"
-+#include "llvm/MC/MCInst.h"
-+#include "llvm/MC/MCStreamer.h"
-+#include "llvm/MC/MCExpr.h"
-+#include "llvm/Support/ErrorHandling.h"
-+
-+using namespace llvm;
-+
-+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
-+ Ctx(ctx)
-+{ }
-+
-+void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
-+ OutMI.setOpcode(MI->getOpcode());
-+
-+ for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
-+ const MachineOperand &MO = MI->getOperand(i);
-+
-+ MCOperand MCOp;
-+ switch (MO.getType()) {
-+ default:
-+ llvm_unreachable("unknown operand type");
-+ case MachineOperand::MO_FPImmediate: {
-+ const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
-+ assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
-+ "Only floating point immediates are supported at the moment.");
-+ MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
-+ break;
-+ }
-+ case MachineOperand::MO_Immediate:
-+ MCOp = MCOperand::CreateImm(MO.getImm());
-+ break;
-+ case MachineOperand::MO_Register:
-+ MCOp = MCOperand::CreateReg(MO.getReg());
-+ break;
-+ case MachineOperand::MO_MachineBasicBlock:
-+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-+ MO.getMBB()->getSymbol(), Ctx));
-+ }
-+ OutMI.addOperand(MCOp);
-+ }
-+}
-+
-+void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-+ AMDGPUMCInstLower MCInstLowering(OutContext);
-+
-+ if (MI->isBundle()) {
-+ const MachineBasicBlock *MBB = MI->getParent();
-+ MachineBasicBlock::const_instr_iterator I = MI;
-+ ++I;
-+ while (I != MBB->end() && I->isInsideBundle()) {
-+ MCInst MCBundleInst;
-+ const MachineInstr *BundledInst = I;
-+ MCInstLowering.lower(BundledInst, MCBundleInst);
-+ OutStreamer.EmitInstruction(MCBundleInst);
-+ ++I;
-+ }
-+ } else {
-+ MCInst TmpInst;
-+ MCInstLowering.lower(MI, TmpInst);
-+ OutStreamer.EmitInstruction(TmpInst);
-+ }
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,34 @@
-+//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_MCINSTLOWER_H
-+#define AMDGPU_MCINSTLOWER_H
-+
-+namespace llvm {
-+
-+class MCInst;
-+class MCContext;
-+class MachineInstr;
-+
-+class AMDGPUMCInstLower {
-+
-+ MCContext &Ctx;
-+
-+public:
-+ AMDGPUMCInstLower(MCContext &ctx);
-+
-+ /// \brief Lower a MachineInstr to an MCInst
-+ void lower(const MachineInstr *MI, MCInst &OutMI) const;
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif //AMDGPU_MCINSTLOWER_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,51 @@
-+//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPURegisterInfo.h"
-+#include "AMDGPUTargetMachine.h"
-+
-+using namespace llvm;
-+
-+AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
-+ const TargetInstrInfo &tii)
-+: AMDGPUGenRegisterInfo(0),
-+ TM(tm),
-+ TII(tii)
-+ { }
-+
-+//===----------------------------------------------------------------------===//
-+// Function handling callbacks - Functions are a seldom used feature of GPUS, so
-+// they are not supported at this time.
-+//===----------------------------------------------------------------------===//
-+
-+const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
-+
-+const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
-+ const {
-+ return &CalleeSavedReg;
-+}
-+
-+void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
-+ int SPAdj,
-+ RegScavenger *RS) const {
-+ assert(!"Subroutines not supported yet");
-+}
-+
-+unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
-+ assert(!"Subroutines not supported yet");
-+ return 0;
-+}
-+
-+#define GET_REGINFO_TARGET_DESC
-+#include "AMDGPUGenRegisterInfo.inc"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h 2013-01-25 19:43:57.430049721 +0100
-@@ -0,0 +1,63 @@
-+//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
-+/// targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUREGISTERINFO_H
-+#define AMDGPUREGISTERINFO_H
-+
-+#include "llvm/ADT/BitVector.h"
-+#include "llvm/Target/TargetRegisterInfo.h"
-+
-+#define GET_REGINFO_HEADER
-+#define GET_REGINFO_ENUM
-+#include "AMDGPUGenRegisterInfo.inc"
-+
-+namespace llvm {
-+
-+class AMDGPUTargetMachine;
-+class TargetInstrInfo;
-+
-+struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
-+ TargetMachine &TM;
-+ const TargetInstrInfo &TII;
-+ static const uint16_t CalleeSavedReg;
-+
-+ AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
-+
-+ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
-+ assert(!"Unimplemented"); return BitVector();
-+ }
-+
-+ /// \param RC is an AMDIL reg class.
-+ ///
-+ /// \returns The ISA reg class that is equivalent to \p RC.
-+ virtual const TargetRegisterClass * getISARegClass(
-+ const TargetRegisterClass * RC) const {
-+ assert(!"Unimplemented"); return NULL;
-+ }
-+
-+ virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
-+ assert(!"Unimplemented"); return NULL;
-+ }
-+
-+ const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
-+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
-+ RegScavenger *RS) const;
-+ unsigned getFrameRegister(const MachineFunction &MF) const;
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDIDSAREGISTERINFO_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td
---- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,22 @@
-+//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// Tablegen register definitions common to all hw codegen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let Namespace = "AMDGPU" in {
-+ def sel_x : SubRegIndex;
-+ def sel_y : SubRegIndex;
-+ def sel_z : SubRegIndex;
-+ def sel_w : SubRegIndex;
-+}
-+
-+include "R600RegisterInfo.td"
-+include "SIRegisterInfo.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,714 @@
-+//===-- AMDGPUStructurizeCFG.cpp - ------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// The pass implemented in this file transforms the programs control flow
-+/// graph into a form that's suitable for code generation on hardware that
-+/// implements control flow by execution masking. This currently includes all
-+/// AMD GPUs but may as well be useful for other types of hardware.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPU.h"
-+#include "llvm/Module.h"
-+#include "llvm/ADT/SCCIterator.h"
-+#include "llvm/Analysis/RegionIterator.h"
-+#include "llvm/Analysis/RegionInfo.h"
-+#include "llvm/Analysis/RegionPass.h"
-+#include "llvm/Transforms/Utils/SSAUpdater.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+// Definition of the complex types used in this pass.
-+
-+typedef std::pair<BasicBlock *, Value *> BBValuePair;
-+typedef ArrayRef<BasicBlock*> BBVecRef;
-+
-+typedef SmallVector<RegionNode*, 8> RNVector;
-+typedef SmallVector<BasicBlock*, 8> BBVector;
-+typedef SmallVector<BBValuePair, 2> BBValueVector;
-+
-+typedef DenseMap<PHINode *, BBValueVector> PhiMap;
-+typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
-+typedef DenseMap<BasicBlock *, Value *> BBPredicates;
-+typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
-+typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
-+
-+// The name for newly created blocks.
-+
-+static const char *FlowBlockName = "Flow";
-+
-+/// @brief Transforms the control flow graph on one single entry/exit region
-+/// at a time.
-+///
-+/// After the transform all "If"/"Then"/"Else" style control flow looks like
-+/// this:
-+///
-+/// \verbatim
-+/// 1
-+/// ||
-+/// | |
-+/// 2 |
-+/// | /
-+/// |/
-+/// 3
-+/// || Where:
-+/// | | 1 = "If" block, calculates the condition
-+/// 4 | 2 = "Then" subregion, runs if the condition is true
-+/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
-+/// |/ 4 = "Else" optional subregion, runs if the condition is false
-+/// 5 5 = "End" block, also rejoins the control flow
-+/// \endverbatim
-+///
-+/// Control flow is expressed as a branch where the true exit goes into the
-+/// "Then"/"Else" region, while the false exit skips the region
-+/// The condition for the optional "Else" region is expressed as a PHI node.
-+/// The incomming values of the PHI node are true for the "If" edge and false
-+/// for the "Then" edge.
-+///
-+/// Additionally to that even complicated loops look like this:
-+///
-+/// \verbatim
-+/// 1
-+/// ||
-+/// | |
-+/// 2 ^ Where:
-+/// | / 1 = "Entry" block
-+/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
-+/// 3 3 = "Flow" block, with back edge to entry block
-+/// |
-+/// \endverbatim
-+///
-+/// The back edge of the "Flow" block is always on the false side of the branch
-+/// while the true side continues the general flow. So the loop condition
-+/// consist of a network of PHI nodes where the true incoming values expresses
-+/// breaks and the false values expresses continue states.
-+class AMDGPUStructurizeCFG : public RegionPass {
-+
-+ static char ID;
-+
-+ Type *Boolean;
-+ ConstantInt *BoolTrue;
-+ ConstantInt *BoolFalse;
-+ UndefValue *BoolUndef;
-+
-+ Function *Func;
-+ Region *ParentRegion;
-+
-+ DominatorTree *DT;
-+
-+ RNVector Order;
-+ VisitedMap Visited;
-+ PredMap Predicates;
-+ BBPhiMap DeletedPhis;
-+ BBVector FlowsInserted;
-+
-+ BasicBlock *LoopStart;
-+ BasicBlock *LoopEnd;
-+ BBPredicates LoopPred;
-+
-+ void orderNodes();
-+
-+ void buildPredicate(BranchInst *Term, unsigned Idx,
-+ BBPredicates &Pred, bool Invert);
-+
-+ void analyzeBlock(BasicBlock *BB);
-+
-+ void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
-+
-+ void collectInfos();
-+
-+ bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
-+
-+ void killTerminator(BasicBlock *BB);
-+
-+ RegionNode *skipChained(RegionNode *Node);
-+
-+ void delPhiValues(BasicBlock *From, BasicBlock *To);
-+
-+ void addPhiValues(BasicBlock *From, BasicBlock *To);
-+
-+ BasicBlock *getNextFlow(BasicBlock *Prev);
-+
-+ bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
-+
-+ BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
-+
-+ void createFlow();
-+
-+ void insertConditions();
-+
-+ void rebuildSSA();
-+
-+public:
-+ AMDGPUStructurizeCFG():
-+ RegionPass(ID) {
-+
-+ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
-+ }
-+
-+ virtual bool doInitialization(Region *R, RGPassManager &RGM);
-+
-+ virtual bool runOnRegion(Region *R, RGPassManager &RGM);
-+
-+ virtual const char *getPassName() const {
-+ return "AMDGPU simplify control flow";
-+ }
-+
-+ void getAnalysisUsage(AnalysisUsage &AU) const {
-+
-+ AU.addRequired<DominatorTree>();
-+ AU.addPreserved<DominatorTree>();
-+ RegionPass::getAnalysisUsage(AU);
-+ }
-+
-+};
-+
-+} // end anonymous namespace
-+
-+char AMDGPUStructurizeCFG::ID = 0;
-+
-+/// \brief Initialize the types and constants used in the pass
-+bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
-+ LLVMContext &Context = R->getEntry()->getContext();
-+
-+ Boolean = Type::getInt1Ty(Context);
-+ BoolTrue = ConstantInt::getTrue(Context);
-+ BoolFalse = ConstantInt::getFalse(Context);
-+ BoolUndef = UndefValue::get(Boolean);
-+
-+ return false;
-+}
-+
-+/// \brief Build up the general order of nodes
-+void AMDGPUStructurizeCFG::orderNodes() {
-+ scc_iterator<Region *> I = scc_begin(ParentRegion),
-+ E = scc_end(ParentRegion);
-+ for (Order.clear(); I != E; ++I) {
-+ std::vector<RegionNode *> &Nodes = *I;
-+ Order.append(Nodes.begin(), Nodes.end());
-+ }
-+}
-+
-+/// \brief Build blocks and loop predicates
-+void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
-+ BBPredicates &Pred, bool Invert) {
-+ Value *True = Invert ? BoolFalse : BoolTrue;
-+ Value *False = Invert ? BoolTrue : BoolFalse;
-+
-+ RegionInfo *RI = ParentRegion->getRegionInfo();
-+ BasicBlock *BB = Term->getParent();
-+
-+ // Handle the case where multiple regions start at the same block
-+ Region *R = BB != ParentRegion->getEntry() ?
-+ RI->getRegionFor(BB) : ParentRegion;
-+
-+ if (R == ParentRegion) {
-+ // It's a top level block in our region
-+ Value *Cond = True;
-+ if (Term->isConditional()) {
-+ BasicBlock *Other = Term->getSuccessor(!Idx);
-+
-+ if (Visited.count(Other)) {
-+ if (!Pred.count(Other))
-+ Pred[Other] = False;
-+
-+ if (!Pred.count(BB))
-+ Pred[BB] = True;
-+ return;
-+ }
-+ Cond = Term->getCondition();
-+
-+ if (Idx != Invert)
-+ Cond = BinaryOperator::CreateNot(Cond, "", Term);
-+ }
-+
-+ Pred[BB] = Cond;
-+
-+ } else if (ParentRegion->contains(R)) {
-+ // It's a block in a sub region
-+ while(R->getParent() != ParentRegion)
-+ R = R->getParent();
-+
-+ Pred[R->getEntry()] = True;
-+
-+ } else {
-+ // It's a branch from outside into our parent region
-+ Pred[BB] = True;
-+ }
-+}
-+
-+/// \brief Analyze the successors of each block and build up predicates
-+void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
-+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-+ BBPredicates &Pred = Predicates[BB];
-+
-+ for (; PI != PE; ++PI) {
-+ BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
-+
-+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-+ BasicBlock *Succ = Term->getSuccessor(i);
-+ if (Succ != BB)
-+ continue;
-+ buildPredicate(Term, i, Pred, false);
-+ }
-+ }
-+}
-+
-+/// \brief Analyze the conditions leading to loop to a previous block
-+void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
-+ BranchInst *Term = cast<BranchInst>(BB->getTerminator());
-+
-+ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-+ BasicBlock *Succ = Term->getSuccessor(i);
-+
-+ // Ignore it if it's not a back edge
-+ if (!Visited.count(Succ))
-+ continue;
-+
-+ buildPredicate(Term, i, LoopPred, true);
-+
-+ LoopEnd = BB;
-+ if (Visited[Succ] < LoopIdx) {
-+ LoopIdx = Visited[Succ];
-+ LoopStart = Succ;
-+ }
-+ }
-+}
-+
-+/// \brief Collect various loop and predicate infos
-+void AMDGPUStructurizeCFG::collectInfos() {
-+ unsigned Number = 0, LoopIdx = ~0;
-+
-+ // Reset predicate
-+ Predicates.clear();
-+
-+ // and loop infos
-+ LoopStart = LoopEnd = 0;
-+ LoopPred.clear();
-+
-+ RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
-+ for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
-+
-+ // Analyze all the conditions leading to a node
-+ analyzeBlock((*OI)->getEntry());
-+
-+ if ((*OI)->isSubRegion())
-+ continue;
-+
-+ // Find the first/last loop nodes and loop predicates
-+ analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
-+ }
-+}
-+
-+/// \brief Does A dominate all the predicates of B ?
-+bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
-+ BBPredicates &Preds = Predicates[B];
-+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
-+ PI != PE; ++PI) {
-+
-+ if (!DT->dominates(A, PI->first))
-+ return false;
-+ }
-+ return true;
-+}
-+
-+/// \brief Remove phi values from all successors and the remove the terminator.
-+void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
-+ TerminatorInst *Term = BB->getTerminator();
-+ if (!Term)
-+ return;
-+
-+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
-+ SI != SE; ++SI) {
-+
-+ delPhiValues(BB, *SI);
-+ }
-+
-+ Term->eraseFromParent();
-+}
-+
-+/// First: Skip forward to the first region node that either isn't a subregion or not
-+/// dominating it's exit, remove all the skipped nodes from the node order.
-+///
-+/// Second: Handle the first successor directly if the resulting nodes successor
-+/// predicates are still dominated by the original entry
-+RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
-+ BasicBlock *Entry = Node->getEntry();
-+
-+ // Skip forward as long as it is just a linear flow
-+ while (true) {
-+ BasicBlock *Entry = Node->getEntry();
-+ BasicBlock *Exit;
-+
-+ if (Node->isSubRegion()) {
-+ Exit = Node->getNodeAs<Region>()->getExit();
-+ } else {
-+ TerminatorInst *Term = Entry->getTerminator();
-+ if (Term->getNumSuccessors() != 1)
-+ break;
-+ Exit = Term->getSuccessor(0);
-+ }
-+
-+ // It's a back edge, break here so we can insert a loop node
-+ if (!Visited.count(Exit))
-+ return Node;
-+
-+ // More than node edges are pointing to exit
-+ if (!DT->dominates(Entry, Exit))
-+ return Node;
-+
-+ RegionNode *Next = ParentRegion->getNode(Exit);
-+ RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
-+ assert(I != Order.end());
-+
-+ Visited.erase(Next->getEntry());
-+ Order.erase(I);
-+ Node = Next;
-+ }
-+
-+ BasicBlock *BB = Node->getEntry();
-+ TerminatorInst *Term = BB->getTerminator();
-+ if (Term->getNumSuccessors() != 2)
-+ return Node;
-+
-+ // Our node has exactly two succesors, check if we can handle
-+ // any of them directly
-+ BasicBlock *Succ = Term->getSuccessor(0);
-+ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
-+ Succ = Term->getSuccessor(1);
-+ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
-+ return Node;
-+ } else {
-+ BasicBlock *Succ2 = Term->getSuccessor(1);
-+ if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
-+ dominatesPredicates(Entry, Succ2))
-+ Succ = Succ2;
-+ }
-+
-+ RegionNode *Next = ParentRegion->getNode(Succ);
-+ RNVector::iterator E = Order.end();
-+ RNVector::iterator I = std::find(Order.begin(), E, Next);
-+ assert(I != E);
-+
-+ killTerminator(BB);
-+ FlowsInserted.push_back(BB);
-+ Visited.erase(Succ);
-+ Order.erase(I);
-+ return ParentRegion->getNode(wireFlowBlock(BB, Next));
-+}
-+
-+/// \brief Remove all PHI values coming from "From" into "To" and remember
-+/// them in DeletedPhis
-+void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
-+ PhiMap &Map = DeletedPhis[To];
-+ for (BasicBlock::iterator I = To->begin(), E = To->end();
-+ I != E && isa<PHINode>(*I);) {
-+
-+ PHINode &Phi = cast<PHINode>(*I++);
-+ while (Phi.getBasicBlockIndex(From) != -1) {
-+ Value *Deleted = Phi.removeIncomingValue(From, false);
-+ Map[&Phi].push_back(std::make_pair(From, Deleted));
-+ }
-+ }
-+}
-+
-+/// \brief Add the PHI values back once we knew the new predecessor
-+void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
-+ if (!DeletedPhis.count(To))
-+ return;
-+
-+ PhiMap &Map = DeletedPhis[To];
-+ SSAUpdater Updater;
-+
-+ for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
-+
-+ PHINode *Phi = I->first;
-+ Updater.Initialize(Phi->getType(), "");
-+ BasicBlock *Fallback = To;
-+ bool HaveFallback = false;
-+
-+ for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
-+ VI != VE; ++VI) {
-+
-+ Updater.AddAvailableValue(VI->first, VI->second);
-+ BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
-+ if (Dom == VI->first)
-+ HaveFallback = true;
-+ else if (Dom != Fallback)
-+ HaveFallback = false;
-+ Fallback = Dom;
-+ }
-+ if (!HaveFallback) {
-+ Value *Undef = UndefValue::get(Phi->getType());
-+ Updater.AddAvailableValue(Fallback, Undef);
-+ }
-+
-+ Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
-+ }
-+ DeletedPhis.erase(To);
-+}
-+
-+/// \brief Create a new flow node and update dominator tree and region info
-+BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
-+ LLVMContext &Context = Func->getContext();
-+ BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
-+ Order.back()->getEntry();
-+ BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
-+ Func, Insert);
-+ DT->addNewBlock(Flow, Prev);
-+ ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
-+ FlowsInserted.push_back(Flow);
-+ return Flow;
-+}
-+
-+/// \brief Can we predict that this node will always be called?
-+bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
-+ BasicBlock *Node) {
-+ BBPredicates &Preds = Predicates[Node];
-+ bool Dominated = false;
-+
-+ for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
-+ I != E; ++I) {
-+
-+ if (I->second != BoolTrue)
-+ return false;
-+
-+ if (!Dominated && DT->dominates(I->first, Prev))
-+ Dominated = true;
-+ }
-+ return Dominated;
-+}
-+
-+/// \brief Wire up the new control flow by inserting or updating the branch
-+/// instructions at node exits
-+BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
-+ RegionNode *Node) {
-+ BasicBlock *Entry = Node->getEntry();
-+
-+ if (LoopStart == Entry) {
-+ LoopStart = Prev;
-+ LoopPred[Prev] = BoolTrue;
-+ }
-+
-+ // Wire it up temporary, skipChained may recurse into us
-+ BranchInst::Create(Entry, Prev);
-+ DT->changeImmediateDominator(Entry, Prev);
-+ addPhiValues(Prev, Entry);
-+
-+ Node = skipChained(Node);
-+
-+ BasicBlock *Next = getNextFlow(Prev);
-+ if (!isPredictableTrue(Prev, Entry)) {
-+ // Let Prev point to entry and next block
-+ Prev->getTerminator()->eraseFromParent();
-+ BranchInst::Create(Entry, Next, BoolUndef, Prev);
-+ } else {
-+ DT->changeImmediateDominator(Next, Entry);
-+ }
-+
-+ // Let node exit(s) point to next block
-+ if (Node->isSubRegion()) {
-+ Region *SubRegion = Node->getNodeAs<Region>();
-+ BasicBlock *Exit = SubRegion->getExit();
-+
-+ // Find all the edges from the sub region to the exit
-+ BBVector ToDo;
-+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
-+ if (SubRegion->contains(*I))
-+ ToDo.push_back(*I);
-+ }
-+
-+ // Modify the edges to point to the new flow block
-+ for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
-+ delPhiValues(*I, Exit);
-+ TerminatorInst *Term = (*I)->getTerminator();
-+ Term->replaceUsesOfWith(Exit, Next);
-+ }
-+
-+ // Update the region info
-+ SubRegion->replaceExit(Next);
-+
-+ } else {
-+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
-+ killTerminator(BB);
-+ BranchInst::Create(Next, BB);
-+
-+ if (BB == LoopEnd)
-+ LoopEnd = 0;
-+ }
-+
-+ return Next;
-+}
-+
-+/// Destroy node order and visited map, build up flow order instead.
-+/// After this function control flow looks like it should be, but
-+/// branches only have undefined conditions.
-+void AMDGPUStructurizeCFG::createFlow() {
-+ DeletedPhis.clear();
-+
-+ BasicBlock *Prev = Order.pop_back_val()->getEntry();
-+ assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
-+ Visited.erase(Prev);
-+
-+ if (LoopStart == Prev) {
-+ // Loop starts at entry, split entry so that we can predicate it
-+ BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
-+ BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
-+ DT->addNewBlock(Split, Prev);
-+ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
-+ Predicates[Split] = Predicates[Prev];
-+ Order.push_back(ParentRegion->getBBNode(Split));
-+ LoopPred[Prev] = BoolTrue;
-+
-+ } else if (LoopStart == Order.back()->getEntry()) {
-+ // Loop starts behind entry, split entry so that we can jump to it
-+ Instruction *Term = Prev->getTerminator();
-+ BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
-+ DT->addNewBlock(Split, Prev);
-+ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
-+ Prev = Split;
-+ }
-+
-+ killTerminator(Prev);
-+ FlowsInserted.clear();
-+ FlowsInserted.push_back(Prev);
-+
-+ while (!Order.empty()) {
-+ RegionNode *Node = Order.pop_back_val();
-+ Visited.erase(Node->getEntry());
-+ Prev = wireFlowBlock(Prev, Node);
-+ if (LoopStart && !LoopEnd) {
-+ // Create an extra loop end node
-+ LoopEnd = Prev;
-+ Prev = getNextFlow(LoopEnd);
-+ BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
-+ addPhiValues(LoopEnd, LoopStart);
-+ }
-+ }
-+
-+ BasicBlock *Exit = ParentRegion->getExit();
-+ BranchInst::Create(Exit, Prev);
-+ addPhiValues(Prev, Exit);
-+ if (DT->dominates(ParentRegion->getEntry(), Exit))
-+ DT->changeImmediateDominator(Exit, Prev);
-+
-+ if (LoopStart && LoopEnd) {
-+ BBVector::iterator FI = std::find(FlowsInserted.begin(),
-+ FlowsInserted.end(),
-+ LoopStart);
-+ for (; *FI != LoopEnd; ++FI) {
-+ addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
-+ }
-+ }
-+
-+ assert(Order.empty());
-+ assert(Visited.empty());
-+ assert(DeletedPhis.empty());
-+}
-+
-+/// \brief Insert the missing branch conditions
-+void AMDGPUStructurizeCFG::insertConditions() {
-+ SSAUpdater PhiInserter;
-+
-+ for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
-+ FI != FE; ++FI) {
-+
-+ BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
-+ if (Term->isUnconditional())
-+ continue;
-+
-+ PhiInserter.Initialize(Boolean, "");
-+ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
-+
-+ BasicBlock *Succ = Term->getSuccessor(0);
-+ BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
-+ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
-+ PI != PE; ++PI) {
-+
-+ PhiInserter.AddAvailableValue(PI->first, PI->second);
-+ }
-+
-+ Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
-+ }
-+}
-+
-+/// Handle a rare case where the disintegrated nodes instructions
-+/// no longer dominate all their uses. Not sure if this is really nessasary
-+void AMDGPUStructurizeCFG::rebuildSSA() {
-+ SSAUpdater Updater;
-+ for (Region::block_iterator I = ParentRegion->block_begin(),
-+ E = ParentRegion->block_end();
-+ I != E; ++I) {
-+
-+ BasicBlock *BB = *I;
-+ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
-+ II != IE; ++II) {
-+
-+ bool Initialized = false;
-+ for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
-+
-+ Next = I->getNext();
-+
-+ Instruction *User = cast<Instruction>(I->getUser());
-+ if (User->getParent() == BB) {
-+ continue;
-+
-+ } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
-+ if (UserPN->getIncomingBlock(*I) == BB)
-+ continue;
-+ }
-+
-+ if (DT->dominates(II, User))
-+ continue;
-+
-+ if (!Initialized) {
-+ Value *Undef = UndefValue::get(II->getType());
-+ Updater.Initialize(II->getType(), "");
-+ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
-+ Updater.AddAvailableValue(BB, II);
-+ Initialized = true;
-+ }
-+ Updater.RewriteUseAfterInsertions(*I);
-+ }
-+ }
-+ }
-+}
-+
-+/// \brief Run the transformation for each region found
-+bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
-+ if (R->isTopLevelRegion())
-+ return false;
-+
-+ Func = R->getEntry()->getParent();
-+ ParentRegion = R;
-+
-+ DT = &getAnalysis<DominatorTree>();
-+
-+ orderNodes();
-+ collectInfos();
-+ createFlow();
-+ insertConditions();
-+ rebuildSSA();
-+
-+ Order.clear();
-+ Visited.clear();
-+ Predicates.clear();
-+ DeletedPhis.clear();
-+ FlowsInserted.clear();
-+
-+ return true;
-+}
-+
-+/// \brief Create the pass
-+Pass *llvm::createAMDGPUStructurizeCFGPass() {
-+ return new AMDGPUStructurizeCFG();
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,87 @@
-+//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUSubtarget.h"
-+
-+using namespace llvm;
-+
-+#define GET_SUBTARGETINFO_ENUM
-+#define GET_SUBTARGETINFO_TARGET_DESC
-+#define GET_SUBTARGETINFO_CTOR
-+#include "AMDGPUGenSubtargetInfo.inc"
-+
-+AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
-+ AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
-+ InstrItins = getInstrItineraryForCPU(CPU);
-+
-+ memset(CapsOverride, 0, sizeof(*CapsOverride)
-+ * AMDGPUDeviceInfo::MaxNumberCapabilities);
-+ // Default card
-+ StringRef GPU = CPU;
-+ Is64bit = false;
-+ DefaultSize[0] = 64;
-+ DefaultSize[1] = 1;
-+ DefaultSize[2] = 1;
-+ ParseSubtargetFeatures(GPU, FS);
-+ DevName = GPU;
-+ Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
-+}
-+
-+AMDGPUSubtarget::~AMDGPUSubtarget() {
-+ delete Device;
-+}
-+
-+bool
-+AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
-+ assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
-+ "Caps index is out of bounds!");
-+ return CapsOverride[caps];
-+}
-+bool
-+AMDGPUSubtarget::is64bit() const {
-+ return Is64bit;
-+}
-+bool
-+AMDGPUSubtarget::isTargetELF() const {
-+ return false;
-+}
-+size_t
-+AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
-+ if (dim > 3) {
-+ return 1;
-+ } else {
-+ return DefaultSize[dim];
-+ }
-+}
-+
-+std::string
-+AMDGPUSubtarget::getDataLayout() const {
-+ if (!Device) {
-+ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
-+ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
-+ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
-+ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
-+ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
-+ }
-+ return Device->getDataLayout();
-+}
-+
-+std::string
-+AMDGPUSubtarget::getDeviceName() const {
-+ return DevName;
-+}
-+const AMDGPUDevice *
-+AMDGPUSubtarget::device() const {
-+ return Device;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h llvm-r600/lib/Target/R600/AMDGPUSubtarget.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.h 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,65 @@
-+//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief AMDGPU specific subclass of TargetSubtarget.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPUSUBTARGET_H
-+#define AMDGPUSUBTARGET_H
-+#include "AMDILDevice.h"
-+#include "llvm/ADT/StringExtras.h"
-+#include "llvm/ADT/StringRef.h"
-+#include "llvm/Target/TargetSubtargetInfo.h"
-+
-+#define GET_SUBTARGETINFO_HEADER
-+#include "AMDGPUGenSubtargetInfo.inc"
-+
-+#define MAX_CB_SIZE (1 << 16)
-+
-+namespace llvm {
-+
-+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
-+private:
-+ bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
-+ const AMDGPUDevice *Device;
-+ size_t DefaultSize[3];
-+ std::string DevName;
-+ bool Is64bit;
-+ bool Is32on64bit;
-+ bool DumpCode;
-+ bool R600ALUInst;
-+
-+ InstrItineraryData InstrItins;
-+
-+public:
-+ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
-+ virtual ~AMDGPUSubtarget();
-+
-+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
-+ virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
-+
-+ bool isOverride(AMDGPUDeviceInfo::Caps) const;
-+ bool is64bit() const;
-+
-+ // Helper functions to simplify if statements
-+ bool isTargetELF() const;
-+ const AMDGPUDevice* device() const;
-+ std::string getDataLayout() const;
-+ std::string getDeviceName() const;
-+ virtual size_t getDefaultSize(uint32_t dim) const;
-+ bool dumpCode() const { return DumpCode; }
-+ bool r600ALUEncoding() const { return R600ALUInst; }
-+
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPUSUBTARGET_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp
---- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,148 @@
-+//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief The AMDGPU target machine contains all of the hardware specific
-+/// information needed to emit code for R600 and SI GPUs.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "AMDGPUTargetMachine.h"
-+#include "AMDGPU.h"
-+#include "R600ISelLowering.h"
-+#include "R600InstrInfo.h"
-+#include "SIISelLowering.h"
-+#include "SIInstrInfo.h"
-+#include "llvm/Analysis/Passes.h"
-+#include "llvm/Analysis/Verifier.h"
-+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-+#include "llvm/CodeGen/MachineModuleInfo.h"
-+#include "llvm/CodeGen/Passes.h"
-+#include "llvm/MC/MCAsmInfo.h"
-+#include "llvm/PassManager.h"
-+#include "llvm/Support/TargetRegistry.h"
-+#include "llvm/Support/raw_os_ostream.h"
-+#include "llvm/Transforms/IPO.h"
-+#include "llvm/Transforms/Scalar.h"
-+#include <llvm/CodeGen/Passes.h>
-+
-+using namespace llvm;
-+
-+extern "C" void LLVMInitializeR600Target() {
-+ // Register the target
-+ RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
-+}
-+
-+AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
-+ StringRef CPU, StringRef FS,
-+ TargetOptions Options,
-+ Reloc::Model RM, CodeModel::Model CM,
-+ CodeGenOpt::Level OptLevel
-+)
-+:
-+ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
-+ Subtarget(TT, CPU, FS),
-+ Layout(Subtarget.getDataLayout()),
-+ FrameLowering(TargetFrameLowering::StackGrowsUp,
-+ Subtarget.device()->getStackAlignment(), 0),
-+ IntrinsicInfo(this),
-+ InstrItins(&Subtarget.getInstrItineraryData()) {
-+ // TLInfo uses InstrInfo so it must be initialized after.
-+ if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
-+ InstrInfo = new R600InstrInfo(*this);
-+ TLInfo = new R600TargetLowering(*this);
-+ } else {
-+ InstrInfo = new SIInstrInfo(*this);
-+ TLInfo = new SITargetLowering(*this);
-+ }
-+}
-+
-+AMDGPUTargetMachine::~AMDGPUTargetMachine() {
-+}
-+
-+namespace {
-+class AMDGPUPassConfig : public TargetPassConfig {
-+public:
-+ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
-+ : TargetPassConfig(TM, PM) {}
-+
-+ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
-+ return getTM<AMDGPUTargetMachine>();
-+ }
-+
-+ virtual bool addPreISel();
-+ virtual bool addInstSelector();
-+ virtual bool addPreRegAlloc();
-+ virtual bool addPostRegAlloc();
-+ virtual bool addPreSched2();
-+ virtual bool addPreEmitPass();
-+};
-+} // End of anonymous namespace
-+
-+TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
-+ return new AMDGPUPassConfig(this, PM);
-+}
-+
-+bool
-+AMDGPUPassConfig::addPreISel() {
-+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ addPass(createAMDGPUStructurizeCFGPass());
-+ addPass(createSIAnnotateControlFlowPass());
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addInstSelector() {
-+ addPass(createAMDGPUPeepholeOpt(*TM));
-+ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreRegAlloc() {
-+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+
-+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ addPass(createSIAssignInterpRegsPass(*TM));
-+ }
-+ addPass(createAMDGPUConvertToISAPass(*TM));
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addPostRegAlloc() {
-+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+
-+ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
-+ addPass(createSIInsertWaits(*TM));
-+ }
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreSched2() {
-+
-+ addPass(&IfConverterID);
-+ return false;
-+}
-+
-+bool AMDGPUPassConfig::addPreEmitPass() {
-+ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
-+ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
-+ addPass(createAMDGPUCFGPreparationPass(*TM));
-+ addPass(createAMDGPUCFGStructurizerPass(*TM));
-+ addPass(createR600ExpandSpecialInstrsPass(*TM));
-+ addPass(createR600LowerConstCopy(*TM));
-+ addPass(&FinalizeMachineBundlesID);
-+ } else {
-+ addPass(createSILowerLiteralConstantsPass(*TM));
-+ addPass(createSILowerControlFlowPass(*TM));
-+ }
-+
-+ return false;
-+}
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h
---- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,70 @@
-+//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+/// \file
-+/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#ifndef AMDGPU_TARGET_MACHINE_H
-+#define AMDGPU_TARGET_MACHINE_H
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDGPUSubtarget.h"
-+#include "AMDILFrameLowering.h"
-+#include "AMDILIntrinsicInfo.h"
-+#include "R600ISelLowering.h"
-+#include "llvm/ADT/OwningPtr.h"
-+#include "llvm/DataLayout.h"
-+
-+namespace llvm {
-+
-+MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
-+
-+class AMDGPUTargetMachine : public LLVMTargetMachine {
-+
-+ AMDGPUSubtarget Subtarget;
-+ const DataLayout Layout;
-+ AMDGPUFrameLowering FrameLowering;
-+ AMDGPUIntrinsicInfo IntrinsicInfo;
-+ const AMDGPUInstrInfo * InstrInfo;
-+ AMDGPUTargetLowering * TLInfo;
-+ const InstrItineraryData* InstrItins;
-+
-+public:
-+ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
-+ StringRef CPU,
-+ TargetOptions Options,
-+ Reloc::Model RM, CodeModel::Model CM,
-+ CodeGenOpt::Level OL);
-+ ~AMDGPUTargetMachine();
-+ virtual const AMDGPUFrameLowering* getFrameLowering() const {
-+ return &FrameLowering;
-+ }
-+ virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
-+ return &IntrinsicInfo;
-+ }
-+ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
-+ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
-+ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
-+ return &InstrInfo->getRegisterInfo();
-+ }
-+ virtual AMDGPUTargetLowering * getTargetLowering() const {
-+ return TLInfo;
-+ }
-+ virtual const InstrItineraryData* getInstrItineraryData() const {
-+ return InstrItins;
-+ }
-+ virtual const DataLayout* getDataLayout() const { return &Layout; }
-+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
-+};
-+
-+} // End namespace llvm
-+
-+#endif // AMDGPU_TARGET_MACHINE_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.td llvm-r600/lib/Target/R600/AMDGPU.td
---- llvm-3.2.src/lib/Target/R600/AMDGPU.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDGPU.td 2013-01-25 19:43:57.423383055 +0100
-@@ -0,0 +1,40 @@
-+//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+
-+// Include AMDIL TD files
-+include "AMDILBase.td"
-+
-+
-+def AMDGPUInstrInfo : InstrInfo {
-+ let guessInstructionProperties = 1;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Declare the target which we are implementing
-+//===----------------------------------------------------------------------===//
-+def AMDGPUAsmWriter : AsmWriter {
-+ string AsmWriterClassName = "InstPrinter";
-+ int Variant = 0;
-+ bit isMCAsmWriter = 1;
-+}
-+
-+def AMDGPU : Target {
-+ // Pull in Instruction Info:
-+ let InstructionSet = AMDGPUInstrInfo;
-+ let AssemblyWriters = [AMDGPUAsmWriter];
-+}
-+
-+// Include AMDGPU TD files
-+include "R600Schedule.td"
-+include "SISchedule.td"
-+include "Processors.td"
-+include "AMDGPUInstrInfo.td"
-+include "AMDGPUIntrinsics.td"
-+include "AMDGPURegisterInfo.td"
-+include "AMDGPUInstructions.td"
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp
---- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp 2013-01-25 19:43:57.433383055 +0100
-@@ -0,0 +1,115 @@
-+//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+// \file
-+//==-----------------------------------------------------------------------===//
-+#include "AMDIL7XXDevice.h"
-+#include "AMDGPUSubtarget.h"
-+#include "AMDILDevice.h"
-+
-+using namespace llvm;
-+
-+AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
-+ setCaps();
-+ std::string name = mSTM->getDeviceName();
-+ if (name == "rv710") {
-+ DeviceFlag = OCL_DEVICE_RV710;
-+ } else if (name == "rv730") {
-+ DeviceFlag = OCL_DEVICE_RV730;
-+ } else {
-+ DeviceFlag = OCL_DEVICE_RV770;
-+ }
-+}
-+
-+AMDGPU7XXDevice::~AMDGPU7XXDevice() {
-+}
-+
-+void AMDGPU7XXDevice::setCaps() {
-+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-+}
-+
-+size_t AMDGPU7XXDevice::getMaxLDSSize() const {
-+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+ return MAX_LDS_SIZE_700;
-+ }
-+ return 0;
-+}
-+
-+size_t AMDGPU7XXDevice::getWavefrontSize() const {
-+ return AMDGPUDevice::HalfWavefrontSize;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getGeneration() const {
-+ return AMDGPUDeviceInfo::HD4XXX;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
-+ switch (DeviceID) {
-+ default:
-+ assert(0 && "ID type passed in is unknown!");
-+ break;
-+ case GLOBAL_ID:
-+ case CONSTANT_ID:
-+ case RAW_UAV_ID:
-+ case ARENA_UAV_ID:
-+ break;
-+ case LDS_ID:
-+ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
-+ return DEFAULT_LDS_ID;
-+ }
-+ break;
-+ case SCRATCH_ID:
-+ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
-+ return DEFAULT_SCRATCH_ID;
-+ }
-+ break;
-+ case GDS_ID:
-+ assert(0 && "GDS UAV ID is not supported on this chip");
-+ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
-+ return DEFAULT_GDS_ID;
-+ }
-+ break;
-+ };
-+
-+ return 0;
-+}
-+
-+uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
-+ return 1;
-+}
-+
-+AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
-+ setCaps();
-+}
-+
-+AMDGPU770Device::~AMDGPU770Device() {
-+}
-+
-+void AMDGPU770Device::setCaps() {
-+ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
-+ mSWBits.set(AMDGPUDeviceInfo::FMA);
-+ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
-+ }
-+ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
-+ mHWBits.reset(AMDGPUDeviceInfo::LongOps);
-+ mSWBits.set(AMDGPUDeviceInfo::LongOps);
-+ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
-+}
-+
-+size_t AMDGPU770Device::getWavefrontSize() const {
-+ return AMDGPUDevice::WavefrontSize;
-+}
-+
-+AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
-+}
-+
-+AMDGPU710Device::~AMDGPU710Device() {
-+}
-+
-+size_t AMDGPU710Device::getWavefrontSize() const {
-+ return AMDGPUDevice::QuarterWavefrontSize;
-+}
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h llvm-r600/lib/Target/R600/AMDIL7XXDevice.h
---- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.h 2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,72 @@
-+//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//==-----------------------------------------------------------------------===//
-+/// \file
-+/// \brief Interface for the subtarget data classes.
-+///
-+/// This file will define the interface that each generation needs to
-+/// implement in order to correctly answer queries on the capabilities of the
-+/// specific hardware.
-+//===----------------------------------------------------------------------===//
-+#ifndef AMDIL7XXDEVICEIMPL_H
-+#define AMDIL7XXDEVICEIMPL_H
-+#include "AMDILDevice.h"
-+
-+namespace llvm {
-+class AMDGPUSubtarget;
-+
-+//===----------------------------------------------------------------------===//
-+// 7XX generation of devices and their respective sub classes
-+//===----------------------------------------------------------------------===//
-+
-+/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
-+///
-+/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
-+/// support the minimal features that are required to be considered OpenCL 1.0
-+/// compliant and nothing more.
-+class AMDGPU7XXDevice : public AMDGPUDevice {
-+public:
-+ AMDGPU7XXDevice(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPU7XXDevice();
-+ virtual size_t getMaxLDSSize() const;
-+ virtual size_t getWavefrontSize() const;
-+ virtual uint32_t getGeneration() const;
-+ virtual uint32_t getResourceID(uint32_t DeviceID) const;
-+ virtual uint32_t getMaxNumUAVs() const;
-+
-+protected:
-+ virtual void setCaps();
-+};
-+
-+/// \brief The AMDGPU770Device class represents the RV770 chip and it's
-+/// derivative cards.
-+///
-+/// The difference between this device and the base class is this device device
-+/// adds support for double precision and has a larger wavefront size.
-+class AMDGPU770Device : public AMDGPU7XXDevice {
-+public:
-+ AMDGPU770Device(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPU770Device();
-+ virtual size_t getWavefrontSize() const;
-+private:
-+ virtual void setCaps();
-+};
-+
-+/// \brief The AMDGPU710Device class derives from the 7XX base class.
-+///
-+/// This class is a smaller derivative, so we need to overload some of the
-+/// functions in order to correctly specify this information.
-+class AMDGPU710Device : public AMDGPU7XXDevice {
-+public:
-+ AMDGPU710Device(AMDGPUSubtarget *ST);
-+ virtual ~AMDGPU710Device();
-+ virtual size_t getWavefrontSize() const;
-+};
-+
-+} // namespace llvm
-+#endif // AMDILDEVICEIMPL_H
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILBase.td llvm-r600/lib/Target/R600/AMDILBase.td
---- llvm-3.2.src/lib/Target/R600/AMDILBase.td 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILBase.td 2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,85 @@
-+//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+// Target-independent interfaces which we are implementing
-+//===----------------------------------------------------------------------===//
-+
-+include "llvm/Target/Target.td"
-+
-+// Dummy Instruction itineraries for pseudo instructions
-+def ALU_NULL : FuncUnit;
-+def NullALU : InstrItinClass;
-+
-+//===----------------------------------------------------------------------===//
-+// AMDIL Subtarget features.
-+//===----------------------------------------------------------------------===//
-+def FeatureFP64 : SubtargetFeature<"fp64",
-+ "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
-+ "true",
-+ "Enable 64bit double precision operations">;
-+def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
-+ "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
-+ "true",
-+ "Enable byte addressable stores">;
-+def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
-+ "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
-+ "true",
-+ "Enable duplicate barrier detection(HD5XXX or later).">;
-+def FeatureImages : SubtargetFeature<"images",
-+ "CapsOverride[AMDGPUDeviceInfo::Images]",
-+ "true",
-+ "Enable image functions">;
-+def FeatureMultiUAV : SubtargetFeature<"multi_uav",
-+ "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
-+ "true",
-+ "Generate multiple UAV code(HD5XXX family or later)">;
-+def FeatureMacroDB : SubtargetFeature<"macrodb",
-+ "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
-+ "true",
-+ "Use internal macrodb, instead of macrodb in driver">;
-+def FeatureNoAlias : SubtargetFeature<"noalias",
-+ "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
-+ "true",
-+ "assert that all kernel argument pointers are not aliased">;
-+def FeatureNoInline : SubtargetFeature<"no-inline",
-+ "CapsOverride[AMDGPUDeviceInfo::NoInline]",
-+ "true",
-+ "specify whether to not inline functions">;
-+
-+def Feature64BitPtr : SubtargetFeature<"64BitPtr",
-+ "Is64bit",
-+ "false",
-+ "Specify if 64bit addressing should be used.">;
-+
-+def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
-+ "Is32on64bit",
-+ "false",
-+ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
-+def FeatureDebug : SubtargetFeature<"debug",
-+ "CapsOverride[AMDGPUDeviceInfo::Debug]",
-+ "true",
-+ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
-+def FeatureDumpCode : SubtargetFeature <"DumpCode",
-+ "DumpCode",
-+ "true",
-+ "Dump MachineInstrs in the CodeEmitter">;
-+
-+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
-+ "R600ALUInst",
-+ "false",
-+ "Older version of ALU instructions encoding.">;
-+
-+
-+//===----------------------------------------------------------------------===//
-+// Register File, Calling Conv, Instruction Descriptions
-+//===----------------------------------------------------------------------===//
-+
-+
-+include "AMDILRegisterInfo.td"
-+include "AMDILInstrInfo.td"
-+
-diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp
---- llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp 1970-01-01 01:00:00.000000000 +0100
-+++ llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp 2013-01-25 19:43:57.436716388 +0100
-@@ -0,0 +1,3045 @@
-+//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
-+//
-+// The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+/// \file
-+//==-----------------------------------------------------------------------===//
-+
-+#define DEBUGME 0
-+#define DEBUG_TYPE "structcfg"
-+
-+#include "AMDGPUInstrInfo.h"
-+#include "AMDIL.h"
-+#include "llvm/ADT/SCCIterator.h"
-+#include "llvm/ADT/SmallVector.h"
-+#include "llvm/ADT/Statistic.h"
-+#include "llvm/Analysis/DominatorInternals.h"
-+#include "llvm/Analysis/Dominators.h"
-+#include "llvm/CodeGen/MachinePostDominators.h"
-+#include "llvm/CodeGen/MachineDominators.h"
-+#include "llvm/CodeGen/MachineFunction.h"
-+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineJumpTableInfo.h"
-+#include "llvm/CodeGen/MachineLoopInfo.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+
-+using namespace llvm;
-+
-+// TODO: move-begin.
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// Statistics for CFGStructurizer.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
-+ "matched");
-+STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
-+ "matched");
-+STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
-+ "pattern matched");
-+STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
-+ "pattern matched");
-+STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
-+ "matched");
-+STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
-+STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// Miscellaneous utility for CFGStructurizer.
-+//
-+//===----------------------------------------------------------------------===//
-+namespace llvmCFGStruct {
-+#define SHOWNEWINSTR(i) \
-+ if (DEBUGME) errs() << "New instr: " << *i << "\n"
-+
-+#define SHOWNEWBLK(b, msg) \
-+if (DEBUGME) { \
-+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
-+ errs() << "\n"; \
-+}
-+
-+#define SHOWBLK_DETAIL(b, msg) \
-+if (DEBUGME) { \
-+ if (b) { \
-+ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
-+ b->print(errs()); \
-+ errs() << "\n"; \
-+ } \
-+}
-+
-+#define INVALIDSCCNUM -1
-+#define INVALIDREGNUM 0
-+
-+template<class LoopinfoT>
-+void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
-+ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
-+ iterEnd = LoopInfo.end();
-+ iter != iterEnd; ++iter) {
-+ (*iter)->print(OS, 0);
-+ }
-+}
-+
-+template<class NodeT>
-+void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
-+ size_t sz = Src.size();
-+ for (size_t i = 0; i < sz/2; ++i) {
-+ NodeT *t = Src[i];
-+ Src[i] = Src[sz - i - 1];
-+ Src[sz - i - 1] = t;
-+ }
-+}
-+
-+} //end namespace llvmCFGStruct
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// supporting data structure for CFGStructurizer
-+//
-+//===----------------------------------------------------------------------===//
-+
-+namespace llvmCFGStruct {
-+template<class PassT>
-+struct CFGStructTraits {
-+};
-+
-+template <class InstrT>
-+class BlockInformation {
-+public:
-+ bool isRetired;
-+ int sccNum;
-+ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
-+ //Instructions defining the corresponding successor.
-+ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
-+};
-+
-+template <class BlockT, class InstrT, class RegiT>
-+class LandInformation {
-+public:
-+ BlockT *landBlk;
-+ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
-+ //WHILELOOP(thisloop) init before entering
-+ //thisloop.
-+ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
-+ //WHILELOOP(thisloop) init after entering
-+ //thisloop.
-+ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
-+ //land block, branch cond on this reg.
-+ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
-+ //endif" after ENDLOOP(thisloop) break
-+ //outerLoopOf(thisLoop).
-+ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
-+ //endif" after ENDLOOP(thisloop) continue on
-+ //outerLoopOf(thisLoop).
-+ LandInformation() : landBlk(NULL) {}
-+};
-+
-+} //end of namespace llvmCFGStruct
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// CFGStructurizer
-+//
-+//===----------------------------------------------------------------------===//
-+
-+namespace llvmCFGStruct {
-+// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
-+template<class PassT>
-+class CFGStructurizer {
-+public:
-+ typedef enum {
-+ Not_SinglePath = 0,
-+ SinglePath_InPath = 1,
-+ SinglePath_NotInPath = 2
-+ } PathToKind;
-+
-+public:
-+ typedef typename PassT::InstructionType InstrT;
-+ typedef typename PassT::FunctionType FuncT;
-+ typedef typename PassT::DominatortreeType DomTreeT;
-+ typedef typename PassT::PostDominatortreeType PostDomTreeT;
-+ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
-+ typedef typename PassT::LoopinfoType LoopInfoT;
-+
-+ typedef GraphTraits<FuncT *> FuncGTraits;
-+ //typedef FuncGTraits::nodes_iterator BlockIterator;
-+ typedef typename FuncT::iterator BlockIterator;
-+
-+ typedef typename FuncGTraits::NodeType BlockT;
-+ typedef GraphTraits<BlockT *> BlockGTraits;
-+ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
-+ //typedef BlockGTraits::succ_iterator InstructionIterator;
-+ typedef typename BlockT::iterator InstrIterator;
-+
-+ typedef CFGStructTraits<PassT> CFGTraits;
-+ typedef BlockInformation<InstrT> BlockInfo;
-+ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
-+
-+ typedef int RegiT;
-+ typedef typename PassT::LoopType LoopT;
-+ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
-+ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
-+ //landing info for loop break
-+ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
-+
-+public:
-+ CFGStructurizer();
-+ ~CFGStructurizer();
-+
-+ /// Perform the CFG structurization
-+ bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
-+
-+ /// Perform the CFG preparation
-+ bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
-+
-+private:
-+ void reversePredicateSetter(typename BlockT::iterator);
-+ void orderBlocks();
-+ void printOrderedBlocks(llvm::raw_ostream &OS);
-+ int patternMatch(BlockT *CurBlock);
-+ int patternMatchGroup(BlockT *CurBlock);
-+
-+ int serialPatternMatch(BlockT *CurBlock);
-+ int ifPatternMatch(BlockT *CurBlock);
-+ int switchPatternMatch(BlockT *CurBlock);
-+ int loopendPatternMatch(BlockT *CurBlock);
-+ int loopPatternMatch(BlockT *CurBlock);
-+
-+ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
-+ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
-+ //int loopWithoutBreak(BlockT *);
-+
-+ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
-+ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
-+ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
-+ BlockT *ContBlock, LoopT *contLoop);
-+ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
-+ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+ BlockT *FalseBlock);
-+ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
-+ BlockT *FalseBlock);
-+ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+ BlockT *FalseBlock, BlockT **LandBlockPtr);
-+ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
-+ BlockT *FalseBlock, BlockT *LandBlock,
-+ bool Detail = false);
-+ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
-+ bool AllowSideEntry = true);
-+ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
-+ bool AllowSideEntry = true);
-+ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
-+ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
-+
-+ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
-+ BlockT *TrueBlock, BlockT *FalseBlock,
-+ BlockT *LandBlock);
-+ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
-+ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
-+ BlockT *ExitLandBlock, RegiT SetReg);
-+ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
-+ RegiT SetReg);
-+ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
-+ std::set<BlockT*> &ExitBlockSet,
-+ BlockT *ExitLandBlk);
-+ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
-+ BlockTSmallerVector &ExitingBlocks,
-+ BlockTSmallerVector &ExitBlocks);
-+ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
-+ void removeUnconditionalBranch(BlockT *SrcBlock);
-+ void removeRedundantConditionalBranch(BlockT *SrcBlock);
-+ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
-+
-+ void removeSuccessor(BlockT *SrcBlock);
-+ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
-+ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
-+
-+ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
-+ InstrIterator InsertPos);
-+
-+ void recordSccnum(BlockT *SrcBlock, int SCCNum);
-+ int getSCCNum(BlockT *srcBlk);
-+
-+ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
-+ bool isRetiredBlock(BlockT *SrcBlock);
-+ bool isActiveLoophead(BlockT *CurBlock);
-+ bool needMigrateBlock(BlockT *Block);
-+
-+ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
-+ BlockTSmallerVector &exitBlocks,
-+ std::set<BlockT*> &ExitBlockSet);
-+ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
-+ BlockT *getLoopLandBlock(LoopT *LoopRep);
-+ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
-+
-+ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
-+ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
-+ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
-+ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
-+ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
-+
-+ bool hasBackEdge(BlockT *curBlock);
-+ unsigned getLoopDepth (LoopT *LoopRep);
-+ int countActiveBlock(
-+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
-+ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
-+ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
-+ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
-+
-+private:
-+ DomTreeT *domTree;
-+ PostDomTreeT *postDomTree;
-+ LoopInfoT *loopInfo;
-+ PassT *passRep;
-+ FuncT *funcRep;
-+
-+ BlockInfoMap blockInfoMap;
-+ LoopLandInfoMap loopLandInfoMap;
-+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
-+ const AMDGPURegisterInfo *TRI;
-+
-+}; //template class CFGStructurizer
-+
-+template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
-+ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
-+}
-+
-+template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
-+ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
-+ E = blockInfoMap.end(); I != E; ++I) {
-+ delete I->second;
-+ }
-+}
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
-+ const AMDGPURegisterInfo * tri) {
-+ passRep = &pass;
-+ funcRep = &func;
-+ TRI = tri;
-+
-+ bool changed = false;
-+
-+ //FIXME: if not reducible flow graph, make it so ???
-+
-+ if (DEBUGME) {
-+ errs() << "AMDGPUCFGStructurizer::prepare\n";
-+ }
-+
-+ loopInfo = CFGTraits::getLoopInfo(pass);
-+ if (DEBUGME) {
-+ errs() << "LoopInfo:\n";
-+ PrintLoopinfo(*loopInfo, errs());
-+ }
-+
-+ orderBlocks();
-+ if (DEBUGME) {
-+ errs() << "Ordered blocks:\n";
-+ printOrderedBlocks(errs());
-+ }
-+
-+ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
-+
-+ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
-+ iterEnd = loopInfo->end();
-+ iter != iterEnd; ++iter) {
-+ LoopT* loopRep = (*iter);
-+ BlockTSmallerVector exitingBlks;
-+ loopRep->getExitingBlocks(exitingBlks);
-+
-+ if (exitingBlks.size() == 0) {
-+ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
-+ if (dummyExitBlk != NULL)
-+ retBlks.push_back(dummyExitBlk);
-+ }
-+ }
-+
-+ // Remove unconditional branch instr.
-+ // Add dummy exit block iff there are multiple returns.
-+
-+ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
-+ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
-+ iterBlk != iterEndBlk;
-+ ++iterBlk) {
-+ BlockT *curBlk = *iterBlk;
-+ removeUnconditionalBranch(curBlk);
-+ removeRedundantConditionalBranch(curBlk);
-+ if (CFGTraits::isReturnBlock(curBlk)) {
-+ retBlks.push_back(curBlk);
-+ }
-+ assert(curBlk->succ_size() <= 2);
-+ } //for
-+
-+ if (retBlks.size() >= 2) {
-+ addDummyExitBlock(retBlks);
-+ changed = true;
-+ }
-+
-+ return changed;
-+} //CFGStructurizer::prepare
-+
-+template<class PassT>
-+bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
-+ const AMDGPURegisterInfo * tri) {
-+ passRep = &pass;
-+ funcRep = &func;
-+ TRI = tri;
-+
-+ //Assume reducible CFG...
-+ if (DEBUGME) {
-+ errs() << "AMDGPUCFGStructurizer::run\n";
-+ func.viewCFG();
-+ }
-+
-+ domTree = CFGTraits::getDominatorTree(pass);
-+ if (DEBUGME) {
-+ domTree->print(errs(), (const llvm::Module*)0);
-+ }
-+
-+ postDomTree = CFGTraits::getPostDominatorTree(pass);
-+ if (DEBUGME) {
-+ postDomTree->print(errs());
-+ }
-+
-+ loopInfo = CFGTraits::getLoopInfo(pass);
-+ if (DEBUGME) {
-+ errs() << "LoopInfo:\n";
-+ PrintLoopinfo(*loopInfo, errs());
-+ }
-+
<Skipped 19052 lines>
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/llvm.git/commitdiff/c0fd916863545c83a6eaac1b5048b1da5ea0c5ba
More information about the pld-cvs-commit
mailing list