[packages/llvm] - added r600 patch (R600 target support)
qboosh
qboosh at pld-linux.org
Fri Jan 25 22:28:01 CET 2013
commit 251890425981d4d0076266858a1b1ef0ea2b617a
Author: Jakub Bogusz <qboosh at pld-linux.org>
Date: Fri Jan 25 22:28:37 2013 +0100
- added r600 patch (R600 target support)
llvm-r600.patch | 23023 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
llvm.spec | 4 +
2 files changed, 23027 insertions(+)
---
diff --git a/llvm.spec b/llvm.spec
index 8e0eff3..a51d296 100644
--- a/llvm.spec
+++ b/llvm.spec
@@ -30,6 +30,8 @@ Patch0: %{name}-config.patch
# Data files should be installed with timestamps preserved
Patch1: %{name}-2.6-timestamp.patch
Patch2: %{name}-pld.patch
+# R600 target support from git://people.freedesktop.org/~tstellar/llvm
+Patch3: %{name}-r600.patch
URL: http://llvm.org/
BuildRequires: autoconf >= 2.60
BuildRequires: automake >= 1:1.9.6
@@ -269,6 +271,7 @@ mv clang-*.* tools/clang
%patch0 -p1
%patch1 -p1
%patch2 -p1
+%patch3 -p1
# configure does not properly specify libdir
sed -i 's|(PROJ_prefix)/lib|(PROJ_prefix)/%{_lib}|g' Makefile.config.in
@@ -305,6 +308,7 @@ bash ../%configure \
%if %{with apidocs}
--enable-doxygen \
%endif
+ --enable-experimental-targets=R600 \
--enable-jit \
--enable-optimized \
--enable-shared \
diff --git a/llvm-r600.patch b/llvm-r600.patch
new file mode 100644
index 0000000..0957c01
--- /dev/null
+++ b/llvm-r600.patch
@@ -0,0 +1,23023 @@
+diff -Nur -x .git llvm-3.2.src/autoconf/configure.ac llvm-r600/autoconf/configure.ac
+--- llvm-3.2.src/autoconf/configure.ac 2012-11-21 17:13:35.000000000 +0100
++++ llvm-r600/autoconf/configure.ac 2013-01-25 19:43:56.096716416 +0100
+@@ -751,6 +751,11 @@
+
+ if test ${enableval} != "disable"
+ then
++ if test ${enableval} = "AMDGPU"
++ then
++ AC_MSG_ERROR([The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600])
++ enableval="R600"
++ fi
+ TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
+ fi
+
+diff -Nur -x .git llvm-3.2.src/configure llvm-r600/configure
+--- llvm-3.2.src/configure 2012-11-21 17:13:35.000000000 +0100
++++ llvm-r600/configure 2013-01-25 19:43:56.173383081 +0100
+@@ -5473,6 +5473,13 @@
+
+ if test ${enableval} != "disable"
+ then
++ if test ${enableval} = "AMDGPU"
++ then
++ { { echo "$as_me:$LINENO: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&5
++echo "$as_me: error: The AMDGPU target has been renamed to R600, please reconfigure with --enable-experimental-targets=R600" >&2;}
++ { (exit 1); exit 1; }; }
++ enableval="R600"
++ fi
+ TARGETS_TO_BUILD="$enableval $TARGETS_TO_BUILD"
+ fi
+
+@@ -10316,7 +10323,7 @@
+ lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+ lt_status=$lt_dlunknown
+ cat > conftest.$ac_ext <<EOF
+-#line 10317 "configure"
++#line 10326 "configure"
+ #include "confdefs.h"
+
+ #if HAVE_DLFCN_H
+diff -Nur -x .git llvm-3.2.src/include/llvm/IntrinsicsR600.td llvm-r600/include/llvm/IntrinsicsR600.td
+--- llvm-3.2.src/include/llvm/IntrinsicsR600.td 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/include/llvm/IntrinsicsR600.td 2013-01-25 19:43:56.433383075 +0100
+@@ -0,0 +1,36 @@
++//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines all of the R600-specific intrinsics.
++//
++//===----------------------------------------------------------------------===//
++
++let TargetPrefix = "r600" in {
++
++class R600ReadPreloadRegisterIntrinsic<string name>
++ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
++ GCCBuiltin<name>;
++
++multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
++ def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
++ def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
++ def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
++}
++
++defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
++ "__builtin_r600_read_global_size">;
++defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
++ "__builtin_r600_read_local_size">;
++defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
++ "__builtin_r600_read_ngroups">;
++defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
++ "__builtin_r600_read_tgid">;
++defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
++ "__builtin_r600_read_tidig">;
++} // End TargetPrefix = "r600"
+diff -Nur -x .git llvm-3.2.src/include/llvm/Intrinsics.td llvm-r600/include/llvm/Intrinsics.td
+--- llvm-3.2.src/include/llvm/Intrinsics.td 2012-10-20 01:00:20.000000000 +0200
++++ llvm-r600/include/llvm/Intrinsics.td 2013-01-25 19:43:56.426716409 +0100
+@@ -469,3 +469,4 @@
+ include "llvm/IntrinsicsHexagon.td"
+ include "llvm/IntrinsicsNVVM.td"
+ include "llvm/IntrinsicsMips.td"
++include "llvm/IntrinsicsR600.td"
+diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+--- llvm-3.2.src/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2012-11-26 18:01:12.000000000 +0100
++++ llvm-r600/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 2013-01-25 19:43:56.720049736 +0100
+@@ -8514,11 +8514,8 @@
+ if (Opcode == ISD::DELETED_NODE &&
+ (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
+ Opcode = Opc;
+- // If not supported by target, bail out.
+- if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal &&
+- TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom)
+- return SDValue();
+ }
++
+ if (Opc != Opcode)
+ return SDValue();
+
+@@ -8543,6 +8540,10 @@
+ assert(SrcVT != MVT::Other && "Cannot determine source type!");
+
+ EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
++
++ if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
++ return SDValue();
++
+ SmallVector<SDValue, 8> Opnds;
+ for (unsigned i = 0; i != NumInScalars; ++i) {
+ SDValue In = N->getOperand(i);
+diff -Nur -x .git llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+--- llvm-3.2.src/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 2012-10-24 19:25:11.000000000 +0200
++++ llvm-r600/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp 2013-01-25 19:43:56.733383069 +0100
+@@ -731,9 +731,10 @@
+ return;
+ }
+ case TargetLowering::Promote: {
+- assert(VT.isVector() && "Unknown legal promote case!");
+- Value = DAG.getNode(ISD::BITCAST, dl,
+- TLI.getTypeToPromoteTo(ISD::STORE, VT), Value);
++ EVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
++ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
++ "Can only promote stores to same size type");
++ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr,
+ ST->getPointerInfo(), isVolatile,
+@@ -889,10 +890,9 @@
+ break;
+ }
+ case TargetLowering::Promote: {
+- // Only promote a load of vector type to another.
+- assert(VT.isVector() && "Cannot promote this load!");
+- // Change base type to a different vector type.
+ EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
++ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
++ "Can only promote loads to same size type");
+
+ SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+diff -Nur -x .git llvm-3.2.src/lib/Target/LLVMBuild.txt llvm-r600/lib/Target/LLVMBuild.txt
+--- llvm-3.2.src/lib/Target/LLVMBuild.txt 2012-07-16 20:19:46.000000000 +0200
++++ llvm-r600/lib/Target/LLVMBuild.txt 2013-01-25 19:43:57.173383060 +0100
+@@ -16,7 +16,7 @@
+ ;===------------------------------------------------------------------------===;
+
+ [common]
+-subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
++subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC R600 Sparc X86 XCore
+
+ ; This is a special group whose required libraries are extended (by llvm-build)
+ ; with the best execution engine (the native JIT, if available, or the
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.cpp 2013-01-25 19:43:57.423383055 +0100
+@@ -0,0 +1,138 @@
++//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer --------------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++///
++/// The AMDGPUAsmPrinter is used to print both assembly string and also binary
++/// code. When passed an MCAsmStreamer it prints assembly and when passed
++/// an MCObjectStreamer it outputs binary code.
++//
++//===----------------------------------------------------------------------===//
++//
++
++
++#include "AMDGPUAsmPrinter.h"
++#include "AMDGPU.h"
++#include "SIMachineFunctionInfo.h"
++#include "SIRegisterInfo.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/Target/TargetLoweringObjectFile.h"
++#include "llvm/Support/TargetRegistry.h"
++
++using namespace llvm;
++
++
++static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
++ MCStreamer &Streamer) {
++ return new AMDGPUAsmPrinter(tm, Streamer);
++}
++
++extern "C" void LLVMInitializeR600AsmPrinter() {
++ TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
++}
++
++/// We need to override this function so we can avoid
++/// the call to EmitFunctionHeader(), which the MCPureStreamer can't handle.
++bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
++ const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
++ if (STM.dumpCode()) {
++#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
++ MF.dump();
++#endif
++ }
++ SetupMachineFunction(MF);
++ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
++ if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
++ EmitProgramInfo(MF);
++ }
++ EmitFunctionBody();
++ return false;
++}
++
++void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
++ unsigned MaxSGPR = 0;
++ unsigned MaxVGPR = 0;
++ bool VCCUsed = false;
++ const SIRegisterInfo * RI =
++ static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
++
++ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
++ BB != BB_E; ++BB) {
++ MachineBasicBlock &MBB = *BB;
++ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
++ I != E; ++I) {
++ MachineInstr &MI = *I;
++
++ unsigned numOperands = MI.getNumOperands();
++ for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
++ MachineOperand & MO = MI.getOperand(op_idx);
++ unsigned maxUsed;
++ unsigned width = 0;
++ bool isSGPR = false;
++ unsigned reg;
++ unsigned hwReg;
++ if (!MO.isReg()) {
++ continue;
++ }
++ reg = MO.getReg();
++ if (reg == AMDGPU::VCC) {
++ VCCUsed = true;
++ continue;
++ }
++ switch (reg) {
++ default: break;
++ case AMDGPU::EXEC:
++ case AMDGPU::SI_LITERAL_CONSTANT:
++ case AMDGPU::SREG_LIT_0:
++ case AMDGPU::M0:
++ continue;
++ }
++
++ if (AMDGPU::SReg_32RegClass.contains(reg)) {
++ isSGPR = true;
++ width = 1;
++ } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
++ isSGPR = false;
++ width = 1;
++ } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
++ isSGPR = true;
++ width = 2;
++ } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
++ isSGPR = false;
++ width = 2;
++ } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
++ isSGPR = true;
++ width = 4;
++ } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
++ isSGPR = false;
++ width = 4;
++ } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
++ isSGPR = true;
++ width = 8;
++ } else {
++ assert(!"Unknown register class");
++ }
++ hwReg = RI->getEncodingValue(reg);
++ maxUsed = hwReg + width - 1;
++ if (isSGPR) {
++ MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
++ } else {
++ MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
++ }
++ }
++ }
++ }
++ if (VCCUsed) {
++ MaxSGPR += 2;
++ }
++ SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
++ OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
++ OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
++ OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUAsmPrinter.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUAsmPrinter.h 2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,44 @@
++//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code -------------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief AMDGPU Assembly printer class.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPU_ASMPRINTER_H
++#define AMDGPU_ASMPRINTER_H
++
++#include "llvm/CodeGen/AsmPrinter.h"
++
++namespace llvm {
++
++class AMDGPUAsmPrinter : public AsmPrinter {
++
++public:
++ explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
++ : AsmPrinter(TM, Streamer) { }
++
++ virtual bool runOnMachineFunction(MachineFunction &MF);
++
++ virtual const char *getPassName() const {
++ return "AMDGPU Assembly Printer";
++ }
++
++ /// \brief Emit register usage information so that the GPU driver
++ /// can correctly setup the GPU state.
++ void EmitProgramInfo(MachineFunction &MF);
++
++ /// Implemented in AMDGPUMCInstLower.cpp
++ virtual void EmitInstruction(const MachineInstr *MI);
++};
++
++} // End anonymous llvm
++
++#endif //AMDGPU_ASMPRINTER_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUCodeEmitter.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUCodeEmitter.h 2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,49 @@
++//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief CodeEmitter interface for R600 and SI codegen.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUCODEEMITTER_H
++#define AMDGPUCODEEMITTER_H
++
++namespace llvm {
++
++class AMDGPUCodeEmitter {
++public:
++ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
++ virtual uint64_t getMachineOpValue(const MachineInstr &MI,
++ const MachineOperand &MO) const { return 0; }
++ virtual unsigned GPR4AlignEncode(const MachineInstr &MI,
++ unsigned OpNo) const {
++ return 0;
++ }
++ virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
++ unsigned OpNo) const {
++ return 0;
++ }
++ virtual uint64_t VOPPostEncode(const MachineInstr &MI,
++ uint64_t Value) const {
++ return Value;
++ }
++ virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
++ unsigned OpNo) const {
++ return 0;
++ }
++ virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
++ const {
++ return 0;
++ }
++};
++
++} // End namespace llvm
++
++#endif // AMDGPUCODEEMITTER_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUConvertToISA.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUConvertToISA.cpp 2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,62 @@
++//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief This pass lowers AMDIL machine instructions to the appropriate
++/// hardware instructions.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPU.h"
++#include "AMDGPUInstrInfo.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++
++using namespace llvm;
++
++namespace {
++
++class AMDGPUConvertToISAPass : public MachineFunctionPass {
++
++private:
++ static char ID;
++ TargetMachine &TM;
++
++public:
++ AMDGPUConvertToISAPass(TargetMachine &tm) :
++ MachineFunctionPass(ID), TM(tm) { }
++
++ virtual bool runOnMachineFunction(MachineFunction &MF);
++
++ virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
++
++};
++
++} // End anonymous namespace
++
++char AMDGPUConvertToISAPass::ID = 0;
++
++FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
++ return new AMDGPUConvertToISAPass(tm);
++}
++
++bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
++ const AMDGPUInstrInfo * TII =
++ static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo());
++
++ for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
++ BB != BB_E; ++BB) {
++ MachineBasicBlock &MBB = *BB;
++ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
++ I != E; ++I) {
++ MachineInstr &MI = *I;
++ TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
++ }
++ }
++ return false;
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.h llvm-r600/lib/Target/R600/AMDGPU.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPU.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPU.h 2013-01-25 19:43:57.423383055 +0100
+@@ -0,0 +1,51 @@
++//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++/// \file
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPU_H
++#define AMDGPU_H
++
++#include "AMDGPUTargetMachine.h"
++#include "llvm/Support/TargetRegistry.h"
++#include "llvm/Target/TargetMachine.h"
++
++namespace llvm {
++
++class FunctionPass;
++class AMDGPUTargetMachine;
++
++// R600 Passes
++FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
++FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
++FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
++
++// SI Passes
++FunctionPass *createSIAnnotateControlFlowPass();
++FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
++FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
++FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
++FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
++FunctionPass *createSIInsertWaits(TargetMachine &tm);
++
++// Passes common to R600 and SI
++Pass *createAMDGPUStructurizeCFGPass();
++FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
++
++} // End namespace llvm
++
++namespace ShaderType {
++ enum Type {
++ PIXEL = 0,
++ VERTEX = 1,
++ GEOMETRY = 2,
++ COMPUTE = 3
++ };
++}
++
++#endif // AMDGPU_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.cpp 2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,257 @@
++//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Implementation of the TargetInstrInfo class that is common to all
++/// AMD GPUs.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPUInstrInfo.h"
++#include "AMDGPURegisterInfo.h"
++#include "AMDGPUTargetMachine.h"
++#include "AMDIL.h"
++#include "llvm/CodeGen/MachineFrameInfo.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++
++#define GET_INSTRINFO_CTOR
++#include "AMDGPUGenInstrInfo.inc"
++
++using namespace llvm;
++
++AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
++ : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
++
++const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
++ return RI;
++}
++
++bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
++ unsigned &SrcReg, unsigned &DstReg,
++ unsigned &SubIdx) const {
++// TODO: Implement this function
++ return false;
++}
++
++unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
++ int &FrameIndex) const {
++// TODO: Implement this function
++ return 0;
++}
++
++unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
++ int &FrameIndex) const {
++// TODO: Implement this function
++ return 0;
++}
++
++bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
++ const MachineMemOperand *&MMO,
++ int &FrameIndex) const {
++// TODO: Implement this function
++ return false;
++}
++unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
++ int &FrameIndex) const {
++// TODO: Implement this function
++ return 0;
++}
++unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr *MI,
++ int &FrameIndex) const {
++// TODO: Implement this function
++ return 0;
++}
++bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
++ const MachineMemOperand *&MMO,
++ int &FrameIndex) const {
++// TODO: Implement this function
++ return false;
++}
++
++MachineInstr *
++AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
++ MachineBasicBlock::iterator &MBBI,
++ LiveVariables *LV) const {
++// TODO: Implement this function
++ return NULL;
++}
++bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
++ MachineBasicBlock &MBB) const {
++ while (iter != MBB.end()) {
++ switch (iter->getOpcode()) {
++ default:
++ break;
++ case AMDGPU::BRANCH_COND_i32:
++ case AMDGPU::BRANCH_COND_f32:
++ case AMDGPU::BRANCH:
++ return true;
++ };
++ ++iter;
++ }
++ return false;
++}
++
++MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
++ MachineBasicBlock::iterator tmp = MBB->end();
++ if (!MBB->size()) {
++ return MBB->end();
++ }
++ while (--tmp) {
++ if (tmp->getOpcode() == AMDGPU::ENDLOOP
++ || tmp->getOpcode() == AMDGPU::ENDIF
++ || tmp->getOpcode() == AMDGPU::ELSE) {
++ if (tmp == MBB->begin()) {
++ return tmp;
++ } else {
++ continue;
++ }
++ } else {
++ return ++tmp;
++ }
++ }
++ return MBB->end();
++}
++
++void
++AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator MI,
++ unsigned SrcReg, bool isKill,
++ int FrameIndex,
++ const TargetRegisterClass *RC,
++ const TargetRegisterInfo *TRI) const {
++ assert(!"Not Implemented");
++}
++
++void
++AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator MI,
++ unsigned DestReg, int FrameIndex,
++ const TargetRegisterClass *RC,
++ const TargetRegisterInfo *TRI) const {
++ assert(!"Not Implemented");
++}
++
++MachineInstr *
++AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
++ MachineInstr *MI,
++ const SmallVectorImpl<unsigned> &Ops,
++ int FrameIndex) const {
++// TODO: Implement this function
++ return 0;
++}
++MachineInstr*
++AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
++ MachineInstr *MI,
++ const SmallVectorImpl<unsigned> &Ops,
++ MachineInstr *LoadMI) const {
++ // TODO: Implement this function
++ return 0;
++}
++bool
++AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
++ const SmallVectorImpl<unsigned> &Ops) const {
++ // TODO: Implement this function
++ return false;
++}
++bool
++AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
++ unsigned Reg, bool UnfoldLoad,
++ bool UnfoldStore,
++ SmallVectorImpl<MachineInstr*> &NewMIs) const {
++ // TODO: Implement this function
++ return false;
++}
++
++bool
++AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
++ SmallVectorImpl<SDNode*> &NewNodes) const {
++ // TODO: Implement this function
++ return false;
++}
++
++unsigned
++AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
++ bool UnfoldLoad, bool UnfoldStore,
++ unsigned *LoadRegIndex) const {
++ // TODO: Implement this function
++ return 0;
++}
++
++bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
++ int64_t Offset1, int64_t Offset2,
++ unsigned NumLoads) const {
++ assert(Offset2 > Offset1
++ && "Second offset should be larger than first offset!");
++ // If we have less than 16 loads in a row, and the offsets are within 16,
++ // then schedule together.
++ // TODO: Make the loads schedule near if it fits in a cacheline
++ return (NumLoads < 16 && (Offset2 - Offset1) < 16);
++}
++
++bool
++AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
++ const {
++ // TODO: Implement this function
++ return true;
++}
++void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator MI) const {
++ // TODO: Implement this function
++}
++
++bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
++ // TODO: Implement this function
++ return false;
++}
++bool
++AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
++ const SmallVectorImpl<MachineOperand> &Pred2)
++ const {
++ // TODO: Implement this function
++ return false;
++}
++
++bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
++ std::vector<MachineOperand> &Pred) const {
++ // TODO: Implement this function
++ return false;
++}
++
++bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
++ // TODO: Implement this function
++ return MI->getDesc().isPredicable();
++}
++
++bool
++AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
++ // TODO: Implement this function
++ return true;
++}
++
++void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
++ DebugLoc DL) const {
++ MachineRegisterInfo &MRI = MF.getRegInfo();
++ const AMDGPURegisterInfo & RI = getRegisterInfo();
++
++ for (unsigned i = 0; i < MI.getNumOperands(); i++) {
++ MachineOperand &MO = MI.getOperand(i);
++ // Convert dst regclass to one that is supported by the ISA
++ if (MO.isReg() && MO.isDef()) {
++ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
++ const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg());
++ const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass);
++
++ assert(newRegClass);
++
++ MRI.setRegClass(MO.getReg(), newRegClass);
++ }
++ }
++ }
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.h 2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,149 @@
++//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++ -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Contains the definition of a TargetInstrInfo class that is common
++/// to all AMD GPUs.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUINSTRUCTIONINFO_H
++#define AMDGPUINSTRUCTIONINFO_H
++
++#include "AMDGPURegisterInfo.h"
++#include "AMDGPUInstrInfo.h"
++#include "llvm/Target/TargetInstrInfo.h"
++
++#include <map>
++
++#define GET_INSTRINFO_HEADER
++#define GET_INSTRINFO_ENUM
++#include "AMDGPUGenInstrInfo.inc"
++
++#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
++#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
++#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
++#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
++
++namespace llvm {
++
++class AMDGPUTargetMachine;
++class MachineFunction;
++class MachineInstr;
++class MachineInstrBuilder;
++
++class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
++private:
++ const AMDGPURegisterInfo RI;
++ TargetMachine &TM;
++ bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
++ MachineBasicBlock &MBB) const;
++public:
++ explicit AMDGPUInstrInfo(TargetMachine &tm);
++
++ virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
++
++ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
++ unsigned &DstReg, unsigned &SubIdx) const;
++
++ unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
++ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
++ int &FrameIndex) const;
++ bool hasLoadFromStackSlot(const MachineInstr *MI,
++ const MachineMemOperand *&MMO,
++ int &FrameIndex) const;
++ unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
++ unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
++ int &FrameIndex) const;
++ bool hasStoreFromStackSlot(const MachineInstr *MI,
++ const MachineMemOperand *&MMO,
++ int &FrameIndex) const;
++
++ MachineInstr *
++ convertToThreeAddress(MachineFunction::iterator &MFI,
++ MachineBasicBlock::iterator &MBBI,
++ LiveVariables *LV) const;
++
++
++ virtual void copyPhysReg(MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator MI, DebugLoc DL,
++ unsigned DestReg, unsigned SrcReg,
++ bool KillSrc) const = 0;
++
++ void storeRegToStackSlot(MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator MI,
++ unsigned SrcReg, bool isKill, int FrameIndex,
++ const TargetRegisterClass *RC,
++ const TargetRegisterInfo *TRI) const;
++ void loadRegFromStackSlot(MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator MI,
++ unsigned DestReg, int FrameIndex,
++ const TargetRegisterClass *RC,
++ const TargetRegisterInfo *TRI) const;
++
++protected:
++ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
++ MachineInstr *MI,
++ const SmallVectorImpl<unsigned> &Ops,
++ int FrameIndex) const;
++ MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
++ MachineInstr *MI,
++ const SmallVectorImpl<unsigned> &Ops,
++ MachineInstr *LoadMI) const;
++public:
++ bool canFoldMemoryOperand(const MachineInstr *MI,
++ const SmallVectorImpl<unsigned> &Ops) const;
++ bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
++ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
++ SmallVectorImpl<MachineInstr *> &NewMIs) const;
++ bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
++ SmallVectorImpl<SDNode *> &NewNodes) const;
++ unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
++ bool UnfoldLoad, bool UnfoldStore,
++ unsigned *LoadRegIndex = 0) const;
++ bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
++ int64_t Offset1, int64_t Offset2,
++ unsigned NumLoads) const;
++
++ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
++ void insertNoop(MachineBasicBlock &MBB,
++ MachineBasicBlock::iterator MI) const;
++ bool isPredicated(const MachineInstr *MI) const;
++ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
++ const SmallVectorImpl<MachineOperand> &Pred2) const;
++ bool DefinesPredicate(MachineInstr *MI,
++ std::vector<MachineOperand> &Pred) const;
++ bool isPredicable(MachineInstr *MI) const;
++ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
++
++ // Helper functions that check the opcode for status information
++ bool isLoadInst(llvm::MachineInstr *MI) const;
++ bool isExtLoadInst(llvm::MachineInstr *MI) const;
++ bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
++ bool isSExtLoadInst(llvm::MachineInstr *MI) const;
++ bool isZExtLoadInst(llvm::MachineInstr *MI) const;
++ bool isAExtLoadInst(llvm::MachineInstr *MI) const;
++ bool isStoreInst(llvm::MachineInstr *MI) const;
++ bool isTruncStoreInst(llvm::MachineInstr *MI) const;
++
++ virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned DstReg,
++ int64_t Imm) const = 0;
++ virtual unsigned getIEQOpcode() const = 0;
++ virtual bool isMov(unsigned opcode) const = 0;
++
++ /// \brief Convert the AMDIL MachineInstr to a supported ISA
++ /// MachineInstr
++ virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
++ DebugLoc DL) const;
++
++};
++
++} // End llvm namespace
++
++#endif // AMDGPUINSTRINFO_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPUInstrInfo.td 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUInstrInfo.td 2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,74 @@
++//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains DAG node defintions for the AMDGPU target.
++//
++//===----------------------------------------------------------------------===//
++
++//===----------------------------------------------------------------------===//
++// AMDGPU DAG Profiles
++//===----------------------------------------------------------------------===//
++
++def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
++ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
++]>;
++
++//===----------------------------------------------------------------------===//
++// AMDGPU DAG Nodes
++//
++
++// out = ((a << 32) | b) >> c)
++//
++// Can be used to optimize rtol:
++// rotl(a, b) = bitalign(a, a, 32 - b)
++def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
++
++// This argument to this node is a dword address.
++def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
++
++// out = a - floor(a)
++def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
++
++// out = max(a, b) a and b are floats
++def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
++ [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = max(a, b) a and b are signed ints
++def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
++ [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = max(a, b) a and b are unsigned ints
++def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
++ [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = min(a, b) a and b are floats
++def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
++ [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = min(a, b) a snd b are signed ints
++def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
++ [SDNPCommutative, SDNPAssociative]
++>;
++
++// out = min(a, b) a and b are unsigned ints
++def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
++ [SDNPCommutative, SDNPAssociative]
++>;
++
++// urecip - This operation is a helper for integer division, it returns the
++// result of 1 / a as a fractional unsigned integer.
++// out = (2^32 / a) + e
++// e is rounding error
++def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
++
++def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td llvm-r600/lib/Target/R600/AMDGPUInstructions.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPUInstructions.td 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUInstructions.td 2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,190 @@
++//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file contains instruction defs that are common to all hw codegen
++// targets.
++//
++//===----------------------------------------------------------------------===//
++
++class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> : Instruction {
++ field bits<16> AMDILOp = 0;
++ field bits<3> Gen = 0;
++
++ let Namespace = "AMDGPU";
++ let OutOperandList = outs;
++ let InOperandList = ins;
++ let AsmString = asm;
++ let Pattern = pattern;
++ let Itinerary = NullALU;
++ let TSFlags{42-40} = Gen;
++ let TSFlags{63-48} = AMDILOp;
++}
++
++class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
++ : AMDGPUInst<outs, ins, asm, pattern> {
++
++ field bits<32> Inst = 0xffffffff;
++
++}
++
++def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
++
++def COND_EQ : PatLeaf <
++ (cond),
++ [{switch(N->get()){{default: return false;
++ case ISD::SETOEQ: case ISD::SETUEQ:
++ case ISD::SETEQ: return true;}}}]
++>;
++
++def COND_NE : PatLeaf <
++ (cond),
++ [{switch(N->get()){{default: return false;
++ case ISD::SETONE: case ISD::SETUNE:
++ case ISD::SETNE: return true;}}}]
++>;
++def COND_GT : PatLeaf <
++ (cond),
++ [{switch(N->get()){{default: return false;
++ case ISD::SETOGT: case ISD::SETUGT:
++ case ISD::SETGT: return true;}}}]
++>;
++
++def COND_GE : PatLeaf <
++ (cond),
++ [{switch(N->get()){{default: return false;
++ case ISD::SETOGE: case ISD::SETUGE:
++ case ISD::SETGE: return true;}}}]
++>;
++
++def COND_LT : PatLeaf <
++ (cond),
++ [{switch(N->get()){{default: return false;
++ case ISD::SETOLT: case ISD::SETULT:
++ case ISD::SETLT: return true;}}}]
++>;
++
++def COND_LE : PatLeaf <
++ (cond),
++ [{switch(N->get()){{default: return false;
++ case ISD::SETOLE: case ISD::SETULE:
++ case ISD::SETLE: return true;}}}]
++>;
++
++//===----------------------------------------------------------------------===//
++// Load/Store Pattern Fragments
++//===----------------------------------------------------------------------===//
++
++def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr), [{
++ return isGlobalLoad(dyn_cast<LoadSDNode>(N));
++}]>;
++
++class Constants {
++int TWO_PI = 0x40c90fdb;
++int PI = 0x40490fdb;
++int TWO_PI_INV = 0x3e22f983;
++}
++def CONST : Constants;
++
++def FP_ZERO : PatLeaf <
++ (fpimm),
++ [{return N->getValueAPF().isZero();}]
++>;
++
++def FP_ONE : PatLeaf <
++ (fpimm),
++ [{return N->isExactlyValue(1.0);}]
++>;
++
++let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1 in {
++
++class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
++ (outs rc:$dst),
++ (ins rc:$src0),
++ "CLAMP $dst, $src0",
++ [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
++>;
++
++class FABS <RegisterClass rc> : AMDGPUShaderInst <
++ (outs rc:$dst),
++ (ins rc:$src0),
++ "FABS $dst, $src0",
++ [(set rc:$dst, (fabs rc:$src0))]
++>;
++
++class FNEG <RegisterClass rc> : AMDGPUShaderInst <
++ (outs rc:$dst),
++ (ins rc:$src0),
++ "FNEG $dst, $src0",
++ [(set rc:$dst, (fneg rc:$src0))]
++>;
++
++def SHADER_TYPE : AMDGPUShaderInst <
++ (outs),
++ (ins i32imm:$type),
++ "SHADER_TYPE $type",
++ [(int_AMDGPU_shader_type imm:$type)]
++>;
++
++} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
++
++/* Generic helper patterns for intrinsics */
++/* -------------------------------------- */
++
++class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul,
++ RegisterClass rc> : Pat <
++ (fpow rc:$src0, rc:$src1),
++ (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
++>;
++
++/* Other helper patterns */
++/* --------------------- */
++
++/* Extract element pattern */
++class Extract_Element <ValueType sub_type, ValueType vec_type,
++ RegisterClass vec_class, int sub_idx,
++ SubRegIndex sub_reg>: Pat<
++ (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
++ (EXTRACT_SUBREG vec_class:$src, sub_reg)
++>;
++
++/* Insert element pattern */
++class Insert_Element <ValueType elem_type, ValueType vec_type,
++ RegisterClass elem_class, RegisterClass vec_class,
++ int sub_idx, SubRegIndex sub_reg> : Pat <
++
++ (vec_type (vector_insert (vec_type vec_class:$vec),
++ (elem_type elem_class:$elem), sub_idx)),
++ (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
++>;
++
++// Vector Build pattern
++class Vector_Build <ValueType vecType, RegisterClass vectorClass,
++ ValueType elemType, RegisterClass elemClass> : Pat <
++ (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
++ (elemType elemClass:$z), (elemType elemClass:$w))),
++ (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
++ (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
++ elemClass:$z, sel_z), elemClass:$w, sel_w)
++>;
++
++// bitconvert pattern
++class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
++ (dt (bitconvert (st rc:$src0))),
++ (dt rc:$src0)
++>;
++
++class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
++ (vt (AMDGPUdwordaddr (vt rc:$addr))),
++ (vt rc:$addr)
++>;
++
++include "R600Instructions.td"
++
++include "SIInstrInfo.td"
++
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPUIntrinsics.td 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUIntrinsics.td 2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,62 @@
++//===-- AMDGPUIntrinsics.td - Common intrinsics -*- tablegen -*-----------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// This file defines intrinsics that are used by all hw codegen targets.
++//
++//===----------------------------------------------------------------------===//
++
++let TargetPrefix = "AMDGPU", isTarget = 1 in {
++
++ def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>;
++ def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>;
++
++ def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
++ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
++ def int_AMDGPU_kilp : Intrinsic<[], [], []>;
++ def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
++ def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
++ def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
++
++ def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
++}
++
++let TargetPrefix = "TGSI", isTarget = 1 in {
++
++ def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
++}
++
++include "SIIntrinsics.td"
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.cpp 2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,418 @@
++//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief This is the parent TargetLowering class for hardware code gen
++/// targets.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPUISelLowering.h"
++#include "AMDILIntrinsicInfo.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/CodeGen/SelectionDAG.h"
++#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
++
++using namespace llvm;
++
++AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
++ TargetLowering(TM, new TargetLoweringObjectFileELF()) {
++
++ // Initialize target lowering borrowed from AMDIL
++ InitAMDILLowering();
++
++ // We need to custom lower some of the intrinsics
++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
++
++ // Library functions. These default to Expand, but we have instructions
++ // for them.
++ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
++ setOperationAction(ISD::FEXP2, MVT::f32, Legal);
++ setOperationAction(ISD::FPOW, MVT::f32, Legal);
++ setOperationAction(ISD::FLOG2, MVT::f32, Legal);
++ setOperationAction(ISD::FABS, MVT::f32, Legal);
++ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
++ setOperationAction(ISD::FRINT, MVT::f32, Legal);
++
++ // Lower floating point store/load to integer store/load to reduce the number
++ // of patterns in tablegen.
++ setOperationAction(ISD::STORE, MVT::f32, Promote);
++ AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
++
++ setOperationAction(ISD::STORE, MVT::v4f32, Promote);
++ AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
++
++ setOperationAction(ISD::LOAD, MVT::f32, Promote);
++ AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
++
++ setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
++ AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
++
++ setOperationAction(ISD::UDIV, MVT::i32, Expand);
++ setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
++ setOperationAction(ISD::UREM, MVT::i32, Expand);
++}
++
++//===---------------------------------------------------------------------===//
++// TargetLowering Callbacks
++//===---------------------------------------------------------------------===//
++
++SDValue AMDGPUTargetLowering::LowerFormalArguments(
++ SDValue Chain,
++ CallingConv::ID CallConv,
++ bool isVarArg,
++ const SmallVectorImpl<ISD::InputArg> &Ins,
++ DebugLoc DL, SelectionDAG &DAG,
++ SmallVectorImpl<SDValue> &InVals) const {
++ for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
++ InVals.push_back(SDValue());
++ }
++ return Chain;
++}
++
++SDValue AMDGPUTargetLowering::LowerReturn(
++ SDValue Chain,
++ CallingConv::ID CallConv,
++ bool isVarArg,
++ const SmallVectorImpl<ISD::OutputArg> &Outs,
++ const SmallVectorImpl<SDValue> &OutVals,
++ DebugLoc DL, SelectionDAG &DAG) const {
++ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
++}
++
++//===---------------------------------------------------------------------===//
++// Target specific lowering
++//===---------------------------------------------------------------------===//
++
++SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
++ const {
++ switch (Op.getOpcode()) {
++ default:
++ Op.getNode()->dump();
++ assert(0 && "Custom lowering code for this"
++ "instruction is not implemented yet!");
++ break;
++ // AMDIL DAG lowering
++ case ISD::SDIV: return LowerSDIV(Op, DAG);
++ case ISD::SREM: return LowerSREM(Op, DAG);
++ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
++ case ISD::BRCOND: return LowerBRCOND(Op, DAG);
++ // AMDGPU DAG lowering
++ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
++ case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
++ }
++ return Op;
++}
++
++SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
++ SelectionDAG &DAG) const {
++ unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
++ DebugLoc DL = Op.getDebugLoc();
++ EVT VT = Op.getValueType();
++
++ switch (IntrinsicID) {
++ default: return Op;
++ case AMDGPUIntrinsic::AMDIL_abs:
++ return LowerIntrinsicIABS(Op, DAG);
++ case AMDGPUIntrinsic::AMDIL_exp:
++ return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
++ case AMDGPUIntrinsic::AMDGPU_lrp:
++ return LowerIntrinsicLRP(Op, DAG);
++ case AMDGPUIntrinsic::AMDIL_fraction:
++ return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
++ case AMDGPUIntrinsic::AMDIL_mad:
++ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
++ Op.getOperand(2), Op.getOperand(3));
++ case AMDGPUIntrinsic::AMDIL_max:
++ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
++ Op.getOperand(2));
++ case AMDGPUIntrinsic::AMDGPU_imax:
++ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
++ Op.getOperand(2));
++ case AMDGPUIntrinsic::AMDGPU_umax:
++ return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
++ Op.getOperand(2));
++ case AMDGPUIntrinsic::AMDIL_min:
++ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
++ Op.getOperand(2));
++ case AMDGPUIntrinsic::AMDGPU_imin:
++ return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
++ Op.getOperand(2));
++ case AMDGPUIntrinsic::AMDGPU_umin:
++ return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
++ Op.getOperand(2));
++ case AMDGPUIntrinsic::AMDIL_round_nearest:
++ return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
++ }
++}
++
++///IABS(a) = SMAX(sub(0, a), a)
++SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
++ SelectionDAG &DAG) const {
++
++ DebugLoc DL = Op.getDebugLoc();
++ EVT VT = Op.getValueType();
++ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
++ Op.getOperand(1));
++
++ return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
++}
++
++/// Linear Interpolation
++/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
++SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
++ SelectionDAG &DAG) const {
++ DebugLoc DL = Op.getDebugLoc();
++ EVT VT = Op.getValueType();
++ SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
++ DAG.getConstantFP(1.0f, MVT::f32),
++ Op.getOperand(1));
++ SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
++ Op.getOperand(3));
++ return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
++ Op.getOperand(2),
++ OneSubAC);
++}
++
++/// \brief Generate Min/Max node
++SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
++ SelectionDAG &DAG) const {
++ DebugLoc DL = Op.getDebugLoc();
++ EVT VT = Op.getValueType();
++
++ SDValue LHS = Op.getOperand(0);
++ SDValue RHS = Op.getOperand(1);
++ SDValue True = Op.getOperand(2);
++ SDValue False = Op.getOperand(3);
++ SDValue CC = Op.getOperand(4);
++
++ if (VT != MVT::f32 ||
++ !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
++ return SDValue();
++ }
++
++ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
++ switch (CCOpcode) {
++ case ISD::SETOEQ:
++ case ISD::SETONE:
++ case ISD::SETUNE:
++ case ISD::SETNE:
++ case ISD::SETUEQ:
++ case ISD::SETEQ:
++ case ISD::SETFALSE:
++ case ISD::SETFALSE2:
++ case ISD::SETTRUE:
++ case ISD::SETTRUE2:
++ case ISD::SETUO:
++ case ISD::SETO:
++ assert(0 && "Operation should already be optimised !");
++ case ISD::SETULE:
++ case ISD::SETULT:
++ case ISD::SETOLE:
++ case ISD::SETOLT:
++ case ISD::SETLE:
++ case ISD::SETLT: {
++ if (LHS == True)
++ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
++ else
++ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
++ }
++ case ISD::SETGT:
++ case ISD::SETGE:
++ case ISD::SETUGE:
++ case ISD::SETOGE:
++ case ISD::SETUGT:
++ case ISD::SETOGT: {
++ if (LHS == True)
++ return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
++ else
++ return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
++ }
++ case ISD::SETCC_INVALID:
++ assert(0 && "Invalid setcc condcode !");
++ }
++ return Op;
++}
++
++
++
++SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
++ SelectionDAG &DAG) const {
++ DebugLoc DL = Op.getDebugLoc();
++ EVT VT = Op.getValueType();
++
++ SDValue Num = Op.getOperand(0);
++ SDValue Den = Op.getOperand(1);
++
++ SmallVector<SDValue, 8> Results;
++
++ // RCP = URECIP(Den) = 2^32 / Den + e
++ // e is rounding error.
++ SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
++
++ // RCP_LO = umulo(RCP, Den) */
++ SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
++
++ // RCP_HI = mulhu (RCP, Den) */
++ SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
++
++ // NEG_RCP_LO = -RCP_LO
++ SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
++ RCP_LO);
++
++ // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
++ SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
++ NEG_RCP_LO, RCP_LO,
++ ISD::SETEQ);
++ // Calculate the rounding error from the URECIP instruction
++ // E = mulhu(ABS_RCP_LO, RCP)
++ SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
++
++ // RCP_A_E = RCP + E
++ SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
++
++ // RCP_S_E = RCP - E
++ SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
++
++ // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
++ SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
++ RCP_A_E, RCP_S_E,
++ ISD::SETEQ);
++ // Quotient = mulhu(Tmp0, Num)
++ SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
++
++ // Num_S_Remainder = Quotient * Den
++ SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
++
++ // Remainder = Num - Num_S_Remainder
++ SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
++
++ // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
++ SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
++ DAG.getConstant(-1, VT),
++ DAG.getConstant(0, VT),
++ ISD::SETGE);
++ // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
++ SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
++ DAG.getConstant(0, VT),
++ DAG.getConstant(-1, VT),
++ DAG.getConstant(0, VT),
++ ISD::SETGE);
++ // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
++ SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
++ Remainder_GE_Zero);
++
++ // Calculate Division result:
++
++ // Quotient_A_One = Quotient + 1
++ SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
++ DAG.getConstant(1, VT));
++
++ // Quotient_S_One = Quotient - 1
++ SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
++ DAG.getConstant(1, VT));
++
++ // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
++ SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
++ Quotient, Quotient_A_One, ISD::SETEQ);
++
++ // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
++ Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
++ Quotient_S_One, Div, ISD::SETEQ);
++
++ // Calculate Rem result:
++
++ // Remainder_S_Den = Remainder - Den
++ SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
++
++ // Remainder_A_Den = Remainder + Den
++ SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
++
++ // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
++ SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
++ Remainder, Remainder_S_Den, ISD::SETEQ);
++
++ // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
++ Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
++ Remainder_A_Den, Rem, ISD::SETEQ);
++ SDValue Ops[2];
++ Ops[0] = Div;
++ Ops[1] = Rem;
++ return DAG.getMergeValues(Ops, 2, DL);
++}
++
++//===----------------------------------------------------------------------===//
++// Helper functions
++//===----------------------------------------------------------------------===//
++
++bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
++ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
++ return CFP->isExactlyValue(1.0);
++ }
++ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
++ return C->isAllOnesValue();
++ }
++ return false;
++}
++
++bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
++ if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
++ return CFP->getValueAPF().isZero();
++ }
++ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
++ return C->isNullValue();
++ }
++ return false;
++}
++
++SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
++ const TargetRegisterClass *RC,
++ unsigned Reg, EVT VT) const {
++ MachineFunction &MF = DAG.getMachineFunction();
++ MachineRegisterInfo &MRI = MF.getRegInfo();
++ unsigned VirtualRegister;
++ if (!MRI.isLiveIn(Reg)) {
++ VirtualRegister = MRI.createVirtualRegister(RC);
++ MRI.addLiveIn(Reg, VirtualRegister);
++ } else {
++ VirtualRegister = MRI.getLiveInVirtReg(Reg);
++ }
++ return DAG.getRegister(VirtualRegister, VT);
++}
++
++#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
++
++const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
++ switch (Opcode) {
++ default: return 0;
++ // AMDIL DAG nodes
++ NODE_NAME_CASE(MAD);
++ NODE_NAME_CASE(CALL);
++ NODE_NAME_CASE(UMUL);
++ NODE_NAME_CASE(DIV_INF);
++ NODE_NAME_CASE(RET_FLAG);
++ NODE_NAME_CASE(BRANCH_COND);
++
++ // AMDGPU DAG nodes
++ NODE_NAME_CASE(DWORDADDR)
++ NODE_NAME_CASE(FRACT)
++ NODE_NAME_CASE(FMAX)
++ NODE_NAME_CASE(SMAX)
++ NODE_NAME_CASE(UMAX)
++ NODE_NAME_CASE(FMIN)
++ NODE_NAME_CASE(SMIN)
++ NODE_NAME_CASE(UMIN)
++ NODE_NAME_CASE(URECIP)
++ NODE_NAME_CASE(INTERP)
++ NODE_NAME_CASE(INTERP_P0)
++ NODE_NAME_CASE(EXPORT)
++ NODE_NAME_CASE(CONST_ADDRESS)
++ }
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h llvm-r600/lib/Target/R600/AMDGPUISelLowering.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUISelLowering.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUISelLowering.h 2013-01-25 19:43:57.426716388 +0100
+@@ -0,0 +1,145 @@
++//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Interface definition of the TargetLowering class that is common
++/// to all AMD GPUs.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUISELLOWERING_H
++#define AMDGPUISELLOWERING_H
++
++#include "llvm/Target/TargetLowering.h"
++
++namespace llvm {
++
++class MachineRegisterInfo;
++
++class AMDGPUTargetLowering : public TargetLowering {
++private:
++ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
++
++protected:
++
++ /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
++ /// MachineFunction.
++ ///
++ /// \returns a RegisterSDNode representing Reg.
++ SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
++ unsigned Reg, EVT VT) const;
++
++ bool isHWTrueValue(SDValue Op) const;
++ bool isHWFalseValue(SDValue Op) const;
++
++public:
++ AMDGPUTargetLowering(TargetMachine &TM);
++
++ virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
++ bool isVarArg,
++ const SmallVectorImpl<ISD::InputArg> &Ins,
++ DebugLoc DL, SelectionDAG &DAG,
++ SmallVectorImpl<SDValue> &InVals) const;
++
++ virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
++ bool isVarArg,
++ const SmallVectorImpl<ISD::OutputArg> &Outs,
++ const SmallVectorImpl<SDValue> &OutVals,
++ DebugLoc DL, SelectionDAG &DAG) const;
++
++ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
++ virtual const char* getTargetNodeName(unsigned Opcode) const;
++
++// Functions defined in AMDILISelLowering.cpp
++public:
++
++ /// \brief Determine which of the bits specified in \p Mask are known to be
++ /// either zero or one and return them in the \p KnownZero and \p KnownOne
++ /// bitsets.
++ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
++ APInt &KnownZero,
++ APInt &KnownOne,
++ const SelectionDAG &DAG,
++ unsigned Depth = 0) const;
++
++ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
++ const CallInst &I, unsigned Intrinsic) const;
++
++ /// We want to mark f32/f64 floating point values as legal.
++ bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
++
++ /// We don't want to shrink f64/f32 constants.
++ bool ShouldShrinkFPConstant(EVT VT) const;
++
++private:
++ void InitAMDILLowering();
++ SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
++ EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
++ SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
++ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
++};
++
++namespace AMDGPUISD {
++
++enum {
++ // AMDIL ISD Opcodes
++ FIRST_NUMBER = ISD::BUILTIN_OP_END,
++ MAD, // 32bit Fused Multiply Add instruction
++ CALL, // Function call based on a single integer
++ UMUL, // 32bit unsigned multiplication
++ DIV_INF, // Divide with infinity returned on zero divisor
++ RET_FLAG,
++ BRANCH_COND,
++ // End AMDIL ISD Opcodes
++ BITALIGN,
++ DWORDADDR,
++ FRACT,
++ FMAX,
++ SMAX,
++ UMAX,
++ FMIN,
++ SMIN,
++ UMIN,
++ URECIP,
++ INTERP,
++ INTERP_P0,
++ EXPORT,
++ CONST_ADDRESS,
++ LAST_AMDGPU_ISD_NUMBER
++};
++
++
++} // End namespace AMDGPUISD
++
++namespace SIISD {
++
++enum {
++ SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
++ VCC_AND,
++ VCC_BITCAST
++};
++
++} // End namespace SIISD
++
++} // End namespace llvm
++
++#endif // AMDGPUISELLOWERING_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.cpp 2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,83 @@
++//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst -----===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Code to lower AMDGPU MachineInstrs to their corresponding MCInst.
++//
++//===----------------------------------------------------------------------===//
++//
++
++#include "AMDGPUMCInstLower.h"
++#include "AMDGPUAsmPrinter.h"
++#include "R600InstrInfo.h"
++#include "llvm/CodeGen/MachineBasicBlock.h"
++#include "llvm/CodeGen/MachineInstr.h"
++#include "llvm/Constants.h"
++#include "llvm/MC/MCInst.h"
++#include "llvm/MC/MCStreamer.h"
++#include "llvm/MC/MCExpr.h"
++#include "llvm/Support/ErrorHandling.h"
++
++using namespace llvm;
++
++AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
++ Ctx(ctx)
++{ }
++
++void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
++ OutMI.setOpcode(MI->getOpcode());
++
++ for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
++ const MachineOperand &MO = MI->getOperand(i);
++
++ MCOperand MCOp;
++ switch (MO.getType()) {
++ default:
++ llvm_unreachable("unknown operand type");
++ case MachineOperand::MO_FPImmediate: {
++ const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
++ assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
++ "Only floating point immediates are supported at the moment.");
++ MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
++ break;
++ }
++ case MachineOperand::MO_Immediate:
++ MCOp = MCOperand::CreateImm(MO.getImm());
++ break;
++ case MachineOperand::MO_Register:
++ MCOp = MCOperand::CreateReg(MO.getReg());
++ break;
++ case MachineOperand::MO_MachineBasicBlock:
++ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
++ MO.getMBB()->getSymbol(), Ctx));
++ }
++ OutMI.addOperand(MCOp);
++ }
++}
++
++void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
++ AMDGPUMCInstLower MCInstLowering(OutContext);
++
++ if (MI->isBundle()) {
++ const MachineBasicBlock *MBB = MI->getParent();
++ MachineBasicBlock::const_instr_iterator I = MI;
++ ++I;
++ while (I != MBB->end() && I->isInsideBundle()) {
++ MCInst MCBundleInst;
++ const MachineInstr *BundledInst = I;
++ MCInstLowering.lower(BundledInst, MCBundleInst);
++ OutStreamer.EmitInstruction(MCBundleInst);
++ ++I;
++ }
++ } else {
++ MCInst TmpInst;
++ MCInstLowering.lower(MI, TmpInst);
++ OutStreamer.EmitInstruction(TmpInst);
++ }
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUMCInstLower.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUMCInstLower.h 2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,34 @@
++//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++ -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++/// \file
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPU_MCINSTLOWER_H
++#define AMDGPU_MCINSTLOWER_H
++
++namespace llvm {
++
++class MCInst;
++class MCContext;
++class MachineInstr;
++
++class AMDGPUMCInstLower {
++
++ MCContext &Ctx;
++
++public:
++ AMDGPUMCInstLower(MCContext &ctx);
++
++ /// \brief Lower a MachineInstr to an MCInst
++ void lower(const MachineInstr *MI, MCInst &OutMI) const;
++
++};
++
++} // End namespace llvm
++
++#endif //AMDGPU_MCINSTLOWER_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.cpp 2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,51 @@
++//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information -------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Parent TargetRegisterInfo class common to all hw codegen targets.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPURegisterInfo.h"
++#include "AMDGPUTargetMachine.h"
++
++using namespace llvm;
++
++AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
++ const TargetInstrInfo &tii)
++: AMDGPUGenRegisterInfo(0),
++ TM(tm),
++ TII(tii)
++ { }
++
++//===----------------------------------------------------------------------===//
++// Function handling callbacks - Functions are a seldom used feature of GPUS, so
++// they are not supported at this time.
++//===----------------------------------------------------------------------===//
++
++const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
++
++const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
++ const {
++ return &CalleeSavedReg;
++}
++
++void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
++ int SPAdj,
++ RegScavenger *RS) const {
++ assert(!"Subroutines not supported yet");
++}
++
++unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF) const {
++ assert(!"Subroutines not supported yet");
++ return 0;
++}
++
++#define GET_REGINFO_TARGET_DESC
++#include "AMDGPUGenRegisterInfo.inc"
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.h 2013-01-25 19:43:57.430049721 +0100
+@@ -0,0 +1,63 @@
++//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++ -*-----===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief TargetRegisterInfo interface that is implemented by all hw codegen
++/// targets.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUREGISTERINFO_H
++#define AMDGPUREGISTERINFO_H
++
++#include "llvm/ADT/BitVector.h"
++#include "llvm/Target/TargetRegisterInfo.h"
++
++#define GET_REGINFO_HEADER
++#define GET_REGINFO_ENUM
++#include "AMDGPUGenRegisterInfo.inc"
++
++namespace llvm {
++
++class AMDGPUTargetMachine;
++class TargetInstrInfo;
++
++struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
++ TargetMachine &TM;
++ const TargetInstrInfo &TII;
++ static const uint16_t CalleeSavedReg;
++
++ AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
++
++ virtual BitVector getReservedRegs(const MachineFunction &MF) const {
++ assert(!"Unimplemented"); return BitVector();
++ }
++
++ /// \param RC is an AMDIL reg class.
++ ///
++ /// \returns The ISA reg class that is equivalent to \p RC.
++ virtual const TargetRegisterClass * getISARegClass(
++ const TargetRegisterClass * RC) const {
++ assert(!"Unimplemented"); return NULL;
++ }
++
++ virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
++ assert(!"Unimplemented"); return NULL;
++ }
++
++ const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
++ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
++ RegScavenger *RS) const;
++ unsigned getFrameRegister(const MachineFunction &MF) const;
++
++};
++
++} // End namespace llvm
++
++#endif // AMDIDSAREGISTERINFO_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPURegisterInfo.td 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPURegisterInfo.td 2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,22 @@
++//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++// Tablegen register definitions common to all hw codegen targets.
++//
++//===----------------------------------------------------------------------===//
++
++let Namespace = "AMDGPU" in {
++ def sel_x : SubRegIndex;
++ def sel_y : SubRegIndex;
++ def sel_z : SubRegIndex;
++ def sel_w : SubRegIndex;
++}
++
++include "R600RegisterInfo.td"
++include "SIRegisterInfo.td"
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUStructurizeCFG.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUStructurizeCFG.cpp 2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,714 @@
++//===-- AMDGPUStructurizeCFG.cpp - ------------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// The pass implemented in this file transforms the programs control flow
++/// graph into a form that's suitable for code generation on hardware that
++/// implements control flow by execution masking. This currently includes all
++/// AMD GPUs but may as well be useful for other types of hardware.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPU.h"
++#include "llvm/Module.h"
++#include "llvm/ADT/SCCIterator.h"
++#include "llvm/Analysis/RegionIterator.h"
++#include "llvm/Analysis/RegionInfo.h"
++#include "llvm/Analysis/RegionPass.h"
++#include "llvm/Transforms/Utils/SSAUpdater.h"
++
++using namespace llvm;
++
++namespace {
++
++// Definition of the complex types used in this pass.
++
++typedef std::pair<BasicBlock *, Value *> BBValuePair;
++typedef ArrayRef<BasicBlock*> BBVecRef;
++
++typedef SmallVector<RegionNode*, 8> RNVector;
++typedef SmallVector<BasicBlock*, 8> BBVector;
++typedef SmallVector<BBValuePair, 2> BBValueVector;
++
++typedef DenseMap<PHINode *, BBValueVector> PhiMap;
++typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
++typedef DenseMap<BasicBlock *, Value *> BBPredicates;
++typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
++typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
++
++// The name for newly created blocks.
++
++static const char *FlowBlockName = "Flow";
++
++/// @brief Transforms the control flow graph on one single entry/exit region
++/// at a time.
++///
++/// After the transform all "If"/"Then"/"Else" style control flow looks like
++/// this:
++///
++/// \verbatim
++/// 1
++/// ||
++/// | |
++/// 2 |
++/// | /
++/// |/
++/// 3
++/// || Where:
++/// | | 1 = "If" block, calculates the condition
++/// 4 | 2 = "Then" subregion, runs if the condition is true
++/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
++/// |/ 4 = "Else" optional subregion, runs if the condition is false
++/// 5 5 = "End" block, also rejoins the control flow
++/// \endverbatim
++///
++/// Control flow is expressed as a branch where the true exit goes into the
++/// "Then"/"Else" region, while the false exit skips the region
++/// The condition for the optional "Else" region is expressed as a PHI node.
++/// The incomming values of the PHI node are true for the "If" edge and false
++/// for the "Then" edge.
++///
++/// Additionally to that even complicated loops look like this:
++///
++/// \verbatim
++/// 1
++/// ||
++/// | |
++/// 2 ^ Where:
++/// | / 1 = "Entry" block
++/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
++/// 3 3 = "Flow" block, with back edge to entry block
++/// |
++/// \endverbatim
++///
++/// The back edge of the "Flow" block is always on the false side of the branch
++/// while the true side continues the general flow. So the loop condition
++/// consist of a network of PHI nodes where the true incoming values expresses
++/// breaks and the false values expresses continue states.
++class AMDGPUStructurizeCFG : public RegionPass {
++
++ static char ID;
++
++ Type *Boolean;
++ ConstantInt *BoolTrue;
++ ConstantInt *BoolFalse;
++ UndefValue *BoolUndef;
++
++ Function *Func;
++ Region *ParentRegion;
++
++ DominatorTree *DT;
++
++ RNVector Order;
++ VisitedMap Visited;
++ PredMap Predicates;
++ BBPhiMap DeletedPhis;
++ BBVector FlowsInserted;
++
++ BasicBlock *LoopStart;
++ BasicBlock *LoopEnd;
++ BBPredicates LoopPred;
++
++ void orderNodes();
++
++ void buildPredicate(BranchInst *Term, unsigned Idx,
++ BBPredicates &Pred, bool Invert);
++
++ void analyzeBlock(BasicBlock *BB);
++
++ void analyzeLoop(BasicBlock *BB, unsigned &LoopIdx);
++
++ void collectInfos();
++
++ bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
++
++ void killTerminator(BasicBlock *BB);
++
++ RegionNode *skipChained(RegionNode *Node);
++
++ void delPhiValues(BasicBlock *From, BasicBlock *To);
++
++ void addPhiValues(BasicBlock *From, BasicBlock *To);
++
++ BasicBlock *getNextFlow(BasicBlock *Prev);
++
++ bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
++
++ BasicBlock *wireFlowBlock(BasicBlock *Prev, RegionNode *Node);
++
++ void createFlow();
++
++ void insertConditions();
++
++ void rebuildSSA();
++
++public:
++ AMDGPUStructurizeCFG():
++ RegionPass(ID) {
++
++ initializeRegionInfoPass(*PassRegistry::getPassRegistry());
++ }
++
++ virtual bool doInitialization(Region *R, RGPassManager &RGM);
++
++ virtual bool runOnRegion(Region *R, RGPassManager &RGM);
++
++ virtual const char *getPassName() const {
++ return "AMDGPU simplify control flow";
++ }
++
++ void getAnalysisUsage(AnalysisUsage &AU) const {
++
++ AU.addRequired<DominatorTree>();
++ AU.addPreserved<DominatorTree>();
++ RegionPass::getAnalysisUsage(AU);
++ }
++
++};
++
++} // end anonymous namespace
++
++char AMDGPUStructurizeCFG::ID = 0;
++
++/// \brief Initialize the types and constants used in the pass
++bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
++ LLVMContext &Context = R->getEntry()->getContext();
++
++ Boolean = Type::getInt1Ty(Context);
++ BoolTrue = ConstantInt::getTrue(Context);
++ BoolFalse = ConstantInt::getFalse(Context);
++ BoolUndef = UndefValue::get(Boolean);
++
++ return false;
++}
++
++/// \brief Build up the general order of nodes
++void AMDGPUStructurizeCFG::orderNodes() {
++ scc_iterator<Region *> I = scc_begin(ParentRegion),
++ E = scc_end(ParentRegion);
++ for (Order.clear(); I != E; ++I) {
++ std::vector<RegionNode *> &Nodes = *I;
++ Order.append(Nodes.begin(), Nodes.end());
++ }
++}
++
++/// \brief Build blocks and loop predicates
++void AMDGPUStructurizeCFG::buildPredicate(BranchInst *Term, unsigned Idx,
++ BBPredicates &Pred, bool Invert) {
++ Value *True = Invert ? BoolFalse : BoolTrue;
++ Value *False = Invert ? BoolTrue : BoolFalse;
++
++ RegionInfo *RI = ParentRegion->getRegionInfo();
++ BasicBlock *BB = Term->getParent();
++
++ // Handle the case where multiple regions start at the same block
++ Region *R = BB != ParentRegion->getEntry() ?
++ RI->getRegionFor(BB) : ParentRegion;
++
++ if (R == ParentRegion) {
++ // It's a top level block in our region
++ Value *Cond = True;
++ if (Term->isConditional()) {
++ BasicBlock *Other = Term->getSuccessor(!Idx);
++
++ if (Visited.count(Other)) {
++ if (!Pred.count(Other))
++ Pred[Other] = False;
++
++ if (!Pred.count(BB))
++ Pred[BB] = True;
++ return;
++ }
++ Cond = Term->getCondition();
++
++ if (Idx != Invert)
++ Cond = BinaryOperator::CreateNot(Cond, "", Term);
++ }
++
++ Pred[BB] = Cond;
++
++ } else if (ParentRegion->contains(R)) {
++ // It's a block in a sub region
++ while(R->getParent() != ParentRegion)
++ R = R->getParent();
++
++ Pred[R->getEntry()] = True;
++
++ } else {
++ // It's a branch from outside into our parent region
++ Pred[BB] = True;
++ }
++}
++
++/// \brief Analyze the successors of each block and build up predicates
++void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
++ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
++ BBPredicates &Pred = Predicates[BB];
++
++ for (; PI != PE; ++PI) {
++ BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
++
++ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
++ BasicBlock *Succ = Term->getSuccessor(i);
++ if (Succ != BB)
++ continue;
++ buildPredicate(Term, i, Pred, false);
++ }
++ }
++}
++
++/// \brief Analyze the conditions leading to loop to a previous block
++void AMDGPUStructurizeCFG::analyzeLoop(BasicBlock *BB, unsigned &LoopIdx) {
++ BranchInst *Term = cast<BranchInst>(BB->getTerminator());
++
++ for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
++ BasicBlock *Succ = Term->getSuccessor(i);
++
++ // Ignore it if it's not a back edge
++ if (!Visited.count(Succ))
++ continue;
++
++ buildPredicate(Term, i, LoopPred, true);
++
++ LoopEnd = BB;
++ if (Visited[Succ] < LoopIdx) {
++ LoopIdx = Visited[Succ];
++ LoopStart = Succ;
++ }
++ }
++}
++
++/// \brief Collect various loop and predicate infos
++void AMDGPUStructurizeCFG::collectInfos() {
++ unsigned Number = 0, LoopIdx = ~0;
++
++ // Reset predicate
++ Predicates.clear();
++
++ // and loop infos
++ LoopStart = LoopEnd = 0;
++ LoopPred.clear();
++
++ RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
++ for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
++
++ // Analyze all the conditions leading to a node
++ analyzeBlock((*OI)->getEntry());
++
++ if ((*OI)->isSubRegion())
++ continue;
++
++ // Find the first/last loop nodes and loop predicates
++ analyzeLoop((*OI)->getNodeAs<BasicBlock>(), LoopIdx);
++ }
++}
++
++/// \brief Does A dominate all the predicates of B ?
++bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
++ BBPredicates &Preds = Predicates[B];
++ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
++ PI != PE; ++PI) {
++
++ if (!DT->dominates(A, PI->first))
++ return false;
++ }
++ return true;
++}
++
++/// \brief Remove phi values from all successors and the remove the terminator.
++void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
++ TerminatorInst *Term = BB->getTerminator();
++ if (!Term)
++ return;
++
++ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
++ SI != SE; ++SI) {
++
++ delPhiValues(BB, *SI);
++ }
++
++ Term->eraseFromParent();
++}
++
++/// First: Skip forward to the first region node that either isn't a subregion or not
++/// dominating it's exit, remove all the skipped nodes from the node order.
++///
++/// Second: Handle the first successor directly if the resulting nodes successor
++/// predicates are still dominated by the original entry
++RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
++ BasicBlock *Entry = Node->getEntry();
++
++ // Skip forward as long as it is just a linear flow
++ while (true) {
++ BasicBlock *Entry = Node->getEntry();
++ BasicBlock *Exit;
++
++ if (Node->isSubRegion()) {
++ Exit = Node->getNodeAs<Region>()->getExit();
++ } else {
++ TerminatorInst *Term = Entry->getTerminator();
++ if (Term->getNumSuccessors() != 1)
++ break;
++ Exit = Term->getSuccessor(0);
++ }
++
++ // It's a back edge, break here so we can insert a loop node
++ if (!Visited.count(Exit))
++ return Node;
++
++ // More than node edges are pointing to exit
++ if (!DT->dominates(Entry, Exit))
++ return Node;
++
++ RegionNode *Next = ParentRegion->getNode(Exit);
++ RNVector::iterator I = std::find(Order.begin(), Order.end(), Next);
++ assert(I != Order.end());
++
++ Visited.erase(Next->getEntry());
++ Order.erase(I);
++ Node = Next;
++ }
++
++ BasicBlock *BB = Node->getEntry();
++ TerminatorInst *Term = BB->getTerminator();
++ if (Term->getNumSuccessors() != 2)
++ return Node;
++
++ // Our node has exactly two succesors, check if we can handle
++ // any of them directly
++ BasicBlock *Succ = Term->getSuccessor(0);
++ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ)) {
++ Succ = Term->getSuccessor(1);
++ if (!Visited.count(Succ) || !dominatesPredicates(Entry, Succ))
++ return Node;
++ } else {
++ BasicBlock *Succ2 = Term->getSuccessor(1);
++ if (Visited.count(Succ2) && Visited[Succ] > Visited[Succ2] &&
++ dominatesPredicates(Entry, Succ2))
++ Succ = Succ2;
++ }
++
++ RegionNode *Next = ParentRegion->getNode(Succ);
++ RNVector::iterator E = Order.end();
++ RNVector::iterator I = std::find(Order.begin(), E, Next);
++ assert(I != E);
++
++ killTerminator(BB);
++ FlowsInserted.push_back(BB);
++ Visited.erase(Succ);
++ Order.erase(I);
++ return ParentRegion->getNode(wireFlowBlock(BB, Next));
++}
++
++/// \brief Remove all PHI values coming from "From" into "To" and remember
++/// them in DeletedPhis
++void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
++ PhiMap &Map = DeletedPhis[To];
++ for (BasicBlock::iterator I = To->begin(), E = To->end();
++ I != E && isa<PHINode>(*I);) {
++
++ PHINode &Phi = cast<PHINode>(*I++);
++ while (Phi.getBasicBlockIndex(From) != -1) {
++ Value *Deleted = Phi.removeIncomingValue(From, false);
++ Map[&Phi].push_back(std::make_pair(From, Deleted));
++ }
++ }
++}
++
++/// \brief Add the PHI values back once we knew the new predecessor
++void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
++ if (!DeletedPhis.count(To))
++ return;
++
++ PhiMap &Map = DeletedPhis[To];
++ SSAUpdater Updater;
++
++ for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
++
++ PHINode *Phi = I->first;
++ Updater.Initialize(Phi->getType(), "");
++ BasicBlock *Fallback = To;
++ bool HaveFallback = false;
++
++ for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
++ VI != VE; ++VI) {
++
++ Updater.AddAvailableValue(VI->first, VI->second);
++ BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
++ if (Dom == VI->first)
++ HaveFallback = true;
++ else if (Dom != Fallback)
++ HaveFallback = false;
++ Fallback = Dom;
++ }
++ if (!HaveFallback) {
++ Value *Undef = UndefValue::get(Phi->getType());
++ Updater.AddAvailableValue(Fallback, Undef);
++ }
++
++ Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
++ }
++ DeletedPhis.erase(To);
++}
++
++/// \brief Create a new flow node and update dominator tree and region info
++BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
++ LLVMContext &Context = Func->getContext();
++ BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
++ Order.back()->getEntry();
++ BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
++ Func, Insert);
++ DT->addNewBlock(Flow, Prev);
++ ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
++ FlowsInserted.push_back(Flow);
++ return Flow;
++}
++
++/// \brief Can we predict that this node will always be called?
++bool AMDGPUStructurizeCFG::isPredictableTrue(BasicBlock *Prev,
++ BasicBlock *Node) {
++ BBPredicates &Preds = Predicates[Node];
++ bool Dominated = false;
++
++ for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
++ I != E; ++I) {
++
++ if (I->second != BoolTrue)
++ return false;
++
++ if (!Dominated && DT->dominates(I->first, Prev))
++ Dominated = true;
++ }
++ return Dominated;
++}
++
++/// \brief Wire up the new control flow by inserting or updating the branch
++/// instructions at node exits
++BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
++ RegionNode *Node) {
++ BasicBlock *Entry = Node->getEntry();
++
++ if (LoopStart == Entry) {
++ LoopStart = Prev;
++ LoopPred[Prev] = BoolTrue;
++ }
++
++ // Wire it up temporary, skipChained may recurse into us
++ BranchInst::Create(Entry, Prev);
++ DT->changeImmediateDominator(Entry, Prev);
++ addPhiValues(Prev, Entry);
++
++ Node = skipChained(Node);
++
++ BasicBlock *Next = getNextFlow(Prev);
++ if (!isPredictableTrue(Prev, Entry)) {
++ // Let Prev point to entry and next block
++ Prev->getTerminator()->eraseFromParent();
++ BranchInst::Create(Entry, Next, BoolUndef, Prev);
++ } else {
++ DT->changeImmediateDominator(Next, Entry);
++ }
++
++ // Let node exit(s) point to next block
++ if (Node->isSubRegion()) {
++ Region *SubRegion = Node->getNodeAs<Region>();
++ BasicBlock *Exit = SubRegion->getExit();
++
++ // Find all the edges from the sub region to the exit
++ BBVector ToDo;
++ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
++ if (SubRegion->contains(*I))
++ ToDo.push_back(*I);
++ }
++
++ // Modify the edges to point to the new flow block
++ for (BBVector::iterator I = ToDo.begin(), E = ToDo.end(); I != E; ++I) {
++ delPhiValues(*I, Exit);
++ TerminatorInst *Term = (*I)->getTerminator();
++ Term->replaceUsesOfWith(Exit, Next);
++ }
++
++ // Update the region info
++ SubRegion->replaceExit(Next);
++
++ } else {
++ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
++ killTerminator(BB);
++ BranchInst::Create(Next, BB);
++
++ if (BB == LoopEnd)
++ LoopEnd = 0;
++ }
++
++ return Next;
++}
++
++/// Destroy node order and visited map, build up flow order instead.
++/// After this function control flow looks like it should be, but
++/// branches only have undefined conditions.
++void AMDGPUStructurizeCFG::createFlow() {
++ DeletedPhis.clear();
++
++ BasicBlock *Prev = Order.pop_back_val()->getEntry();
++ assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
++ Visited.erase(Prev);
++
++ if (LoopStart == Prev) {
++ // Loop starts at entry, split entry so that we can predicate it
++ BasicBlock::iterator Insert = Prev->getFirstInsertionPt();
++ BasicBlock *Split = Prev->splitBasicBlock(Insert, FlowBlockName);
++ DT->addNewBlock(Split, Prev);
++ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
++ Predicates[Split] = Predicates[Prev];
++ Order.push_back(ParentRegion->getBBNode(Split));
++ LoopPred[Prev] = BoolTrue;
++
++ } else if (LoopStart == Order.back()->getEntry()) {
++ // Loop starts behind entry, split entry so that we can jump to it
++ Instruction *Term = Prev->getTerminator();
++ BasicBlock *Split = Prev->splitBasicBlock(Term, FlowBlockName);
++ DT->addNewBlock(Split, Prev);
++ ParentRegion->getRegionInfo()->setRegionFor(Split, ParentRegion);
++ Prev = Split;
++ }
++
++ killTerminator(Prev);
++ FlowsInserted.clear();
++ FlowsInserted.push_back(Prev);
++
++ while (!Order.empty()) {
++ RegionNode *Node = Order.pop_back_val();
++ Visited.erase(Node->getEntry());
++ Prev = wireFlowBlock(Prev, Node);
++ if (LoopStart && !LoopEnd) {
++ // Create an extra loop end node
++ LoopEnd = Prev;
++ Prev = getNextFlow(LoopEnd);
++ BranchInst::Create(Prev, LoopStart, BoolUndef, LoopEnd);
++ addPhiValues(LoopEnd, LoopStart);
++ }
++ }
++
++ BasicBlock *Exit = ParentRegion->getExit();
++ BranchInst::Create(Exit, Prev);
++ addPhiValues(Prev, Exit);
++ if (DT->dominates(ParentRegion->getEntry(), Exit))
++ DT->changeImmediateDominator(Exit, Prev);
++
++ if (LoopStart && LoopEnd) {
++ BBVector::iterator FI = std::find(FlowsInserted.begin(),
++ FlowsInserted.end(),
++ LoopStart);
++ for (; *FI != LoopEnd; ++FI) {
++ addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
++ }
++ }
++
++ assert(Order.empty());
++ assert(Visited.empty());
++ assert(DeletedPhis.empty());
++}
++
++/// \brief Insert the missing branch conditions
++void AMDGPUStructurizeCFG::insertConditions() {
++ SSAUpdater PhiInserter;
++
++ for (BBVector::iterator FI = FlowsInserted.begin(), FE = FlowsInserted.end();
++ FI != FE; ++FI) {
++
++ BranchInst *Term = cast<BranchInst>((*FI)->getTerminator());
++ if (Term->isUnconditional())
++ continue;
++
++ PhiInserter.Initialize(Boolean, "");
++ PhiInserter.AddAvailableValue(&Func->getEntryBlock(), BoolFalse);
++
++ BasicBlock *Succ = Term->getSuccessor(0);
++ BBPredicates &Preds = (*FI == LoopEnd) ? LoopPred : Predicates[Succ];
++ for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
++ PI != PE; ++PI) {
++
++ PhiInserter.AddAvailableValue(PI->first, PI->second);
++ }
++
++ Term->setCondition(PhiInserter.GetValueAtEndOfBlock(*FI));
++ }
++}
++
++/// Handle a rare case where the disintegrated nodes instructions
++/// no longer dominate all their uses. Not sure if this is really nessasary
++void AMDGPUStructurizeCFG::rebuildSSA() {
++ SSAUpdater Updater;
++ for (Region::block_iterator I = ParentRegion->block_begin(),
++ E = ParentRegion->block_end();
++ I != E; ++I) {
++
++ BasicBlock *BB = *I;
++ for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
++ II != IE; ++II) {
++
++ bool Initialized = false;
++ for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
++
++ Next = I->getNext();
++
++ Instruction *User = cast<Instruction>(I->getUser());
++ if (User->getParent() == BB) {
++ continue;
++
++ } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
++ if (UserPN->getIncomingBlock(*I) == BB)
++ continue;
++ }
++
++ if (DT->dominates(II, User))
++ continue;
++
++ if (!Initialized) {
++ Value *Undef = UndefValue::get(II->getType());
++ Updater.Initialize(II->getType(), "");
++ Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
++ Updater.AddAvailableValue(BB, II);
++ Initialized = true;
++ }
++ Updater.RewriteUseAfterInsertions(*I);
++ }
++ }
++ }
++}
++
++/// \brief Run the transformation for each region found
++bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
++ if (R->isTopLevelRegion())
++ return false;
++
++ Func = R->getEntry()->getParent();
++ ParentRegion = R;
++
++ DT = &getAnalysis<DominatorTree>();
++
++ orderNodes();
++ collectInfos();
++ createFlow();
++ insertConditions();
++ rebuildSSA();
++
++ Order.clear();
++ Visited.clear();
++ Predicates.clear();
++ DeletedPhis.clear();
++ FlowsInserted.clear();
++
++ return true;
++}
++
++/// \brief Create the pass
++Pass *llvm::createAMDGPUStructurizeCFGPass() {
++ return new AMDGPUStructurizeCFG();
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.cpp 2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,87 @@
++//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information ----------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPUSubtarget.h"
++
++using namespace llvm;
++
++#define GET_SUBTARGETINFO_ENUM
++#define GET_SUBTARGETINFO_TARGET_DESC
++#define GET_SUBTARGETINFO_CTOR
++#include "AMDGPUGenSubtargetInfo.inc"
++
++AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
++ AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
++ InstrItins = getInstrItineraryForCPU(CPU);
++
++ memset(CapsOverride, 0, sizeof(*CapsOverride)
++ * AMDGPUDeviceInfo::MaxNumberCapabilities);
++ // Default card
++ StringRef GPU = CPU;
++ Is64bit = false;
++ DefaultSize[0] = 64;
++ DefaultSize[1] = 1;
++ DefaultSize[2] = 1;
++ ParseSubtargetFeatures(GPU, FS);
++ DevName = GPU;
++ Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
++}
++
++AMDGPUSubtarget::~AMDGPUSubtarget() {
++ delete Device;
++}
++
++bool
++AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
++ assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
++ "Caps index is out of bounds!");
++ return CapsOverride[caps];
++}
++bool
++AMDGPUSubtarget::is64bit() const {
++ return Is64bit;
++}
++bool
++AMDGPUSubtarget::isTargetELF() const {
++ return false;
++}
++size_t
++AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
++ if (dim > 3) {
++ return 1;
++ } else {
++ return DefaultSize[dim];
++ }
++}
++
++std::string
++AMDGPUSubtarget::getDataLayout() const {
++ if (!Device) {
++ return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
++ "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
++ "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
++ "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
++ "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
++ }
++ return Device->getDataLayout();
++}
++
++std::string
++AMDGPUSubtarget::getDeviceName() const {
++ return DevName;
++}
++const AMDGPUDevice *
++AMDGPUSubtarget::device() const {
++ return Device;
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h llvm-r600/lib/Target/R600/AMDGPUSubtarget.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUSubtarget.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUSubtarget.h 2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,65 @@
++//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++ -*-====//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//==-----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief AMDGPU specific subclass of TargetSubtarget.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPUSUBTARGET_H
++#define AMDGPUSUBTARGET_H
++#include "AMDILDevice.h"
++#include "llvm/ADT/StringExtras.h"
++#include "llvm/ADT/StringRef.h"
++#include "llvm/Target/TargetSubtargetInfo.h"
++
++#define GET_SUBTARGETINFO_HEADER
++#include "AMDGPUGenSubtargetInfo.inc"
++
++#define MAX_CB_SIZE (1 << 16)
++
++namespace llvm {
++
++class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
++private:
++ bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
++ const AMDGPUDevice *Device;
++ size_t DefaultSize[3];
++ std::string DevName;
++ bool Is64bit;
++ bool Is32on64bit;
++ bool DumpCode;
++ bool R600ALUInst;
++
++ InstrItineraryData InstrItins;
++
++public:
++ AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
++ virtual ~AMDGPUSubtarget();
++
++ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
++ virtual void ParseSubtargetFeatures(llvm::StringRef CPU, llvm::StringRef FS);
++
++ bool isOverride(AMDGPUDeviceInfo::Caps) const;
++ bool is64bit() const;
++
++ // Helper functions to simplify if statements
++ bool isTargetELF() const;
++ const AMDGPUDevice* device() const;
++ std::string getDataLayout() const;
++ std::string getDeviceName() const;
++ virtual size_t getDefaultSize(uint32_t dim) const;
++ bool dumpCode() const { return DumpCode; }
++ bool r600ALUEncoding() const { return R600ALUInst; }
++
++};
++
++} // End namespace llvm
++
++#endif // AMDGPUSUBTARGET_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.cpp 2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,148 @@
++//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief The AMDGPU target machine contains all of the hardware specific
++/// information needed to emit code for R600 and SI GPUs.
++//
++//===----------------------------------------------------------------------===//
++
++#include "AMDGPUTargetMachine.h"
++#include "AMDGPU.h"
++#include "R600ISelLowering.h"
++#include "R600InstrInfo.h"
++#include "SIISelLowering.h"
++#include "SIInstrInfo.h"
++#include "llvm/Analysis/Passes.h"
++#include "llvm/Analysis/Verifier.h"
++#include "llvm/CodeGen/MachineFunctionAnalysis.h"
++#include "llvm/CodeGen/MachineModuleInfo.h"
++#include "llvm/CodeGen/Passes.h"
++#include "llvm/MC/MCAsmInfo.h"
++#include "llvm/PassManager.h"
++#include "llvm/Support/TargetRegistry.h"
++#include "llvm/Support/raw_os_ostream.h"
++#include "llvm/Transforms/IPO.h"
++#include "llvm/Transforms/Scalar.h"
++#include <llvm/CodeGen/Passes.h>
++
++using namespace llvm;
++
++extern "C" void LLVMInitializeR600Target() {
++ // Register the target
++ RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
++}
++
++AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
++ StringRef CPU, StringRef FS,
++ TargetOptions Options,
++ Reloc::Model RM, CodeModel::Model CM,
++ CodeGenOpt::Level OptLevel
++)
++:
++ LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
++ Subtarget(TT, CPU, FS),
++ Layout(Subtarget.getDataLayout()),
++ FrameLowering(TargetFrameLowering::StackGrowsUp,
++ Subtarget.device()->getStackAlignment(), 0),
++ IntrinsicInfo(this),
++ InstrItins(&Subtarget.getInstrItineraryData()) {
++ // TLInfo uses InstrInfo so it must be initialized after.
++ if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
++ InstrInfo = new R600InstrInfo(*this);
++ TLInfo = new R600TargetLowering(*this);
++ } else {
++ InstrInfo = new SIInstrInfo(*this);
++ TLInfo = new SITargetLowering(*this);
++ }
++}
++
++AMDGPUTargetMachine::~AMDGPUTargetMachine() {
++}
++
++namespace {
++class AMDGPUPassConfig : public TargetPassConfig {
++public:
++ AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
++ : TargetPassConfig(TM, PM) {}
++
++ AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
++ return getTM<AMDGPUTargetMachine>();
++ }
++
++ virtual bool addPreISel();
++ virtual bool addInstSelector();
++ virtual bool addPreRegAlloc();
++ virtual bool addPostRegAlloc();
++ virtual bool addPreSched2();
++ virtual bool addPreEmitPass();
++};
++} // End of anonymous namespace
++
++TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase &PM) {
++ return new AMDGPUPassConfig(this, PM);
++}
++
++bool
++AMDGPUPassConfig::addPreISel() {
++ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
++ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
++ addPass(createAMDGPUStructurizeCFGPass());
++ addPass(createSIAnnotateControlFlowPass());
++ }
++ return false;
++}
++
++bool AMDGPUPassConfig::addInstSelector() {
++ addPass(createAMDGPUPeepholeOpt(*TM));
++ addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
++ return false;
++}
++
++bool AMDGPUPassConfig::addPreRegAlloc() {
++ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
++
++ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
++ addPass(createSIAssignInterpRegsPass(*TM));
++ }
++ addPass(createAMDGPUConvertToISAPass(*TM));
++ return false;
++}
++
++bool AMDGPUPassConfig::addPostRegAlloc() {
++ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
++
++ if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
++ addPass(createSIInsertWaits(*TM));
++ }
++ return false;
++}
++
++bool AMDGPUPassConfig::addPreSched2() {
++
++ addPass(&IfConverterID);
++ return false;
++}
++
++bool AMDGPUPassConfig::addPreEmitPass() {
++ const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
++ if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
++ addPass(createAMDGPUCFGPreparationPass(*TM));
++ addPass(createAMDGPUCFGStructurizerPass(*TM));
++ addPass(createR600ExpandSpecialInstrsPass(*TM));
++ addPass(createR600LowerConstCopy(*TM));
++ addPass(&FinalizeMachineBundlesID);
++ } else {
++ addPass(createSILowerLiteralConstantsPass(*TM));
++ addPass(createSILowerControlFlowPass(*TM));
++ }
++
++ return false;
++}
++
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h
+--- llvm-3.2.src/lib/Target/R600/AMDGPUTargetMachine.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPUTargetMachine.h 2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,70 @@
++//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++ -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++//
++/// \file
++/// \brief The AMDGPU TargetMachine interface definition for hw codgen targets.
++//
++//===----------------------------------------------------------------------===//
++
++#ifndef AMDGPU_TARGET_MACHINE_H
++#define AMDGPU_TARGET_MACHINE_H
++
++#include "AMDGPUInstrInfo.h"
++#include "AMDGPUSubtarget.h"
++#include "AMDILFrameLowering.h"
++#include "AMDILIntrinsicInfo.h"
++#include "R600ISelLowering.h"
++#include "llvm/ADT/OwningPtr.h"
++#include "llvm/DataLayout.h"
++
++namespace llvm {
++
++MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
++
++class AMDGPUTargetMachine : public LLVMTargetMachine {
++
++ AMDGPUSubtarget Subtarget;
++ const DataLayout Layout;
++ AMDGPUFrameLowering FrameLowering;
++ AMDGPUIntrinsicInfo IntrinsicInfo;
++ const AMDGPUInstrInfo * InstrInfo;
++ AMDGPUTargetLowering * TLInfo;
++ const InstrItineraryData* InstrItins;
++
++public:
++ AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
++ StringRef CPU,
++ TargetOptions Options,
++ Reloc::Model RM, CodeModel::Model CM,
++ CodeGenOpt::Level OL);
++ ~AMDGPUTargetMachine();
++ virtual const AMDGPUFrameLowering* getFrameLowering() const {
++ return &FrameLowering;
++ }
++ virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
++ return &IntrinsicInfo;
++ }
++ virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
++ virtual const AMDGPUSubtarget *getSubtargetImpl() const {return &Subtarget; }
++ virtual const AMDGPURegisterInfo *getRegisterInfo() const {
++ return &InstrInfo->getRegisterInfo();
++ }
++ virtual AMDGPUTargetLowering * getTargetLowering() const {
++ return TLInfo;
++ }
++ virtual const InstrItineraryData* getInstrItineraryData() const {
++ return InstrItins;
++ }
++ virtual const DataLayout* getDataLayout() const { return &Layout; }
++ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
++};
++
++} // End namespace llvm
++
++#endif // AMDGPU_TARGET_MACHINE_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDGPU.td llvm-r600/lib/Target/R600/AMDGPU.td
+--- llvm-3.2.src/lib/Target/R600/AMDGPU.td 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDGPU.td 2013-01-25 19:43:57.423383055 +0100
+@@ -0,0 +1,40 @@
++//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen -*-------------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//==-----------------------------------------------------------------------===//
++
++// Include AMDIL TD files
++include "AMDILBase.td"
++
++
++def AMDGPUInstrInfo : InstrInfo {
++ let guessInstructionProperties = 1;
++}
++
++//===----------------------------------------------------------------------===//
++// Declare the target which we are implementing
++//===----------------------------------------------------------------------===//
++def AMDGPUAsmWriter : AsmWriter {
++ string AsmWriterClassName = "InstPrinter";
++ int Variant = 0;
++ bit isMCAsmWriter = 1;
++}
++
++def AMDGPU : Target {
++ // Pull in Instruction Info:
++ let InstructionSet = AMDGPUInstrInfo;
++ let AssemblyWriters = [AMDGPUAsmWriter];
++}
++
++// Include AMDGPU TD files
++include "R600Schedule.td"
++include "SISchedule.td"
++include "Processors.td"
++include "AMDGPUInstrInfo.td"
++include "AMDGPUIntrinsics.td"
++include "AMDGPURegisterInfo.td"
++include "AMDGPUInstructions.td"
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.cpp 2013-01-25 19:43:57.433383055 +0100
+@@ -0,0 +1,115 @@
++//===-- AMDIL7XXDevice.cpp - Device Info for 7XX GPUs ---------------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++// \file
++//==-----------------------------------------------------------------------===//
++#include "AMDIL7XXDevice.h"
++#include "AMDGPUSubtarget.h"
++#include "AMDILDevice.h"
++
++using namespace llvm;
++
++AMDGPU7XXDevice::AMDGPU7XXDevice(AMDGPUSubtarget *ST) : AMDGPUDevice(ST) {
++ setCaps();
++ std::string name = mSTM->getDeviceName();
++ if (name == "rv710") {
++ DeviceFlag = OCL_DEVICE_RV710;
++ } else if (name == "rv730") {
++ DeviceFlag = OCL_DEVICE_RV730;
++ } else {
++ DeviceFlag = OCL_DEVICE_RV770;
++ }
++}
++
++AMDGPU7XXDevice::~AMDGPU7XXDevice() {
++}
++
++void AMDGPU7XXDevice::setCaps() {
++ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
++}
++
++size_t AMDGPU7XXDevice::getMaxLDSSize() const {
++ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
++ return MAX_LDS_SIZE_700;
++ }
++ return 0;
++}
++
++size_t AMDGPU7XXDevice::getWavefrontSize() const {
++ return AMDGPUDevice::HalfWavefrontSize;
++}
++
++uint32_t AMDGPU7XXDevice::getGeneration() const {
++ return AMDGPUDeviceInfo::HD4XXX;
++}
++
++uint32_t AMDGPU7XXDevice::getResourceID(uint32_t DeviceID) const {
++ switch (DeviceID) {
++ default:
++ assert(0 && "ID type passed in is unknown!");
++ break;
++ case GLOBAL_ID:
++ case CONSTANT_ID:
++ case RAW_UAV_ID:
++ case ARENA_UAV_ID:
++ break;
++ case LDS_ID:
++ if (usesHardware(AMDGPUDeviceInfo::LocalMem)) {
++ return DEFAULT_LDS_ID;
++ }
++ break;
++ case SCRATCH_ID:
++ if (usesHardware(AMDGPUDeviceInfo::PrivateMem)) {
++ return DEFAULT_SCRATCH_ID;
++ }
++ break;
++ case GDS_ID:
++ assert(0 && "GDS UAV ID is not supported on this chip");
++ if (usesHardware(AMDGPUDeviceInfo::RegionMem)) {
++ return DEFAULT_GDS_ID;
++ }
++ break;
++ };
++
++ return 0;
++}
++
++uint32_t AMDGPU7XXDevice::getMaxNumUAVs() const {
++ return 1;
++}
++
++AMDGPU770Device::AMDGPU770Device(AMDGPUSubtarget *ST): AMDGPU7XXDevice(ST) {
++ setCaps();
++}
++
++AMDGPU770Device::~AMDGPU770Device() {
++}
++
++void AMDGPU770Device::setCaps() {
++ if (mSTM->isOverride(AMDGPUDeviceInfo::DoubleOps)) {
++ mSWBits.set(AMDGPUDeviceInfo::FMA);
++ mHWBits.set(AMDGPUDeviceInfo::DoubleOps);
++ }
++ mSWBits.set(AMDGPUDeviceInfo::BarrierDetect);
++ mHWBits.reset(AMDGPUDeviceInfo::LongOps);
++ mSWBits.set(AMDGPUDeviceInfo::LongOps);
++ mSWBits.set(AMDGPUDeviceInfo::LocalMem);
++}
++
++size_t AMDGPU770Device::getWavefrontSize() const {
++ return AMDGPUDevice::WavefrontSize;
++}
++
++AMDGPU710Device::AMDGPU710Device(AMDGPUSubtarget *ST) : AMDGPU7XXDevice(ST) {
++}
++
++AMDGPU710Device::~AMDGPU710Device() {
++}
++
++size_t AMDGPU710Device::getWavefrontSize() const {
++ return AMDGPUDevice::QuarterWavefrontSize;
++}
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h llvm-r600/lib/Target/R600/AMDIL7XXDevice.h
+--- llvm-3.2.src/lib/Target/R600/AMDIL7XXDevice.h 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDIL7XXDevice.h 2013-01-25 19:43:57.436716388 +0100
+@@ -0,0 +1,72 @@
++//==-- AMDIL7XXDevice.h - Define 7XX Device Device for AMDIL ---*- C++ -*--===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//==-----------------------------------------------------------------------===//
++/// \file
++/// \brief Interface for the subtarget data classes.
++///
++/// This file will define the interface that each generation needs to
++/// implement in order to correctly answer queries on the capabilities of the
++/// specific hardware.
++//===----------------------------------------------------------------------===//
++#ifndef AMDIL7XXDEVICEIMPL_H
++#define AMDIL7XXDEVICEIMPL_H
++#include "AMDILDevice.h"
++
++namespace llvm {
++class AMDGPUSubtarget;
++
++//===----------------------------------------------------------------------===//
++// 7XX generation of devices and their respective sub classes
++//===----------------------------------------------------------------------===//
++
++/// \brief The AMDGPU7XXDevice class represents the generic 7XX device.
++///
++/// All 7XX devices are derived from this class. The AMDGPU7XX device will only
++/// support the minimal features that are required to be considered OpenCL 1.0
++/// compliant and nothing more.
++class AMDGPU7XXDevice : public AMDGPUDevice {
++public:
++ AMDGPU7XXDevice(AMDGPUSubtarget *ST);
++ virtual ~AMDGPU7XXDevice();
++ virtual size_t getMaxLDSSize() const;
++ virtual size_t getWavefrontSize() const;
++ virtual uint32_t getGeneration() const;
++ virtual uint32_t getResourceID(uint32_t DeviceID) const;
++ virtual uint32_t getMaxNumUAVs() const;
++
++protected:
++ virtual void setCaps();
++};
++
++/// \brief The AMDGPU770Device class represents the RV770 chip and it's
++/// derivative cards.
++///
++/// The difference between this device and the base class is this device device
++/// adds support for double precision and has a larger wavefront size.
++class AMDGPU770Device : public AMDGPU7XXDevice {
++public:
++ AMDGPU770Device(AMDGPUSubtarget *ST);
++ virtual ~AMDGPU770Device();
++ virtual size_t getWavefrontSize() const;
++private:
++ virtual void setCaps();
++};
++
++/// \brief The AMDGPU710Device class derives from the 7XX base class.
++///
++/// This class is a smaller derivative, so we need to overload some of the
++/// functions in order to correctly specify this information.
++class AMDGPU710Device : public AMDGPU7XXDevice {
++public:
++ AMDGPU710Device(AMDGPUSubtarget *ST);
++ virtual ~AMDGPU710Device();
++ virtual size_t getWavefrontSize() const;
++};
++
++} // namespace llvm
++#endif // AMDILDEVICEIMPL_H
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILBase.td llvm-r600/lib/Target/R600/AMDILBase.td
+--- llvm-3.2.src/lib/Target/R600/AMDILBase.td 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDILBase.td 2013-01-25 19:43:57.436716388 +0100
+@@ -0,0 +1,85 @@
++//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++//===----------------------------------------------------------------------===//
++// Target-independent interfaces which we are implementing
++//===----------------------------------------------------------------------===//
++
++include "llvm/Target/Target.td"
++
++// Dummy Instruction itineraries for pseudo instructions
++def ALU_NULL : FuncUnit;
++def NullALU : InstrItinClass;
++
++//===----------------------------------------------------------------------===//
++// AMDIL Subtarget features.
++//===----------------------------------------------------------------------===//
++def FeatureFP64 : SubtargetFeature<"fp64",
++ "CapsOverride[AMDGPUDeviceInfo::DoubleOps]",
++ "true",
++ "Enable 64bit double precision operations">;
++def FeatureByteAddress : SubtargetFeature<"byte_addressable_store",
++ "CapsOverride[AMDGPUDeviceInfo::ByteStores]",
++ "true",
++ "Enable byte addressable stores">;
++def FeatureBarrierDetect : SubtargetFeature<"barrier_detect",
++ "CapsOverride[AMDGPUDeviceInfo::BarrierDetect]",
++ "true",
++ "Enable duplicate barrier detection(HD5XXX or later).">;
++def FeatureImages : SubtargetFeature<"images",
++ "CapsOverride[AMDGPUDeviceInfo::Images]",
++ "true",
++ "Enable image functions">;
++def FeatureMultiUAV : SubtargetFeature<"multi_uav",
++ "CapsOverride[AMDGPUDeviceInfo::MultiUAV]",
++ "true",
++ "Generate multiple UAV code(HD5XXX family or later)">;
++def FeatureMacroDB : SubtargetFeature<"macrodb",
++ "CapsOverride[AMDGPUDeviceInfo::MacroDB]",
++ "true",
++ "Use internal macrodb, instead of macrodb in driver">;
++def FeatureNoAlias : SubtargetFeature<"noalias",
++ "CapsOverride[AMDGPUDeviceInfo::NoAlias]",
++ "true",
++ "assert that all kernel argument pointers are not aliased">;
++def FeatureNoInline : SubtargetFeature<"no-inline",
++ "CapsOverride[AMDGPUDeviceInfo::NoInline]",
++ "true",
++ "specify whether to not inline functions">;
++
++def Feature64BitPtr : SubtargetFeature<"64BitPtr",
++ "Is64bit",
++ "false",
++ "Specify if 64bit addressing should be used.">;
++
++def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr",
++ "Is32on64bit",
++ "false",
++ "Specify if 64bit sized pointers with 32bit addressing should be used.">;
++def FeatureDebug : SubtargetFeature<"debug",
++ "CapsOverride[AMDGPUDeviceInfo::Debug]",
++ "true",
++ "Debug mode is enabled, so disable hardware accelerated address spaces.">;
++def FeatureDumpCode : SubtargetFeature <"DumpCode",
++ "DumpCode",
++ "true",
++ "Dump MachineInstrs in the CodeEmitter">;
++
++def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
++ "R600ALUInst",
++ "false",
++ "Older version of ALU instructions encoding.">;
++
++
++//===----------------------------------------------------------------------===//
++// Register File, Calling Conv, Instruction Descriptions
++//===----------------------------------------------------------------------===//
++
++
++include "AMDILRegisterInfo.td"
++include "AMDILInstrInfo.td"
++
+diff -Nur -x .git llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp
+--- llvm-3.2.src/lib/Target/R600/AMDILCFGStructurizer.cpp 1970-01-01 01:00:00.000000000 +0100
++++ llvm-r600/lib/Target/R600/AMDILCFGStructurizer.cpp 2013-01-25 19:43:57.436716388 +0100
+@@ -0,0 +1,3045 @@
++//===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
++//
++// The LLVM Compiler Infrastructure
++//
++// This file is distributed under the University of Illinois Open Source
++// License. See LICENSE.TXT for details.
++//
++/// \file
++//==-----------------------------------------------------------------------===//
++
++#define DEBUGME 0
++#define DEBUG_TYPE "structcfg"
++
++#include "AMDGPUInstrInfo.h"
++#include "AMDIL.h"
++#include "llvm/ADT/SCCIterator.h"
++#include "llvm/ADT/SmallVector.h"
++#include "llvm/ADT/Statistic.h"
++#include "llvm/Analysis/DominatorInternals.h"
++#include "llvm/Analysis/Dominators.h"
++#include "llvm/CodeGen/MachinePostDominators.h"
++#include "llvm/CodeGen/MachineDominators.h"
++#include "llvm/CodeGen/MachineFunction.h"
++#include "llvm/CodeGen/MachineFunctionAnalysis.h"
++#include "llvm/CodeGen/MachineFunctionPass.h"
++#include "llvm/CodeGen/MachineInstrBuilder.h"
++#include "llvm/CodeGen/MachineJumpTableInfo.h"
++#include "llvm/CodeGen/MachineLoopInfo.h"
++#include "llvm/CodeGen/MachineRegisterInfo.h"
++#include "llvm/Target/TargetInstrInfo.h"
++
++using namespace llvm;
++
++// TODO: move-begin.
++
++//===----------------------------------------------------------------------===//
++//
++// Statistics for CFGStructurizer.
++//
++//===----------------------------------------------------------------------===//
++
++STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
++ "matched");
++STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
++ "matched");
++STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
++ "pattern matched");
++STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue "
++ "pattern matched");
++STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern "
++ "matched");
++STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
++STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
++
++//===----------------------------------------------------------------------===//
++//
++// Miscellaneous utility for CFGStructurizer.
++//
++//===----------------------------------------------------------------------===//
++namespace llvmCFGStruct {
++#define SHOWNEWINSTR(i) \
++ if (DEBUGME) errs() << "New instr: " << *i << "\n"
++
++#define SHOWNEWBLK(b, msg) \
++if (DEBUGME) { \
++ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
++ errs() << "\n"; \
++}
++
++#define SHOWBLK_DETAIL(b, msg) \
++if (DEBUGME) { \
++ if (b) { \
++ errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
++ b->print(errs()); \
++ errs() << "\n"; \
++ } \
++}
++
++#define INVALIDSCCNUM -1
++#define INVALIDREGNUM 0
++
++template<class LoopinfoT>
++void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
++ for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
++ iterEnd = LoopInfo.end();
++ iter != iterEnd; ++iter) {
++ (*iter)->print(OS, 0);
++ }
++}
++
++template<class NodeT>
++void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
++ size_t sz = Src.size();
++ for (size_t i = 0; i < sz/2; ++i) {
++ NodeT *t = Src[i];
++ Src[i] = Src[sz - i - 1];
++ Src[sz - i - 1] = t;
++ }
++}
++
++} //end namespace llvmCFGStruct
++
++//===----------------------------------------------------------------------===//
++//
++// supporting data structure for CFGStructurizer
++//
++//===----------------------------------------------------------------------===//
++
++namespace llvmCFGStruct {
++template<class PassT>
++struct CFGStructTraits {
++};
++
++template <class InstrT>
++class BlockInformation {
++public:
++ bool isRetired;
++ int sccNum;
++ //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
++ //Instructions defining the corresponding successor.
++ BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
++};
++
++template <class BlockT, class InstrT, class RegiT>
++class LandInformation {
++public:
++ BlockT *landBlk;
++ std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
++ //WHILELOOP(thisloop) init before entering
++ //thisloop.
++ std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
++ //WHILELOOP(thisloop) init after entering
++ //thisloop.
++ std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
++ //land block, branch cond on this reg.
++ std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
++ //endif" after ENDLOOP(thisloop) break
++ //outerLoopOf(thisLoop).
++ std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
++ //endif" after ENDLOOP(thisloop) continue on
++ //outerLoopOf(thisLoop).
++ LandInformation() : landBlk(NULL) {}
++};
++
++} //end of namespace llvmCFGStruct
++
++//===----------------------------------------------------------------------===//
++//
++// CFGStructurizer
++//
++//===----------------------------------------------------------------------===//
++
++namespace llvmCFGStruct {
++// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
++template<class PassT>
++class CFGStructurizer {
++public:
++ typedef enum {
++ Not_SinglePath = 0,
++ SinglePath_InPath = 1,
++ SinglePath_NotInPath = 2
++ } PathToKind;
++
++public:
++ typedef typename PassT::InstructionType InstrT;
++ typedef typename PassT::FunctionType FuncT;
++ typedef typename PassT::DominatortreeType DomTreeT;
++ typedef typename PassT::PostDominatortreeType PostDomTreeT;
++ typedef typename PassT::DomTreeNodeType DomTreeNodeT;
++ typedef typename PassT::LoopinfoType LoopInfoT;
++
++ typedef GraphTraits<FuncT *> FuncGTraits;
++ //typedef FuncGTraits::nodes_iterator BlockIterator;
++ typedef typename FuncT::iterator BlockIterator;
++
++ typedef typename FuncGTraits::NodeType BlockT;
++ typedef GraphTraits<BlockT *> BlockGTraits;
++ typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
++ //typedef BlockGTraits::succ_iterator InstructionIterator;
++ typedef typename BlockT::iterator InstrIterator;
++
++ typedef CFGStructTraits<PassT> CFGTraits;
++ typedef BlockInformation<InstrT> BlockInfo;
++ typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
++
++ typedef int RegiT;
++ typedef typename PassT::LoopType LoopT;
++ typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
++ typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
++ //landing info for loop break
++ typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
++
++public:
++ CFGStructurizer();
++ ~CFGStructurizer();
++
++ /// Perform the CFG structurization
++ bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
++
++ /// Perform the CFG preparation
++ bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
++
++private:
++ void reversePredicateSetter(typename BlockT::iterator);
++ void orderBlocks();
++ void printOrderedBlocks(llvm::raw_ostream &OS);
++ int patternMatch(BlockT *CurBlock);
++ int patternMatchGroup(BlockT *CurBlock);
++
++ int serialPatternMatch(BlockT *CurBlock);
++ int ifPatternMatch(BlockT *CurBlock);
++ int switchPatternMatch(BlockT *CurBlock);
++ int loopendPatternMatch(BlockT *CurBlock);
++ int loopPatternMatch(BlockT *CurBlock);
++
++ int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
++ int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
++ //int loopWithoutBreak(BlockT *);
++
++ void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
++ BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
++ void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
++ BlockT *ContBlock, LoopT *contLoop);
++ bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
++ int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
++ BlockT *FalseBlock);
++ int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
++ BlockT *FalseBlock);
++ int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
++ BlockT *FalseBlock, BlockT **LandBlockPtr);
++ void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
++ BlockT *FalseBlock, BlockT *LandBlock,
++ bool Detail = false);
++ PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
++ bool AllowSideEntry = true);
++ BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
++ bool AllowSideEntry = true);
++ int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
++ void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
++
++ void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
++ BlockT *TrueBlock, BlockT *FalseBlock,
++ BlockT *LandBlock);
++ void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
++ void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
++ BlockT *ExitLandBlock, RegiT SetReg);
++ void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
++ RegiT SetReg);
++ BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
++ std::set<BlockT*> &ExitBlockSet,
++ BlockT *ExitLandBlk);
++ BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
++ BlockTSmallerVector &ExitingBlocks,
++ BlockTSmallerVector &ExitBlocks);
++ BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
++ void removeUnconditionalBranch(BlockT *SrcBlock);
++ void removeRedundantConditionalBranch(BlockT *SrcBlock);
++ void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
++
++ void removeSuccessor(BlockT *SrcBlock);
++ BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
++ BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
++
++ void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
++ InstrIterator InsertPos);
++
++ void recordSccnum(BlockT *SrcBlock, int SCCNum);
++ int getSCCNum(BlockT *srcBlk);
++
++ void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
++ bool isRetiredBlock(BlockT *SrcBlock);
++ bool isActiveLoophead(BlockT *CurBlock);
++ bool needMigrateBlock(BlockT *Block);
++
++ BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
++ BlockTSmallerVector &exitBlocks,
++ std::set<BlockT*> &ExitBlockSet);
++ void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
++ BlockT *getLoopLandBlock(LoopT *LoopRep);
++ LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
++
++ void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
++ void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
++ void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
++ void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
++ void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
++
++ bool hasBackEdge(BlockT *curBlock);
++ unsigned getLoopDepth (LoopT *LoopRep);
++ int countActiveBlock(
++ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
++ typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
++ BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
++ BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
++
++private:
++ DomTreeT *domTree;
++ PostDomTreeT *postDomTree;
++ LoopInfoT *loopInfo;
++ PassT *passRep;
++ FuncT *funcRep;
++
++ BlockInfoMap blockInfoMap;
++ LoopLandInfoMap loopLandInfoMap;
++ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
++ const AMDGPURegisterInfo *TRI;
++
++}; //template class CFGStructurizer
++
++template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
++ : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
++}
++
++template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
++ for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
++ E = blockInfoMap.end(); I != E; ++I) {
++ delete I->second;
++ }
++}
++
++template<class PassT>
++bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
++ const AMDGPURegisterInfo * tri) {
++ passRep = &pass;
++ funcRep = &func;
++ TRI = tri;
++
++ bool changed = false;
++
++ //FIXME: if not reducible flow graph, make it so ???
++
++ if (DEBUGME) {
++ errs() << "AMDGPUCFGStructurizer::prepare\n";
++ }
++
++ loopInfo = CFGTraits::getLoopInfo(pass);
++ if (DEBUGME) {
++ errs() << "LoopInfo:\n";
++ PrintLoopinfo(*loopInfo, errs());
++ }
++
++ orderBlocks();
++ if (DEBUGME) {
++ errs() << "Ordered blocks:\n";
++ printOrderedBlocks(errs());
++ }
++
++ SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
++
++ for (typename LoopInfoT::iterator iter = loopInfo->begin(),
++ iterEnd = loopInfo->end();
++ iter != iterEnd; ++iter) {
++ LoopT* loopRep = (*iter);
++ BlockTSmallerVector exitingBlks;
++ loopRep->getExitingBlocks(exitingBlks);
++
++ if (exitingBlks.size() == 0) {
++ BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
++ if (dummyExitBlk != NULL)
++ retBlks.push_back(dummyExitBlk);
++ }
++ }
++
++ // Remove unconditional branch instr.
++ // Add dummy exit block iff there are multiple returns.
++
++ for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
++ iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
++ iterBlk != iterEndBlk;
++ ++iterBlk) {
++ BlockT *curBlk = *iterBlk;
++ removeUnconditionalBranch(curBlk);
++ removeRedundantConditionalBranch(curBlk);
++ if (CFGTraits::isReturnBlock(curBlk)) {
++ retBlks.push_back(curBlk);
++ }
++ assert(curBlk->succ_size() <= 2);
++ } //for
++
++ if (retBlks.size() >= 2) {
++ addDummyExitBlock(retBlks);
++ changed = true;
++ }
++
++ return changed;
++} //CFGStructurizer::prepare
<Skipped 19082 lines>
================================================================
---- gitweb:
http://git.pld-linux.org/gitweb.cgi/packages/llvm.git/commitdiff/251890425981d4d0076266858a1b1ef0ea2b617a
More information about the pld-cvs-commit
mailing list