Index: lib/Target/X86/X86ATTAsmPrinter.cpp
===================================================================
--- lib/Target/X86/X86ATTAsmPrinter.cpp	(revision 40568)
+++ lib/Target/X86/X86ATTAsmPrinter.cpp	(working copy)
@@ -576,38 +576,6 @@
 ///
 void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
-
-  // See if a truncate instruction can be turned into a nop.
-  switch (MI->getOpcode()) {
-  default: break;
-  case X86::TRUNC_64to32:
-  case X86::TRUNC_64to16:
-  case X86::TRUNC_32to16:
-  case X86::TRUNC_32to8:
-  case X86::TRUNC_16to8:
-  case X86::TRUNC_32_to8:
-  case X86::TRUNC_16_to8: {
-    const MachineOperand &MO0 = MI->getOperand(0);
-    const MachineOperand &MO1 = MI->getOperand(1);
-    unsigned Reg0 = MO0.getReg();
-    unsigned Reg1 = MO1.getReg();
-    unsigned Opc = MI->getOpcode();
-    if (Opc == X86::TRUNC_64to32)
-      Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
-    else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
-      Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
-    else
-      Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
-    O << TAI->getCommentString() << " TRUNCATE ";
-    if (Reg0 != Reg1)
-      O << "\n\t";
-    break;
-  }
-  case X86::PsMOVZX64rr32:
-    O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
-    break;
-  }
-
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
 }
Index: lib/Target/X86/X86InstrInfo.td
===================================================================
--- lib/Target/X86/X86InstrInfo.td	(revision 40568)
+++ lib/Target/X86/X86InstrInfo.td	(working copy)
@@ -388,14 +388,6 @@
 // Nop
 def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
 
-// Truncate
-def TRUNC_32_to8 : I<0x88, MRMDestReg, (outs GR8:$dst), (ins GR32_:$src),
-                     "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}", []>;
-def TRUNC_16_to8 : I<0x88, MRMDestReg, (outs GR8:$dst), (ins GR16_:$src),
-                     "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}", []>;
-def TRUNC_32to16 : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR32:$src),
-                     "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
-                     [(set GR16:$dst, (trunc GR32:$src))]>;
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions...
Index: lib/Target/X86/X86RegisterInfo.cpp
===================================================================
--- lib/Target/X86/X86RegisterInfo.cpp	(revision 40568)
+++ lib/Target/X86/X86RegisterInfo.cpp	(working copy)
@@ -700,7 +700,8 @@
       { X86::PSHUFDri,        X86::PSHUFDmi },
       { X86::PSHUFHWri,       X86::PSHUFHWmi },
       { X86::PSHUFLWri,       X86::PSHUFLWmi },
-      { X86::PsMOVZX64rr32,   X86::PsMOVZX64rm32 },
+      // This became an INSERT_SUBREG(in, 3) so folding will be more complex
+      // { X86::PsMOVZX64rr32,   X86::PsMOVZX64rm32 }, 
       { X86::TEST16rr,        X86::TEST16rm },
       { X86::TEST32rr,        X86::TEST32rm },
       { X86::TEST64rr,        X86::TEST64rm },
Index: lib/Target/X86/X86CodeEmitter.cpp
===================================================================
--- lib/Target/X86/X86CodeEmitter.cpp	(revision 40568)
+++ lib/Target/X86/X86CodeEmitter.cpp	(working copy)
@@ -448,12 +448,6 @@
   return false;
 }
 
-inline static bool isX86_64TruncToByte(unsigned oc) {
-  return (oc == X86::TRUNC_64to8 || oc == X86::TRUNC_32to8 ||
-          oc == X86::TRUNC_16to8);
-}
-
-
 inline static bool isX86_64NonExtLowByteReg(unsigned reg) {
   return (reg == X86::SPL || reg == X86::BPL ||
           reg == X86::SIL || reg == X86::DIL);
@@ -465,7 +459,6 @@
 unsigned Emitter::determineREX(const MachineInstr &MI) {
   unsigned REX = 0;
   const TargetInstrDescriptor *Desc = MI.getInstrDescriptor();
-  unsigned Opcode = Desc->Opcode;
 
   // Pseudo instructions do not need REX prefix byte.
   if ((Desc->TSFlags & X86II::FormMask) == X86II::Pseudo)
@@ -479,16 +472,11 @@
       Desc->getOperandConstraint(1, TOI::TIED_TO) != -1;
 
     // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
-    bool isTrunc8 = isX86_64TruncToByte(Opcode);
     unsigned i = isTwoAddr ? 1 : 0;
     for (unsigned e = NumOps; i != e; ++i) {
       const MachineOperand& MO = MI.getOperand(i);
       if (MO.isRegister()) {
         unsigned Reg = MO.getReg();
-        // Trunc to byte are actually movb. The real source operand is the low
-        // byte of the register.
-        if (isTrunc8 && i == 1)
-          Reg = getX86SubSuperRegister(Reg, MVT::i8);
         if (isX86_64NonExtLowByteReg(Reg))
           REX |= 0x40;
       }
Index: lib/Target/X86/X86InstrX86-64.td
===================================================================
--- lib/Target/X86/X86InstrX86-64.td	(revision 40568)
+++ lib/Target/X86/X86InstrX86-64.td	(working copy)
@@ -1005,33 +1005,8 @@
 // Alias Instructions
 //===----------------------------------------------------------------------===//
 
-// Truncate
-// In 64-mode, each 64-bit and 32-bit registers has a low 8-bit sub-register.
-def TRUNC_64to8  : I<0x88, MRMDestReg, (outs GR8:$dst), (ins GR64:$src),
-                     "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}",
-                     [(set GR8:$dst, (trunc GR64:$src))]>;
-def TRUNC_32to8  : I<0x88, MRMDestReg, (outs GR8:$dst), (ins GR32:$src),
-                     "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}",
-                     [(set GR8:$dst, (trunc GR32:$src))]>,
-                   Requires<[In64BitMode]>;
-def TRUNC_16to8  : I<0x88, MRMDestReg, (outs GR8:$dst), (ins GR16:$src),
-                     "mov{b} {${src:subreg8}, $dst|$dst, ${src:subreg8}}",
-                     [(set GR8:$dst, (trunc GR16:$src))]>,
-                   Requires<[In64BitMode]>;
-
-def TRUNC_64to16 : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR64:$src),
-                     "mov{w} {${src:subreg16}, $dst|$dst, ${src:subreg16}}",
-                     [(set GR16:$dst, (trunc GR64:$src))]>;
-
-def TRUNC_64to32 : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR64:$src),
-                     "mov{l} {${src:subreg32}, $dst|$dst, ${src:subreg32}}",
-                     [(set GR32:$dst, (trunc GR64:$src))]>;
-
 // Zero-extension
 // TODO: Remove this after proper i32 -> i64 zext support.
-def PsMOVZX64rr32: I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
-                     "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
-                     [(set GR64:$dst, (zext GR32:$src))]>;
 def PsMOVZX64rm32: I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
                      "mov{l} {$src, ${dst:subreg32}|${dst:subreg32}, $src}",
                      [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
@@ -1108,7 +1083,7 @@
 // anyext -> zext
 def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8 :$src)>;
 def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16:$src)>;
-def : Pat<(i64 (anyext GR32:$src)), (PsMOVZX64rr32 GR32:$src)>;
+// (i64 (anyext GR32:$src)) handled in C++ isel code
 def : Pat<(i64 (anyext (loadi8  addr:$src))), (MOVZX64rm8  addr:$src)>;
 def : Pat<(i64 (anyext (loadi16 addr:$src))), (MOVZX64rm16 addr:$src)>;
 def : Pat<(i64 (anyext (loadi32 addr:$src))), (PsMOVZX64rm32 addr:$src)>;
Index: lib/Target/X86/X86ISelDAGToDAG.cpp
===================================================================
--- lib/Target/X86/X86ISelDAGToDAG.cpp	(revision 40568)
+++ lib/Target/X86/X86ISelDAGToDAG.cpp	(working copy)
@@ -1259,38 +1259,62 @@
       return NULL;
     }
 
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND:
+      if (NVT == MVT::i64) {
+        if (Node->getOperand(0).getValueType() == MVT::i32) {
+          // Use 2 operand insert
+          SDOperand Tmp = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
+          return CurDAG->getTargetNode(X86::INSERT_SUBREG, NVT, 
+                                       Node->getOperand(0), Tmp);
+        }
+      }
+      break;
+      
     case ISD::TRUNCATE: {
-      if (!Subtarget->is64Bit() && NVT == MVT::i8) {
-        unsigned Opc2;
-        MVT::ValueType VT;
-        switch (Node->getOperand(0).getValueType()) {
-        default: assert(0 && "Unknown truncate!");
-        case MVT::i16:
-          Opc = X86::MOV16to16_;
-          VT = MVT::i16;
-          Opc2 = X86::TRUNC_16_to8;
-          break;
-        case MVT::i32:
-          Opc = X86::MOV32to32_;
-          VT = MVT::i32;
-          Opc2 = X86::TRUNC_32_to8;
-          break;
+      SDOperand Tmp;
+      SDOperand Input = Node->getOperand(0);
+      AddToISelQueue(Node->getOperand(0));
+      switch (NVT) {
+      case MVT::i8:
+        Tmp = CurDAG->getTargetConstant(1, MVT::i32); // SubRegSet 1
+        // Ensure that the source register has an 8-bit subreg on 32-bit targets
+        if (!Subtarget->is64Bit()) { 
+          unsigned Opc;
+          MVT::ValueType VT;
+          switch (Node->getOperand(0).getValueType()) {
+          default: assert(0 && "Unknown truncate!");
+          case MVT::i16:
+            Opc = X86::MOV16to16_;
+            VT = MVT::i16;
+            break;
+          case MVT::i32:
+            Opc = X86::MOV32to32_;
+            VT = MVT::i32;
+            break;
+          }
+          Input = 
+            SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
         }
-
-        AddToISelQueue(Node->getOperand(0));
-        SDOperand Tmp =
-          SDOperand(CurDAG->getTargetNode(Opc, VT, Node->getOperand(0)), 0);
-        SDNode *ResNode = CurDAG->getTargetNode(Opc2, NVT, Tmp);
-      
+        break;
+      case MVT::i16:
+        Tmp = CurDAG->getTargetConstant(2, MVT::i32); // SubRegSet 2
+        break;
+      case MVT::i32:
+        Tmp = CurDAG->getTargetConstant(3, MVT::i32); // SubRegSet 3
+        break;
+      default: assert(0 && "Unknown truncate!");
+      }
+      SDNode *ResNode = CurDAG->getTargetNode(X86::EXTRACT_SUBREG, 
+                                              NVT, 
+                                              Input, Tmp);
 #ifndef NDEBUG
         DOUT << std::string(Indent-2, ' ') << "=> ";
         DEBUG(ResNode->dump(CurDAG));
         DOUT << "\n";
         Indent -= 2;
 #endif
-        return ResNode;
-      }
-
+      return ResNode;
       break;
     }
   }
Index: lib/Target/X86/X86IntelAsmPrinter.cpp
===================================================================
--- lib/Target/X86/X86IntelAsmPrinter.cpp	(revision 40568)
+++ lib/Target/X86/X86IntelAsmPrinter.cpp	(working copy)
@@ -302,38 +302,6 @@
 ///
 void X86IntelAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
   ++EmittedInsts;
-
-  // See if a truncate instruction can be turned into a nop.
-  switch (MI->getOpcode()) {
-  default: break;
-  case X86::TRUNC_64to32:
-  case X86::TRUNC_64to16:
-  case X86::TRUNC_32to16:
-  case X86::TRUNC_32to8:
-  case X86::TRUNC_16to8:
-  case X86::TRUNC_32_to8:
-  case X86::TRUNC_16_to8: {
-    const MachineOperand &MO0 = MI->getOperand(0);
-    const MachineOperand &MO1 = MI->getOperand(1);
-    unsigned Reg0 = MO0.getReg();
-    unsigned Reg1 = MO1.getReg();
-    unsigned Opc = MI->getOpcode();
-    if (Opc == X86::TRUNC_64to32)
-      Reg1 = getX86SubSuperRegister(Reg1, MVT::i32);
-    else if (Opc == X86::TRUNC_32to16 || Opc == X86::TRUNC_64to16)
-      Reg1 = getX86SubSuperRegister(Reg1, MVT::i16);
-    else
-      Reg1 = getX86SubSuperRegister(Reg1, MVT::i8);
-    O << TAI->getCommentString() << " TRUNCATE ";
-    if (Reg0 != Reg1)
-      O << "\n\t";
-    break;
-  }
-  case X86::PsMOVZX64rr32:
-    O << TAI->getCommentString() << " ZERO-EXTEND " << "\n\t";
-    break;
-  }
-
   // Call the autogenerated instruction printer routines.
   printInstruction(MI);
 }
Index: lib/Target/X86/X86RegisterInfo.td
===================================================================
--- lib/Target/X86/X86RegisterInfo.td	(revision 40568)
+++ lib/Target/X86/X86RegisterInfo.td	(working copy)
@@ -163,7 +163,49 @@
   def ST7 : Register<"ST(7)">, DwarfRegNum<18>; 
 }
 
+
 //===----------------------------------------------------------------------===//
+// Subregister Set Definitions... now that we have all of the pieces, define the
+// sub registers for each register.
+//
+
+def : SubRegSet<1, [AX, CX, DX, BX, SP,  BP,  SI,  DI,  
+                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
+                   [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
+                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+// It's unclear if this subreg set is safe, given that not all registers
+// in the class have an 'H' subreg.
+// def : SubRegSet<2, [AX, CX, DX, BX],
+//                    [AH, CH, DH, BH]>;
+
+def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,  
+                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+                   [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
+                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def : SubRegSet<2, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,  
+                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
+                   [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI, 
+                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+
+
+def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,  
+                    R8,  R9,  R10, R11, R12, R13, R14, R15],
+                   [AL, CL, DL, BL, SPL, BPL, SIL, DIL, 
+                    R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
+
+def : SubRegSet<2, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,  
+                    R8,  R9,  R10, R11, R12, R13, R14, R15],
+                   [AX,  CX,  DX,  BX,  SP,  BP,  SI,  DI, 
+                    R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
+                    
+def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,  
+                    R8,  R9,  R10, R11, R12, R13, R14, R15],
+                   [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, 
+                    R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
+
+//===----------------------------------------------------------------------===//
 // Register Class Definitions... now that we have all of the pieces, define the
 // top-level register classes.  The order specified in the register list is
 // implicitly defined to be the register allocation order.
@@ -229,6 +271,7 @@
 def GR16 : RegisterClass<"X86", [i16], 16,
                          [AX, CX, DX, SI, DI, BX, BP, SP,
                           R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]> {
+  let SubRegClassList = [GR8];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -292,6 +335,7 @@
 def GR32 : RegisterClass<"X86", [i32], 32, 
                          [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
                           R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]> {
+  let SubRegClassList = [GR8, GR16];
   let MethodProtos = [{
     iterator allocation_order_begin(const MachineFunction &MF) const;
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -355,6 +399,7 @@
 def GR64 : RegisterClass<"X86", [i64], 64, 
                          [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
                           RBX, R14, R15, R12, R13, RBP, RSP]> {
+  let SubRegClassList = [GR8, GR16, GR32];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
   }];
@@ -374,8 +419,12 @@
 
 // GR16, GR32 subclasses which contain registers that have R8 sub-registers.
 // These should only be used for 32-bit mode.
-def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]>;
-def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]>;
+def GR16_ : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
+  let SubRegClassList = [GR8];
+}
+def GR32_ : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
+  let SubRegClassList = [GR8, GR16];
+}
 
 // Scalar SSE2 floating point registers.
 def FR32 : RegisterClass<"X86", [f32], 32,
