Re: Linux 2.6.23.17

Greg KH Mon, 25 Feb 2008 17:08:08 -0800
diff --git a/Makefile b/Makefile
index 3a932c7..0fe28d1 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 23
-EXTRAVERSION = .16
+EXTRAVERSION = .17
 NAME = Arr Matey! A Hairy Bilge Rat!
 
 # *DOCUMENTATION*
diff --git a/arch/powerpc/platforms/powermac/feature.c 
b/arch/powerpc/platforms/powermac/feature.c
index ba931be..5169ecc 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2565,6 +2565,8 @@ static void __init probe_uninorth(void)
 
        /* Locate core99 Uni-N */
        uninorth_node = of_find_node_by_name(NULL, "uni-n");
+       uninorth_maj = 1;
+
        /* Locate G5 u3 */
        if (uninorth_node == NULL) {
                uninorth_node = of_find_node_by_name(NULL, "u3");
@@ -2575,8 +2577,10 @@ static void __init probe_uninorth(void)
                uninorth_node = of_find_node_by_name(NULL, "u4");
                uninorth_maj = 4;
        }
-       if (uninorth_node == NULL)
+       if (uninorth_node == NULL) {
+               uninorth_maj = 0;
                return;
+       }
 
        addrp = of_get_property(uninorth_node, "reg", NULL);
        if (addrp == NULL)
@@ -3029,3 +3033,8 @@ void pmac_resume_agp_for_card(struct pci_dev *dev)
        pmac_agp_resume(pmac_agp_bridge);
 }
 EXPORT_SYMBOL(pmac_resume_agp_for_card);
+
+int pmac_get_uninorth_variant(void)
+{
+       return uninorth_maj;
+}
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index eff3b22..7770e10 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -207,7 +207,7 @@ int change_page_attr_addr(unsigned long address, int 
numpages, pgprot_t prot)
                if (__pa(address) < KERNEL_TEXT_SIZE) {
                        unsigned long addr2;
                        pgprot_t prot2;
-                       addr2 = __START_KERNEL_map + __pa(address);
+                       addr2 = __START_KERNEL_map + __pa(address) - phys_base;
                        /* Make sure the kernel mappings stay executable */
                        prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
                        err = __change_page_attr(addr2, pfn, prot2,
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index d409f67..1ebe7a3 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -85,6 +85,7 @@ struct smu_device {
        u32                     cmd_buf_abs;    /* command buffer absolute */
        struct list_head        cmd_list;
        struct smu_cmd          *cmd_cur;       /* pending command */
+       int                     broken_nap;
        struct list_head        cmd_i2c_list;
        struct smu_i2c_cmd      *cmd_i2c_cur;   /* pending i2c command */
        struct timer_list       i2c_timer;
@@ -135,6 +136,19 @@ static void smu_start_cmd(void)
        fend = faddr + smu->cmd_buf->length + 2;
        flush_inval_dcache_range(faddr, fend);
 
+
+       /* We also disable NAP mode for the duration of the command
+        * on U3 based machines.
+        * This is slightly racy as it can be written back to 1 by a sysctl
+        * but that never happens in practice. There seem to be an issue with
+        * U3 based machines such as the iMac G5 where napping for the
+        * whole duration of the command prevents the SMU from fetching it
+        * from memory. This might be related to the strange i2c based
+        * mechanism the SMU uses to access memory.
+        */
+       if (smu->broken_nap)
+               powersave_nap = 0;
+
        /* This isn't exactly a DMA mapping here, I suspect
         * the SMU is actually communicating with us via i2c to the
         * northbridge or the CPU to access RAM.
@@ -211,6 +225,10 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
        misc = cmd->misc;
        mb();
        cmd->status = rc;
+
+       /* Re-enable NAP mode */
+       if (smu->broken_nap)
+               powersave_nap = 1;
  bail:
        /* Start next command if any */
        smu_start_cmd();
@@ -461,7 +479,7 @@ int __init smu_init (void)
         if (np == NULL)
                return -ENODEV;
 
-       printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR);
+       printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR);
 
        if (smu_cmdbuf_abs == 0) {
                printk(KERN_ERR "SMU: Command buffer not allocated !\n");
@@ -533,6 +551,11 @@ int __init smu_init (void)
                goto fail;
        }
 
+       /* U3 has an issue with NAP mode when issuing SMU commands */
+       smu->broken_nap = pmac_get_uninorth_variant() < 4;
+       if (smu->broken_nap)
+               printk(KERN_INFO "SMU: using NAP mode workaround\n");
+
        sys_ctrler = SYS_CTRLER_SMU;
        return 0;
 
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2c6116f..2b28a24 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -901,6 +901,7 @@ static void sd_rw_intr(struct scsi_cmnd * SCpnt)
        unsigned int xfer_size = SCpnt->request_bufflen;
        unsigned int good_bytes = result ? 0 : xfer_size;
        u64 start_lba = SCpnt->request->sector;
+       u64 end_lba = SCpnt->request->sector + (xfer_size / 512);
        u64 bad_lba;
        struct scsi_sense_hdr sshdr;
        int sense_valid = 0;
@@ -939,26 +940,23 @@ static void sd_rw_intr(struct scsi_cmnd * SCpnt)
                        goto out;
                if (xfer_size <= SCpnt->device->sector_size)
                        goto out;
-               switch (SCpnt->device->sector_size) {
-               case 256:
+               if (SCpnt->device->sector_size < 512) {
+                       /* only legitimate sector_size here is 256 */
                        start_lba <<= 1;
-                       break;
-               case 512:
-                       break;
-               case 1024:
-                       start_lba >>= 1;
-                       break;
-               case 2048:
-                       start_lba >>= 2;
-                       break;
-               case 4096:
-                       start_lba >>= 3;
-                       break;
-               default:
-                       /* Print something here with limiting frequency. */
-                       goto out;
-                       break;
+                       end_lba <<= 1;
+               } else {
+                       /* be careful ... don't want any overflows */
+                       u64 factor = SCpnt->device->sector_size / 512;
+                       do_div(start_lba, factor);
+                       do_div(end_lba, factor);
                }
+
+               if (bad_lba < start_lba  || bad_lba >= end_lba)
+                       /* the bad lba was reported incorrectly, we have
+                        * no idea where the error is
+                        */
+                       goto out;
+
                /* This computation should always be done in terms of
                 * the resolution of the device's medium.
                 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index a2a4865..331a5bb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -717,6 +717,17 @@ int nfs_flush_incompatible(struct file *file, struct page 
*page)
 }
 
 /*
+ * If the page cache is marked as unsafe or invalid, then we can't rely on
+ * the PageUptodate() flag. In this case, we will need to turn off
+ * write optimisations that depend on the page contents being correct.
+ */
+static int nfs_write_pageuptodate(struct page *page, struct inode *inode)
+{
+       return PageUptodate(page) &&
+               !(NFS_I(inode)->cache_validity & 
(NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA));
+}
+
+/*
  * Update and possibly write a cached page of an NFS file.
  *
  * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
@@ -737,10 +748,13 @@ int nfs_updatepage(struct file *file, struct page *page,
                (long long)(page_offset(page) +offset));
 
        /* If we're not using byte range locks, and we know the page
-        * is entirely in cache, it may be more efficient to avoid
-        * fragmenting write requests.
+        * is up to date, it may be more efficient to extend the write
+        * to cover the entire page in order to avoid fragmentation
+        * inefficiencies.
         */
-       if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & 
O_SYNC)) {
+       if (nfs_write_pageuptodate(page, inode) &&
+                       inode->i_flock == NULL &&
+                       !(file->f_mode & O_SYNC)) {
                count = max(count + offset, nfs_page_length(page));
                offset = 0;
        }
diff --git a/include/asm-powerpc/pmac_feature.h 
b/include/asm-powerpc/pmac_feature.h
index 26bcb0a..877c35a 100644
--- a/include/asm-powerpc/pmac_feature.h
+++ b/include/asm-powerpc/pmac_feature.h
@@ -392,6 +392,14 @@ extern u32 __iomem *uninorth_base;
 #define UN_BIS(r,v)    (UN_OUT((r), UN_IN(r) | (v)))
 #define UN_BIC(r,v)    (UN_OUT((r), UN_IN(r) & ~(v)))
 
+/* Uninorth variant:
+ *
+ * 0 = not uninorth
+ * 1 = U1.x or U2.x
+ * 3 = U3
+ * 4 = U4
+ */
+extern int pmac_get_uninorth_variant(void);
 
 #endif /* __ASM_POWERPC_PMAC_FEATURE_H */
 #endif /* __KERNEL__ */
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index dae7143..15a0229 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -289,6 +289,8 @@ static inline ktime_t ktime_add_us(const ktime_t kt, const 
u64 usec)
        return ktime_add_ns(kt, usec * 1000);
 }
 
+extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs);
+
 /*
  * The resolution of the clocks. The resolution value is returned in
  * the clock_getres() system call to give application programmers an
diff --git a/kernel/futex.c b/kernel/futex.c
index b658a9a..0c55a58 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2063,7 +2063,7 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 
val,
 
                t = timespec_to_ktime(ts);
                if (cmd == FUTEX_WAIT)
-                       t = ktime_add(ktime_get(), t);
+                       t = ktime_add_safe(ktime_get(), t);
                tp = &t;
        }
        /*
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index f938c23..bba74b6 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -175,7 +175,7 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, 
u32 val,
 
                t = timespec_to_ktime(ts);
                if (cmd == FUTEX_WAIT)
-                       t = ktime_add(ktime_get(), t);
+                       t = ktime_add_safe(ktime_get(), t);
                tp = &t;
        }
        if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE)
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index ee8d0ac..2ee0497 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -301,6 +301,24 @@ unsigned long ktime_divns(const ktime_t kt, s64 div)
 }
 #endif /* BITS_PER_LONG >= 64 */
 
+/*
+ * Add two ktime values and do a safety check for overflow:
+ */
+
+ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs)
+{
+       ktime_t res = ktime_add(lhs, rhs);
+
+       /*
+        * We use KTIME_SEC_MAX here, the maximum timeout which we can
+        * return to user space in a timespec:
+        */
+       if (res.tv64 < 0 || res.tv64 < lhs.tv64 || res.tv64 < rhs.tv64)
+               res = ktime_set(KTIME_SEC_MAX, 0);
+
+       return res;
+}
+
 /* High resolution timer related functions */
 #ifdef CONFIG_HIGH_RES_TIMERS
 
@@ -658,13 +676,7 @@ hrtimer_forward(struct hrtimer *timer, ktime_t now, 
ktime_t interval)
                 */
                orun++;
        }
-       timer->expires = ktime_add(timer->expires, interval);
-       /*
-        * Make sure, that the result did not wrap with a very large
-        * interval.
-        */
-       if (timer->expires.tv64 < 0)
-               timer->expires = ktime_set(KTIME_SEC_MAX, 0);
+       timer->expires = ktime_add_safe(timer->expires, interval);
 
        return orun;
 }
@@ -815,7 +827,7 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const 
enum hrtimer_mode mode)
        new_base = switch_hrtimer_base(timer, base);
 
        if (mode == HRTIMER_MODE_REL) {
-               tim = ktime_add(tim, new_base->get_time());
+               tim = ktime_add_safe(tim, new_base->get_time());
                /*
                 * CONFIG_TIME_LOW_RES is a temporary way for architectures
                 * to signal that they simply return xtime in
@@ -824,16 +836,8 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const 
enum hrtimer_mode mode)
                 * timeouts. This will go away with the GTOD framework.
                 */
 #ifdef CONFIG_TIME_LOW_RES
-               tim = ktime_add(tim, base->resolution);
+               tim = ktime_add_safe(tim, base->resolution);
 #endif
-               /*
-                * Careful here: User space might have asked for a
-                * very long sleep, so the add above might result in a
-                * negative number, which enqueues the timer in front
-                * of the queue.
-                */
-               if (tim.tv64 < 0)
-                       tim.tv64 = KTIME_MAX;
        }
        timer->expires = tim;
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index f1a73f0..7279484 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -246,6 +246,17 @@ static unsigned int default_startup(unsigned int irq)
 }
 
 /*
+ * default shutdown function
+ */
+static void default_shutdown(unsigned int irq)
+{
+       struct irq_desc *desc = irq_desc + irq;
+
+       desc->chip->mask(irq);
+       desc->status |= IRQ_MASKED;
+}
+
+/*
  * Fixup enable/disable function pointers
  */
 void irq_chip_set_defaults(struct irq_chip *chip)
@@ -256,8 +267,15 @@ void irq_chip_set_defaults(struct irq_chip *chip)
                chip->disable = default_disable;
        if (!chip->startup)
                chip->startup = default_startup;
+       /*
+        * We use chip->disable, when the user provided its own. When
+        * we have default_disable set for chip->disable, then we need
+        * to use default_shutdown, otherwise the irq line is not
+        * disabled on free_irq():
+        */
        if (!chip->shutdown)
-               chip->shutdown = chip->disable;
+               chip->shutdown = chip->disable != default_disable ?
+                       chip->disable : default_shutdown;
        if (!chip->name)
                chip->name = chip->typename;
        if (!chip->end)
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7a15afb..00c9e25 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -765,9 +765,11 @@ common_timer_set(struct k_itimer *timr, int flags,
        /* SIGEV_NONE timers are not queued ! See common_timer_get */
        if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
                /* Setup correct expiry time for relative timers */
-               if (mode == HRTIMER_MODE_REL)
-                       timer->expires = ktime_add(timer->expires,
-                                                  timer->base->get_time());
+               if (mode == HRTIMER_MODE_REL) {
+                       timer->expires =
+                               ktime_add_safe(timer->expires,
+                                              timer->base->get_time());
+               }
                return 0;
        }
 
diff --git a/mm/memory.c b/mm/memory.c
index f82b359..51a8691 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -981,6 +981,8 @@ int get_user_pages(struct task_struct *tsk, struct 
mm_struct *mm,
        int i;
        unsigned int vm_flags;
 
+       if (len <= 0)
+               return 0;
        /* 
         * Require read or write permissions.
         * If 'force' is set, we only require the "MAY" flags.
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c 
b/net/netfilter/nf_conntrack_proto_tcp.c
index 70c5b7d..09b902d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -135,7 +135,7 @@ enum tcp_bit_set {
  * CLOSE_WAIT: ACK seen (after FIN)
  * LAST_ACK:   FIN seen (after FIN)
  * TIME_WAIT:  last ACK seen
- * CLOSE:      closed connection
+ * CLOSE:      closed connection (RST)
  *
  * LISTEN state is not used.
  *
@@ -834,8 +834,21 @@ static int tcp_packet(struct nf_conn *conntrack,
        case TCP_CONNTRACK_SYN_SENT:
                if (old_state < TCP_CONNTRACK_TIME_WAIT)
                        break;
-               if ((conntrack->proto.tcp.seen[!dir].flags &
-                       IP_CT_TCP_FLAG_CLOSE_INIT)
+               /* RFC 1122: "When a connection is closed actively,
+                * it MUST linger in TIME-WAIT state for a time 2xMSL
+                * (Maximum Segment Lifetime). However, it MAY accept
+                * a new SYN from the remote TCP to reopen the connection
+                * directly from TIME-WAIT state, if..."
+                * We ignore the conditions because we are in the
+                * TIME-WAIT state anyway.
+                *
+                * Handle aborted connections: we and the server
+                * think there is an existing connection but the client
+                * aborts it and starts a new one.
+                */
+               if (((conntrack->proto.tcp.seen[dir].flags
+                     | conntrack->proto.tcp.seen[!dir].flags)
+                    & IP_CT_TCP_FLAG_CLOSE_INIT)
                    || (conntrack->proto.tcp.last_dir == dir
                        && conntrack->proto.tcp.last_index == TCP_RST_SET)) {
                        /* Attempt to reopen a closed/aborted connection.
@@ -850,16 +863,23 @@ static int tcp_packet(struct nf_conn *conntrack,
        case TCP_CONNTRACK_IGNORE:
                /* Ignored packets:
                 *
+                * Our connection entry may be out of sync, so ignore
+                * packets which may signal the real connection between
+                * the client and the server.
+                *
                 * a) SYN in ORIGINAL
                 * b) SYN/ACK in REPLY
                 * c) ACK in reply direction after initial SYN in original.
+                *
+                * If the ignored packet is invalid, the receiver will send
+                * a RST we'll catch below.
                 */
                if (index == TCP_SYNACK_SET
                    && conntrack->proto.tcp.last_index == TCP_SYN_SET
                    && conntrack->proto.tcp.last_dir != dir
                    && ntohl(th->ack_seq) ==
                             conntrack->proto.tcp.last_end) {
-                       /* This SYN/ACK acknowledges a SYN that we earlier
+                       /* b) This SYN/ACK acknowledges a SYN that we earlier
                         * ignored as invalid. This means that the client and
                         * the server are both in sync, while the firewall is
                         * not. We kill this session and block the SYN/ACK so
@@ -884,7 +904,7 @@ static int tcp_packet(struct nf_conn *conntrack,
                write_unlock_bh(&tcp_lock);
                if (LOG_INVALID(IPPROTO_TCP))
                        nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
-                                 "nf_ct_tcp: invalid packed ignored ");
+                                 "nf_ct_tcp: invalid packet ignored ");
                return NF_ACCEPT;
        case TCP_CONNTRACK_MAX:
                /* Invalid packet */
@@ -938,8 +958,7 @@ static int tcp_packet(struct nf_conn *conntrack,
 
        conntrack->proto.tcp.state = new_state;
        if (old_state != new_state
-           && (new_state == TCP_CONNTRACK_FIN_WAIT
-               || new_state == TCP_CONNTRACK_CLOSE))
+           && new_state == TCP_CONNTRACK_FIN_WAIT)
                conntrack->proto.tcp.seen[dir].flags |= 
IP_CT_TCP_FLAG_CLOSE_INIT;
        timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans
                  && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Re: Linux 2.6.23.17

Reply via email to