Index: linux-2.6.12/include/linux/page-flags.h
===================================================================
--- linux-2.6.12.orig/include/linux/page-flags.h	2005-08-07 12:45:21.000000000 +0100
+++ linux-2.6.12/include/linux/page-flags.h	2005-08-07 12:45:27.000000000 +0100
@@ -133,11 +133,6 @@
 	unsigned long pgrotated;	/* pages rotated to tail of the LRU */
 	unsigned long nr_bounce;	/* pages for bounce buffers */
 	unsigned long spurious_page_faults;	/* Faults with no ops */
-	unsigned long cmpxchg_fail_flag_update;	/* cmpxchg failures for pte flag update */
-	unsigned long cmpxchg_fail_flag_reuse;	/* cmpxchg failures when cow reuse of pte */
-
-	unsigned long cmpxchg_fail_anon_read;	/* cmpxchg failures on anonymous read */
-	unsigned long cmpxchg_fail_anon_write;	/* cmpxchg failures on anonymous write */
 };
 
 extern void get_page_state(struct page_state *ret);
Index: linux-2.6.12/include/asm-generic/pgtable-nopmd.h
===================================================================
--- linux-2.6.12.orig/include/asm-generic/pgtable-nopmd.h	2005-08-07 12:45:20.000000000 +0100
+++ linux-2.6.12/include/asm-generic/pgtable-nopmd.h	2005-08-07 12:45:27.000000000 +0100
@@ -31,11 +31,6 @@
 #define pmd_ERROR(pmd)				(pud_ERROR((pmd).pud))
 
 #define pud_populate(mm, pmd, pte)		do { } while (0)
-#define __ARCH_HAVE_PUD_TEST_AND_POPULATE
-static inline int pud_test_and_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
-{
-	return 1;
-}
 
 /*
  * (pmds are folded into puds so this doesn't get actually called,
Index: linux-2.6.12/include/asm-generic/pgtable.h
===================================================================
--- linux-2.6.12.orig/include/asm-generic/pgtable.h	2005-08-07 12:45:20.000000000 +0100
+++ linux-2.6.12/include/asm-generic/pgtable.h	2005-08-07 12:45:27.000000000 +0100
@@ -157,65 +157,6 @@
 })
 #endif
 
-/*
- * page_table_atomic_start and page_table_atomic_stop may be used to
- * define special measures that an arch needs to guarantee atomic
- * operations outside of a spinlock. In the case that an arch does
- * not support atomic page table operations we will fall back to the
- * page table lock.
- */
-#ifndef __HAVE_ARCH_PAGE_TABLE_ATOMIC_START
-#define page_table_atomic_start(mm) do { } while (0)
-#endif
-
-#ifndef __HAVE_ARCH_PAGE_TABLE_ATOMIC_START
-#define page_table_atomic_stop(mm) do { } while (0)
-#endif
-
-/*
- * Fallback functions for atomic population of higher page table
- * structures. These simply acquire the page_table_lock for
- * synchronization. An architecture may override these generic
- * functions to provide atomic populate functions to make these
- * more effective.
- */
-
-#ifndef __HAVE_ARCH_PGD_TEST_AND_POPULATE
-#define pgd_test_and_populate(__mm, __pgd, __pud)			\
-({									\
-	int __rc;							\
-	spin_lock(&mm->page_table_lock);				\
-	__rc = pgd_none(*(__pgd));					\
-	if (__rc) pgd_populate(__mm, __pgd, __pud);			\
-	spin_unlock(&mm->page_table_lock);				\
-	__rc;								\
-})
-#endif
-
-#ifndef __HAVE_ARCH_PUD_TEST_AND_POPULATE
-#define pud_test_and_populate(__mm, __pud, __pmd)			\
-({									\
-	int __rc;							\
-	spin_lock(&mm->page_table_lock);				\
-	__rc = pud_none(*(__pud));					\
-	if (__rc) pud_populate(__mm, __pud, __pmd);			\
-	spin_unlock(&mm->page_table_lock);				\
-	__rc;								\
-})
-#endif
-
-#ifndef __HAVE_ARCH_PMD_TEST_AND_POPULATE
-#define pmd_test_and_populate(__mm, __pmd, __page)			\
-({									\
-	int __rc;							\
-	spin_lock(&mm->page_table_lock);				\
-	__rc = !pmd_present(*(__pmd));					\
-	if (__rc) pmd_populate(__mm, __pmd, __page);			\
-	spin_unlock(&mm->page_table_lock);				\
-	__rc;								\
-})
-#endif
-
 #else
 
 /*
Index: linux-2.6.12/mm/page_alloc.c
===================================================================
--- linux-2.6.12.orig/mm/page_alloc.c	2005-08-07 12:45:23.000000000 +0100
+++ linux-2.6.12/mm/page_alloc.c	2005-08-07 12:45:27.000000000 +0100
@@ -2272,11 +2272,6 @@
 	"pgrotated",
 	"nr_bounce",
 	"spurious_page_faults",
-	"cmpxchg_fail_flag_update",
-	"cmpxchg_fail_flag_reuse",
-
-	"cmpxchg_fail_anon_read",
-	"cmpxchg_fail_anon_write",
 };
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
Index: linux-2.6.12/mm/memory.c
===================================================================
--- linux-2.6.12.orig/mm/memory.c	2005-08-07 12:45:23.000000000 +0100
+++ linux-2.6.12/mm/memory.c	2005-08-07 12:48:31.000000000 +0100
@@ -1663,7 +1663,7 @@
 	int ret = VM_FAULT_MINOR;
 
 	pte_unmap(page_table);
-	page_table_atomic_stop(mm);
+	spin_unlock(&mm->page_table_lock);
 	page = lookup_swap_cache(entry);
 	if (!page) {
  		swapin_readahead(entry, address, vma);
@@ -1748,78 +1748,65 @@
 }
 
 /*
- * We are called with atomic operations started and the
- * value of the pte that was read in orig_entry.
+ * We are called with the MM semaphore and page_table_lock
+ * spinlock held to protect against concurrent faults in
+ * multithreaded programs. 
  */
 static int
 do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		pte_t *page_table, pmd_t *pmd, int write_access,
-		unsigned long addr, pte_t orig_entry)
+		unsigned long addr)
 {
 	pte_t entry;
-	struct page * page;
+	struct page * page = ZERO_PAGE(addr);
 
-	if (unlikely(!write_access)) {
+	/* Read-only mapping of ZERO_PAGE. */
+	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
 
-		/* Read-only mapping of ZERO_PAGE. */
-		entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr),
-					vma->vm_page_prot));
+	/* ..except if it's a write access */
+	if (write_access) {
+		/* Allocate our own private page. */
+		pte_unmap(page_table);
+		spin_unlock(&mm->page_table_lock);
 
-		/*
-		 * If the cmpxchg fails then another cpu may
-		 * already have populated the entry
-		 */
-		if (ptep_cmpxchg(mm, addr, page_table, orig_entry, entry)) {
-			update_mmu_cache(vma, addr, entry);
-			lazy_mmu_prot_update(entry);
-		} else {
-			inc_page_state(cmpxchg_fail_anon_read);
+		if (unlikely(anon_vma_prepare(vma)))
+			goto no_mem;
+		page = alloc_zeroed_user_highpage(vma, addr);
+		if (!page)
+			goto no_mem;
+
+		spin_lock(&mm->page_table_lock);
+		page_table = pte_offset_map(pmd, addr);
+
+		if (!pte_none(*page_table)) {
+			pte_unmap(page_table);
+			page_cache_release(page);
+			spin_unlock(&mm->page_table_lock);
+			goto out;
 		}
-		goto minor_fault;
+		inc_mm_counter(mm, rss);
+		entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
+							 vma->vm_page_prot)),
+				      vma);
+		lru_cache_add_active(page);
+		SetPageReferenced(page);
+		page_add_anon_rmap(page, vma, addr);
 	}
 
-	/* This leaves the write case */
-	page_table_atomic_stop(mm);
+	set_pte_at(mm, addr, page_table, entry);
 	pte_unmap(page_table);
-	if (unlikely(anon_vma_prepare(vma)))
-		goto oom;
 
-	page = alloc_zeroed_user_highpage(vma, addr);
-	if (!page)
-		goto oom;
-
-	entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
-						vma->vm_page_prot)),
-				vma);
-	page_table = pte_offset_map(pmd, addr);
-	page_table_atomic_start(mm);
-
-	if (!ptep_cmpxchg(mm, addr, page_table, orig_entry, entry)) {
-		page_cache_release(page);
-		inc_page_state(cmpxchg_fail_anon_write);
-		goto minor_fault;
-        }
-
-	/*
-	 * These two functions must come after the cmpxchg
-	 * because if the page is on the LRU then try_to_unmap may come
-	 * in and unmap the pte.
-	 */
-	page_add_anon_rmap(page, vma, addr);
-	lru_cache_add_active(page);
-	inc_mm_counter(mm, rss);
+	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, addr, entry);
 	lazy_mmu_prot_update(entry);
-
-minor_fault:
-	page_table_atomic_stop(mm);
-	pte_unmap(page_table);
+	spin_unlock(&mm->page_table_lock);
+out:
 	return VM_FAULT_MINOR;
-
-oom:
+no_mem:
 	return VM_FAULT_OOM;
 }
 
+
 /*
  * do_no_page() tries to create a new page mapping. It aggressively
  * tries to share with existing pages, but makes a separate copy if
@@ -1834,7 +1821,7 @@
 static int
 do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long address, int write_access, pte_t *page_table,
-        pmd_t *pmd, pte_t orig_entry)
+        pmd_t *pmd)
 {
 	struct page * new_page;
 	struct address_space *mapping = NULL;
@@ -1845,9 +1832,9 @@
 
 	if (!vma->vm_ops || !vma->vm_ops->nopage)
 		return do_anonymous_page(mm, vma, page_table,
-					pmd, write_access, address, orig_entry);
+					pmd, write_access, address);
 	pte_unmap(page_table);
-	page_table_atomic_stop(mm);
+	spin_unlock(&mm->page_table_lock);
 
 	if (vma->vm_file) {
 		mapping = vma->vm_file->f_mapping;
@@ -1954,7 +1941,7 @@
  * nonlinear vmas.
  */
 static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
-	unsigned long address, int write_access, pte_t *pte, pmd_t *pmd, pte_t entry)
+	unsigned long address, int write_access, pte_t *pte, pmd_t *pmd)
 {
 	unsigned long pgoff;
 	int err;
@@ -1967,13 +1954,13 @@
 	if (!vma->vm_ops || !vma->vm_ops->populate || 
 			(write_access && !(vma->vm_flags & VM_SHARED))) {
 		pte_clear(mm, address, pte);
-		return do_no_page(mm, vma, address, write_access, pte, pmd, entry);
+		return do_no_page(mm, vma, address, write_access, pte, pmd);
 	}
 
-	pgoff = pte_to_pgoff(entry);
+	pgoff = pte_to_pgoff(*pte);
 
 	pte_unmap(pte);
-	page_table_atomic_stop(mm);
+	spin_unlock(&mm->page_table_lock);
 
 	err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0);
 	if (err == -ENOMEM)
@@ -2001,7 +1988,6 @@
 	int write_access, pte_t *pte, pmd_t *pmd)
 {
 	pte_t entry;
-	pte_t new_entry;
 
 	entry = *pte;
 	if (!pte_present(entry)) {
@@ -2012,60 +1998,26 @@
 		 */
 		if (pte_none(entry))
 			return do_no_page(mm, vma, address, write_access,
-						pte, pmd, entry);
+						pte, pmd);
 		if (pte_file(entry))
 			return do_file_page(mm, vma, address, write_access,
-						pte, pmd, entry);
+						pte, pmd);
 		return do_swap_page(mm, vma, address, pte, pmd,
 						entry, write_access);
 	}
 
-	new_entry = pte_mkyoung(entry);
 	if (write_access) {
-		if (!pte_write(entry)) {
-#ifdef CONFIG_ATOMIC_TABLE_OPS
-			/*
-			 * do_wp_page modifies a pte. We can add a pte without
-			 * the page_table_lock but not modify a pte since a
-			 * cmpxchg does not allow us to verify that the page
-			 * was not changed under us. So acquire the page table
-			 * lock.
-			 */
-			spin_lock(&mm->page_table_lock);
-			if (pte_same(entry, *pte))
-				return do_wp_page(mm, vma, address, pte,
-							pmd, entry);
-			/*
-			 * pte was changed under us. Another processor may have
-			 * done what we needed to do.
-			 */
-			pte_unmap(pte);
-			spin_unlock(&mm->page_table_lock);
-			return VM_FAULT_MINOR;
-#else
+		if (!pte_write(entry))
 			return do_wp_page(mm, vma, address, pte, pmd, entry);
-#endif
-		}
 		entry = pte_mkdirty(entry);
 	}
-
-	/*
-	 * If the cmpxchg fails then another processor may have done
-	 * the changes for us. If not then another fault will bring
-	 * another chance to do this again.
-	*/
-	if (ptep_cmpxchg(mm, address, pte, entry, new_entry)) {
-		flush_tlb_page(vma, address);
-		update_mmu_cache(vma, address, entry);
-		lazy_mmu_prot_update(entry);
-	} else {
-		inc_page_state(cmpxchg_fail_flag_update);
-	}
+	entry = pte_mkyoung(entry);
+	ptep_set_access_flags(vma, address, pte, entry, write_access);
+	update_mmu_cache(vma, address, entry);
+	lazy_mmu_prot_update(entry);
 
 	pte_unmap(pte);
-	page_table_atomic_stop(mm);
-	if (pte_val(new_entry) == pte_val(entry))
-		inc_page_state(spurious_page_faults);
+	spin_unlock(&mm->page_table_lock);
 	return VM_FAULT_MINOR;
 }
 
@@ -2084,8 +2036,8 @@
 
 	inc_page_state(pgfault);
 
-	if (unlikely(is_vm_hugetlb_page(vma)))
-		goto sigbus;		/* mapping truncation does this. */
+	if (is_vm_hugetlb_page(vma))
+		return VM_FAULT_SIGBUS;	/* mapping truncation does this. */
 
 	/*
 	 * We try to rely on the mmap_sem and the SMP-safe atomic PTE updates.
@@ -2097,67 +2049,26 @@
 	 * duplicate the functionality of pmd_alloc, pud_alloc and
 	 * pte_alloc_map here.
 	 */
-	page_table_atomic_start(mm);
 	pgd = pgd_offset(mm, address);
-#ifndef __PAGETABLE_PUD_FOLDED
-	if (unlikely(pgd_none(*pgd))) {
-		pud_t *new;
-
-		page_table_atomic_stop(mm);
-		new = pud_alloc_one(mm, address);
-
-		if (!new)
-			goto oom;
-
-		page_table_atomic_start(mm);
-		if (!pgd_test_and_populate(mm, pgd, new))
-			pud_free(new);
-	}
-#endif
-
-	pud = pud_offset(pgd, address);
-	if (unlikely(pud_none(*pud))) {
-		pmd_t *new;
-
-		page_table_atomic_stop(mm);
-		new = pmd_alloc_one(mm, address);
-
-		if (!new)
-			goto oom;
-
-		page_table_atomic_start(mm);
-
-		if (!pud_test_and_populate(mm, pud, new))
-			pmd_free(new);
-	}
-
-	pmd = pmd_offset(pud, address);
-	if (unlikely(!pmd_present(*pmd))) {
-		struct page *new;
-
-		page_table_atomic_stop(mm);
-		new = pte_alloc_one(mm, address);
-
-		if (!new)
-			goto oom;
+	spin_lock(&mm->page_table_lock);
 
-		page_table_atomic_start(mm);
+	pud = pud_alloc(mm, pgd, address);
+	if (!pud)
+		goto oom;
 
-		if (!pmd_test_and_populate(mm, pmd, new))
-			pte_free(new);
-		else {
-			inc_page_state(nr_page_table_pages);
-			mm->nr_ptes++;
-		}
-	}
+	pmd = pmd_alloc(mm, pud, address);
+	if (!pmd)
+		goto oom;
 
-	pte = pte_offset_map(pmd, address);
+	pte = pte_alloc_map(mm, pmd, address);
+	if (!pte)
+		goto oom;
+	
 	return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
-oom:
-	return VM_FAULT_OOM;
 
-sigbus:
-	return VM_FAULT_SIGBUS;
+ oom:
+	spin_unlock(&mm->page_table_lock);
+	return VM_FAULT_OOM;
 }
 
 #ifndef __PAGETABLE_PUD_FOLDED
Index: linux-2.6.12/include/asm-generic/pgtable-nopud.h
===================================================================
--- linux-2.6.12.orig/include/asm-generic/pgtable-nopud.h	2005-08-07 12:45:20.000000000 +0100
+++ linux-2.6.12/include/asm-generic/pgtable-nopud.h	2005-08-07 12:45:27.000000000 +0100
@@ -29,12 +29,6 @@
 #define pud_ERROR(pud)				(pgd_ERROR((pud).pgd))
 #define pgd_populate(mm, pgd, pud)		do { } while (0)
 
-#define __HAVE_ARCH_PGD_TEST_AND_POPULATE
-static inline int pgd_test_and_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
-{
-	return 1;
-}
-
 /*
  * (puds are folded into pgds so this doesn't get actually called,
  * but the define is needed for a generic inline function.)

