[Astfin-commits] Commit in software/astfin/trunk/misc (4 files)

Astfin Subversion Commits svn-commit at astfin.org
Sun Jun 24 22:42:10 EDT 2007


    Date: Sunday, June 24, 2007 @ 22:42:07
  Author: david
Revision: 897

added test for page cache problems

Added:
  software/astfin/trunk/misc/test_pagecache/
  software/astfin/trunk/misc/test_pagecache/2007R1_page_cache.diff
  software/astfin/trunk/misc/test_pagecache/README.txt
  software/astfin/trunk/misc/test_pagecache/test_page_cache.sh

Added: software/astfin/trunk/misc/test_pagecache/2007R1_page_cache.diff
===================================================================
--- software/astfin/trunk/misc/test_pagecache/2007R1_page_cache.diff	                        (rev 0)
+++ software/astfin/trunk/misc/test_pagecache/2007R1_page_cache.diff	2007-06-25 02:42:07 UTC (rev 897)
@@ -0,0 +1,176 @@
+Index: kernel/sysctl.c
+===================================================================
+--- kernel/sysctl.c	(revision 3262)
++++ kernel/sysctl.c	(revision 3263)
+@@ -1022,6 +1022,17 @@
+ 		.extra2		= &one_hundred,
+ 	},
+ #endif
++	{
++		.ctl_name       = VM_PAGECACHE_RATIO,
++		.procname       = "pagecache_ratio",
++		.data           = &sysctl_pagecache_ratio,
++		.maxlen         = sizeof(sysctl_pagecache_ratio),
++		.mode           = 0644,
++		.proc_handler   = &sysctl_pagecache_ratio_sysctl_handler,
++		.strategy       = &sysctl_intvec,
++		.extra1         = &zero,
++		.extra2         = &one_hundred,
++	},
+ #ifdef CONFIG_X86_32
+ 	{
+ 		.ctl_name	= VM_VDSO_ENABLED,
+Index: include/linux/sysctl.h
+===================================================================
+--- include/linux/sysctl.h	(revision 3262)
++++ include/linux/sysctl.h	(revision 3263)
+@@ -202,6 +202,7 @@
+ 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
+ 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
+ 	VM_MIN_SLAB=35,		 /* Percent pages ignored by zone reclaim */
++	VM_PAGECACHE_RATIO=36,	/* percent of RAM to use as page cache */
+ };
+ 
+ 
+Index: include/linux/mmzone.h
+===================================================================
+--- include/linux/mmzone.h	(revision 3262)
++++ include/linux/mmzone.h	(revision 3263)
+@@ -166,6 +166,7 @@
+ 	 * sysctl_lowmem_reserve_ratio sysctl changes.
+ 	 */
+ 	unsigned long		lowmem_reserve[MAX_NR_ZONES];
++	unsigned long		max_pagecache_pages;
+ 
+ #ifdef CONFIG_NUMA
+ 	int node;
+@@ -463,6 +464,8 @@
+ 			struct file *, void __user *, size_t *, loff_t *);
+ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
+ 			struct file *, void __user *, size_t *, loff_t *);
++int sysctl_pagecache_ratio_sysctl_handler(struct ctl_table *, int,
++			struct file *, void __user *, size_t *, loff_t *);
+ 
+ #include <linux/topology.h>
+ /* Returns the number of the current Node. */
+Index: include/linux/pagemap.h
+===================================================================
+--- include/linux/pagemap.h	(revision 3262)
++++ include/linux/pagemap.h	(revision 3263)
+@@ -63,12 +63,13 @@
+ 
+ static inline struct page *page_cache_alloc(struct address_space *x)
+ {
+-	return __page_cache_alloc(mapping_gfp_mask(x));
++	return __page_cache_alloc(mapping_gfp_mask(x) | __GFP_PAGECACHE);
+ }
+ 
+ static inline struct page *page_cache_alloc_cold(struct address_space *x)
+ {
+-	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
++	return __page_cache_alloc(mapping_gfp_mask(x) |
++				__GFP_COLD | __GFP_PAGECACHE);
+ }
+ 
+ typedef int filler_t(void *, struct page *);
+Index: include/linux/gfp.h
+===================================================================
+--- include/linux/gfp.h	(revision 3262)
++++ include/linux/gfp.h	(revision 3263)
+@@ -46,6 +46,7 @@
+ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+ #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+ #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
++#define __GFP_PAGECACHE ((__force gfp_t)0x80000u) /* Page cache allocation */
+ 
+ #define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+Index: include/linux/swap.h
+===================================================================
+--- include/linux/swap.h	(revision 3262)
++++ include/linux/swap.h	(revision 3263)
+@@ -191,6 +191,7 @@
+ extern int vm_swappiness;
+ extern int remove_mapping(struct address_space *mapping, struct page *page);
+ extern long vm_total_pages;
++extern int sysctl_pagecache_ratio;
+ 
+ #ifdef CONFIG_NUMA
+ extern int zone_reclaim_mode;
+Index: mm/vmscan.c
+===================================================================
+--- mm/vmscan.c	(revision 3262)
++++ mm/vmscan.c	(revision 3263)
+@@ -923,6 +923,15 @@
+ 		zone->nr_scan_inactive = 0;
+ 	else
+ 		nr_inactive = 0;
++	
++	/*
++	 * If the page cache is too big then focus on page cache
++	 * and ignore anonymous pages
++	 */
++	if (sc->may_swap && (zone_page_state(zone, NR_FILE_PAGES) -
++			zone_page_state(zone, NR_FILE_MAPPED))
++			> zone->max_pagecache_pages)
++		sc->may_swap = 0;
+ 
+ 	while (nr_active || nr_inactive) {
+ 		if (nr_active) {
+Index: mm/page_alloc.c
+===================================================================
+--- mm/page_alloc.c	(revision 3262)
++++ mm/page_alloc.c	(revision 3263)
+@@ -58,6 +58,8 @@
+ long nr_swap_pages;
+ int percpu_pagelist_fraction;
+ 
++int sysctl_pagecache_ratio = 100;
++
+ static void __free_pages_ok(struct page *page, unsigned int order);
+ 
+ /*
+@@ -953,6 +955,11 @@
+ 		if ((alloc_flags & ALLOC_CPUSET) &&
+ 				!cpuset_zone_allowed(zone, gfp_mask))
+ 			continue;
++		if ((gfp_mask & __GFP_PAGECACHE) &&
++				(zone_page_state(zone, NR_FILE_PAGES) -
++				zone_page_state(zone, NR_FILE_MAPPED)) >
++					zone->max_pagecache_pages)
++			continue;
+ 
+ 		if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
+ 			unsigned long mark;
+@@ -2403,6 +2410,8 @@
+ 						/ 100;
+ 		zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
+ #endif
++		zone->max_pagecache_pages =
++			(realsize * sysctl_pagecache_ratio) / 100;
+ 		zone->name = zone_names[j];
+ 		spin_lock_init(&zone->lock);
+ 		spin_lock_init(&zone->lru_lock);
+@@ -2980,6 +2989,22 @@
+ }
+ #endif
+ 
++int sysctl_pagecache_ratio_sysctl_handler(ctl_table *table, int write,
++	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
++{
++	struct zone *zone;
++	int rc;
++
++	rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
++	if (rc)
++		return rc;
++	
++	for_each_zone(zone)
++		zone->max_pagecache_pages = (zone->present_pages *
++				sysctl_pagecache_ratio) / 100;
++	return 0;
++}
++
+ /*
+  * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
+  *	proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()

Added: software/astfin/trunk/misc/test_pagecache/README.txt
===================================================================
--- software/astfin/trunk/misc/test_pagecache/README.txt	                        (rev 0)
+++ software/astfin/trunk/misc/test_pagecache/README.txt	2007-06-25 02:42:07 UTC (rev 897)
@@ -0,0 +1,84 @@
+README.txt for test_pagecache
+David Rowe 25 June 2007
+
+THE PROBLEM
+-----------
+
+I stumbled across a problem with page cache when simulating voicemail
+files being written to NAND flash.  Linux uses spare memory to buffer
+files.  For example if you create a file, Linux buffers that file in
+memory, presumably so it can be accessed quickly.  In regular (MMU)
+Linux, this physical memory is effectively freed when required by
+other applications by paging it to disk.  
+
+So you get the best of both worlds (i) efficient use of free RAM to
+speed disk access and (ii) availability of that RAM should it be required
+elsewhere in the system (say by a new application loading).
+
+However I found that with Blackfin uClinux, page cache doesn't work so
+well.  For example when I tried to create about 40 500k files, MemFree
+was reduced down to zero, while Cached (page cache) increased to use
+all memory.  This left no memory free for any other applications,
+leading to Out Of Memory (OOM) dumps from the kernel.
+
+Now 40 500k files is an estimated daily load of voicemail for a
+typical 8 user PBX.  500k is 60 seconds of ulaw samples, if each user
+has 5 voicemail messages that's 40 files a day.  It is not acceptable
+to have the system run out of memory with this load.
+
+Unlike regular (MMU) Linux with hard disk paging, it looks like
+uClinux can't (ever) return memory from the page-cache to general
+usage.  However Aubrey from the ADI Blackfin team seems to have come
+up with a solution, a way to limit memory used by page-cache.
+
+I posted some question about the problem to this thread on
+blackfin.uclinux:
+
+http://blackfin.uclinux.org/gf/project/uclinux-dist/forum/?action=ForumBrowse&forum_id=39&_forum_action=ForumMessageBrowse&thread_id=17900
+
+Here is the key conclusion:
+
+Hi Aubrey,
+
+1/ Based on your on your 2007R1 branch r3263 page cache tunable mods, I
+generated a patch (attached) that can be applied to the 2007R1-RC3
+release.  It works well, thanks.  For example after booting I tried:
+
+echo 30 > /proc/sys/vm/pagecache_ratio
+
+then I ran my test script that generates 200 500k files.  On my system
+I have MemFree = 48M.  When Cached reached about 15M (approx 30% of
+48M), it stopped increasing.  This left about 15M free for other stuff
+to start.
+
+2/ I also tried the CONFIG_NP2 allocator (without the page cache
+tunable mods).  This worked a little better than the default
+CONFIG_BUDDY allocator, for example it left about 5M of free memory
+after copying 200 files.  However this was not enough to start
+something like Asterisk.
+
+I still haven't worked out if memory in page cache _ever_ gets
+returned to the system.  I suspect it doesn't.  Perhaps this is an
+artifact of uClinux compared to regular Linux.  In most uClinux
+systems where the files being copied are small compared to RAM I guess
+this is OK.  However in my application NAND flash is much larger than
+RAM (256M compared to 64M) and it is possible that 40 500k voicemail
+files may be generated every day.  So it's possible page cache could
+cause the system to run out of memory.
+
+Anyway, with your tunable mod it should be OK - thanks.
+
+FILES
+-----
+
+1/ test_page_cache.sh - a script to stress the system by creating 200
+                        500k files.  This will cause MemFree to reduce
+                        to near zero if the page cache tunable feature
+                        is not present.
+
+2/ 2007R1_page_cache.diff - patch containing Aubrey's page cache 
+                            tunable feature.
+
+
+
+

Added: software/astfin/trunk/misc/test_pagecache/test_page_cache.sh
===================================================================
--- software/astfin/trunk/misc/test_pagecache/test_page_cache.sh	                        (rev 0)
+++ software/astfin/trunk/misc/test_pagecache/test_page_cache.sh	2007-06-25 02:42:07 UTC (rev 897)
@@ -0,0 +1,20 @@
+#!/bin/sh
+# test_page_cache.sh
+# Script to test Blackfin page cache
+# Created David Rowe 19 June 2007
+
+mkdir -p /tmp/yaffstest
+rm -f /tmp/yaffstest
+
+loops=0
+
+while [ $loops -lt 200 ]
+do
+
+  cp /var/lib/asterisk/sounds/demo-instruct.ulaw /tmp/yaffstest/file$loops.ulaw
+
+  loops=`expr $loops + 1`
+  echo "loops $loops"
+done
+
+


Property changes on: software/astfin/trunk/misc/test_pagecache/test_page_cache.sh
___________________________________________________________________
Name: svn:executable
   + *




More information about the Astfin-commits mailing list