[Astfin-commits] Commit in software/astfin/trunk/misc (4 files)
Astfin Subversion Commits
svn-commit at astfin.org
Sun Jun 24 22:42:10 EDT 2007
Date: Sunday, June 24, 2007 @ 22:42:07
Author: david
Revision: 897
added test for page cache problems
Added:
software/astfin/trunk/misc/test_pagecache/
software/astfin/trunk/misc/test_pagecache/2007R1_page_cache.diff
software/astfin/trunk/misc/test_pagecache/README.txt
software/astfin/trunk/misc/test_pagecache/test_page_cache.sh
Added: software/astfin/trunk/misc/test_pagecache/2007R1_page_cache.diff
===================================================================
--- software/astfin/trunk/misc/test_pagecache/2007R1_page_cache.diff (rev 0)
+++ software/astfin/trunk/misc/test_pagecache/2007R1_page_cache.diff 2007-06-25 02:42:07 UTC (rev 897)
@@ -0,0 +1,176 @@
+Index: kernel/sysctl.c
+===================================================================
+--- kernel/sysctl.c (revision 3262)
++++ kernel/sysctl.c (revision 3263)
+@@ -1022,6 +1022,17 @@
+ .extra2 = &one_hundred,
+ },
+ #endif
++ {
++ .ctl_name = VM_PAGECACHE_RATIO,
++ .procname = "pagecache_ratio",
++ .data = &sysctl_pagecache_ratio,
++ .maxlen = sizeof(sysctl_pagecache_ratio),
++ .mode = 0644,
++ .proc_handler = &sysctl_pagecache_ratio_sysctl_handler,
++ .strategy = &sysctl_intvec,
++ .extra1 = &zero,
++ .extra2 = &one_hundred,
++ },
+ #ifdef CONFIG_X86_32
+ {
+ .ctl_name = VM_VDSO_ENABLED,
+Index: include/linux/sysctl.h
+===================================================================
+--- include/linux/sysctl.h (revision 3262)
++++ include/linux/sysctl.h (revision 3263)
+@@ -202,6 +202,7 @@
+ VM_PANIC_ON_OOM=33, /* panic at out-of-memory */
+ VM_VDSO_ENABLED=34, /* map VDSO into new processes? */
+ VM_MIN_SLAB=35, /* Percent pages ignored by zone reclaim */
++ VM_PAGECACHE_RATIO=36, /* percent of RAM to use as page cache */
+ };
+
+
+Index: include/linux/mmzone.h
+===================================================================
+--- include/linux/mmzone.h (revision 3262)
++++ include/linux/mmzone.h (revision 3263)
+@@ -166,6 +166,7 @@
+ * sysctl_lowmem_reserve_ratio sysctl changes.
+ */
+ unsigned long lowmem_reserve[MAX_NR_ZONES];
++ unsigned long max_pagecache_pages;
+
+ #ifdef CONFIG_NUMA
+ int node;
+@@ -463,6 +464,8 @@
+ struct file *, void __user *, size_t *, loff_t *);
+ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
+ struct file *, void __user *, size_t *, loff_t *);
++int sysctl_pagecache_ratio_sysctl_handler(struct ctl_table *, int,
++ struct file *, void __user *, size_t *, loff_t *);
+
+ #include <linux/topology.h>
+ /* Returns the number of the current Node. */
+Index: include/linux/pagemap.h
+===================================================================
+--- include/linux/pagemap.h (revision 3262)
++++ include/linux/pagemap.h (revision 3263)
+@@ -63,12 +63,13 @@
+
+ static inline struct page *page_cache_alloc(struct address_space *x)
+ {
+- return __page_cache_alloc(mapping_gfp_mask(x));
++ return __page_cache_alloc(mapping_gfp_mask(x) | __GFP_PAGECACHE);
+ }
+
+ static inline struct page *page_cache_alloc_cold(struct address_space *x)
+ {
+- return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
++ return __page_cache_alloc(mapping_gfp_mask(x) |
++ __GFP_COLD | __GFP_PAGECACHE);
+ }
+
+ typedef int filler_t(void *, struct page *);
+Index: include/linux/gfp.h
+===================================================================
+--- include/linux/gfp.h (revision 3262)
++++ include/linux/gfp.h (revision 3263)
+@@ -46,6 +46,7 @@
+ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+ #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
++#define __GFP_PAGECACHE ((__force gfp_t)0x80000u) /* Page cache allocation */
+
+ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+Index: include/linux/swap.h
+===================================================================
+--- include/linux/swap.h (revision 3262)
++++ include/linux/swap.h (revision 3263)
+@@ -191,6 +191,7 @@
+ extern int vm_swappiness;
+ extern int remove_mapping(struct address_space *mapping, struct page *page);
+ extern long vm_total_pages;
++extern int sysctl_pagecache_ratio;
+
+ #ifdef CONFIG_NUMA
+ extern int zone_reclaim_mode;
+Index: mm/vmscan.c
+===================================================================
+--- mm/vmscan.c (revision 3262)
++++ mm/vmscan.c (revision 3263)
+@@ -923,6 +923,15 @@
+ zone->nr_scan_inactive = 0;
+ else
+ nr_inactive = 0;
++
++ /*
++ * If the page cache is too big then focus on page cache
++ * and ignore anonymous pages
++ */
++ if (sc->may_swap && (zone_page_state(zone, NR_FILE_PAGES) -
++ zone_page_state(zone, NR_FILE_MAPPED))
++ > zone->max_pagecache_pages)
++ sc->may_swap = 0;
+
+ while (nr_active || nr_inactive) {
+ if (nr_active) {
+Index: mm/page_alloc.c
+===================================================================
+--- mm/page_alloc.c (revision 3262)
++++ mm/page_alloc.c (revision 3263)
+@@ -58,6 +58,8 @@
+ long nr_swap_pages;
+ int percpu_pagelist_fraction;
+
++int sysctl_pagecache_ratio = 100;
++
+ static void __free_pages_ok(struct page *page, unsigned int order);
+
+ /*
+@@ -953,6 +955,11 @@
+ if ((alloc_flags & ALLOC_CPUSET) &&
+ !cpuset_zone_allowed(zone, gfp_mask))
+ continue;
++ if ((gfp_mask & __GFP_PAGECACHE) &&
++ (zone_page_state(zone, NR_FILE_PAGES) -
++ zone_page_state(zone, NR_FILE_MAPPED)) >
++ zone->max_pagecache_pages)
++ continue;
+
+ if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
+ unsigned long mark;
+@@ -2403,6 +2410,8 @@
+ / 100;
+ zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
+ #endif
++ zone->max_pagecache_pages =
++ (realsize * sysctl_pagecache_ratio) / 100;
+ zone->name = zone_names[j];
+ spin_lock_init(&zone->lock);
+ spin_lock_init(&zone->lru_lock);
+@@ -2980,6 +2989,22 @@
+ }
+ #endif
+
++int sysctl_pagecache_ratio_sysctl_handler(ctl_table *table, int write,
++ struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
++{
++ struct zone *zone;
++ int rc;
++
++ rc = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
++ if (rc)
++ return rc;
++
++ for_each_zone(zone)
++ zone->max_pagecache_pages = (zone->present_pages *
++ sysctl_pagecache_ratio) / 100;
++ return 0;
++}
++
+ /*
+ * lowmem_reserve_ratio_sysctl_handler - just a wrapper around
+ * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve()
Added: software/astfin/trunk/misc/test_pagecache/README.txt
===================================================================
--- software/astfin/trunk/misc/test_pagecache/README.txt (rev 0)
+++ software/astfin/trunk/misc/test_pagecache/README.txt 2007-06-25 02:42:07 UTC (rev 897)
@@ -0,0 +1,84 @@
+README.txt for test_pagecache
+David Rowe 25 June 2007
+
+THE PROBLEM
+-----------
+
+I stumbled across a problem with page cache when simulating voicemail
+files being written to NAND flash. Linux uses spare memory to buffer
+files. For example if you create a file, Linux buffers that file in
+memory, presumably so it can be accessed quickly. In regular (MMU)
+Linux, this physical memory is effectively freed when required by
+other applications by paging it to disk.
+
+So you get the best of both worlds (i) efficient use of free RAM to
+speed disk access and (ii) availability of that RAM should it be required
+elsewhere in the system (say by a new application loading).
+
+However I found that with Blackfin uClinux, page cache doesn't work so
+well. For example when I tried to create about 40 500k files, MemFree
+was reduced down to zero, while Cached (page cache) increased to use
+all memory. This left no memory free for any other applications,
+leading to Out Of Memory (OOM) dumps from the kernel.
+
+Now 40 500k files is an estimated daily load of voicemail for a
+typical 8 user PBX. 500k is 60 seconds of ulaw samples, if each user
+has 5 voicemail messages that's 40 files a day. It is not acceptable
+to have the system run out of memory with this load.
+
+Unlike regular (MMU) Linux with hard disk paging, it looks like
+uClinux can't (ever) return memory from the page-cache to general
+usage. However Aubrey from the ADI Blackfin team seems to have come
+up with a solution, a way to limit memory used by page-cache.
+
+I posted some question about the problem to this thread on
+blackfin.uclinux:
+
+http://blackfin.uclinux.org/gf/project/uclinux-dist/forum/?action=ForumBrowse&forum_id=39&_forum_action=ForumMessageBrowse&thread_id=17900
+
+Here is the key conclusion:
+
+Hi Aubrey,
+
+1/ Based on your on your 2007R1 branch r3263 page cache tunable mods, I
+generated a patch (attached) that can be applied to the 2007R1-RC3
+release. It works well, thanks. For example after booting I tried:
+
+echo 30 > /proc/sys/vm/pagecache_ratio
+
+then I ran my test script that generates 200 500k files. On my system
+I have MemFree = 48M. When Cached reached about 15M (approx 30% of
+48M), it stopped increasing. This left about 15M free for other stuff
+to start.
+
+2/ I also tried the CONFIG_NP2 allocator (without the page cache
+tunable mods). This worked a little better than the default
+CONFIG_BUDDY allocator, for example it left about 5M of free memory
+after copying 200 files. However this was not enough to start
+something like Asterisk.
+
+I still haven't worked out if memory in page cache _ever_ gets
+returned to the system. I suspect it doesn't. Perhaps this is an
+artifact of uClinux compared to regular Linux. In most uClinux
+systems where the files being copied are small compared to RAM I guess
+this is OK. However in my application NAND flash is much larger than
+RAM (256M compared to 64M) and it is possible that 40 500k voicemail
+files may be generated every day. So it's possible page cache could
+cause the system to run out of memory.
+
+Anyway, with your tunable mod it should be OK - thanks.
+
+FILES
+-----
+
+1/ test_page_cache.sh - a script to stress the system by creating 200
+ 500k files. This will cause MemFree to reduce
+ to near zero if the page cache tunable feature
+ is not present.
+
+2/ 2007R1_page_cache.diff - patch containing Aubrey's page cache
+ tunable feature.
+
+
+
+
Added: software/astfin/trunk/misc/test_pagecache/test_page_cache.sh
===================================================================
--- software/astfin/trunk/misc/test_pagecache/test_page_cache.sh (rev 0)
+++ software/astfin/trunk/misc/test_pagecache/test_page_cache.sh 2007-06-25 02:42:07 UTC (rev 897)
@@ -0,0 +1,20 @@
+#!/bin/sh
+# test_page_cache.sh
+# Script to test Blackfin page cache
+# Created David Rowe 19 June 2007
+
+mkdir -p /tmp/yaffstest
+rm -f /tmp/yaffstest
+
+loops=0
+
+while [ $loops -lt 200 ]
+do
+
+ cp /var/lib/asterisk/sounds/demo-instruct.ulaw /tmp/yaffstest/file$loops.ulaw
+
+ loops=`expr $loops + 1`
+ echo "loops $loops"
+done
+
+
Property changes on: software/astfin/trunk/misc/test_pagecache/test_page_cache.sh
___________________________________________________________________
Name: svn:executable
+ *
More information about the Astfin-commits
mailing list