[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[patches] malloc & madvise



Dear Eglibc developers,

The malloc implementation in eglibc is great, but, unfortunately, it never uses the madvise(MADV_DONTNEED) system call to indicate which parts of the allocated memory are actually unused.

This is imho a significant shortcomming. While it is true that using the madvise syscall would make free() slightly slower, it might still be worth of it. If the kernel knows which parts of the memory are unused, it can immediately reuse this memory e.g. as a cache, what improves the overall system performance. On the other hand, if the kernel has no information about unused memory, the performance might be greatly hurt: When the system is low on memory, the kernel needs to swap out the rubbish data from the unused but allocated memory. Furthermore, if the application decides to reuse this memory, the kernel has to swap in the rubbish data. Since the cost of the madvise() sycall is significantly lower than the cost of swapping and by using madvise it is possible to avoid two swapping operations, the total effect of using madvise can be very positive.

As a proof of concept, I have implemented the madvise() calls in eglibc (the draft patch against eglibc 2.10.1 is attached). Some simple tests (running ghostscript on large pdfs, see the attached test script) shown that enabling madvise calls decreases the average resident memory by 14%, at the cost of 2.4% increase in running time. Hence, it is imho reasonable to give the user a possibility to enable the madvise calls() in free().

Would it be possible to include such a functionality in eglibc?

Best regards
	Richard

P.S. Even if this is not exactly embedded stuff, I'm asking here because one of the goals of eglibc is to be more friendly to contributors :-). On the other hand, embedded devices usually can not afford to waste too much ram...
diff -ur eglibc-2.10.1.old/malloc/arena.c eglibc-2.10.1/malloc/arena.c
--- eglibc-2.10.1.old/malloc/arena.c	2009-04-30 23:37:18.000000000 +0200
+++ eglibc-2.10.1/malloc/arena.c	2009-10-20 11:25:06.110115426 +0200
@@ -598,6 +598,10 @@
 		    mALLOPt(M_MMAP_THRESHOLD, atoi(&envline[16]));
 		}
 	      break;
+            case 19:
+              if ( memcmp (envline, "AGGRESSIVE_MADVISE_", 19) == 0)
+                mALLOPt(M_AGGRESSIVE_MADVISE, atoi(&envline[20]));
+              break;
 	    default:
 	      break;
 	    }
@@ -616,6 +620,8 @@
 	mALLOPt(M_MMAP_THRESHOLD, atoi(s));
       if((s = getenv("MALLOC_MMAP_MAX_")))
 	mALLOPt(M_MMAP_MAX, atoi(s));
+      if((s = getenv("MALLOC_AGGRESSIVE_MADVISE_")))
+        mALLOPt(M_AGGRESSIVE_MADVISE, atoi(s));
     }
   s = getenv("MALLOC_CHECK_");
 #endif
diff -ur eglibc-2.10.1.old/malloc/malloc.c eglibc-2.10.1/malloc/malloc.c
--- eglibc-2.10.1.old/malloc/malloc.c	2009-07-29 18:29:54.000000000 +0200
+++ eglibc-2.10.1/malloc/malloc.c	2009-10-20 11:31:12.939106253 +0200
@@ -2540,6 +2540,28 @@
 #define alloc_perturb(p, n) memset (p, (perturb_byte ^ 0xff) & 0xff, n)
 #define free_perturb(p, n) memset (p, perturb_byte & 0xff, n)
 
+/* ------------------ Support for extensive MADV_DONTUSE -------------- */
+
+static int aggressive_madvise = 0;
+
+static void madvise_free_chunk(mchunkptr p, void *orig_start, void *orig_end) {
+  const size_t ps = mp_.pagesize;
+  const size_t psm1 = ps - 1;
+
+  /* Interior of p - the area that should be madvised */
+  char *start = (char *)(((uintptr_t) p + sizeof(struct malloc_chunk) + psm1) & ~psm1);
+  char *end = (char *)(((uintptr_t) p + chunksize(p)) & ~psm1);
+
+  /* compute ranges that already were madivsed */
+  orig_start = (void *)(((uintptr_t)orig_start - 1) & ~psm1);
+  orig_end = (void *)(((uintptr_t)orig_end + sizeof(struct malloc_chunk) + psm1) & ~psm1);
+
+  if ((char *)orig_start > start) start = orig_start;
+  if ((char *)orig_end < end) end = orig_end;
+
+  if (end > start)
+    madvise( start, end-start, MADV_DONTNEED);
+}
 
 /* ------------------- Support for multiple arenas -------------------- */
 #include "arena.c"
@@ -4844,6 +4866,9 @@
   */
 
   else if (!chunk_is_mmapped(p)) {
+    void *orig_start = p;
+    void *orig_end = (void *)p + size;
+
 #ifdef ATOMIC_FASTBINS
     if (! have_lock) {
 # if THREAD_STATS
@@ -4950,6 +4975,8 @@
       check_chunk(av, p);
     }
 
+    if (aggressive_madvise) madvise_free_chunk(p, orig_start, orig_end);
+
     /*
       If freeing a large space, consolidate possibly-surrounding
       chunks. Then, if the total unused topmost memory exceeds trim
@@ -5080,6 +5107,9 @@
 	*fb = 0;
 #endif
         do {
+          void *orig_start;
+          void *orig_end;
+
           check_inuse_chunk(av, p);
           nextp = p->fd;
 
@@ -5087,6 +5117,7 @@
           size = p->size & ~(PREV_INUSE|NON_MAIN_ARENA);
           nextchunk = chunk_at_offset(p, size);
           nextsize = chunksize(nextchunk);
+          orig_start = p; orig_end = (void *)p + size;
 
           if (!prev_inuse(p)) {
             prevsize = p->prev_size;
@@ -5125,6 +5156,8 @@
             av->top = p;
           }
 
+          if (aggressive_madvise) madvise_free_chunk(p, orig_start, orig_end);
+
         } while ( (p = nextp) != 0);
 
       }
@@ -6040,6 +6073,10 @@
     perturb_byte = value;
     break;
 
+  case M_AGGRESSIVE_MADVISE:
+    aggressive_madvise = value;
+    break;
+
 #ifdef PER_THREAD
   case M_ARENA_TEST:
     if (value > 0)
diff -ur eglibc-2.10.1.old/malloc/malloc.h eglibc-2.10.1/malloc/malloc.h
--- eglibc-2.10.1.old/malloc/malloc.h	2009-04-14 21:46:20.000000000 +0200
+++ eglibc-2.10.1/malloc/malloc.h	2009-10-20 11:31:45.580357358 +0200
@@ -130,6 +130,7 @@
 #define M_PERTURB	    -6
 #define M_ARENA_TEST	    -7
 #define M_ARENA_MAX	    -8
+#define M_AGGRESSIVE_MADVISE  -9
 
 /* General SVID/XPG interface to tunable parameters. */
 extern int mallopt __MALLOC_P ((int __param, int __val));
#!/bin/bash

FILE=main.pdf


doit() {
  rm -f mem.$1
  ( (time gs -dBATCH -dNODISPLAY $FILE ) 2>time.$1 ) | ( while [ `ps aux|grep gs|grep -v grep | tee -a mem.$1 | wc -l` -eq 1 ]; do /bin/true; done )
  (echo -ne "scale=5\n("; awk '{printf $6 "+"}' mem.$1; echo -n "0)/"; wc -l <mem.$1) | bc > avgmem.$1
}


export MALLOC_AGGRESSIVE_MADVISE_=1
doit madvise
export MALLOC_AGGRESSIVE_MADVISE_=0
doit nomadvise