[Mono-dev] RFC: GC precise scanning of stacks
Hi all We think some of our 'leak' issues can be attributed to libgc's false-positive identification of pointers. Attached is a proof-of-concept patch to libgc (and a simple demonstration program) that I hope will be the start of GC precise stack scanning. The code should apply easily to sgen as well. It basically adds an extra variable to the stack which contains specific markers and references to all the pointers that will contain GC-alloced memory. There is an optional failsafe mode that will fall back to the current 'all stack is scanned' code if the markers are not seen. This code will cover objects on unmanaged stacks but I don't know what will be needed for managed code. I presume the JIT can add the same sort of marker to the stack? So, comments? Is this technique going to be workable? - Dick diff -urBbw gc6.6-orig/include/gc.h gc6.6/include/gc.h --- gc6.6-orig/include/gc.h 2005-05-20 18:50:58.0 +0100 +++ gc6.6/include/gc.h 2009-09-28 11:17:45.0 +0100 @@ -859,6 +859,20 @@ # define GC_PTR_STORE(p, q) *((p) = (q)) #endif +#ifdef GC_PRECISE_STACK +/* Keep the least significant bit set so these don't look like pointers */ +# define GC_PRECISE_STACK_BEGIN_MARKER 0xF00FF00F +# define GC_PRECISE_STACK_END_MARKER 0xD00DD00D +/* This macro is very gcc specific, with the 'unused' attribute to + * shut up the unused-variable warning, the volatile placement to foil the + * optimiser, and the '##vars' to suppress the leading comma when the args + * list is empty + */ +# define GC_STACK_REFERENCE(x, vars...) __attribute__((unused)) void * volatile __GC_stack_references_ ## x[] = {(void *)GC_PRECISE_STACK_BEGIN_MARKER, ##vars, (void *)GC_PRECISE_STACK_END_MARKER}; +#else +# define GC_STACK_REFERENCE(x, vars...) +#endif + /* Functions called to report pointer checking errors */ GC_API void (*GC_same_obj_print_proc) GC_PROTO((GC_PTR p, GC_PTR q)); diff -urBbw gc6.6-orig/pthread_stop_world.c gc6.6/pthread_stop_world.c --- gc6.6-orig/pthread_stop_world.c 2005-09-09 18:54:32.0 +0100 +++ gc6.6/pthread_stop_world.c 2009-09-28 11:11:48.0 +0100 @@ -256,6 +256,10 @@ /* On IA64, we also need to scan the register backing store. */ IF_IA64(ptr_t bs_lo; ptr_t bs_hi;) pthread_t me = pthread_self(); +#if GC_PRECISE_STACK +ptr_t stack_ptr; +int found_markers; +#endif if (!GC_thr_initialized) GC_thr_init(); #if DEBUG_THREADS @@ -284,7 +288,7 @@ hi = GC_stackbottom; IF_IA64(bs_lo = BACKING_STORE_BASE;) } -#if DEBUG_THREADS +#if defined(DEBUG_THREADS) || defined(DEBUG_PRECISE_STACK) GC_printf3(Stack for thread 0x%lx = [%lx,%lx)\n, (unsigned long) p - id, (unsigned long) lo, (unsigned long) hi); @@ -292,10 +296,70 @@ if (0 == lo) ABORT(GC_push_all_stacks: sp not set!\n); # ifdef STACK_GROWS_UP /* We got them backwards! */ +# ifdef GC_PRECISE_STACK + found_markers = 0; + + for (stack_ptr = hi; stack_ptr = lo; stack_ptr += sizeof(ptr_t)) { + word content = *(word *)stack_ptr; + if (content == GC_PRECISE_STACK_BEGIN_MARKER) { + ptr_t stack_ptr_end = stack_ptr; +# ifdef DEBUG_PRECISE_STACK + GC_printf1(Found precise begin marker at 0x%lx\n, stack_ptr); +# endif + found_markers = 1; + do { +stack_ptr_end += sizeof(ptr_t); +content = *(word *)stack_ptr_end; +if (content != GC_PRECISE_STACK_END_MARKER) { + GC_push_all_stack ((ptr_t)content, (ptr_t)(content + sizeof(ptr_t))); +} + } while (content != GC_PRECISE_STACK_END_MARKER +stack_ptr_end lo); + stack_ptr = stack_ptr_end; + } + } + +# ifdef GC_PRECISE_STACK_FAILSAFE + if (!found_markers) { + GC_push_all_stack(hi, lo); + } +# endif +# else GC_push_all_stack(hi, lo); +# endif +# else +# ifdef GC_PRECISE_STACK + found_markers = 0; + + for (stack_ptr = lo; stack_ptr = hi; stack_ptr += sizeof(ptr_t)) { + word content = *(word *)stack_ptr; + if (content == GC_PRECISE_STACK_BEGIN_MARKER) { + ptr_t stack_ptr_end = stack_ptr; +# ifdef DEBUG_PRECISE_STACK + GC_printf1(Found precise begin marker at 0x%lx\n, stack_ptr); +# endif + found_markers = 1; + do { +stack_ptr_end += sizeof(ptr_t); +content = *(word *)stack_ptr_end; +if (content != GC_PRECISE_STACK_END_MARKER) { + GC_push_all_stack ((ptr_t)content, (ptr_t)(content + sizeof(ptr_t))); +} + } while (content != GC_PRECISE_STACK_END_MARKER +stack_ptr_end hi); + stack_ptr = stack_ptr_end; + } + } + +# ifdef GC_PRECISE_STACK_FAILSAFE + if (!found_markers) { + GC_push_all_stack(lo, hi); + } +# endif # else GC_push_all_stack(lo, hi); # endif +# endif # ifdef IA64 # if DEBUG_THREADS GC_printf3(Reg stack for thread 0x%lx = [%lx,%lx)\n, #define GC_PRECISE_STACK #include gc.h #include stdio.h #include stdlib.h typedef struct { int val; void *ptr; } Object; void
[Mono-dev] Ping throwing exception
I'm getting an exception from an application that is repeatedly running pings. The Ping class looks like it'll switch between invoking /bin/ping and sending an ICMP directly depending on the user. I'm running as a non-privileged user, therefore, its exec'ing ping. Every once in a while I get an unknown response, and have tracked it back to Ping.cs -- the code snippet below. The only way I can see this failing is if WaitForSingleObjectEx is broken for processes. The timeout being used is 4000, which is the default. Does anyone know if there are any outstanding issues with WaitForSingleObjectEx and processes. System.InvalidOperationException: The process must exit before getting the requested information. at System.Diagnostics.Process.get_ExitCode () [0x0] at (wrapper remoting-invoke-with-check) System.Diagnostics.Process:get_ExitCode () at MassPinger.Ping.SendUnprivileged (System.Net.IPAddress address, Int32 timeout, System.Byte[] buffer, MassPinger.PingOptions options) [0x0] Ping.cs:273 if (!ping.WaitForExit (timeout) || ping.ExitCode == 2) { return new PingReply (address, buffer, options, trip_time, System.Net.NetworkInformation.IPStatus.TimedOut); } IMPORTANT: The information contained in this email and/or its attachments is confidential. If you are not the intended recipient, please notify the sender immediately by reply and immediately delete this message and all its attachments. Any review, use, reproduction, disclosure or dissemination of this message or any attachment by an unintended recipient is strictly prohibited. Neither this message nor any attachment is intended as or should be construed as an offer, solicitation or recommendation to buy or sell any security or other financial instrument. Neither the sender, his or her employer nor any of their respective affiliates makes any warranties as to the completeness or accuracy of any of the information contained herein or that this message or any of its attachments is free of viruses. ___ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list
[Mono-dev] Mono 2.6 release notes.
Hello, Since we are getting ready for 2.6, I would like to ask everyone on the team to update the release notes with the work that we have done since 2.4 was published, as it will not do any justice if I update it myself: http://mono-project.com/Release_Notes_Mono_2.6 ___ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list
Re: [Mono-dev] Mono 2.6 release notes.
Hi, Since we are getting ready for 2.6, I would like to ask everyone on the team to update the release notes with the work that we have done since 2.4 was published, as it will not do any justice if I update it myself: Must have missed it, but have the preview tarballs been uploaded anywhere? TTFN Paul -- Sie können mich aufreizen und wirklich heiß machen! signature.asc Description: This is a digitally signed message part ___ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list
[Mono-dev] Issues with GC due to libgc
Hi all, After several weeks working on a bunch of mem issues related to the libgc based garbage collector, we've identified the following issue and a possible solution (Dick already sent some workarounds to the list): The libgc garbage collector has a really hard time identifying pointers to objects since it guesses what is a pointer instead of actually knowing by using data passed by the mono runtime. It means something as simple as introducing a long on the stack (for instance something like array = new int[100]) will block (forever) the memory at address 100. Yes, as incredible as it sounds, it can cause important mem problems on long living apps (typically servers). (As a side note, this exact problem is present on sgen, since it also scans the stack conservatively). A small improvement could be made in the current GC with little effort, and is supplying more class refmaps to libgc. Libgc is very hard to modify, it contains too many hacks and optimizations that have made the code a nightmare to understand and modify, so we don't find useful to make anything here beyond very small patches. That said, mono currently can provide reference bitmaps for objects, it's a matter of providing the right descriptor to the garbage collector. Libgc supports this kind of descriptors and mono already generates them for the sgen gc, so it's just a matter of joining those together (which should beeasy to do). This should improve a great number of scans in the arking process, leaving only stacks and several minor objects without precise marking. (Should become similar to the current sgen idea, where stacks and other roots are scanned conservatively, although not compacting). Attached is the sample code we use to reproduce the issue on 32 bit based Linux/Mono systems. Some notes about the test app below: === the program accepts commands like gc, mem, exit, 2, or 1 2 n m creates n arrays of ints with m elements, and put them in an arraylist. After the call completes, they are no longer referenced. 1 n m same, but waiting for a key press after each new array gc n performs n gcs exit exits So, the case: mono test.exe 2 200 70creates 2 million int arrays of 70 elements each (virtual goes up to 777MB) gc 10should free everything, but around 33MB remain allocated acording to pmap: ... bf4b5000 32K 0K 0K ---p [anon] bfc9e000 88K 32K 28K rwxp [stack] e000 4K 0K 0K r-xp [vdso] Total: 777820K 33852K 29336K 2 20 2500 creates 20 int arrays of 25 million elements each (2.7GB) gc 10 now pmap shows everything is screwed up: ... b7f2b000 8K 8K 8K rwxp /lib/ld-2.6.1.so bf4b5000 32K 0K 0K ---p [anon] bfc9e000 88K 32K 28K rwxp [stack] e000 4K 0K 0K r-xp [vdso] Total: 2764356K 1696132K 1691616K Trying with smaller sizes lets you see that segments are joined and split, but seems that there is some inability to free everything. == Regards, pablo using System; using System.Collections; namespace test { class Program { static void Main(string[] args) { WaitForEnter(); } private static void WaitForEnter() { Console.WriteLine(Command:); while (true) { Console.Write( ); string line = Console.ReadLine(); string[] args = line.Split(' '); if (args.Length = 0) continue; switch (args[0].ToLower()) { case exit: return; case 1: Case1(args); break; case 2: Case2(args); break; case mem: Console.WriteLine(Memory now: {0}, GC.GetTotalMemory(false)); break; case gc: Gcs(args); break; default: Console.WriteLine(Unknown command); break; } } } private static void Gcs(string[] args) { int loop = (args.Length == 2) ? Int32.Parse(args[1]) : 1; for (int i = 0; i loop; ++i) { Console.WriteLine(Memory {1} now : {0}, GC.GetTotalMemory(false), i); Console.WriteLine(Memory {1} after GC: {0}, GC.GetTotalMemory(true), i); } } private const int OneMeg = 1024 * 1024; private static void Case1(string[] args) { int loop = (args.Length = 2) ? Int32.Parse(args[1]) : 5; int size = (args.Length = 3) ? Int32.Parse(args[2]) : 10 * OneMeg; ArrayList
Re: [Mono-dev] Mono 2.6 release notes.
I should be able to get the first preview published today. ___ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list
Re: [Mono-dev] Issues with GC due to libgc
Hello, Libgc supports this kind of descriptors and mono already generates them for the sgen gc, so it's just a matter of joining those together (which should beeasy to do). This should improve a great number of scans in the arking process, leaving only stacks and several minor objects without precise marking. (Should become similar to the current sgen idea, where stacks and other roots are scanned conservatively, although not compacting). Mono already uses those descriptors for the heap; There are only two cases when it does not use that: * Scanning the stack, this is done with the conservative collector. * Any AppDomains that are not the root appdomain. The problem with scanning the stack precisely is that it requires the JIT and the GC to work as a team to be able to at any point of the execution to determine which values on the stack are pointers and which values are not.This is not trivial. The problem with AppDomains is that upon unloading there is a potential for leaking vtables, something that I do not particular think is as important as being able to scan the AppDomains precisely. We should bring Ben's patch into Mono and just default to this. There are ways of minimizing the problems that you are experiencing today, some techniques might work better than others, but: * Do not allocate large blocks of data, as they tend to fragment your heap; Instead use smaller allocations, or use unmanaged buffers if you need to. This technique is used in Mono's ASP.NET precisely for that reason. See System.Web/HttpResponseStream.cs * Make your stacks shallower. Miguel. ___ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list
Re: [Mono-dev] Issues with GC due to libgc
Hello, I tried your sample, in my machine the memory usage with Case1 does go up very quickly, but then it tends to stay stable around 600 megs. Later, I modified the program to not allocate 1 meg blocks, but instead to allocation 1024 times 1k blocks (so that it allocates the same amount of memory). The program stays stable at around 200 megs of ram in this case. I am sure your program is more complicated, but the difference between these two patterns of memory usage in my opinion are caused by memory fragmentation, not really conservative heap scanning. My suggestion is to change the code in your server to use either unmanaged buffers for large allocations, or to do buffering with smaller blocks of memory instead of 10 megabyte blobs. Hi all, After several weeks working on a bunch of mem issues related to the libgc based garbage collector, we've identified the following issue and a possible solution (Dick already sent some workarounds to the list): The libgc garbage collector has a really hard time identifying pointers to objects since it guesses what is a pointer instead of actually knowing by using data passed by the mono runtime. It means something as simple as introducing a long on the stack (for instance something like array = new int[100]) will block (forever) the memory at address 100. Yes, as incredible as it sounds, it can cause important mem problems on long living apps (typically servers). (As a side note, this exact problem is present on sgen, since it also scans the stack conservatively). A small improvement could be made in the current GC with little effort, and is supplying more class refmaps to libgc. Libgc is very hard to modify, it contains too many hacks and optimizations that have made the code a nightmare to understand and modify, so we don't find useful to make anything here beyond very small patches. That said, mono currently can provide reference bitmaps for objects, it's a matter of providing the right descriptor to the garbage collector. Libgc supports this kind of descriptors and mono already generates them for the sgen gc, so it's just a matter of joining those together (which should beeasy to do). This should improve a great number of scans in the arking process, leaving only stacks and several minor objects without precise marking. (Should become similar to the current sgen idea, where stacks and other roots are scanned conservatively, although not compacting). Attached is the sample code we use to reproduce the issue on 32 bit based Linux/Mono systems. Some notes about the test app below: === the program accepts commands like gc, mem, exit, 2, or 1 2 n m creates n arrays of ints with m elements, and put them in an arraylist. After the call completes, they are no longer referenced. 1 n m same, but waiting for a key press after each new array gc n performs n gcs exit exits So, the case: mono test.exe 2 200 70creates 2 million int arrays of 70 elements each (virtual goes up to 777MB) gc 10should free everything, but around 33MB remain allocated acording to pmap: ... bf4b5000 32K 0K 0K ---p [anon] bfc9e000 88K 32K 28K rwxp [stack] e000 4K 0K 0K r-xp [vdso] Total: 777820K 33852K 29336K 2 20 2500 creates 20 int arrays of 25 million elements each (2.7GB) gc 10 now pmap shows everything is screwed up: ... b7f2b000 8K 8K 8K rwxp /lib/ld-2.6.1.so bf4b5000 32K 0K 0K ---p [anon] bfc9e000 88K 32K 28K rwxp [stack] e000 4K 0K 0K r-xp [vdso] Total: 2764356K 1696132K 1691616K Trying with smaller sizes lets you see that segments are joined and split, but seems that there is some inability to free everything. == Regards, pablo plain text document attachment (Program.cs) using System; using System.Collections; namespace test { class Program { static void Main(string[] args) { WaitForEnter(); } private static void WaitForEnter() { Console.WriteLine(Command:); while (true) { Console.Write( ); string line = Console.ReadLine(); string[] args = line.Split(' '); if (args.Length = 0) continue; switch (args[0].ToLower()) { case exit: return; case 1: Case1(args); break; case 2: Case2(args); break; case mem: Console.WriteLine(Memory now: {0}, GC.GetTotalMemory(false));
Re: [Mono-dev] RFC: GC precise scanning of stacks
Hello Dick, Attached is a proof-of-concept patch to libgc (and a simple demonstration program) that I hope will be the start of GC precise stack scanning. The code should apply easily to sgen as well. Thanks; This is a nice start, I think there should be a bit more checking for the markers, something along the lines of having a size argument and checking that mem [start + size] = end_marker as well, just for the sake of avoiding false positives, give or take more checks. The challenge is to make the JIT compiler group all of the managed object references in a contiguous space and then decorating that block with this. I like the idea myself, it will not be 100% precise, but it will get us very very close. The VM team of course needs to weight in. ___ Mono-devel-list mailing list Mono-devel-list@lists.ximian.com http://lists.ximian.com/mailman/listinfo/mono-devel-list
[Mono-dev] [PATCH] Network Performance Counters
The attached patch implements 3 Network Interface performance category counters. The counters are Bytes Received/sec, Bytes Sent/sec, and Bytes Total/sec. A sample program using these counters and sample output is attached as well. This information is obtained by reading /proc/net/dev. The patch changes the following files: mono/metadata/ChangeLog mono/metadata/mono-perfcounters-def.h mono/metadata/mono-perfcounters.c mono/utils/ChangeLog mono/utils/mono-proclib.c mono/utils/mono-proclib.h jr diff --git a/mono/metadata/ChangeLog b/mono/metadata/ChangeLog index a954bee..4094799 100644 --- a/mono/metadata/ChangeLog +++ b/mono/metadata/ChangeLog @@ -1,3 +1,9 @@ +2009-09-25 Joel W. Reed joelwr...@gmail.com + + * mono-perfcounters.c, mono-perfcounters-def.h: Add + network performance counters for bytes sent per second, bytes + received per second, and bytes total per second. + 2009-09-22 Zoltan Varga var...@gmail.com * image.c (mono_image_close): Atomically decrement the reference count and diff --git a/mono/metadata/mono-perfcounters-def.h b/mono/metadata/mono-perfcounters-def.h index 1a7cbab..367d12e 100644 --- a/mono/metadata/mono-perfcounters-def.h +++ b/mono/metadata/mono-perfcounters-def.h @@ -119,3 +119,7 @@ PERFCTR_COUNTER(SECURITY_LCHECKS, # Link Time Checks, , NumberOfItems32, sec PERFCTR_COUNTER(SECURITY_PERCTIME, % Time in RT checks, , RawFraction, security_time) PERFCTR_COUNTER(SECURITY_SWDEPTH, Stack Walk Depth, , NumberOfItems32, security_depth) +PERFCTR_CAT(NETWORK, Network Interface, , MultiInstance, NetworkInterface, NETWORK_BYTESRECSEC) +PERFCTR_COUNTER(NETWORK_BYTESRECSEC, Bytes Received/sec, , RateOfCountsPerSecond64, unused) +PERFCTR_COUNTER(NETWORK_BYTESSENTSEC, Bytes Sent/sec, , RateOfCountsPerSecond64, unused) +PERFCTR_COUNTER(NETWORK_BYTESTOTALSEC, Bytes Total/sec, , RateOfCountsPerSecond64, unused) diff --git a/mono/metadata/mono-perfcounters.c b/mono/metadata/mono-perfcounters.c index 8451091..e5ccca2 100644 --- a/mono/metadata/mono-perfcounters.c +++ b/mono/metadata/mono-perfcounters.c @@ -98,6 +98,7 @@ enum { ThreadInstance, CPUInstance, MonoInstance, + NetworkInterfaceInstance, CustomInstance }; @@ -283,6 +284,11 @@ struct _ImplVtable { }; typedef struct { + int id; + char *name; +} NetworkVtableArg; + +typedef struct { ImplVtable vtable; MonoPerfCounters *counters; int pid; @@ -686,6 +692,60 @@ cpu_get_impl (MonoString* counter, MonoString* instance, int *type, MonoBoolean } static MonoBoolean +get_network_counter (ImplVtable *vtable, MonoBoolean only_value, MonoCounterSample *sample) +{ + MonoNetworkError error = MONO_NETWORK_ERROR_OTHER; + NetworkVtableArg *narg = (NetworkVtableArg*) vtable-arg; + if (!only_value) { + fill_sample (sample); + } + + sample-counterType = predef_counters [predef_categories [CATEGORY_NETWORK].first_counter + narg-id].type; + switch (narg-id) { + case COUNTER_NETWORK_BYTESRECSEC: + sample-rawValue = mono_network_get_data (narg-name, MONO_NETWORK_BYTESREC, error); + break; + case COUNTER_NETWORK_BYTESSENTSEC: + sample-rawValue = mono_network_get_data (narg-name, MONO_NETWORK_BYTESSENT, error); + break; + case COUNTER_NETWORK_BYTESTOTALSEC: + sample-rawValue = mono_network_get_data (narg-name, MONO_NETWORK_BYTESTOTAL, error); + break; + } + + if (error == MONO_NETWORK_ERROR_NONE) + return TRUE; + else + return FALSE; +} + +static void +network_cleanup (ImplVtable *vtable) +{ + if (vtable-arg) + g_free(vtable-arg); +} + +static void* +network_get_impl (MonoString* counter, MonoString* instance, int *type, MonoBoolean *custom) +{ + const CounterDesc *cdesc; + NetworkVtableArg *narg; + ImplVtable *vtable; + *custom = FALSE; + if ((cdesc = get_counter_in_category (predef_categories [CATEGORY_NETWORK], counter))) { + narg = g_new0 (NetworkVtableArg, 1); + narg-id = cdesc-id; + narg-name = mono_string_to_utf8 (instance); + *type = cdesc-type; + vtable = create_vtable (narg, get_network_counter, NULL); + vtable-cleanup = network_cleanup; + return vtable; + } + return NULL; +} + +static MonoBoolean get_process_counter (ImplVtable *vtable, MonoBoolean only_value, MonoCounterSample *sample) { int id = GPOINTER_TO_INT (vtable-arg); @@ -1033,6 +1093,8 @@ mono_perfcounter_get_impl (MonoString* category, MonoString* counter, MonoString return process_get_impl (counter, instance, type, custom); case CATEGORY_MONO_MEM: return mono_mem_get_impl (counter, instance, type, custom); + case CATEGORY_NETWORK: + return network_get_impl (counter, instance, type, custom); case CATEGORY_JIT: case CATEGORY_EXC: case CATEGORY_GC: @@ -1343,6 +1405,21 @@ get_string_array (void **array, int count, gboolean is_process) } static MonoArray* +get_string_array_of_strings (void **array, int count) +{ + int i; + MonoDomain *domain = mono_domain_get (); + MonoArray * res = mono_array_new (mono_domain_get (), mono_get_string_class (), count); + for (i = 0; i count; ++i) { + char* p = array[i]; +