From 7f1dcb2a810f4fd621e4404ac3c07cd15b8ace16 Mon Sep 17 00:00:00 2001 From: Przemyslaw Czerpak Date: Sun, 29 Mar 2009 17:13:52 +0000 Subject: [PATCH] 2009-03-29 19:20 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl) * harbour/include/hbapi.h * harbour/include/hbstack.h * harbour/source/vm/estack.c * harbour/source/vm/fm.c + added support for thread local memory allocators by using MSPACE mechanism in DLALLOC. It can be enabled by compiling Harbour with HB_FM_DLMT_ALLOC macro. By default up to 16 (HB_MSPACE_COUNT macro) independent memory allocators is dynamically created for new threads. When more threads is used then allocators are shared between them and HVM tries to balance used allocators to reduce number of threads using the same allocator. It should give noticeable scalability improvement in some OS-es where default memory manages has pure MT performance. Please try to compiler speedtst.prg with Harbour compiled with and without HB_FM_DLMT_ALLOC and compare results in this test: speedtst --thread=2 --scale using real multiCPU machine. * harbour/source/vm/dlmalloc.c * pacified warning when FOOTERS are enabled * harbour/source/vm/garbage.c * cleanup comment --- harbour/ChangeLog | 26 +++++ harbour/include/hbapi.h | 4 + harbour/include/hbstack.h | 1 + harbour/source/vm/dlmalloc.c | 1 + harbour/source/vm/estack.c | 3 +- harbour/source/vm/fm.c | 184 ++++++++++++++++++++++++++++------- harbour/source/vm/garbage.c | 4 +- 7 files changed, 184 insertions(+), 39 deletions(-) diff --git a/harbour/ChangeLog b/harbour/ChangeLog index 4786224162..2d26c8538f 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -8,6 +8,32 @@ 2009-12-31 13:59 UTC+0100 Foo Bar (foo.bar foobar.org) */ +2009-03-29 19:20 UTC+0200 Przemyslaw Czerpak (druzus/at/priv.onet.pl) + * harbour/include/hbapi.h + * harbour/include/hbstack.h + * harbour/source/vm/estack.c + * harbour/source/vm/fm.c + + added support for thread local memory allocators by using MSPACE + mechanism in DLALLOC. + It can be enabled by compiling Harbour with HB_FM_DLMT_ALLOC macro. + By default up to 16 (HB_MSPACE_COUNT macro) independent memory + allocators is dynamically created for new threads. When more threads + is used then allocators are shared between them and HVM tries to + balance used allocators to reduce number of threads using the + same allocator. + It should give noticeable scalability improvement in some OS-es + where default memory manages has pure MT performance. + Please try to compiler speedtst.prg with Harbour compiled with + and without HB_FM_DLMT_ALLOC and compare results in this test: + speedtst --thread=2 --scale + using real multiCPU machine. + + * harbour/source/vm/dlmalloc.c + * pacified warning when FOOTERS are enabled + + * harbour/source/vm/garbage.c + * cleanup comment + 2009-03-29 09:03 UTC-0800 Pritpal Bedi (pritpal@vouchcac.com) * harbour/contrib/gtwvg/wvgsink.c ! Fixed remaining non-ANSI C comments. Just had been overlooked! diff --git a/harbour/include/hbapi.h b/harbour/include/hbapi.h index 4251629f53..241d18956d 100644 --- a/harbour/include/hbapi.h +++ b/harbour/include/hbapi.h @@ -493,6 +493,10 @@ extern HB_EXPORT ULONG hb_xsize( void * pMem ); /* returns t extern HB_EXPORT ULONG hb_xquery( USHORT uiMode ); /* Query different types of memory information */ extern HB_EXPORT void hb_xsetfilename( char * szValue ); extern HB_EXPORT void hb_xsetinfo( char * szValue ); +#ifdef _HB_API_INTERNAL_ +extern void hb_xinit_thread( void ); +extern void hb_xexit_thread( void ); +#endif extern HB_EXPORT HB_VMHANDLE hb_xvalloc( ULONG nSize, USHORT nFlags ); extern HB_EXPORT void hb_xvfree( HB_VMHANDLE h ); diff --git a/harbour/include/hbstack.h b/harbour/include/hbstack.h index 80e60c3606..d727807780 100644 --- a/harbour/include/hbstack.h +++ b/harbour/include/hbstack.h @@ -184,6 +184,7 @@ typedef struct void * pStackLst; /* this stack entry in stack linked list */ HB_IOERRORS IOErrors; /* MT safe buffer for IO errors */ BYTE * byDirBuffer; /* MT safe buffer for hb_fsCurDir() results */ + void * allocator; /* memory manager global struct pointer */ #endif } HB_STACK, * PHB_STACK; diff --git a/harbour/source/vm/dlmalloc.c b/harbour/source/vm/dlmalloc.c index 09338558e8..e7653e7e4b 100644 --- a/harbour/source/vm/dlmalloc.c +++ b/harbour/source/vm/dlmalloc.c @@ -4566,6 +4566,7 @@ void mspace_free(mspace msp, void* mem) { mchunkptr p = mem2chunk(mem); #if FOOTERS mstate fm = get_mstate_for(p); + ( void ) msp; #else /* FOOTERS */ mstate fm = (mstate)msp; #endif /* FOOTERS */ diff --git a/harbour/source/vm/estack.c b/harbour/source/vm/estack.c index 318fb820e2..4d2f446836 100644 --- a/harbour/source/vm/estack.c +++ b/harbour/source/vm/estack.c @@ -351,6 +351,7 @@ void hb_stackInit( void ) { HB_STACK_TLS_PRELOAD hb_stack_init( &hb_stack ); + hb_xinit_thread(); } } @@ -361,7 +362,7 @@ void hb_stackFree( void ) HB_TRACE(HB_TR_DEBUG, ("hb_stackFree()")); hb_stack_free( &hb_stack ); - + hb_xexit_thread(); #if defined( HB_MT_VM ) hb_stack_dealloc(); #endif diff --git a/harbour/source/vm/fm.c b/harbour/source/vm/fm.c index 460172c604..cf137cfdc9 100644 --- a/harbour/source/vm/fm.c +++ b/harbour/source/vm/fm.c @@ -83,6 +83,8 @@ #include */ +#define HB_STACK_PRELOAD + #include "hbvmopt.h" #include "hbapi.h" #include "hbapiitm.h" @@ -101,7 +103,8 @@ #undef HB_FM_DL_ALLOC #undef HB_FM_WIN_ALLOC #elif !defined( HB_FM_DL_ALLOC ) && !defined( HB_FM_WIN_ALLOC ) - #if defined( _MSC_VER ) || defined( __BORLANDC__ ) || defined( __MINGW32__ ) + #if defined( _MSC_VER ) || defined( __BORLANDC__ ) || defined( __MINGW32__ ) || \ + ( defined( HB_FM_DLMT_ALLOC ) && defined( HB_MT_VM ) ) #define HB_FM_DL_ALLOC #else /* #define HB_FM_DL_ALLOC */ @@ -124,6 +127,12 @@ # define REALLOC_ZERO_BYTES_FREES # if defined( HB_MT_VM ) # define USE_LOCKS 1 +# if defined( HB_FM_DLMT_ALLOC ) +# define ONLY_MSPACES 1 +# define FOOTERS 1 +# endif +# else +# undef HB_FM_DLMT_ALLOC # endif # if defined( __BORLANDC__ ) # pragma warn -aus @@ -136,7 +145,7 @@ # define ABORT TerminateProcess( GetCurrentProcess(), 0 ) # elif defined( __POCC__ ) && !defined( InterlockedCompareExchangePointer ) # define InterlockedCompareExchangePointer -# elif defined( _MSC_VER ) && !defined( USE_DL_PREFIX ) +# elif defined( _MSC_VER ) && !defined( USE_DL_PREFIX ) && !defined( HB_FM_DLMT_ALLOC ) # define USE_DL_PREFIX # endif # include "dlmalloc.c" @@ -148,27 +157,35 @@ # pragma warn +prc # pragma warn +rch # endif -# if defined( USE_DL_PREFIX ) +# if defined( HB_FM_DLMT_ALLOC ) +# define malloc( n ) mspace_malloc( hb_mspace(), ( n ) ) +# define realloc( p, n ) mspace_realloc( NULL, ( p ), ( n ) ) +# define free( p ) mspace_free( NULL, ( p ) ) +# elif defined( USE_DL_PREFIX ) # define malloc( n ) dlmalloc( ( n ) ) # define realloc( p, n ) dlrealloc( ( p ), ( n ) ) # define free( p ) dlfree( ( p ) ) # endif -#elif defined( HB_FM_WIN_ALLOC ) && defined( HB_OS_WIN ) -# if defined( HB_FM_LOCALALLOC ) -# define malloc( n ) ( void * ) LocalAlloc( LMEM_FIXED, ( n ) ) -# define realloc( p, n ) ( void * ) LocalReAlloc( ( HLOCAL ) ( p ), ( n ), LMEM_MOVEABLE ) -# define free( p ) LocalFree( ( HLOCAL ) ( p ) ) -# else - static HANDLE s_hProcessHeap = NULL; -# define HB_FM_NEED_INIT -# define HB_FM_HEAP_INIT -# define malloc( n ) ( void * ) HeapAlloc( s_hProcessHeap, 0, ( n ) ) -# define realloc( p, n ) ( void * ) HeapReAlloc( s_hProcessHeap, 0, ( void * ) ( p ), ( n ) ) -# define free( p ) HeapFree( s_hProcessHeap, 0, ( void * ) ( p ) ) +#else +# undef HB_FM_DLMT_ALLOC +# if defined( HB_FM_WIN_ALLOC ) && defined( HB_OS_WIN ) +# if defined( HB_FM_LOCALALLOC ) +# define malloc( n ) ( void * ) LocalAlloc( LMEM_FIXED, ( n ) ) +# define realloc( p, n ) ( void * ) LocalReAlloc( ( HLOCAL ) ( p ), ( n ), LMEM_MOVEABLE ) +# define free( p ) LocalFree( ( HLOCAL ) ( p ) ) +# else + static HANDLE s_hProcessHeap = NULL; +# define HB_FM_NEED_INIT +# define HB_FM_HEAP_INIT +# define malloc( n ) ( void * ) HeapAlloc( s_hProcessHeap, 0, ( n ) ) +# define realloc( p, n ) ( void * ) HeapReAlloc( s_hProcessHeap, 0, ( void * ) ( p ), ( n ) ) +# define free( p ) HeapFree( s_hProcessHeap, 0, ( void * ) ( p ) ) +# endif # endif #endif #if defined( HB_MT_VM ) && ( defined( HB_FM_STATISTICS ) || \ + defined( HB_FM_DLMT_ALLOC ) || \ !defined( HB_ATOM_INC ) || !defined( HB_ATOM_DEC ) ) static HB_CRITICAL_NEW( s_fmMtx ); @@ -307,6 +324,116 @@ typedef void * PHB_MEMINFO; #endif +#if defined( HB_FM_DLMT_ALLOC ) + +# if !defined( HB_MSPACE_COUNT ) +# define HB_MSPACE_COUNT 16 +# endif + +typedef struct +{ + int count; + mspace * ms; +} HB_MSPACE, * PHB_MSPACE; + +static mspace s_gm = NULL; +static HB_MSPACE s_mspool[ HB_MSPACE_COUNT ]; + +static mspace hb_mspace( void ) +{ + HB_STACK_TLS_PRELOAD + + if( hb_stackId() && hb_stack.allocator ) + return ( ( PHB_MSPACE ) hb_stack.allocator )->ms; + + if( !s_gm ) + s_gm = create_mspace( 0, 1 ); + + return s_gm; +} + +static void hb_mspace_cleanup( void ) +{ + int i; + + s_gm = NULL; + for( i = 0; i < HB_MSPACE_COUNT; ++i ) + { + if( s_mspool[ i ].ms ) + { + destroy_mspace( s_mspool[ i ].ms ); + s_mspool[ i ].ms = NULL; + s_mspool[ i ].count = 0; + } + } +} + +#elif defined( HB_FM_DL_ALLOC ) && defined( USE_DL_PREFIX ) + +static void dlmalloc_destroy( void ) +{ + if( ok_magic(gm) ) + { + msegmentptr sp = &gm->seg; + while(sp != 0 ) + { + char* base = sp->base; + size_t size = sp->size; + flag_t flag = sp->sflags; + sp = sp->next; + if( (flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) ) + CALL_MUNMAP(base, size); + } + } +} + +#endif + +void hb_xinit_thread( void ) +{ +#if defined( HB_FM_DLMT_ALLOC ) + HB_STACK_TLS_PRELOAD + + if( hb_stack.allocator == NULL ) + { + HB_FM_LOCK + if( s_mspool[ 0 ].ms == NULL && s_gm ) + { + s_mspool[ 0 ].count = 1; + s_mspool[ 0 ].ms = s_gm; + hb_stack.allocator = ( void * ) &s_mspool[ 0 ]; + } + else + { + int i, imin = 0; + for( i = 1; i < HB_MSPACE_COUNT; ++i ) + { + if( s_mspool[ i ].count < s_mspool[ imin ].count ) + imin = i; + } + if( s_mspool[ imin ].ms == NULL ) + s_mspool[ imin ].ms = create_mspace( 0, 1 ); + s_mspool[ imin ].count++; + hb_stack.allocator = ( void * ) &s_mspool[ imin ]; + } + HB_FM_UNLOCK + } +#endif +} + +void hb_xexit_thread( void ) +{ +#if defined( HB_FM_DLMT_ALLOC ) + if( hb_stack.allocator != NULL ) + { + HB_FM_LOCK + ( ( PHB_MSPACE ) hb_stack.allocator )->count--; + hb_stack.allocator = NULL; + HB_FM_UNLOCK + } +#endif +} + void hb_xsetfilename( char * szValue ) { #ifdef HB_FM_STATISTICS @@ -795,25 +922,6 @@ void hb_xinit( void ) /* Initialize fixed memory subsystem */ #endif /* HB_FM_NEED_INIT */ } -#if defined( HB_FM_DL_ALLOC ) && defined( USE_DL_PREFIX ) -static void dlmalloc_destroy( void ) -{ - if( ok_magic(gm) ) - { - msegmentptr sp = &gm->seg; - while(sp != 0 ) - { - char* base = sp->base; - size_t size = sp->size; - flag_t flag = sp->sflags; - sp = sp->next; - if( (flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) ) - CALL_MUNMAP(base, size); - } - } -} -#endif - /* Returns pointer to string containing printable version of pMem memory block */ @@ -931,7 +1039,9 @@ void hb_xexit( void ) /* Deinitialize fixed memory subsystem */ } #if defined( HB_FM_DL_ALLOC ) -# if defined( USE_DL_PREFIX ) +# if defined( HB_FM_DLMT_ALLOC ) + hb_mspace_cleanup(); +# elif defined( USE_DL_PREFIX ) dlmalloc_destroy(); # else malloc_trim( 0 ); @@ -946,7 +1056,9 @@ void hb_xexit( void ) /* Deinitialize fixed memory subsystem */ HB_TRACE(HB_TR_DEBUG, ("hb_xexit()")); #if defined( HB_FM_DL_ALLOC ) -# if defined( USE_DL_PREFIX ) +# if defined( HB_FM_DLMT_ALLOC ) + hb_mspace_cleanup(); +# elif defined( USE_DL_PREFIX ) dlmalloc_destroy(); # else malloc_trim( 0 ); diff --git a/harbour/source/vm/garbage.c b/harbour/source/vm/garbage.c index badd13cf7d..0afb3139a3 100644 --- a/harbour/source/vm/garbage.c +++ b/harbour/source/vm/garbage.c @@ -72,9 +72,9 @@ # include "hbthread.h" # include "hbatomic.h" -/* Use spinlock instead of mutex in OS2 builds */ +/* Use spinlock instead of mutex */ -# if defined( HB_SPINLOCK_INIT ) +# if defined( HB_SPINLOCK_INIT ) && 1 HB_SPINLOCK_T s_gcSpinLock = HB_SPINLOCK_INIT; # define HB_GC_LOCK HB_SPINLOCK_ACQUIRE( &s_gcSpinLock );