From ced7a8e61a23a8e514fc8020759f07ded2f327c5 Mon Sep 17 00:00:00 2001 From: Przemyslaw Czerpak Date: Wed, 24 Mar 2010 23:14:30 +0000 Subject: [PATCH] 2010-03-25 00:14 UTC+0100 Przemyslaw Czerpak (druzus/at/priv.onet.pl) * harbour/include/hbatomic.h * removed unnecessary parenthesis * renamed HB_SPINLOCK_INIT_R macro to HB_SPINLOCK_INITVAL_R + added HB_SPINLOCK_INIT_R(l) macro * harbour/src/vm/fm.c * harbour/src/vm/dlmalloc.c + updated to DLMALLOC 2.8.4 * added OS2 support from our previous DLMALLOC version + updated to use our own recursive locks when available * disabled hack which breaks strict aliasing ! added some fixes to new DLMALLOC code TODO: Test it with MSVC win and wince builds and add _MSC_VER based protection for __forceinline usage (I do not know in which MSVC version it was added). Test it with OS2 GCC and OpenWatcom builds. Test it Darwin and some other *nixes. * harbour/contrib/sddfb/sddfb.c ! fixed missing reference operator in isc_detach_database() ! fixed NULL used to clear connection handles * keep references to connection handlers passed to isc_*() functions * minor formatting --- harbour/ChangeLog | 25 + harbour/contrib/sddfb/sddfb.c | 38 +- harbour/include/hbatomic.h | 23 +- harbour/src/vm/dlmalloc.c | 1378 ++++++++++++++++++++++++--------- harbour/src/vm/fm.c | 10 +- 5 files changed, 1062 insertions(+), 412 deletions(-) diff --git a/harbour/ChangeLog b/harbour/ChangeLog index c6f4fb3ba2..0d5c12bd0a 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -17,6 +17,31 @@ past entries belonging to author(s): Viktor Szakats. */ +2010-03-25 00:14 UTC+0100 Przemyslaw Czerpak (druzus/at/priv.onet.pl) + * harbour/include/hbatomic.h + * removed unnecessary parenthesis + * renamed HB_SPINLOCK_INIT_R macro to HB_SPINLOCK_INITVAL_R + + added HB_SPINLOCK_INIT_R(l) macro + + * harbour/src/vm/fm.c + * harbour/src/vm/dlmalloc.c + + updated to DLMALLOC 2.8.4 + * added OS2 support from our previous DLMALLOC version + + updated to use our own recursive locks when available + * disabled hack which breaks strict aliasing + ! added some fixes to new DLMALLOC code + TODO: Test it with MSVC win and wince builds and add _MSC_VER + based protection for __forceinline usage (I do not know + in which MSVC version it was added). + Test it with OS2 GCC and OpenWatcom builds. + Test it Darwin and some other *nixes. + + * harbour/contrib/sddfb/sddfb.c + ! fixed missing reference operator in isc_detach_database() + ! fixed NULL used to clear connection handles + * keep references to connection handlers passed to isc_*() functions + * minor formatting + 2010-03-24 21:42 UTC+0100 Viktor Szakats (harbour.01 syenar.hu) * contrib/Makefile + sdd libs ordered alphabetically like other contribs. diff --git a/harbour/contrib/sddfb/sddfb.c b/harbour/contrib/sddfb/sddfb.c index f1af34281e..96fba6420d 100644 --- a/harbour/contrib/sddfb/sddfb.c +++ b/harbour/contrib/sddfb/sddfb.c @@ -104,7 +104,7 @@ static void hb_firebirddd_init( void * cargo ) { HB_SYMBOL_UNUSED( cargo ); - if ( ! hb_sddRegister( & firebirddd ) || ( sizeof( isc_db_handle ) != sizeof( void * ) ) ) + if ( ! hb_sddRegister( &firebirddd ) || ( sizeof( isc_db_handle ) != sizeof( void * ) ) ) { hb_errInternal( HB_EI_RDDINVALID, NULL, NULL, NULL ); HB_FUNC_EXEC( SQLBASE ); /* force SQLBASE linking */ @@ -148,7 +148,7 @@ static HB_USHORT hb_errRT_FireBirdDD( HB_ERRCODE errGenCode, HB_ERRCODE errSubCo static HB_ERRCODE fbConnect( SQLDDCONNECTION * pConnection, PHB_ITEM pItem ) { ISC_STATUS_ARRAY status; - isc_db_handle db = ( isc_db_handle ) 0; + isc_db_handle hDb = ( isc_db_handle ) 0; char parambuf[ 520 ]; int i; HB_SIZE ul; @@ -173,14 +173,14 @@ static HB_ERRCODE fbConnect( SQLDDCONNECTION * pConnection, PHB_ITEM pItem ) i += ul; if ( isc_attach_database( status, ( short ) hb_arrayGetCLen( pItem, 5 ), hb_arrayGetCPtr( pItem, 5 ), - & db, ( short ) i, parambuf ) ) + &hDb, ( short ) i, parambuf ) ) { /* TODO: error code in status[1]; */ return HB_FAILURE; } pConnection->pSDDConn = hb_xgrab( sizeof( SDDCONN ) ); - ( ( SDDCONN * ) pConnection->pSDDConn )->hDb = db; -/* HB_TRACE( HB_TR_ALWAYS, ("db=%d", db) ); */ + ( ( SDDCONN * ) pConnection->pSDDConn )->hDb = hDb; +/* HB_TRACE( HB_TR_ALWAYS, ("hDb=%d", hDb) ); */ return HB_SUCCESS; } @@ -189,7 +189,7 @@ static HB_ERRCODE fbDisconnect( SQLDDCONNECTION * pConnection ) { ISC_STATUS_ARRAY status; - isc_detach_database( status, ( ( SDDCONN * ) pConnection->pSDDConn )->hDb ); + isc_detach_database( status, &( ( SDDCONN * ) pConnection->pSDDConn )->hDb ); hb_xfree( pConnection->pSDDConn ); return HB_SUCCESS; } @@ -205,7 +205,7 @@ static HB_ERRCODE fbExecute( SQLDDCONNECTION * pConnection, PHB_ITEM pItem ) static HB_ERRCODE fbOpen( SQLBASEAREAP pArea ) { - isc_db_handle hDb = ( ( SDDCONN * ) pArea->pConnection->pSDDConn )->hDb; + isc_db_handle * phDb = &( ( SDDCONN * ) pArea->pConnection->pSDDConn )->hDb; SDDDATA * pSDDData; ISC_STATUS_ARRAY status; isc_tr_handle hTrans = ( isc_tr_handle ) 0; @@ -225,14 +225,14 @@ static HB_ERRCODE fbOpen( SQLBASEAREAP pArea ) memset( &status, 0, sizeof( status ) ); /* HB_TRACE( HB_TR_ALWAYS, ("db=%d", hDb) ); */ - if ( isc_start_transaction( status, &hTrans, 1, &hDb, 0, NULL ) ) + if ( isc_start_transaction( status, &hTrans, 1, phDb, 0, NULL ) ) { /* HB_TRACE( HB_TR_ALWAYS, ("hTrans=%d status=%ld %ld %ld %ld", ( int ) hTrans, ( long ) status[0], ( long ) status[1], ( long ) status[2], ( long ) status[3] ) ); */ hb_errRT_FireBirdDD( EG_OPEN, ESQLDD_START, "Start transaction failed", NULL, ( HB_ERRCODE ) isc_sqlcode( status ) ); return HB_FAILURE; } - if ( isc_dsql_allocate_statement( status, &hDb, &hStmt ) ) + if ( isc_dsql_allocate_statement( status, phDb, &hStmt ) ) { hb_errRT_FireBirdDD( EG_OPEN, ESQLDD_STMTALLOC, "Allocate statement failed", NULL, ( HB_ERRCODE ) isc_sqlcode( status ) ); isc_rollback_transaction( status, &hTrans ); @@ -259,7 +259,7 @@ static HB_ERRCODE fbOpen( SQLBASEAREAP pArea ) pSqlda->sqln = uiFields; pSqlda->version = 1; - if ( isc_dsql_describe( status, & hStmt, SQL_DIALECT_V5, pSqlda ) ) + if ( isc_dsql_describe( status, &hStmt, SQL_DIALECT_V5, pSqlda ) ) { hb_errRT_FireBirdDD( EG_OPEN, ESQLDD_STMTDESCR, "Describe statement failed", NULL, ( HB_ERRCODE ) isc_sqlcode( status ) ); isc_dsql_free_statement( status, &hStmt, DSQL_drop ); @@ -440,11 +440,11 @@ static HB_ERRCODE fbClose( SQLBASEAREAP pArea ) } if ( pSDDData->hStmt ) { - isc_dsql_free_statement( status, & pSDDData->hStmt, DSQL_drop ); + isc_dsql_free_statement( status, &pSDDData->hStmt, DSQL_drop ); } if ( pSDDData->hTrans ) { - isc_rollback_transaction( status, & pSDDData->hTrans ); + isc_rollback_transaction( status, &pSDDData->hTrans ); } hb_xfree( pSDDData ); return HB_SUCCESS; @@ -463,10 +463,10 @@ static HB_ERRCODE fbGoTo( SQLBASEAREAP pArea, HB_ULONG ulRecNo ) while ( ulRecNo > pArea->ulRecCount && ! pArea->fFetched ) { - isc_stmt_handle stmt = pSDDData->hStmt; - isc_tr_handle tr = pSDDData->hTrans; + isc_stmt_handle * phStmt = &pSDDData->hStmt; + isc_tr_handle * phTr = &pSDDData->hTrans; - lErr = isc_dsql_fetch( status, &stmt, SQL_DIALECT_V5, pSDDData->pSqlda ); + lErr = isc_dsql_fetch( status, phStmt, SQL_DIALECT_V5, pSDDData->pSqlda ); if ( lErr == 0 ) { @@ -528,19 +528,19 @@ static HB_ERRCODE fbGoTo( SQLBASEAREAP pArea, HB_ULONG ulRecNo ) else if ( lErr == 100L ) { pArea->fFetched = HB_TRUE; - if ( isc_dsql_free_statement( status, &stmt, DSQL_drop ) ) + if ( isc_dsql_free_statement( status, phStmt, DSQL_drop ) ) { hb_errRT_FireBirdDD( EG_OPEN, ESQLDD_STMTFREE, "Statement free error", NULL, ( HB_ERRCODE ) isc_sqlcode( status ) ); return HB_FAILURE; } - pSDDData->hStmt = NULL; + pSDDData->hStmt = ( isc_stmt_handle ) 0; - if ( isc_commit_transaction( status, &tr ) ) + if ( isc_commit_transaction( status, phTr ) ) { hb_errRT_FireBirdDD( EG_OPEN, ESQLDD_COMMIT, "Transaction commit error", NULL, ( HB_ERRCODE ) isc_sqlcode( status ) ); return HB_FAILURE; } - pSDDData->hTrans = NULL; + pSDDData->hTrans = ( isc_tr_handle ) 0; hb_xfree( pSDDData->pSqlda ); /* TODO: free is more complex */ pSDDData->pSqlda = NULL; diff --git a/harbour/include/hbatomic.h b/harbour/include/hbatomic.h index 084ad7c78e..b94b73d307 100644 --- a/harbour/include/hbatomic.h +++ b/harbour/include/hbatomic.h @@ -485,20 +485,20 @@ HB_EXTERN_BEGIN static _HB_INLINE_ int hb_spinlock_try_r( struct hb_spinlock_r * sl ) { - HB_SPINLOCK_T * l = &(sl)->lock; + HB_SPINLOCK_T * l = &sl->lock; int r = 0; if( *l != HB_SPINLOCK_INIT ) { - if( (sl)->thid == HB_THREAD_SELF() ) + if( sl->thid == HB_THREAD_SELF() ) { - (sl)->count++; + sl->count++; r = 1; } } else if( HB_SPINLOCK_TRY( l ) ) { - (sl)->thid = HB_THREAD_SELF(); - (sl)->count = 1; + sl->thid = HB_THREAD_SELF(); + sl->count = 1; r = 1; } return r; @@ -540,22 +540,22 @@ HB_EXTERN_BEGIN #else static _HB_INLINE_ void hb_spinlock_acquire_r( struct hb_spinlock_r * sl ) { - HB_SPINLOCK_T * l = &(sl)->lock; + HB_SPINLOCK_T * l = &sl->lock; int count = HB_SPINLOCK_REPEAT; for( ;; ) { if( *l != HB_SPINLOCK_INIT ) { - if( (sl)->thid == HB_THREAD_SELF() ) + if( sl->thid == HB_THREAD_SELF() ) { - (sl)->count++; + sl->count++; break; } } else if( HB_SPINLOCK_TRY( l ) ) { - (sl)->thid = HB_THREAD_SELF(); - (sl)->count = 1; + sl->thid = HB_THREAD_SELF(); + sl->count = 1; break; } if( --count == 0 ) @@ -568,7 +568,8 @@ HB_EXTERN_BEGIN #endif # define HB_SPINLOCK_R struct hb_spinlock_r -# define HB_SPINLOCK_INIT_R { 0, 0, 0 } +# define HB_SPINLOCK_INITVAL_R { 0, 0, 0 } +# define HB_SPINLOCK_INIT_R(l) do { (l)->lock = 0; (l)->count = 0; (l)->thid = 0; } while( 0 ) # define HB_SPINLOCK_TRY_R(l) hb_spinlock_try_r(l) # define HB_SPINLOCK_RELEASE_R(l) hb_spinlock_release_r(l) # define HB_SPINLOCK_ACQUIRE_R(l) hb_spinlock_acquire_r(l) diff --git a/harbour/src/vm/dlmalloc.c b/harbour/src/vm/dlmalloc.c index 27c8c8605d..844a0dede9 100644 --- a/harbour/src/vm/dlmalloc.c +++ b/harbour/src/vm/dlmalloc.c @@ -4,7 +4,7 @@ http://creativecommons.org/licenses/publicdomain. Send questions, comments, complaints, performance data, etc to dl@cs.oswego.edu -* Version 2.8.3 Thu Sep 22 11:16:15 2005 Doug Lea (dl at gee) +* Version 2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) Note: There may be an updated version of this malloc obtainable at ftp://gee.cs.oswego.edu/pub/misc/malloc.c @@ -19,7 +19,7 @@ compile-time and dynamic tuning options. For convenience, an include file for code using this malloc is at: - ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.3.h + ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h You don't really need this .h file unless you call functions not defined in your system include files. The .h file contains only the excerpts from this file needed for using this malloc on ANSI C/C++ @@ -111,8 +111,10 @@ can be a major bottleneck. It is designed only to provide minimal protection in concurrent environments, and to provide a basis for extensions. If you are using malloc in a concurrent - program, consider instead using ptmalloc, which is derived from - a version of this malloc. (See http://www.malloc.de). + program, consider instead using nedmalloc + (http://www.nedprod.com/programs/portable/nedmalloc/) or + ptmalloc (See http://www.malloc.de), which are derived + from versions of this malloc. System requirements: Any combination of MORECORE and/or MMAP/MUNMAP This malloc can use unix sbrk or any emulation (invoked using @@ -153,7 +155,11 @@ a size_t, not counting any clearing in calloc or copying in realloc, or actions surrounding MORECORE and MMAP that have times proportional to the number of non-contiguous regions returned by - system allocation routines, which is often just 1. + system allocation routines, which is often just 1. In real-time + applications, you can optionally suppress segment traversals using + NO_SEGMENT_TRAVERSAL, which assures bounded execution even when + system allocators return non-contiguous spaces, at the typical + expense of carrying around more memory and increased fragmentation. The implementation is not very modular and seriously overuses macros. Perhaps someday all C compilers will do as good a job @@ -203,11 +209,22 @@ Be careful in setting #define values for numerical constants of type size_t. On some systems, literal values are not automatically extended -to size_t precision unless they are explicitly casted. +to size_t precision unless they are explicitly casted. You can also +use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. WIN32 default: defined if _WIN32 defined Defining WIN32 sets up defaults for MS environment and compilers. - Otherwise defaults are for unix. + Otherwise defaults are for unix. Beware that there seem to be some + cases where this malloc might not be a pure drop-in replacement for + Win32 malloc: Random-looking failures from Win32 GDI API's (eg; + SetDIBits()) may be due to bugs in some video driver implementations + when pixel buffers are malloc()ed, and the region spans more than + one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) + default granularity, pixel buffers may straddle virtual allocation + regions more often than when using the Microsoft allocator. You can + avoid this by using VirtualAlloc() and VirtualFree() for all pixel + buffers rather than using malloc(). If this is not possible, + recompile this malloc with a larger DEFAULT_GRANULARITY. MALLOC_ALIGNMENT default: (size_t)8 Controls the minimum alignment for malloc'ed chunks. It must be a @@ -226,7 +243,15 @@ ONLY_MSPACES default: 0 (false) USE_LOCKS default: 0 (false) Causes each call to each public routine to be surrounded with pthread or WIN32 mutex lock/unlock. (If set true, this can be - overridden on a per-mspace basis for mspace versions.) + overridden on a per-mspace basis for mspace versions.) If set to a + non-zero value other than 1, locks are used, but their + implementation is left out, so lock functions must be supplied manually, + as described below. + +USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC + If true, uses custom spin locks for locking. This is currently + supported only for x86 platforms using gcc or recent MS compilers. + Otherwise, posix locks or win32 critical sections are used. FOOTERS default: 0 If true, provide extra checking and dispatching by placing @@ -297,11 +322,9 @@ MORECORE default: sbrk size_t (sometimes declared as "intptr_t"). It doesn't much matter though. Internally, we only call it with arguments less than half the max value of a size_t, which should work across all reasonable - possibilities, although sometimes generating compiler warnings. See - near the end of this file for guidelines for creating a custom - version of MORECORE. + possibilities, although sometimes generating compiler warnings. -MORECORE_CONTIGUOUS default: 1 (true) +MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE If true, take advantage of fact that consecutive calls to MORECORE with positive arguments always return contiguous increasing addresses. This is true of unix sbrk. It does not hurt too much to @@ -315,6 +338,12 @@ MORECORE_CANNOT_TRIM default: NOT defined using a hand-crafted MORECORE function that cannot handle negative arguments. +NO_SEGMENT_TRAVERSAL default: 0 + If non-zero, suppresses traversals of memory segments + returned by either MORECORE or CALL_MMAP. This disables + merging of segments that are contiguous, and selectively + releasing them to the OS if unused, but bounds execution times. + HAVE_MMAP default: 1 (true) True if this system supports mmap or an emulation of it. If so, and HAVE_MORECORE is not true, MMAP is used for all system @@ -329,17 +358,17 @@ HAVE_MREMAP default: 1 on linux, else 0 If true realloc() uses mremap() to re-allocate large blocks and extend or shrink allocation spaces. -MMAP_CLEARS default: 1 on unix +MMAP_CLEARS default: 1 except on WINCE. True if mmap clears memory so calloc doesn't need to. This is true - for standard unix mmap using /dev/zero. + for standard unix mmap using /dev/zero and on WIN32 except for WINCE. USE_BUILTIN_FFS default: 0 (i.e., not used) Causes malloc to use the builtin ffs() function to compute indices. Some compilers may recognize and intrinsify ffs to be faster than the supplied C version. Also, the case of x86 using gcc is special-cased to an asm instruction, so is already as fast as it can be, and so - this setting has no effect. (On most x86s, the asm version is only - slightly faster than the C version.) + this setting has no effect. Similarly for Win32 under recent MS compilers. + (On most x86s, the asm version is only slightly faster than the C version.) malloc_getpagesize default: derive from system includes, or 4096. The system page size. To the extent possible, this malloc manages @@ -436,12 +465,40 @@ DEFAULT_MMAP_THRESHOLD default: 256K empirically derived value that works well in most systems. You can disable mmap by setting to MAX_SIZE_T. +MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP + The number of consolidated frees between checks to release + unused segments when freeing. When using non-contiguous segments, + especially with multiple mspaces, checking only for topmost space + doesn't always suffice to trigger trimming. To compensate for this, + free() will, with a period of MAX_RELEASE_CHECK_RATE (or the + current number of segments, if greater) try to release unused + segments to the OS when freeing chunks that result in + consolidation. The best value for this parameter is a compromise + between slowing down frees with relatively costly checks that + rarely trigger versus holding on to unused memory. To effectively + disable, set to MAX_SIZE_T. This may lead to a very slight speed + improvement at the expense of carrying around more memory. */ +/* Version identifier to allow people to support multiple versions */ +#ifndef DLMALLOC_VERSION +#define DLMALLOC_VERSION 20804 +#endif /* DLMALLOC_VERSION */ + +#ifdef HB_OS_WIN_CE +#ifndef _WIN32_WCE +#define _WIN32_WCE +#endif /* _WIN32_WCE */ +#endif /* HB_OS_WIN_CE */ + #ifndef WIN32 #ifdef _WIN32 #define WIN32 1 #endif /* _WIN32 */ +#ifdef _WIN32_WCE +#define LACKS_FCNTL_H +#define WIN32 1 +#endif /* _WIN32_WCE */ #endif /* WIN32 */ #ifdef WIN32 #define WIN32_LEAN_AND_MEAN @@ -455,8 +512,14 @@ DEFAULT_MMAP_THRESHOLD default: 256K #define LACKS_STRINGS_H #define LACKS_SYS_TYPES_H #define LACKS_ERRNO_H +#ifndef MALLOC_FAILURE_ACTION #define MALLOC_FAILURE_ACTION -#define MMAP_CLEARS 0 /* WINCE and some others apparently don't clear */ +#endif /* MALLOC_FAILURE_ACTION */ +#ifdef _WIN32_WCE /* WINCE reportedly does not clear */ +#define MMAP_CLEARS 0 +#else +#define MMAP_CLEARS 1 +#endif /* _WIN32_WCE */ #endif /* WIN32 */ #ifdef HB_OS_OS2 @@ -479,6 +542,10 @@ DEFAULT_MMAP_THRESHOLD default: 256K #ifndef HAVE_MORECORE #define HAVE_MORECORE 0 #define HAVE_MMAP 1 +/* OSX allocators provide 16 byte alignment */ +#ifndef MALLOC_ALIGNMENT +#define MALLOC_ALIGNMENT ((size_t)16U) +#endif #endif /* HAVE_MORECORE */ #endif /* DARWIN */ @@ -486,11 +553,19 @@ DEFAULT_MMAP_THRESHOLD default: 256K #include /* For size_t */ #endif /* LACKS_SYS_TYPES_H */ +#if (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310) +#define SPIN_LOCKS_AVAILABLE 1 +#else +#define SPIN_LOCKS_AVAILABLE 0 +#endif + /* The maximum possible size_t value has all bits set */ #define MAX_SIZE_T (~(size_t)0) #ifndef ONLY_MSPACES -#define ONLY_MSPACES 0 +#define ONLY_MSPACES 0 /* define to a value */ +#else +#define ONLY_MSPACES 1 #endif /* ONLY_MSPACES */ #ifndef MSPACES #if ONLY_MSPACES @@ -517,6 +592,13 @@ DEFAULT_MMAP_THRESHOLD default: 256K #ifndef USE_LOCKS #define USE_LOCKS 0 #endif /* USE_LOCKS */ +#ifndef USE_SPIN_LOCKS +#if USE_LOCKS && SPIN_LOCKS_AVAILABLE +#define USE_SPIN_LOCKS 1 +#else +#define USE_SPIN_LOCKS 0 +#endif /* USE_LOCKS && SPIN_LOCKS_AVAILABLE. */ +#endif /* USE_SPIN_LOCKS */ #ifndef INSECURE #define INSECURE 0 #endif /* INSECURE */ @@ -528,6 +610,10 @@ DEFAULT_MMAP_THRESHOLD default: 256K #endif /* MMAP_CLEARS */ #ifndef HAVE_MREMAP #ifdef linux +#ifndef _GNU_SOURCE +/* For mremap() function */ +#define _GNU_SOURCE +#endif /* _GNU_SOURCE */ #define HAVE_MREMAP 1 #else /* linux */ #define HAVE_MREMAP 0 @@ -546,15 +632,13 @@ DEFAULT_MMAP_THRESHOLD default: 256K #if !HAVE_MORECORE #define MORECORE_CONTIGUOUS 0 #else /* !HAVE_MORECORE */ -#ifndef MORECORE -#define MORECORE sbrk -#endif /* MORECORE */ +#define MORECORE_DEFAULT sbrk #ifndef MORECORE_CONTIGUOUS #define MORECORE_CONTIGUOUS 1 #endif /* MORECORE_CONTIGUOUS */ #endif /* HAVE_MORECORE */ #ifndef DEFAULT_GRANULARITY -#if MORECORE_CONTIGUOUS +#if (MORECORE_CONTIGUOUS || defined(WIN32)) #define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ #else /* MORECORE_CONTIGUOUS */ #define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) @@ -574,6 +658,13 @@ DEFAULT_MMAP_THRESHOLD default: 256K #define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T #endif /* HAVE_MMAP */ #endif /* DEFAULT_MMAP_THRESHOLD */ +#ifndef MAX_RELEASE_CHECK_RATE +#if HAVE_MMAP +#define MAX_RELEASE_CHECK_RATE 4095 +#else +#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T +#endif /* HAVE_MMAP */ +#endif /* MAX_RELEASE_CHECK_RATE */ #ifndef USE_BUILTIN_FFS #define USE_BUILTIN_FFS 0 #endif /* USE_BUILTIN_FFS */ @@ -586,6 +677,9 @@ DEFAULT_MMAP_THRESHOLD default: 256K #ifndef MALLINFO_FIELD_TYPE #define MALLINFO_FIELD_TYPE size_t #endif /* MALLINFO_FIELD_TYPE */ +#ifndef NO_SEGMENT_TRAVERSAL +#define NO_SEGMENT_TRAVERSAL 0 +#endif /* NO_SEGMENT_TRAVERSAL */ /* mallopt tuning options. SVID/XPG defines four standard parameter @@ -628,7 +722,8 @@ DEFAULT_MMAP_THRESHOLD default: 256K #ifdef HAVE_USR_INCLUDE_MALLOC_H #include "/usr/include/malloc.h" #else /* HAVE_USR_INCLUDE_MALLOC_H */ - +#ifndef STRUCT_MALLINFO_DECLARED +#define STRUCT_MALLINFO_DECLARED 1 struct mallinfo { MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ @@ -641,13 +736,49 @@ struct mallinfo { MALLINFO_FIELD_TYPE fordblks; /* total free space */ MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ }; - +#endif /* STRUCT_MALLINFO_DECLARED */ #endif /* HAVE_USR_INCLUDE_MALLOC_H */ #endif /* NO_MALLINFO */ +/* + Try to persuade compilers to inline. The most critical functions for + inlining are defined as macros, so these aren't used for them. +*/ + +#ifndef FORCEINLINE + #if defined(__GNUC__) + #if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 0 )) + #define FORCEINLINE __inline __attribute__ ((always_inline)) + #else + #define FORCEINLINE __inline + #endif + #elif defined(_MSC_VER) && !defined(__XCC__) + #define FORCEINLINE __forceinline + #endif +#endif +#ifndef NOINLINE + #if defined(__GNUC__) + #define NOINLINE __attribute__ ((noinline)) + #elif defined(_MSC_VER) + #define NOINLINE __declspec(noinline) + #else + #define NOINLINE + #endif +#endif + #ifdef __cplusplus extern "C" { +#ifndef FORCEINLINE + #define FORCEINLINE inline +#endif /* FORCEINLINE */ #endif /* __cplusplus */ +#ifndef FORCEINLINE +#ifdef _HB_INLINE_ + #define FORCEINLINE _HB_INLINE_ +#else /* _HB_INLINE_ */ + #define FORCEINLINE +#endif /* _HB_INLINE_ */ +#endif #if !ONLY_MSPACES @@ -757,7 +888,11 @@ void* dlvalloc(size_t); (parameter-number, parameter-value) pair. mallopt then sets the corresponding parameter to the argument value if it can (i.e., so long as the value is meaningful), and returns 1 if successful else - 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, + 0. To workaround the fact that mallopt is specified to use int, + not size_t parameters, the value -1 is specially treated as the + maximum unsigned size_t value. + + SVID/XPG/ANSI defines four standard param numbers for mallopt, normally defined in malloc.h. None of these are use in this malloc, so setting them has no effect. But this malloc also supports other options in mallopt. See below for details. Briefly, supported @@ -765,7 +900,7 @@ void* dlvalloc(size_t); configurations). Symbol param # default allowed param values - M_TRIM_THRESHOLD -1 2*1024*1024 any (MAX_SIZE_T disables) + M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) M_GRANULARITY -2 page size any power of 2 >= page size M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) */ @@ -967,22 +1102,6 @@ void* dlpvalloc(size_t); */ int dlmalloc_trim(size_t); -/* - malloc_usable_size(void* p); - - Returns the number of bytes you can actually use in - an allocated chunk, which may be more than you requested (although - often not) due to alignment and minimum size constraints. - You can use this many bytes without worrying about - overwriting other allocated objects. This is not a particularly great - programming practice. malloc_usable_size can be more useful in - debugging and assertions, for example: - - p = malloc(n); - assert(malloc_usable_size(p) >= 256); -*/ -size_t dlmalloc_usable_size(void*); - /* malloc_stats(); Prints on stderr the amount of space obtained from the system (both @@ -1006,6 +1125,23 @@ void dlmalloc_stats(void); #endif /* ONLY_MSPACES */ +/* + malloc_usable_size(void* p); + + Returns the number of bytes you can actually use in + an allocated chunk, which may be more than you requested (although + often not) due to alignment and minimum size constraints. + You can use this many bytes without worrying about + overwriting other allocated objects. This is not a particularly great + programming practice. malloc_usable_size can be more useful in + debugging and assertions, for example: + + p = malloc(n); + assert(malloc_usable_size(p) >= 256); +*/ +size_t dlmalloc_usable_size(void*); + + #if MSPACES /* @@ -1046,6 +1182,20 @@ size_t destroy_mspace(mspace msp); */ mspace create_mspace_with_base(void* base, size_t capacity, int locked); +/* + mspace_track_large_chunks controls whether requests for large chunks + are allocated in their own untracked mmapped regions, separate from + others in this mspace. By default large chunks are not tracked, + which reduces fragmentation. However, such chunks are not + necessarily released to the system upon destroy_mspace. Enabling + tracking by setting to true may increase fragmentation, but avoids + leakage when relying on destroy_mspace to release all memory + allocated using this space. The function returns the previous + setting. +*/ +int mspace_track_large_chunks(mspace msp, int enable); + + /* mspace_malloc behaves as malloc, but operates within the given space. @@ -1120,6 +1270,11 @@ size_t mspace_max_footprint(mspace msp); struct mallinfo mspace_mallinfo(mspace msp); #endif /* NO_MALLINFO */ +/* + mspace_usable_size(void* p) behaves the same as malloc_usable_size; +*/ + size_t mspace_usable_size(void* mem); + /* mspace_malloc_stats behaves as malloc_stats, but reports properties of the given space. @@ -1166,20 +1321,24 @@ int mspace_mallopt(int, int); #ifndef LACKS_ERRNO_H #include /* for MALLOC_FAILURE_ACTION */ #endif /* LACKS_ERRNO_H */ -#if FOOTERS +#ifndef WIN32 #include /* for magic initialization */ -#endif /* FOOTERS */ +#endif /* WIN32 */ #ifndef LACKS_STDLIB_H #include /* for abort() */ #endif /* LACKS_STDLIB_H */ #ifdef DEBUG #if ABORT_ON_ASSERT_FAILURE +#undef assert #define assert(x) if(!(x)) ABORT #else /* ABORT_ON_ASSERT_FAILURE */ #include #endif /* ABORT_ON_ASSERT_FAILURE */ #else /* DEBUG */ +#ifndef assert #define assert(x) +#endif /* assert */ +#define DEBUG 0 #endif /* DEBUG */ #ifndef LACKS_STRING_H #include /* for memset etc */ @@ -1197,15 +1356,60 @@ int mspace_mallopt(int, int); #include #endif /* LACKS_FCNTL_H */ #endif /* HAVE_MMAP */ -#if HAVE_MORECORE #ifndef LACKS_UNISTD_H -#include /* for sbrk */ +#include /* for sbrk, sysconf */ #else /* LACKS_UNISTD_H */ -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) +#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && \ + !defined(WIN32) && !defined(HB_OS_OS2) extern void* sbrk(ptrdiff_t); #endif /* FreeBSD etc */ #endif /* LACKS_UNISTD_H */ -#endif /* HAVE_MMAP */ + +/* Declarations for locking */ +#if USE_LOCKS == 1 +#ifdef HB_OS_OS2 +#elif !defined(WIN32) +#include +#if defined (__SVR4) && defined (__sun) /* solaris */ +#include +#endif /* solaris */ +#else +#ifndef _M_AMD64 +/* These are already defined on AMD64 builds */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); +LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif /* _M_AMD64 */ +#pragma intrinsic (_InterlockedCompareExchange) +#pragma intrinsic (_InterlockedExchange) +#define interlockedcompareexchange _InterlockedCompareExchange +#define interlockedexchange _InterlockedExchange +#endif /* Win32 */ +#endif /* USE_LOCKS */ + +/* Declarations for bit scanning on win32 */ +#if defined(_MSC_VER) && _MSC_VER>=1300 +#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ +unsigned char _BitScanForward(unsigned long *index, unsigned long mask); +unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#define BitScanForward _BitScanForward +#define BitScanReverse _BitScanReverse +#pragma intrinsic(_BitScanForward) +#pragma intrinsic(_BitScanReverse) +#endif /* BitScanForward */ +#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ #ifndef WIN32 #ifndef malloc_getpagesize @@ -1254,6 +1458,8 @@ extern void* sbrk(ptrdiff_t); #endif #endif + + /* ------------------- size_t and alignment properties -------------------- */ /* The byte and bit size of a size_t */ @@ -1261,10 +1467,11 @@ extern void* sbrk(ptrdiff_t); #define SIZE_T_BITSIZE (sizeof(size_t) << 3) /* Some constants coerced to size_t */ -/* Annoying but necessary to avoid errors on some plaftorms */ +/* Annoying but necessary to avoid errors on some platforms */ #define SIZE_T_ZERO ((size_t)0) #define SIZE_T_ONE ((size_t)1) #define SIZE_T_TWO ((size_t)2) +#define SIZE_T_FOUR ((size_t)4) #define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) #define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) #define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) @@ -1294,27 +1501,17 @@ extern void* sbrk(ptrdiff_t); #define MFAIL ((void*)(MAX_SIZE_T)) #define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ -#if !HAVE_MMAP -#define IS_MMAPPED_BIT (SIZE_T_ZERO) -#define USE_MMAP_BIT (SIZE_T_ZERO) -#define CALL_MMAP(s) MFAIL -#define CALL_MUNMAP(a, s) (-1) -#define DIRECT_MMAP(s) MFAIL +#if HAVE_MMAP -#else /* HAVE_MMAP */ -#define IS_MMAPPED_BIT (SIZE_T_ONE) -#define USE_MMAP_BIT (SIZE_T_ONE) - -#if !defined( WIN32 ) && !defined( HB_OS_OS2 ) - -#define CALL_MUNMAP(a, s) munmap((a), (s)) +#if !defined(WIN32) && !defined(HB_OS_OS2) +#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) #define MMAP_PROT (PROT_READ|PROT_WRITE) #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) #define MAP_ANONYMOUS MAP_ANON #endif /* MAP_ANON */ #ifdef MAP_ANONYMOUS #define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#define CALL_MMAP(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) +#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) #else /* MAP_ANONYMOUS */ /* Nearly all versions of mmap support MAP_ANONYMOUS, so the following @@ -1322,18 +1519,18 @@ extern void* sbrk(ptrdiff_t); */ #define MMAP_FLAGS (MAP_PRIVATE) static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ -#define CALL_MMAP(s) ((dev_zero_fd < 0) ? \ +#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ (dev_zero_fd = open("/dev/zero", O_RDWR), \ mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) #endif /* MAP_ANONYMOUS */ -#define DIRECT_MMAP(s) CALL_MMAP(s) +#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) -#elif defined( HB_OS_OS2 ) +#elif defined(HB_OS_OS2) /* OS/2 MMAP via DosAllocMem */ -static void* os2mmap(size_t size) { +static FORCEINLINE void* os2mmap(size_t size) { void* ptr; if (DosAllocMem(&ptr, size, PAG_COMMIT|PAG_READ|PAG_WRITE) != 0 ) return MFAIL; @@ -1342,7 +1539,7 @@ static void* os2mmap(size_t size) { #if 0 /* For direct MMAP, use OBJ_TILE to minimize interference */ -static void* os2direct_mmap(size_t size) { +static FORCEINLINE void* os2direct_mmap(size_t size) { void* ptr; if (DosAllocMem(&ptr, size, PAG_COMMIT|PAG_READ|PAG_WRITE|OBJ_TILE) != 0 ) return MFAIL; @@ -1353,7 +1550,7 @@ static void* os2direct_mmap(size_t size) { #endif /* This function supports releasing coalesed segments */ -static int os2munmap(void* ptr, size_t size) { +static FORCEINLINE int os2munmap(void* ptr, size_t size) { while (size) { ULONG ulSize = size, ulFlags = 0; if (DosQueryMem(ptr, &ulSize, &ulFlags) != 0) @@ -1369,29 +1566,29 @@ static int os2munmap(void* ptr, size_t size) { return 0; } -#define CALL_MMAP(s) os2mmap(s) -#define CALL_MUNMAP(a, s) os2munmap((a), (s)) -#define DIRECT_MMAP(s) os2direct_mmap(s) +#define MMAP_DEFAULT(s) os2mmap(s) +#define MUNMAP_DEFAULT(a, s) os2munmap((a), (s)) +#define DIRECT_MMAP_DEFAULT(s) os2direct_mmap(s) #else /* WIN32 */ /* Win32 MMAP via VirtualAlloc */ -static void* win32mmap(size_t size) { +static FORCEINLINE void* win32mmap(size_t size) { void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); return (ptr != 0)? ptr: MFAIL; } /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static void* win32direct_mmap(size_t size) { +static FORCEINLINE void* win32direct_mmap(size_t size) { void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE); return (ptr != 0)? ptr: MFAIL; } /* This function supports releasing coalesed segments */ -static int win32munmap(void* ptr, size_t size) { +static FORCEINLINE int win32munmap(void* ptr, size_t size) { MEMORY_BASIC_INFORMATION minfo; - char* cptr = (char*)ptr; /* NOTE: Harbour fix for MSVC C++ mode compile error. Also fixed in dlmalloc 2.8.4b. [vszakats] */ + char* cptr = (char*)ptr; while (size) { if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) return -1; @@ -1406,24 +1603,77 @@ static int win32munmap(void* ptr, size_t size) { return 0; } -#define CALL_MMAP(s) win32mmap(s) -#define CALL_MUNMAP(a, s) win32munmap((a), (s)) -#define DIRECT_MMAP(s) win32direct_mmap(s) +#define MMAP_DEFAULT(s) win32mmap(s) +#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) +#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) #endif /* WIN32 */ #endif /* HAVE_MMAP */ -#if HAVE_MMAP && HAVE_MREMAP -#define CALL_MREMAP(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) -#else /* HAVE_MMAP && HAVE_MREMAP */ -#define CALL_MREMAP(addr, osz, nsz, mv) MFAIL -#endif /* HAVE_MMAP && HAVE_MREMAP */ +#if HAVE_MREMAP +#ifndef WIN32 +#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) +#endif /* WIN32 */ +#endif /* HAVE_MREMAP */ + +/** + * Define CALL_MORECORE + */ #if HAVE_MORECORE -#define CALL_MORECORE(S) MORECORE(S) + #ifdef MORECORE + #define CALL_MORECORE(S) MORECORE(S) + #else /* MORECORE */ + #define CALL_MORECORE(S) MORECORE_DEFAULT(S) + #endif /* MORECORE */ #else /* HAVE_MORECORE */ -#define CALL_MORECORE(S) MFAIL + #define CALL_MORECORE(S) MFAIL #endif /* HAVE_MORECORE */ +/** + * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP + */ +#if HAVE_MMAP + #define USE_MMAP_BIT (SIZE_T_ONE) + + #ifdef MMAP + #define CALL_MMAP(s) MMAP(s) + #else /* MMAP */ + #define CALL_MMAP(s) MMAP_DEFAULT(s) + #endif /* MMAP */ + #ifdef MUNMAP + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) + #else /* MUNMAP */ + #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) + #endif /* MUNMAP */ + #ifdef DIRECT_MMAP + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #else /* DIRECT_MMAP */ + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) + #endif /* DIRECT_MMAP */ +#else /* HAVE_MMAP */ + #define USE_MMAP_BIT (SIZE_T_ZERO) + + #define MMAP(s) MFAIL + #define MUNMAP(a, s) (-1) + #define DIRECT_MMAP(s) MFAIL + #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) + #define CALL_MMAP(s) MMAP(s) + #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) +#endif /* HAVE_MMAP */ + +/** + * Define CALL_MREMAP + */ +#if HAVE_MMAP && HAVE_MREMAP + #ifdef MREMAP + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) + #else /* MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) + #endif /* MREMAP */ +#else /* HAVE_MMAP && HAVE_MREMAP */ + #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL +#endif /* HAVE_MMAP && HAVE_MREMAP */ + /* mstate bit set if continguous morecore disabled or failed */ #define USE_NONCONTIGUOUS_BIT (4U) @@ -1433,126 +1683,320 @@ static int win32munmap(void* ptr, size_t size) { /* --------------------------- Lock preliminaries ------------------------ */ -#if USE_LOCKS - /* - When locks are defined, there are up to two global locks: + When locks are defined, there is one global lock, plus + one per-mspace lock. - * If HAVE_MORECORE, morecore_mutex protects sequences of calls to - MORECORE. In many cases sys_alloc requires two calls, that should - not be interleaved with calls by other threads. This does not - protect against direct calls to MORECORE by other threads not - using this lock, so there is still code to cope the best we can on - interference. + The global lock_ensures that mparams.magic and other unique + mparams values are initialized only once. It also protects + sequences of calls to MORECORE. In many cases sys_alloc requires + two calls, that should not be interleaved with calls by other + threads. This does not protect against direct calls to MORECORE + by other threads not using this lock, so there is still code to + cope the best we can on interference. + + Per-mspace locks surround calls to malloc, free, etc. To enable use + in layered extensions, per-mspace locks are reentrant. + + Because lock-protected regions generally have bounded times, it is + OK to use the supplied simple spinlocks in the custom versions for + x86. Spinlocks are likely to improve performance for lightly + contended applications, but worsen performance under heavy + contention. + + If USE_LOCKS is > 1, the definitions of lock routines here are + bypassed, in which case you will need to define the type MLOCK_T, + and at least INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly + TRY_LOCK (which is not used in this malloc, but commonly needed in + extensions.) You must also declare a + static MLOCK_T malloc_global_mutex = { initialization values };. - * magic_init_mutex ensures that mparams.magic and other - unique mparams values are initialized only once. */ -#ifdef HB_MT_VM -# ifndef HB_SPINLOCK_USE -# define HB_SPINLOCK_USE -# endif /* HB_SPINLOCK_USE */ -#endif /* HB_MT_VM */ +#if USE_LOCKS == 1 -#ifdef HB_SPINLOCK_USE -# include "hbthread.h" -# include "hbatomic.h" -#endif /* HB_SPINLOCK_USE */ +#if USE_SPIN_LOCKS && SPIN_LOCKS_AVAILABLE +#ifndef WIN32 -#ifndef HB_SPINLOCK_T -# undef HB_SPINLOCK_USE -#endif /* HB_SPINLOCK_T */ +/* Custom pthread-style spin locks on x86 and x64 for gcc */ +struct pthread_mlock_t { + volatile unsigned int l; + unsigned int c; + pthread_t threadid; +}; +#define MLOCK_T struct pthread_mlock_t +#define CURRENT_THREAD pthread_self() +#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) +#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl) +#define RELEASE_LOCK(sl) pthread_release_lock(sl) +#define TRY_LOCK(sl) pthread_try_lock(sl) +#define SPINS_PER_YIELD 63 -#ifdef HB_SPINLOCK_USE -static int spin_acquire_lock( HB_SPINLOCK_T *l ) -{ - HB_SPINLOCK_ACQUIRE( l ); - return 0; -} +static MLOCK_T malloc_global_mutex = { 0, 0, 0}; -#define MLOCK_T HB_SPINLOCK_T -#define INITIAL_LOCK(l) *(l)=HB_SPINLOCK_INIT -#define ACQUIRE_LOCK(l) spin_acquire_lock(l) -#define RELEASE_LOCK(l) HB_SPINLOCK_RELEASE(l) -#define MLOCK_INIT HB_SPINLOCK_INIT - -/* -#elif defined( HB_OS_OS2 ) - -#define MLOCK_T HB_RAWCRITICAL_T -#define INITIAL_LOCK(l) HB_CRITICAL_INIT(*(l)) -#define ACQUIRE_LOCK(l) HB_CRITICAL_LOCK(*(l)) -#define RELEASE_LOCK(l) HB_CRITICAL_UNLOCK(*(l)) -#define MLOCK_INIT { 0 } -*/ - -#elif !defined( WIN32 ) - -/* By default use posix locks */ -#include -#define MLOCK_T pthread_mutex_t -#define INITIAL_LOCK(l) pthread_mutex_init(l, NULL) -#define ACQUIRE_LOCK(l) pthread_mutex_lock(l) -#define RELEASE_LOCK(l) pthread_mutex_unlock(l) -#define MLOCK_INIT PTHREAD_MUTEX_INITIALIZER - -#else /* WIN32 */ -/* - Because lock-protected regions have bounded times, and there - are no recursive lock calls, we can use simple spinlocks. -*/ - -#define MLOCK_T long -static int win32_acquire_lock (MLOCK_T *sl) { +static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) { + int spins = 0; + volatile unsigned int* lp = &sl->l; for (;;) { -#ifdef InterlockedCompareExchangePointer - if (!InterlockedCompareExchange(sl, 1, 0)) - return 0; -#else /* Use older void* version */ - if (!InterlockedCompareExchange((void**)sl, (void*)1, (void*)0)) - return 0; -#endif /* InterlockedCompareExchangePointer */ - Sleep (0); + if (*lp != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 0; + } + } + else { + /* place args to cmpxchgl in locals to evade oddities in some gccs */ + int cmp = 0; + int val = 1; + int ret; + __asm__ __volatile__ ("lock; cmpxchgl %1, %2" + : "=a" (ret) + : "r" (val), "m" (*(lp)), "0"(cmp) + : "memory", "cc"); + if (!ret) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 0; + } + } + if ((++spins & SPINS_PER_YIELD) == 0) { +#if defined (__SVR4) && defined (__sun) /* solaris */ + thr_yield(); +#else +#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) + sched_yield(); +#else /* no-op yield on unknown systems */ + ; +#endif /* __linux__ || __FreeBSD__ || __APPLE__ */ +#endif /* solaris */ + } } } -static void win32_release_lock (MLOCK_T *sl) { - InterlockedExchange (sl, 0); +static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) { + volatile unsigned int* lp = &sl->l; + assert(*lp != 0); + assert(sl->threadid == CURRENT_THREAD); + if (--sl->c == 0) { + sl->threadid = 0; + int prev = 0; + int ret; + __asm__ __volatile__ ("lock; xchgl %0, %1" + : "=r" (ret) + : "m" (*(lp)), "0"(prev) + : "memory"); + } } -#define INITIAL_LOCK(l) *(l)=0 -#define ACQUIRE_LOCK(l) win32_acquire_lock(l) -#define RELEASE_LOCK(l) win32_release_lock(l) -#define MLOCK_INIT 0 +static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) { + volatile unsigned int* lp = &sl->l; + if (*lp != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 1; + } + } + else { + int cmp = 0; + int val = 1; + int ret; + __asm__ __volatile__ ("lock; cmpxchgl %1, %2" + : "=a" (ret) + : "r" (val), "m" (*(lp)), "0"(cmp) + : "memory", "cc"); + if (!ret) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 1; + } + } + return 0; +} + + +#else /* WIN32 */ +/* Custom win32-style spin locks on x86 and x64 for MSC */ +struct win32_mlock_t { + volatile long l; + unsigned int c; + long threadid; +}; + +#define MLOCK_T struct win32_mlock_t +#define CURRENT_THREAD GetCurrentThreadId() +#define INITIAL_LOCK(sl) ((sl)->threadid = 0, (sl)->l = (sl)->c = 0, 0) +#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl) +#define RELEASE_LOCK(sl) win32_release_lock(sl) +#define TRY_LOCK(sl) win32_try_lock(sl) +#define SPINS_PER_YIELD 63 + +static MLOCK_T malloc_global_mutex = { 0, 0, 0}; + +static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) { + int spins = 0; + for (;;) { + if (sl->l != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 0; + } + } + else { + if (!interlockedexchange(&sl->l, 1)) { + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 0; + } + } + if ((++spins & SPINS_PER_YIELD) == 0) + SleepEx(0, FALSE); + } +} + +static FORCEINLINE void win32_release_lock (MLOCK_T *sl) { + assert(sl->threadid == CURRENT_THREAD); + assert(sl->l != 0); + if (--sl->c == 0) { + sl->threadid = 0; + interlockedexchange (&sl->l, 0); + } +} + +static FORCEINLINE int win32_try_lock (MLOCK_T *sl) { + if (sl->l != 0) { + if (sl->threadid == CURRENT_THREAD) { + ++sl->c; + return 1; + } + } + else { + if (!interlockedexchange(&sl->l, 1)){ + assert(!sl->threadid); + sl->threadid = CURRENT_THREAD; + sl->c = 1; + return 1; + } + } + return 0; +} #endif /* WIN32 */ +#else /* USE_SPIN_LOCKS */ -#if HAVE_MORECORE -static MLOCK_T morecore_mutex = MLOCK_INIT; -#endif /* HAVE_MORECORE */ -static MLOCK_T magic_init_mutex = MLOCK_INIT; +#ifndef WIN32 +/* pthreads-based locks */ +#define MLOCK_T pthread_mutex_t +#define CURRENT_THREAD pthread_self() +#define INITIAL_LOCK(sl) pthread_init_lock(sl) +#define ACQUIRE_LOCK(sl) pthread_mutex_lock(sl) +#define RELEASE_LOCK(sl) pthread_mutex_unlock(sl) +#define TRY_LOCK(sl) (!pthread_mutex_trylock(sl)) + +static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; + +/* Cope with old-style linux recursive lock initialization by adding */ +/* skipped internal declaration from pthread.h */ +#ifdef linux +#ifndef PTHREAD_MUTEX_RECURSIVE +extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, + int __kind)); +#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP +#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) +#endif +#endif + +static int pthread_init_lock (MLOCK_T *sl) { + pthread_mutexattr_t attr; + if (pthread_mutexattr_init(&attr)) return 1; + if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; + if (pthread_mutex_init(sl, &attr)) return 1; + if (pthread_mutexattr_destroy(&attr)) return 1; + return 0; +} + +#else /* WIN32 */ +/* Win32 critical sections */ +#define MLOCK_T CRITICAL_SECTION +#define CURRENT_THREAD GetCurrentThreadId() +#define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000)) +#define ACQUIRE_LOCK(s) (EnterCriticalSection(sl), 0) +#define RELEASE_LOCK(s) LeaveCriticalSection(sl) +#define TRY_LOCK(s) TryEnterCriticalSection(sl) +#define NEED_GLOBAL_LOCK_INIT + +static MLOCK_T malloc_global_mutex; +static volatile long malloc_global_mutex_status; + +/* Use spin loop to initialize global lock */ +static void init_malloc_global_mutex( void ) { + for (;;) { + long stat = malloc_global_mutex_status; + if (stat > 0) + return; + /* transition to < 0 while initializing, then to > 0) */ + if (stat == 0 && + interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) { + InitializeCriticalSection(&malloc_global_mutex); + interlockedexchange(&malloc_global_mutex_status,1); + return; + } + SleepEx(0, FALSE); + } +} + +#endif /* WIN32 */ +#endif /* USE_SPIN_LOCKS */ +#endif /* USE_LOCKS == 1 */ + +/* ----------------------- User-defined locks ------------------------ */ + +#if USE_LOCKS > 1 +/* Define your own lock implementation here */ +/* #define INITIAL_LOCK(sl) ... */ +/* #define ACQUIRE_LOCK(sl) ... */ +/* #define RELEASE_LOCK(sl) ... */ +/* #define TRY_LOCK(sl) ... */ +/* static MLOCK_T malloc_global_mutex = ... */ + +static _HB_INLINE_ int user_acquire_lock( HB_SPINLOCK_R * sl ) +{ + HB_SPINLOCK_ACQUIRE_R( sl ); + return 0; +} + +#define MLOCK_T HB_SPINLOCK_R +#define INITIAL_LOCK(sl) HB_SPINLOCK_INIT_R(sl) +#define ACQUIRE_LOCK(sl) user_acquire_lock(sl) +#define RELEASE_LOCK(sl) HB_SPINLOCK_RELEASE_R(sl) +#define TRY_LOCK(sl) HB_SPINLOCK_TRY_R(sl) + +static MLOCK_T malloc_global_mutex = HB_SPINLOCK_INITVAL_R; + +#endif /* USE_LOCKS > 1 */ + +/* ----------------------- Lock-based state ------------------------ */ + +#if USE_LOCKS #define USE_LOCK_BIT (2U) #else /* USE_LOCKS */ #define USE_LOCK_BIT (0U) #define INITIAL_LOCK(l) #endif /* USE_LOCKS */ -#if USE_LOCKS && HAVE_MORECORE -#define ACQUIRE_MORECORE_LOCK() ACQUIRE_LOCK(&morecore_mutex); -#define RELEASE_MORECORE_LOCK() RELEASE_LOCK(&morecore_mutex); -#else /* USE_LOCKS && HAVE_MORECORE */ -#define ACQUIRE_MORECORE_LOCK() -#define RELEASE_MORECORE_LOCK() -#endif /* USE_LOCKS && HAVE_MORECORE */ - #if USE_LOCKS -#define ACQUIRE_MAGIC_INIT_LOCK() ACQUIRE_LOCK(&magic_init_mutex); -#define RELEASE_MAGIC_INIT_LOCK() RELEASE_LOCK(&magic_init_mutex); +#ifndef ACQUIRE_MALLOC_GLOBAL_LOCK +#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); +#endif +#ifndef RELEASE_MALLOC_GLOBAL_LOCK +#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); +#endif #else /* USE_LOCKS */ -#define ACQUIRE_MAGIC_INIT_LOCK() -#define RELEASE_MAGIC_INIT_LOCK() +#define ACQUIRE_MALLOC_GLOBAL_LOCK() +#define RELEASE_MALLOC_GLOBAL_LOCK() #endif /* USE_LOCKS */ @@ -1581,7 +2025,7 @@ static MLOCK_T magic_init_mutex = MLOCK_INIT; A chunk that's in use looks like: chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk (if P = 1) | + | Size of previous chunk (if P = 0) | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| | Size of this chunk 1| +-+ @@ -1652,8 +2096,9 @@ static MLOCK_T magic_init_mutex = MLOCK_INIT; The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of the chunk size redundantly records whether the current chunk is - inuse. This redundancy enables usage checks within free and realloc, - and reduces indirection when freeing and consolidating chunks. + inuse (unless the chunk is mmapped). This redundancy enables usage + checks within free and realloc, and reduces indirection when freeing + and consolidating chunks. Each freshly allocated chunk must have both cinuse and pinuse set. That is, each allocated chunk borders either a previously allocated @@ -1682,9 +2127,8 @@ static MLOCK_T magic_init_mutex = MLOCK_INIT; space is still allocated for it (TOP_FOOT_SIZE) to enable separation or merging when space is extended. - 3. Chunks allocated via mmap, which have the lowest-order bit - (IS_MMAPPED_BIT) set in their prev_foot fields, and do not set - PINUSE_BIT in their head fields. Because they are allocated + 3. Chunks allocated via mmap, have both cinuse and pinuse bits + cleared in their head fields. Because they are allocated one-by-one, each must carry its own prev_foot field, which is also used to hold the offset this chunk has within its mmapped region, which is needed to preserve alignment. Each mmapped @@ -1750,14 +2194,16 @@ typedef unsigned int flag_t; /* The type of various bit flag sets */ /* The head field of a chunk is or'ed with PINUSE_BIT when previous adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in - use. If the chunk was obtained with mmap, the prev_foot field has - IS_MMAPPED_BIT set, otherwise holding the offset of the base of the - mmapped region to the base of the chunk. + use, unless mmapped, in which case both bits are cleared. + + FLAG4_BIT is not used by this malloc, but might be useful in extensions. */ #define PINUSE_BIT (SIZE_T_ONE) #define CINUSE_BIT (SIZE_T_TWO) +#define FLAG4_BIT (SIZE_T_FOUR) #define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) +#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) /* Head value for fenceposts */ #define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) @@ -1765,17 +2211,19 @@ typedef unsigned int flag_t; /* The type of various bit flag sets */ /* extraction of fields from head words */ #define cinuse(p) ((p)->head & CINUSE_BIT) #define pinuse(p) ((p)->head & PINUSE_BIT) -#define chunksize(p) ((p)->head & ~(INUSE_BITS)) +#define is_inuse(p) (((p)->head & INUSE_BITS) != PINUSE_BIT) +#define is_mmapped(p) (((p)->head & INUSE_BITS) == 0) + +#define chunksize(p) ((p)->head & ~(FLAG_BITS)) #define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) -#define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT) /* Treat space at ptr +/- offset as a chunk */ #define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) #define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) /* Ptr to next or previous physical malloc_chunk. */ -#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~INUSE_BITS))) +#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) #define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) /* extract next chunk's pinuse bit */ @@ -1793,9 +2241,6 @@ typedef unsigned int flag_t; /* The type of various bit flag sets */ #define set_free_with_pinuse(p, s, n)\ (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) -#define is_mmapped(p)\ - (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_MMAPPED_BIT)) - /* Get the internal overhead associated with chunk p */ #define overhead_for(p)\ (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) @@ -1966,7 +2411,7 @@ typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ and so should not try to deallocate or merge with others. (This currently holds only for the initial segment passed into create_mspace_with_base.) - * If IS_MMAPPED_BIT set, the segment may be merged with + * If USE_MMAP_BIT set, the segment may be merged with other surrounding mmapped segments and trimmed/de-allocated using munmap. * If neither bit is set, then the segment was obtained using @@ -1981,7 +2426,7 @@ struct malloc_segment { flag_t sflags; /* mmap and extern flag */ }; -#define is_mmapped_segment(S) ((S)->sflags & IS_MMAPPED_BIT) +#define is_mmapped_segment(S) ((S)->sflags & USE_MMAP_BIT) #define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) typedef struct malloc_segment msegment; @@ -2057,9 +2502,18 @@ typedef struct malloc_segment* msegmentptr; Each space keeps track of current and maximum system memory obtained via MORECORE or MMAP. + Trim support + Fields holding the amount of unused topmost memory that should trigger + timming, and a counter to force periodic scanning to release unused + non-topmost segments. + Locking If USE_LOCKS is defined, the "mutex" lock is acquired and released around every public call using this mspace. + + Extension support + A void* pointer and a size_t field that can be used to help implement + extensions to this malloc. */ /* Bin types, widths and sizes */ @@ -2081,6 +2535,7 @@ struct malloc_state { mchunkptr dv; mchunkptr top; size_t trim_check; + size_t release_checks; size_t magic; mchunk smallbins[NSMALLBINS]; tbinptr treebins[NTREEBINS]; @@ -2091,6 +2546,8 @@ struct malloc_state { MLOCK_T mutex; /* locate lock among fields that rarely change */ #endif /* USE_LOCKS */ msegment seg; + void* extp; /* Unused but available for extensions */ + size_t exts; }; typedef struct malloc_state* mstate; @@ -2100,11 +2557,12 @@ typedef struct malloc_state* mstate; /* malloc_params holds global properties, including those that can be dynamically set using mallopt. There is a single instance, mparams, - initialized in init_mparams. + initialized in init_mparams. Note that the non-zeroness of "magic" + also serves as an initialization flag. */ struct malloc_params { - size_t magic; + volatile size_t magic; size_t page_size; size_t granularity; size_t mmap_threshold; @@ -2114,10 +2572,18 @@ struct malloc_params { static struct malloc_params mparams; +/* Ensure mparams initialized */ +#define ensure_initialization() (void)(mparams.magic != 0 || init_mparams()) + +#if !ONLY_MSPACES + /* The global malloc_state used for all non-"mspace" calls */ static struct malloc_state _gm_; #define gm (&_gm_) #define is_global(M) ((M) == &_gm_) + +#endif /* !ONLY_MSPACES */ + #define is_initialized(M) ((M)->top != 0) /* -------------------------- system alloc setup ------------------------- */ @@ -2142,11 +2608,23 @@ static struct malloc_state _gm_; /* page-align a size */ #define page_align(S)\ - (((S) + (mparams.page_size)) & ~(mparams.page_size - SIZE_T_ONE)) + (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) /* granularity-align a size */ #define granularity_align(S)\ - (((S) + (mparams.granularity)) & ~(mparams.granularity - SIZE_T_ONE)) + (((S) + (mparams.granularity - SIZE_T_ONE))\ + & ~(mparams.granularity - SIZE_T_ONE)) + + +/* For mmap, use granularity alignment on windows, else page-align */ +#ifdef WIN32 +#define mmap_align(S) granularity_align(S) +#else +#define mmap_align(S) page_align(S) +#endif + +/* For sys_alloc, enough padding to ensure can malloc request on success */ +#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) #define is_page_aligned(S)\ (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) @@ -2204,10 +2682,7 @@ static int has_segment_link(mstate m, msegmentptr ss) { #if USE_LOCKS -/* Ensure locks are initialized */ -#define GLOBALLY_INITIALIZE() (mparams.page_size == 0 && init_mparams()) - -#define PREACTION(M) ((GLOBALLY_INITIALIZE() || use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) +#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) #define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } #else /* USE_LOCKS */ @@ -2296,8 +2771,37 @@ static size_t traverse_and_check(mstate m); #define smallbin_at(M, i) (&((M)->smallbins[(i)])) #define treebin_at(M,i) (&((M)->treebins[i])) -/* assign tree index for size S to variable I */ -#if defined(__GNUC__) && defined(i386) +/* assign tree index for size S to variable I. Use x86 asm if possible */ +#if defined (__INTEL_COMPILER) +#define compute_tree_index(S, I)\ +{\ + size_t X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K = _bit_scan_reverse (X); \ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#define compute_tree_index(S, I)\ +{\ + unsigned int X = S >> TREEBIN_SHIFT;\ + if (X == 0)\ + I = 0;\ + else if (X > 0xFFFF)\ + I = NTREEBINS-1;\ + else {\ + unsigned int K;\ + __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "g" (X));\ + I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ + }\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 #define compute_tree_index(S, I)\ {\ size_t X = S >> TREEBIN_SHIFT;\ @@ -2307,11 +2811,12 @@ static size_t traverse_and_check(mstate m); I = NTREEBINS-1;\ else {\ unsigned int K;\ - __asm__("bsrl %1,%0\n\t" : "=r" (K) : "rm" (X));\ + _BitScanReverse((DWORD *) &K, X);\ I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ }\ } -#else /* GNUC */ + +#else /* __INTEL_COMPILER */ #define compute_tree_index(S, I)\ {\ size_t X = S >> TREEBIN_SHIFT;\ @@ -2329,7 +2834,7 @@ static size_t traverse_and_check(mstate m); I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ }\ } -#endif /* GNUC */ +#endif /* __INTEL_COMPILER */ /* Bit representing maximum resolved size in a treebin at i */ #define bit_for_tree_index(i) \ @@ -2360,21 +2865,45 @@ static size_t traverse_and_check(mstate m); #define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) #define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) -/* index corresponding to given bit */ +/* isolate the least set bit of a bitmap */ +#define least_bit(x) ((x) & -(x)) -#if defined(__GNUC__) && defined(i386) +/* mask with all bits to left of least bit of x on */ +#define left_bits(x) ((x<<1) | -(x<<1)) + +/* mask with all bits to left of or equal to least bit of x on */ +#define same_or_left_bits(x) ((x) | -(x)) + +/* index corresponding to given bit. Use x86 asm if possible */ + +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) #define compute_bit2idx(X, I)\ {\ unsigned int J;\ - __asm__("bsfl %1,%0\n\t" : "=r" (J) : "rm" (X));\ + __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "g" (X));\ I = (bindex_t)J;\ } -#else /* GNUC */ -#if USE_BUILTIN_FFS +#elif defined (__INTEL_COMPILER) +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + J = _bit_scan_forward (X); \ + I = (bindex_t)J;\ +} + +#elif defined(_MSC_VER) && _MSC_VER>=1300 +#define compute_bit2idx(X, I)\ +{\ + unsigned int J;\ + _BitScanForward((DWORD *) &J, X);\ + I = (bindex_t)J;\ +} + +#elif USE_BUILTIN_FFS #define compute_bit2idx(X, I) I = ffs(X)-1 -#else /* USE_BUILTIN_FFS */ +#else #define compute_bit2idx(X, I)\ {\ unsigned int Y = X - 1;\ @@ -2386,18 +2915,8 @@ static size_t traverse_and_check(mstate m); N += K = Y >> (1-0) & 1; Y >>= K;\ I = (bindex_t)(N + Y);\ } -#endif /* USE_BUILTIN_FFS */ #endif /* GNUC */ -/* isolate the least set bit of a bitmap */ -#define least_bit(x) ((x) & -(x)) - -/* mask with all bits to left of least bit of x on */ -#define left_bits(x) ((x<<1) | -(x<<1)) - -/* mask with all bits to left of or equal to least bit of x on */ -#define same_or_left_bits(x) ((x) | -(x)) - /* ----------------------- Runtime Check Support ------------------------- */ @@ -2432,15 +2951,15 @@ static size_t traverse_and_check(mstate m); #define ok_address(M, a) ((char*)(a) >= (M)->least_addr) /* Check if address of next chunk n is higher than base chunk p */ #define ok_next(p, n) ((char*)(p) < (char*)(n)) -/* Check if p has its cinuse bit on */ -#define ok_cinuse(p) cinuse(p) +/* Check if p has inuse status */ +#define ok_inuse(p) is_inuse(p) /* Check if p has its pinuse bit on */ #define ok_pinuse(p) pinuse(p) #else /* !INSECURE */ #define ok_address(M, a) (1) #define ok_next(b, n) (1) -#define ok_cinuse(p) (1) +#define ok_inuse(p) (1) #define ok_pinuse(p) (1) #endif /* !INSECURE */ @@ -2469,6 +2988,8 @@ static size_t traverse_and_check(mstate m); #define mark_inuse_foot(M,p,s) +/* Macros for setting head/foot of non-mmapped chunks */ + /* Set cinuse bit and pinuse bit of next chunk */ #define set_inuse(M,p,s)\ ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ @@ -2513,58 +3034,27 @@ static size_t traverse_and_check(mstate m); /* Initialize mparams */ static int init_mparams(void) { - if (mparams.page_size == 0) { - size_t s; +#ifdef NEED_GLOBAL_LOCK_INIT + if (malloc_global_mutex_status <= 0) + init_malloc_global_mutex(); +#endif - mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; - mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; -#if MORECORE_CONTIGUOUS - mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; -#else /* MORECORE_CONTIGUOUS */ - mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; -#endif /* MORECORE_CONTIGUOUS */ - -#if (FOOTERS && !INSECURE) - { -#if USE_DEV_RANDOM - int fd; - unsigned char buf[sizeof(size_t)]; - /* Try to use /dev/urandom, else fall back on using time */ - if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && - read(fd, buf, sizeof(buf)) == sizeof(buf)) { - s = *((size_t *) buf); - close(fd); - } - else -#endif /* USE_DEV_RANDOM */ - s = (size_t)(time(0) ^ (size_t)0x55555555U); - - s |= (size_t)8U; /* ensure nonzero */ - s &= ~(size_t)7U; /* improve chances of fault for bad values */ - - } -#else /* (FOOTERS && !INSECURE) */ - s = (size_t)0x58585858U; -#endif /* (FOOTERS && !INSECURE) */ - ACQUIRE_MAGIC_INIT_LOCK(); - if (mparams.magic == 0) { - mparams.magic = s; - /* Set up lock for main malloc area */ - INITIAL_LOCK(&gm->mutex); - gm->mflags = mparams.default_mflags; - } - RELEASE_MAGIC_INIT_LOCK(); + ACQUIRE_MALLOC_GLOBAL_LOCK(); + if (mparams.magic == 0) { + size_t magic; + size_t psize; + size_t gsize; #ifndef WIN32 - mparams.page_size = malloc_getpagesize; - mparams.granularity = ((DEFAULT_GRANULARITY != 0)? - DEFAULT_GRANULARITY : mparams.page_size); + psize = malloc_getpagesize; + gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); #else /* WIN32 */ { SYSTEM_INFO system_info; GetSystemInfo(&system_info); - mparams.page_size = system_info.dwPageSize; - mparams.granularity = system_info.dwAllocationGranularity; + psize = system_info.dwPageSize; + gsize = ((DEFAULT_GRANULARITY != 0)? + DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); } #endif /* WIN32 */ @@ -2578,19 +3068,60 @@ static int init_mparams(void) { (MAX_SIZE_T < MIN_CHUNK_SIZE) || (sizeof(int) < 4) || (MALLOC_ALIGNMENT < (size_t)8U) || - ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || - ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || - ((mparams.granularity & (mparams.granularity-SIZE_T_ONE)) != 0) || - ((mparams.page_size & (mparams.page_size-SIZE_T_ONE)) != 0)) + ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || + ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || + ((gsize & (gsize-SIZE_T_ONE)) != 0) || + ((psize & (psize-SIZE_T_ONE)) != 0)) ABORT; + + mparams.granularity = gsize; + mparams.page_size = psize; + mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; + mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; +#if MORECORE_CONTIGUOUS + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; +#else /* MORECORE_CONTIGUOUS */ + mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; +#endif /* MORECORE_CONTIGUOUS */ + +#if !ONLY_MSPACES + /* Set up lock for main malloc area */ + gm->mflags = mparams.default_mflags; + INITIAL_LOCK(&gm->mutex); +#endif + + { +#if USE_DEV_RANDOM + int fd; + unsigned char buf[sizeof(size_t)]; + /* Try to use /dev/urandom, else fall back on using time */ + if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && + read(fd, buf, sizeof(buf)) == sizeof(buf)) { + magic = *((size_t *) buf); + close(fd); + } + else +#endif /* USE_DEV_RANDOM */ +#ifdef WIN32 + magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); +#else + magic = (size_t)(time(0) ^ (size_t)0x55555555U); +#endif + magic |= (size_t)8U; /* ensure nonzero */ + magic &= ~(size_t)7U; /* improve chances of fault for bad values */ + mparams.magic = magic; + } } - return 0; + + RELEASE_MALLOC_GLOBAL_LOCK(); + return 1; } /* support for mallopt */ static int change_mparam(int param_number, int value) { - size_t val = (size_t)value; - init_mparams(); + size_t val; + ensure_initialization(); + val = (value == -1)? MAX_SIZE_T : (size_t)value; switch(param_number) { case M_TRIM_THRESHOLD: mparams.trim_threshold = val; @@ -2622,7 +3153,7 @@ static void do_check_any_chunk(mstate m, mchunkptr p) { /* Check properties of top chunk */ static void do_check_top_chunk(mstate m, mchunkptr p) { msegmentptr sp = segment_holding(m, (char*)p); - size_t sz = chunksize(p); + size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ assert(sp != 0); assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); assert(ok_address(m, p)); @@ -2630,13 +3161,13 @@ static void do_check_top_chunk(mstate m, mchunkptr p) { assert(sz > 0); assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); assert(pinuse(p)); - assert(!next_pinuse(p)); + assert(!pinuse(chunk_plus_offset(p, sz))); } /* Check properties of (inuse) mmapped chunks */ static void do_check_mmapped_chunk(mstate m, mchunkptr p) { size_t sz = chunksize(p); - size_t len = (sz + (p->prev_foot & ~IS_MMAPPED_BIT) + MMAP_FOOT_PAD); + size_t len = (sz + (p->prev_foot) + MMAP_FOOT_PAD); assert(is_mmapped(p)); assert(use_mmap(m)); assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); @@ -2650,7 +3181,7 @@ static void do_check_mmapped_chunk(mstate m, mchunkptr p) { /* Check properties of inuse chunks */ static void do_check_inuse_chunk(mstate m, mchunkptr p) { do_check_any_chunk(m, p); - assert(cinuse(p)); + assert(is_inuse(p)); assert(next_pinuse(p)); /* If not pinuse and not mmapped, previous chunk has OK offset */ assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); @@ -2660,10 +3191,10 @@ static void do_check_inuse_chunk(mstate m, mchunkptr p) { /* Check properties of free chunks */ static void do_check_free_chunk(mstate m, mchunkptr p) { - size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT); + size_t sz = chunksize(p); mchunkptr next = chunk_plus_offset(p, sz); do_check_any_chunk(m, p); - assert(!cinuse(p)); + assert(!is_inuse(p)); assert(!next_pinuse(p)); assert (!is_mmapped(p)); if (p != m->dv && p != m->top) { @@ -2672,7 +3203,7 @@ static void do_check_free_chunk(mstate m, mchunkptr p) { assert(is_aligned(chunk2mem(p))); assert(next->prev_foot == sz); assert(pinuse(p)); - assert (next == m->top || cinuse(next)); + assert (next == m->top || is_inuse(next)); assert(p->fd->bk == p); assert(p->bk->fd == p); } @@ -2685,7 +3216,7 @@ static void do_check_free_chunk(mstate m, mchunkptr p) { static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { if (mem != 0) { mchunkptr p = mem2chunk(mem); - size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT); + size_t sz = p->head & ~INUSE_BITS; do_check_inuse_chunk(m, p); assert((sz & CHUNK_ALIGN_MASK) == 0); assert(sz >= MIN_CHUNK_SIZE); @@ -2712,7 +3243,7 @@ static void do_check_tree(mstate m, tchunkptr t) { do_check_any_chunk(m, ((mchunkptr)u)); assert(u->index == tindex); assert(chunksize(u) == tsize); - assert(!cinuse(u)); + assert(!is_inuse(u)); assert(!next_pinuse(u)); assert(u->fd->bk == u); assert(u->bk->fd == u); @@ -2830,13 +3361,13 @@ static size_t traverse_and_check(mstate m) { while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { sum += chunksize(q); - if (cinuse(q)) { + if (is_inuse(q)) { assert(!bin_find(m, q)); do_check_inuse_chunk(m, q); } else { assert(q == m->dv || bin_find(m, q)); - assert(lastq == 0 || cinuse(lastq)); /* Not 2 consecutive free */ + assert(lastq == 0 || is_inuse(lastq)); /* Not 2 consecutive free */ do_check_free_chunk(m, q); } lastq = q; @@ -2867,7 +3398,7 @@ static void do_check_malloc_state(mstate m) { if (m->top != 0) { /* check top chunk */ do_check_top_chunk(m, m->top); - assert(m->topsize == chunksize(m->top)); + /*assert(m->topsize == chunksize(m->top)); redundant */ assert(m->topsize > 0); assert(bin_find(m, m->top) == 0); } @@ -2883,6 +3414,7 @@ static void do_check_malloc_state(mstate m) { #if !NO_MALLINFO static struct mallinfo internal_mallinfo(mstate m) { struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + ensure_initialization(); if (!PREACTION(m)) { check_malloc_state(m); if (is_initialized(m)) { @@ -2896,7 +3428,7 @@ static struct mallinfo internal_mallinfo(mstate m) { q != m->top && q->head != FENCEPOST_HEAD) { size_t sz = chunksize(q); sum += sz; - if (!cinuse(q)) { + if (!is_inuse(q)) { mfree += sz; ++nfree; } @@ -2921,6 +3453,7 @@ static struct mallinfo internal_mallinfo(mstate m) { #endif /* !NO_MALLINFO */ static void internal_malloc_stats(mstate m) { + ensure_initialization(); if (!PREACTION(m)) { size_t maxfp = 0; size_t fp = 0; @@ -2936,7 +3469,7 @@ static void internal_malloc_stats(mstate m) { mchunkptr q = align_as_chunk(s->base); while (segment_holds(s, q) && q != m->top && q->head != FENCEPOST_HEAD) { - if (!cinuse(q)) + if (!is_inuse(q)) used -= chunksize(q); q = next_chunk(q); } @@ -3017,6 +3550,8 @@ static void internal_malloc_stats(mstate m) { }\ } + + /* Replace dv node, binning the old one */ /* Used only when dvsize known to be small */ #define replace_dv(M, P, S) {\ @@ -3207,27 +3742,25 @@ static void internal_malloc_stats(mstate m) { the mmapped region stored in the prev_foot field of the chunk. This allows reconstruction of the required argument to MUNMAP when freed, and also allows adjustment of the returned chunk to meet alignment - requirements (especially in memalign). There is also enough space - allocated to hold a fake next chunk of size SIZE_T_SIZE to maintain - the PINUSE bit so frees can be checked. + requirements (especially in memalign). */ /* Malloc using mmap */ static void* mmap_alloc(mstate m, size_t nb) { - size_t mmsize = granularity_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); + size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); if (mmsize > nb) { /* Check for wrap around 0 */ - char* mm = (char*)(DIRECT_MMAP(mmsize)); + char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); if (mm != CMFAIL) { size_t offset = align_offset(chunk2mem(mm)); size_t psize = mmsize - offset - MMAP_FOOT_PAD; mchunkptr p = (mchunkptr)(mm + offset); - p->prev_foot = offset | IS_MMAPPED_BIT; - (p)->head = (psize|CINUSE_BIT); + p->prev_foot = offset; + p->head = psize; mark_inuse_foot(m, p, psize); chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; - if (mm < m->least_addr) + if (m->least_addr == 0 || mm < m->least_addr) m->least_addr = mm; if ((m->footprint += mmsize) > m->max_footprint) m->max_footprint = m->footprint; @@ -3249,16 +3782,15 @@ static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { (oldsize - nb) <= (mparams.granularity << 1)) return oldp; else { - size_t offset = oldp->prev_foot & ~IS_MMAPPED_BIT; + size_t offset = oldp->prev_foot; size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; - size_t newmmsize = granularity_align(nb + SIX_SIZE_T_SIZES + - CHUNK_ALIGN_MASK); + size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); char* cp = (char*)CALL_MREMAP((char*)oldp - offset, oldmmsize, newmmsize, 1); if (cp != CMFAIL) { mchunkptr newp = (mchunkptr)(cp + offset); size_t psize = newmmsize - offset - MMAP_FOOT_PAD; - newp->head = (psize|CINUSE_BIT); + newp->head = psize; mark_inuse_foot(m, newp, psize); chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; @@ -3347,7 +3879,7 @@ static void* prepend_alloc(mstate m, char* newbase, char* oldbase, set_size_and_pinuse_of_free_chunk(q, dsize); } else { - if (!cinuse(oldfirst)) { + if (!is_inuse(oldfirst)) { size_t nsize = chunksize(oldfirst); unlink_chunk(m, oldfirst, nsize); oldfirst = chunk_plus_offset(oldfirst, nsize); @@ -3362,7 +3894,6 @@ static void* prepend_alloc(mstate m, char* newbase, char* oldbase, return chunk2mem(p); } - /* Add a segment to hold a new noncontiguous region */ static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { /* Determine locations and sizes of segment, fenceposts, old top */ @@ -3424,10 +3955,10 @@ static void* sys_alloc(mstate m, size_t nb) { size_t tsize = 0; flag_t mmap_flag = 0; - init_mparams(); + ensure_initialization(); - /* Directly map large chunks */ - if (use_mmap(m) && nb >= mparams.mmap_threshold) { + /* Directly map large chunks, but only if already initialized */ + if (use_mmap(m) && nb >= mparams.mmap_threshold && m->topsize != 0) { void* mem = mmap_alloc(m, nb); if (mem != 0) return mem; @@ -3448,18 +3979,23 @@ static void* sys_alloc(mstate m, size_t nb) { find space. 3. A call to MORECORE that cannot usually contiguously extend memory. (disabled if not HAVE_MORECORE) + + In all cases, we need to request enough bytes from system to ensure + we can malloc nb bytes upon success, so pad with enough space for + top_foot, plus alignment-pad to make sure we don't lose bytes if + not on boundary, and round this up to a granularity unit. */ if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { char* br = CMFAIL; msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); size_t asize = 0; - ACQUIRE_MORECORE_LOCK(); + ACQUIRE_MALLOC_GLOBAL_LOCK(); if (ss == 0) { /* First time through or recovery */ char* base = (char*)CALL_MORECORE(0); if (base != CMFAIL) { - asize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE); + asize = granularity_align(nb + SYS_ALLOC_PADDING); /* Adjust to end on a page boundary */ if (!is_page_aligned(base)) asize += (page_align((size_t)base) - (size_t)base); @@ -3473,7 +4009,7 @@ static void* sys_alloc(mstate m, size_t nb) { } else { /* Subtract out existing available top space from MORECORE request. */ - asize = granularity_align(nb - m->topsize + TOP_FOOT_SIZE + SIZE_T_ONE); + asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); /* Use mem here only if it did continuously extend old space */ if (asize < HALF_MAX_SIZE_T && (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) { @@ -3485,8 +4021,8 @@ static void* sys_alloc(mstate m, size_t nb) { if (tbase == CMFAIL) { /* Cope with partial failure */ if (br != CMFAIL) { /* Try to use/extend the space we did get */ if (asize < HALF_MAX_SIZE_T && - asize < nb + TOP_FOOT_SIZE + SIZE_T_ONE) { - size_t esize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE - asize); + asize < nb + SYS_ALLOC_PADDING) { + size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize); if (esize < HALF_MAX_SIZE_T) { char* end = (char*)CALL_MORECORE(esize); if (end != CMFAIL) @@ -3506,31 +4042,30 @@ static void* sys_alloc(mstate m, size_t nb) { disable_contiguous(m); /* Don't try contiguous path in the future */ } - RELEASE_MORECORE_LOCK(); + RELEASE_MALLOC_GLOBAL_LOCK(); } if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ - size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE; - size_t rsize = granularity_align(req); + size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING); if (rsize > nb) { /* Fail if wraps around zero */ char* mp = (char*)(CALL_MMAP(rsize)); if (mp != CMFAIL) { tbase = mp; tsize = rsize; - mmap_flag = IS_MMAPPED_BIT; + mmap_flag = USE_MMAP_BIT; } } } if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ - size_t asize = granularity_align(nb + TOP_FOOT_SIZE + SIZE_T_ONE); + size_t asize = granularity_align(nb + SYS_ALLOC_PADDING); if (asize < HALF_MAX_SIZE_T) { char* br = CMFAIL; char* end = CMFAIL; - ACQUIRE_MORECORE_LOCK(); + ACQUIRE_MALLOC_GLOBAL_LOCK(); br = (char*)(CALL_MORECORE(asize)); end = (char*)(CALL_MORECORE(0)); - RELEASE_MORECORE_LOCK(); + RELEASE_MALLOC_GLOBAL_LOCK(); if (br != CMFAIL && end != CMFAIL && br < end) { size_t ssize = end - br; if (ssize > nb + TOP_FOOT_SIZE) { @@ -3547,14 +4082,20 @@ static void* sys_alloc(mstate m, size_t nb) { m->max_footprint = m->footprint; if (!is_initialized(m)) { /* first-time initialization */ - m->seg.base = m->least_addr = tbase; + if (m->least_addr == 0 || tbase < m->least_addr) + m->least_addr = tbase; + m->seg.base = tbase; m->seg.size = tsize; m->seg.sflags = mmap_flag; m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; init_bins(m); +#if !ONLY_MSPACES if (is_global(m)) init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); - else { + else +#endif + { /* Offset top by embedded malloc_state */ mchunkptr mn = next_chunk(mem2chunk(m)); init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); @@ -3564,11 +4105,12 @@ static void* sys_alloc(mstate m, size_t nb) { else { /* Try to merge with an existing segment */ msegmentptr sp = &m->seg; + /* Only consider most recent segment if traversal suppressed */ while (sp != 0 && tbase != sp->base + sp->size) - sp = sp->next; + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; if (sp != 0 && !is_extern_segment(sp) && - (sp->sflags & IS_MMAPPED_BIT) == mmap_flag && + (sp->sflags & USE_MMAP_BIT) == mmap_flag && segment_holds(sp, m->top)) { /* append */ sp->size += tsize; init_top(m, m->top, m->topsize + tsize); @@ -3578,10 +4120,10 @@ static void* sys_alloc(mstate m, size_t nb) { m->least_addr = tbase; sp = &m->seg; while (sp != 0 && sp->base != tbase + tsize) - sp = sp->next; + sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; if (sp != 0 && !is_extern_segment(sp) && - (sp->sflags & IS_MMAPPED_BIT) == mmap_flag) { + (sp->sflags & USE_MMAP_BIT) == mmap_flag) { char* oldbase = sp->base; sp->base = tbase; sp->size += tsize; @@ -3613,17 +4155,19 @@ static void* sys_alloc(mstate m, size_t nb) { /* Unmap and unlink any mmapped segments that don't contain used chunks */ static size_t release_unused_segments(mstate m) { size_t released = 0; + int nsegs = 0; msegmentptr pred = &m->seg; msegmentptr sp = pred->next; while (sp != 0) { char* base = sp->base; size_t size = sp->size; msegmentptr next = sp->next; + ++nsegs; if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { mchunkptr p = align_as_chunk(base); size_t psize = chunksize(p); /* Can unmap if first chunk holds entire segment and not pinned */ - if (!cinuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { + if (!is_inuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { tchunkptr tp = (tchunkptr)p; assert(segment_holds(sp, (char*)sp)); if (p == m->dv) { @@ -3645,14 +4189,20 @@ static size_t release_unused_segments(mstate m) { } } } + if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ + break; pred = sp; sp = next; } + /* Reset check counter */ + m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)? + nsegs : MAX_RELEASE_CHECK_RATE); return released; } static int sys_trim(mstate m, size_t pad) { size_t released = 0; + ensure_initialization(); if (pad < MAX_REQUEST && is_initialized(m)) { pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ @@ -3679,7 +4229,7 @@ static int sys_trim(mstate m, size_t pad) { else if (HAVE_MORECORE) { if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; - ACQUIRE_MORECORE_LOCK(); + ACQUIRE_MALLOC_GLOBAL_LOCK(); { /* Make sure end of memory is where we last set it. */ char* old_br = (char*)(CALL_MORECORE(0)); @@ -3690,7 +4240,7 @@ static int sys_trim(mstate m, size_t pad) { released = old_br - new_br; } } - RELEASE_MORECORE_LOCK(); + RELEASE_MALLOC_GLOBAL_LOCK(); } } @@ -3707,13 +4257,14 @@ static int sys_trim(mstate m, size_t pad) { released += release_unused_segments(m); /* On failure, disable autotrim to avoid repeated failed future calls */ - if (released == 0) + if (released == 0 && m->topsize > m->trim_check) m->trim_check = MAX_SIZE_T; } return (released != 0)? 1 : 0; } + /* ---------------------------- malloc support --------------------------- */ /* allocate a large request from the best fitting chunk in a treebin */ @@ -3723,7 +4274,6 @@ static void* tmalloc_large(mstate m, size_t nb) { tchunkptr t; bindex_t idx; compute_tree_index(nb, idx); - if ((t = *treebin_at(m, idx)) != 0) { /* Traverse tree for this bin looking for node with size == nb */ size_t sizebits = nb << leftshift_for_tree_index(idx); @@ -3747,7 +4297,6 @@ static void* tmalloc_large(mstate m, size_t nb) { sizebits <<= 1; } } - if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; if (leftbits != 0) { @@ -3796,7 +4345,6 @@ static void* tmalloc_small(mstate m, size_t nb) { bindex_t i; binmap_t leastbit = least_bit(m->treemap); compute_bit2idx(leastbit, i); - v = t = *treebin_at(m, i); rsize = chunksize(t) - nb; @@ -3844,7 +4392,7 @@ static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { /* Try to either shrink or extend into top. Else malloc-copy-free */ - if (RTCHECK(ok_address(m, oldp) && ok_cinuse(oldp) && + if (RTCHECK(ok_address(m, oldp) && ok_inuse(oldp) && ok_next(oldp, next) && ok_pinuse(next))) { size_t nb = request2size(bytes); if (is_mmapped(oldp)) @@ -3855,7 +4403,7 @@ static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { if (rsize >= MIN_CHUNK_SIZE) { mchunkptr remainder = chunk_plus_offset(newp, nb); set_inuse(m, newp, nb); - set_inuse(m, remainder, rsize); + set_inuse_and_pinuse(m, remainder, rsize); extra = chunk2mem(remainder); } } @@ -3876,6 +4424,11 @@ static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { POSTACTION(m); return 0; } +#if DEBUG + if (newp != 0) { + check_inuse_chunk(m, newp); /* Check requires lock */ + } +#endif POSTACTION(m); @@ -3883,7 +4436,6 @@ static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { if (extra != 0) { internal_free(m, extra); } - check_inuse_chunk(m, newp); return chunk2mem(newp); } else { @@ -3920,7 +4472,7 @@ static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { else { size_t nb = request2size(bytes); size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; - char* mem = (char*)(internal_malloc(m, req)); + char* mem = (char*)internal_malloc(m, req); if (mem != 0) { void* leader = 0; void* trailer = 0; @@ -3948,7 +4500,7 @@ static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ newp->prev_foot = p->prev_foot + leadsize; - newp->head = (newsize|CINUSE_BIT); + newp->head = newsize; } else { /* Otherwise, give back leader, use the rest */ set_inuse(m, newp, newsize); @@ -4014,6 +4566,7 @@ static void** ialloc(mstate m, size_t size; size_t i; + ensure_initialization(); /* compute array length, if needed */ if (chunks != 0) { if (n_elements == 0) @@ -4024,7 +4577,7 @@ static void** ialloc(mstate m, else { /* if empty req, must still return chunk representing empty array */ if (n_elements == 0) - return (void**)(internal_malloc(m, 0)); + return (void**)internal_malloc(m, 0); marray = 0; array_size = request2size(n_elements * (sizeof(void*))); } @@ -4143,6 +4696,10 @@ void* dlmalloc(size_t bytes) { The ugly goto's here ensure that postaction occurs along all paths. */ +#if USE_LOCKS + ensure_initialization(); /* initialize in sys_alloc if not using locks */ +#endif + if (!PREACTION(gm)) { void* mem; size_t nb; @@ -4271,13 +4828,12 @@ void dlfree(void* mem) { #endif /* FOOTERS */ if (!PREACTION(fm)) { check_inuse_chunk(fm, p); - if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { size_t psize = chunksize(p); mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { size_t prevsize = p->prev_foot; - if ((prevsize & IS_MMAPPED_BIT) != 0) { - prevsize &= ~IS_MMAPPED_BIT; + if (is_mmapped(p)) { psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) fm->footprint -= psize; @@ -4335,8 +4891,18 @@ void dlfree(void* mem) { } else set_free_with_pinuse(p, psize, next); - insert_chunk(fm, p, psize); - check_free_chunk(fm, p); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } goto postaction; } } @@ -4406,20 +4972,21 @@ void** dlindependent_comalloc(size_t n_elements, size_t sizes[], void* dlvalloc(size_t bytes) { size_t pagesz; - init_mparams(); + ensure_initialization(); pagesz = mparams.page_size; return dlmemalign(pagesz, bytes); } void* dlpvalloc(size_t bytes) { size_t pagesz; - init_mparams(); + ensure_initialization(); pagesz = mparams.page_size; return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); } int dlmalloc_trim(size_t pad) { int result = 0; + ensure_initialization(); if (!PREACTION(gm)) { result = sys_trim(gm, pad); POSTACTION(gm); @@ -4441,25 +5008,25 @@ struct mallinfo dlmallinfo(void) { } #endif /* NO_MALLINFO */ -void dlmalloc_stats() { +void dlmalloc_stats( void ) { internal_malloc_stats(gm); } -size_t dlmalloc_usable_size(void* mem) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - if (cinuse(p)) - return chunksize(p) - overhead_for(p); - } - return 0; -} - int dlmallopt(int param_number, int value) { return change_mparam(param_number, value); } #endif /* !ONLY_MSPACES */ +size_t dlmalloc_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + /* ----------------------------- user mspaces ---------------------------- */ #if MSPACES @@ -4471,11 +5038,14 @@ static mstate init_user_mstate(char* tbase, size_t tsize) { mstate m = (mstate)(chunk2mem(msp)); memset(m, 0, msize); INITIAL_LOCK(&m->mutex); - msp->head = (msize|PINUSE_BIT|CINUSE_BIT); + msp->head = (msize|INUSE_BITS); m->seg.base = m->least_addr = tbase; m->seg.size = m->footprint = m->max_footprint = tsize; m->magic = mparams.magic; + m->release_checks = MAX_RELEASE_CHECK_RATE; m->mflags = mparams.default_mflags; + m->extp = 0; + m->exts = 0; disable_contiguous(m); init_bins(m); mn = next_chunk(mem2chunk(m)); @@ -4486,9 +5056,9 @@ static mstate init_user_mstate(char* tbase, size_t tsize) { mspace create_mspace(size_t capacity, int locked) { mstate m = 0; - size_t msize = pad_request(sizeof(struct malloc_state)); - init_mparams(); /* Ensure pagesize etc initialized */ - + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { size_t rs = ((capacity == 0)? mparams.granularity : (capacity + TOP_FOOT_SIZE + msize)); @@ -4496,7 +5066,7 @@ mspace create_mspace(size_t capacity, int locked) { char* tbase = (char*)(CALL_MMAP(tsize)); if (tbase != CMFAIL) { m = init_user_mstate(tbase, tsize); - m->seg.sflags = IS_MMAPPED_BIT; + m->seg.sflags = USE_MMAP_BIT; set_lock(m, locked); } } @@ -4505,9 +5075,9 @@ mspace create_mspace(size_t capacity, int locked) { mspace create_mspace_with_base(void* base, size_t capacity, int locked) { mstate m = 0; - size_t msize = pad_request(sizeof(struct malloc_state)); - init_mparams(); /* Ensure pagesize etc initialized */ - + size_t msize; + ensure_initialization(); + msize = pad_request(sizeof(struct malloc_state)); if (capacity > msize + TOP_FOOT_SIZE && capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { m = init_user_mstate((char*)base, capacity); @@ -4517,6 +5087,21 @@ mspace create_mspace_with_base(void* base, size_t capacity, int locked) { return (mspace)m; } +int mspace_track_large_chunks(mspace msp, int enable) { + int ret = 0; + mstate ms = (mstate)msp; + if (!PREACTION(ms)) { + if (!use_mmap(ms)) + ret = 1; + if (!enable) + enable_mmap(ms); + else + disable_mmap(ms); + POSTACTION(ms); + } + return ret; +} + size_t destroy_mspace(mspace msp) { size_t freed = 0; mstate ms = (mstate)msp; @@ -4527,7 +5112,7 @@ size_t destroy_mspace(mspace msp) { size_t size = sp->size; flag_t flag = sp->sflags; sp = sp->next; - if ((flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) && + if ((flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) && CALL_MUNMAP(base, size) == 0) freed += size; } @@ -4663,7 +5248,7 @@ void mspace_free(mspace msp, void* mem) { mchunkptr p = mem2chunk(mem); #if FOOTERS mstate fm = get_mstate_for(p); - ( void ) msp; + msp = msp; /* placate people compiling -Wunused */ #else /* FOOTERS */ mstate fm = (mstate)msp; #endif /* FOOTERS */ @@ -4673,13 +5258,12 @@ void mspace_free(mspace msp, void* mem) { } if (!PREACTION(fm)) { check_inuse_chunk(fm, p); - if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { + if (RTCHECK(ok_address(fm, p) && ok_inuse(p))) { size_t psize = chunksize(p); mchunkptr next = chunk_plus_offset(p, psize); if (!pinuse(p)) { size_t prevsize = p->prev_foot; - if ((prevsize & IS_MMAPPED_BIT) != 0) { - prevsize &= ~IS_MMAPPED_BIT; + if (is_mmapped(p)) { psize += prevsize + MMAP_FOOT_PAD; if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) fm->footprint -= psize; @@ -4737,8 +5321,18 @@ void mspace_free(mspace msp, void* mem) { } else set_free_with_pinuse(p, psize, next); - insert_chunk(fm, p, psize); - check_free_chunk(fm, p); + + if (is_small(psize)) { + insert_small_chunk(fm, p, psize); + check_free_chunk(fm, p); + } + else { + tchunkptr tp = (tchunkptr)p; + insert_large_chunk(fm, tp, psize); + check_free_chunk(fm, p); + if (--fm->release_checks == 0) + release_unused_segments(fm); + } goto postaction; } } @@ -4850,29 +5444,27 @@ void mspace_malloc_stats(mspace msp) { } size_t mspace_footprint(mspace msp) { - size_t result; + size_t result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { result = ms->footprint; } else { - result = 0; + USAGE_ERROR_ACTION(ms,ms); } - USAGE_ERROR_ACTION(ms,ms); return result; } size_t mspace_max_footprint(mspace msp) { - size_t result; + size_t result = 0; mstate ms = (mstate)msp; if (ok_magic(ms)) { result = ms->max_footprint; } else { - result = 0; + USAGE_ERROR_ACTION(ms,ms); } - USAGE_ERROR_ACTION(ms,ms); return result; } @@ -4887,12 +5479,22 @@ struct mallinfo mspace_mallinfo(mspace msp) { } #endif /* NO_MALLINFO */ +size_t mspace_usable_size(void* mem) { + if (mem != 0) { + mchunkptr p = mem2chunk(mem); + if (is_inuse(p)) + return chunksize(p) - overhead_for(p); + } + return 0; +} + int mspace_mallopt(int param_number, int value) { return change_mparam(param_number, value); } #endif /* MSPACES */ + /* -------------------- Alternative MORECORE functions ------------------- */ /* @@ -4987,6 +5589,21 @@ int mspace_mallopt(int param_number, int value) { /* ----------------------------------------------------------------------- History: + V2.8.4 Wed May 27 09:56:23 2009 Doug Lea (dl at gee) + * Use zeros instead of prev foot for is_mmapped + * Add mspace_track_large_chunks; thanks to Jean Brouwers + * Fix set_inuse in internal_realloc; thanks to Jean Brouwers + * Fix insufficient sys_alloc padding when using 16byte alignment + * Fix bad error check in mspace_footprint + * Adaptations for ptmalloc; thanks to Wolfram Gloger. + * Reentrant spin locks; thanks to Earl Chew and others + * Win32 improvements; thanks to Niall Douglas and Earl Chew + * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options + * Extension hook in malloc_state + * Various small adjustments to reduce warnings on some compilers + * Various configuration extensions/changes for more platforms. Thanks + to all who contributed these. + V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) * Add max_footprint functions * Ensure all appropriate literals are size_t @@ -5165,3 +5782,4 @@ History: structure of old version, but most details differ.) */ + diff --git a/harbour/src/vm/fm.c b/harbour/src/vm/fm.c index ad33a5e83c..2395588f29 100644 --- a/harbour/src/vm/fm.c +++ b/harbour/src/vm/fm.c @@ -141,7 +141,11 @@ /* # define USE_DL_PREFIX */ # define REALLOC_ZERO_BYTES_FREES # if defined( HB_MT_VM ) -# define USE_LOCKS 1 +# if defined( HB_SPINLOCK_R ) +# define USE_LOCKS 2 +# else +# define USE_LOCKS 1 +# endif # if defined( HB_FM_DLMT_ALLOC ) # define ONLY_MSPACES 1 # define FOOTERS 1 @@ -156,6 +160,7 @@ # pragma warn -ngu # pragma warn -prc # pragma warn -rch +# pragma warn -inl # elif defined( HB_OS_WIN_CE ) && defined( __POCC__ ) # define ABORT TerminateProcess( GetCurrentProcess(), 0 ) # elif defined( __POCC__ ) && !defined( InterlockedCompareExchangePointer ) @@ -193,6 +198,7 @@ # pragma warn +ngu # pragma warn +prc # pragma warn +rch +# pragma warn +inl # elif defined( __WATCOMC__ ) # pragma warning 13 2 # pragma warning 367 2 @@ -462,7 +468,7 @@ static void dlmalloc_destroy( void ) size_t size = sp->size; flag_t flag = sp->sflags; sp = sp->next; - if( (flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) ) + if( (flag & USE_MMAP_BIT) && !(flag & EXTERN_BIT) ) CALL_MUNMAP(base, size); } }