From 0181a0f48fd0ddfb631690da68f46392ac34e661 Mon Sep 17 00:00:00 2001 From: Aleksander Czajczynski Date: Mon, 26 Jun 2017 08:56:14 +0200 Subject: [PATCH] 2017-06-26 08:55 UTC+0200 Aleksander Czajczynski (hb fki.pl) * contrib/hbexpat/3rd/expat/* ! updated to 2.2.1 (from 2.1.0) using 3rdpatch.hb, expat.dif(f) from Viktor's 3.4 fork was used - but adapted for DOS 8.3 naming scheme. According to issue #157, previous revisions of expat have security vulnerabilities, for more information refer to: https://github.com/libexpat/libexpat/blob/master/expat/Changes Many thanks to Sebastian Pipping for the information. * contrib/hbexpat/hbexpat.ch + new constant HB_XML_ERROR_INVALID_ARGUMENT added --- ChangeLog.txt | 12 + contrib/hbexpat/3rd/expat/COPYING | 5 +- contrib/hbexpat/3rd/expat/amigacon.h | 32 -- contrib/hbexpat/3rd/expat/expat.dif | 179 ++----- contrib/hbexpat/3rd/expat/expat.h | 35 +- contrib/hbexpat/3rd/expat/expat.hbp | 9 +- contrib/hbexpat/3rd/expat/expat_ex.h | 19 +- contrib/hbexpat/3rd/expat/internal.h | 22 + contrib/hbexpat/3rd/expat/macconfi.h | 53 -- contrib/hbexpat/3rd/expat/winconfi.h | 10 + contrib/hbexpat/3rd/expat/xmlparse.c | 712 +++++++++++++++++++++------ contrib/hbexpat/3rd/expat/xmlrole.c | 230 +++++---- contrib/hbexpat/3rd/expat/xmltok.c | 253 ++++++---- contrib/hbexpat/3rd/expat/xmltok.h | 10 +- contrib/hbexpat/3rd/expat/xmltok_i.c | 226 +++++---- contrib/hbexpat/hbexpat.ch | 1 + 16 files changed, 1092 insertions(+), 716 deletions(-) delete mode 100644 contrib/hbexpat/3rd/expat/amigacon.h delete mode 100644 contrib/hbexpat/3rd/expat/macconfi.h diff --git a/ChangeLog.txt b/ChangeLog.txt index 731297e3d5..7308ba448b 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -10,6 +10,18 @@ * Change, ! Fix, % Optimization, + Addition, - Removal, ; Comment */ +2017-06-26 08:55 UTC+0200 Aleksander Czajczynski (hb fki.pl) + * contrib/hbexpat/3rd/expat/* + ! updated to 2.2.1 (from 2.1.0) using 3rdpatch.hb, expat.dif(f) from + Viktor's 3.4 fork was used - but adapted for DOS 8.3 naming + scheme. According to issue #157, previous revisions of expat have + security vulnerabilities, for more information refer to: + https://github.com/libexpat/libexpat/blob/master/expat/Changes + Many thanks to Sebastian Pipping for the information. + + * contrib/hbexpat/hbexpat.ch + + new constant HB_XML_ERROR_INVALID_ARGUMENT added + 2017-05-20 02:25 UTC Viktor Szakats (vszakats users.noreply.github.com) * utils/hbmk2/hbmk2.prg * remove `_HBSHELL_EXEC_PRE` macro diff --git a/contrib/hbexpat/3rd/expat/COPYING b/contrib/hbexpat/3rd/expat/COPYING index dcb4506429..8d288f0f28 100644 --- a/contrib/hbexpat/3rd/expat/COPYING +++ b/contrib/hbexpat/3rd/expat/COPYING @@ -1,6 +1,5 @@ -Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd - and Clark Cooper -Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers. +Copyright (c) 1998-2000 Thai Open Source Software Center Ltd and Clark Cooper +Copyright (c) 2001-2017 Expat maintainers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/contrib/hbexpat/3rd/expat/amigacon.h b/contrib/hbexpat/3rd/expat/amigacon.h deleted file mode 100644 index 86c6115040..0000000000 --- a/contrib/hbexpat/3rd/expat/amigacon.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef AMIGACONFIG_H -#define AMIGACONFIG_H - -/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */ -#define BYTEORDER 4321 - -/* Define to 1 if you have the `bcopy' function. */ -#define HAVE_BCOPY 1 - -/* Define to 1 if you have the header file. */ -#undef HAVE_CHECK_H - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the header file. */ -#define HAVE_UNISTD_H 1 - -/* whether byteorder is bigendian */ -#define WORDS_BIGENDIAN - -/* Define to specify how much context to retain around the current parse - point. */ -#define XML_CONTEXT_BYTES 1024 - -/* Define to make parameter entity parsing functionality available. */ -#define XML_DTD - -/* Define to make XML Namespaces functionality available. */ -#define XML_NS - -#endif /* AMIGACONFIG_H */ diff --git a/contrib/hbexpat/3rd/expat/expat.dif b/contrib/hbexpat/3rd/expat/expat.dif index a8cf12f546..f1af7f551f 100644 --- a/contrib/hbexpat/3rd/expat/expat.dif +++ b/contrib/hbexpat/3rd/expat/expat.dif @@ -1,168 +1,51 @@ -diff -urN expat.orig\xmlparse.c expat\xmlparse.c ---- expat.orig\xmlparse.c Wed Sep 12 12:57:25 2012 -+++ expat\xmlparse.c Wed Sep 12 12:57:25 2012 -@@ -10,7 +10,9 @@ +diff -urN expat.orig/xmlparse.c expat/xmlparse.c +--- expat.orig/xmlparse.c 2017-06-26 08:22:50.000000000 +0200 ++++ expat/xmlparse.c 2017-06-26 08:22:50.000000000 +0200 +@@ -23,7 +23,9 @@ #define XML_BUILDING_EXPAT 1 --#ifdef COMPILED_FROM_DSP +-#ifdef _WIN32 +#ifdef HARBOUR_CONF +#include "_hbconf.h" -+#elif defined(COMPILED_FROM_DSP) ++#elif defined(_WIN32) #include "winconfi.h" - #elif defined(MACOS_CLASSIC) - #include "macconfi.h" -diff -urN expat.orig\xmlrole.c expat\xmlrole.c ---- expat.orig\xmlrole.c Wed Sep 12 12:57:25 2012 -+++ expat\xmlrole.c Wed Sep 12 12:57:25 2012 + #elif defined(HAVE_EXPAT_CONFIG_H) + #include +@@ -749,7 +751,7 @@ + static int + writeRandomBytes_RtlGenRandom(void * target, size_t count) { + int success = 0; /* full count bytes written? */ +- const HMODULE advapi32 = LoadLibrary("ADVAPI32.DLL"); ++ const HMODULE advapi32 = LoadLibrary(TEXT("ADVAPI32.DLL")); + + if (advapi32) { + const RTLGENRANDOM_FUNC RtlGenRandom +diff -urN expat.orig/xmlrole.c expat/xmlrole.c +--- expat.orig/xmlrole.c 2017-06-26 08:22:50.000000000 +0200 ++++ expat/xmlrole.c 2017-06-26 08:22:50.000000000 +0200 @@ -4,7 +4,9 @@ #include --#ifdef COMPILED_FROM_DSP +-#ifdef _WIN32 +#ifdef HARBOUR_CONF +#include "_hbconf.h" -+#elif defined(COMPILED_FROM_DSP) ++#elif defined(_WIN32) #include "winconfi.h" - #elif defined(MACOS_CLASSIC) - #include "macconfi.h" -diff -urN expat.orig\xmltok.c expat\xmltok.c ---- expat.orig\xmltok.c Wed Sep 12 12:57:25 2012 -+++ expat\xmltok.c Wed Sep 12 12:57:25 2012 + #else + #ifdef HAVE_EXPAT_CONFIG_H +diff -urN expat.orig/xmltok.c expat/xmltok.c +--- expat.orig/xmltok.c 2017-06-26 08:22:50.000000000 +0200 ++++ expat/xmltok.c 2017-06-26 08:22:50.000000000 +0200 @@ -4,7 +4,9 @@ #include --#ifdef COMPILED_FROM_DSP +-#ifdef _WIN32 +#ifdef HARBOUR_CONF +#include "_hbconf.h" -+#elif defined(COMPILED_FROM_DSP) ++#elif defined(_WIN32) #include "winconfi.h" - #elif defined(MACOS_CLASSIC) - #include "macconfi.h" -@@ -222,6 +224,17 @@ - E ## isInvalid3, \ - E ## isInvalid4 - -+#define NULL_VTABLE() \ -+ NULL, \ -+ NULL, \ -+ NULL, \ -+ NULL, \ -+ NULL, \ -+ NULL, \ -+ NULL, \ -+ NULL, \ -+ NULL -+ - static int FASTCALL checkCharRefNumber(int); - - #include "xmltok_i.h" -@@ -467,7 +480,7 @@ - #include "asciitab.h" - #include "latin1ta.h" - }, -- STANDARD_VTABLE(sb_) -+ STANDARD_VTABLE(sb_) NULL_VTABLE() - }; - - #endif -@@ -480,7 +493,7 @@ - #undef BT_COLON - #include "latin1ta.h" - }, -- STANDARD_VTABLE(sb_) -+ STANDARD_VTABLE(sb_) NULL_VTABLE() - }; - - static void PTRCALL -@@ -500,7 +513,7 @@ - #include "asciitab.h" - /* BT_NONXML == 0 */ - }, -- STANDARD_VTABLE(sb_) -+ STANDARD_VTABLE(sb_) NULL_VTABLE() - }; - - #endif -@@ -513,7 +526,7 @@ - #undef BT_COLON - /* BT_NONXML == 0 */ - }, -- STANDARD_VTABLE(sb_) -+ STANDARD_VTABLE(sb_) NULL_VTABLE() - }; - - static int PTRFASTCALL -@@ -726,7 +739,7 @@ - #include "asciitab.h" - #include "latin1ta.h" - }, -- STANDARD_VTABLE(little2_) -+ STANDARD_VTABLE(little2_) NULL_VTABLE() - }; - - #endif -@@ -745,7 +758,7 @@ - #undef BT_COLON - #include "latin1ta.h" - }, -- STANDARD_VTABLE(little2_) -+ STANDARD_VTABLE(little2_) NULL_VTABLE() - }; - - #if BYTEORDER != 4321 -@@ -758,7 +771,7 @@ - #include "iasciita.h" - #include "latin1ta.h" - }, -- STANDARD_VTABLE(little2_) -+ STANDARD_VTABLE(little2_) NULL_VTABLE() - }; - - #endif -@@ -771,7 +784,7 @@ - #undef BT_COLON - #include "latin1ta.h" - }, -- STANDARD_VTABLE(little2_) -+ STANDARD_VTABLE(little2_) NULL_VTABLE() - }; - - #endif -@@ -867,7 +880,7 @@ - #include "asciitab.h" - #include "latin1ta.h" - }, -- STANDARD_VTABLE(big2_) -+ STANDARD_VTABLE(big2_) NULL_VTABLE() - }; - - #endif -@@ -886,7 +899,7 @@ - #undef BT_COLON - #include "latin1ta.h" - }, -- STANDARD_VTABLE(big2_) -+ STANDARD_VTABLE(big2_) NULL_VTABLE() - }; - - #if BYTEORDER != 1234 -@@ -899,7 +912,7 @@ - #include "iasciita.h" - #include "latin1ta.h" - }, -- STANDARD_VTABLE(big2_) -+ STANDARD_VTABLE(big2_) NULL_VTABLE() - }; - - #endif -@@ -912,7 +925,7 @@ - #undef BT_COLON - #include "latin1ta.h" - }, -- STANDARD_VTABLE(big2_) -+ STANDARD_VTABLE(big2_) NULL_VTABLE() - }; - - #endif + #else + #ifdef HAVE_EXPAT_CONFIG_H diff --git a/contrib/hbexpat/3rd/expat/expat.h b/contrib/hbexpat/3rd/expat/expat.h index d393e96b79..8bdde93a3c 100644 --- a/contrib/hbexpat/3rd/expat/expat.h +++ b/contrib/hbexpat/3rd/expat/expat.h @@ -95,7 +95,9 @@ enum XML_Error { /* Added in 2.0. */ XML_ERROR_RESERVED_PREFIX_XML, XML_ERROR_RESERVED_PREFIX_XMLNS, - XML_ERROR_RESERVED_NAMESPACE_URI + XML_ERROR_RESERVED_NAMESPACE_URI, + /* Added in 2.2.1. */ + XML_ERROR_INVALID_ARGUMENT }; enum XML_Content_Type { @@ -342,7 +344,7 @@ XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler); /* OBSOLETE -- OBSOLETE -- OBSOLETE - This handler has been superceded by the EntityDeclHandler above. + This handler has been superseded by the EntityDeclHandler above. It is provided here for backward compatibility. This is called for a declaration of an unparsed (NDATA) entity. @@ -706,6 +708,7 @@ XML_UseParserAsHandlerArg(XML_Parser parser); be called, despite an external subset being parsed. Note: If XML_DTD is not defined when Expat is compiled, returns XML_ERROR_FEATURE_REQUIRES_XML_DTD. + Note: If parser == NULL, returns XML_ERROR_INVALID_ARGUMENT. */ XMLPARSEAPI(enum XML_Error) XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); @@ -729,15 +732,16 @@ XML_GetBase(XML_Parser parser); to the XML_StartElementHandler that were specified in the start-tag rather than defaulted. Each attribute/value pair counts as 2; thus this correspondds to an index into the atts array passed to the - XML_StartElementHandler. + XML_StartElementHandler. Returns -1 if parser == NULL. */ XMLPARSEAPI(int) XML_GetSpecifiedAttributeCount(XML_Parser parser); /* Returns the index of the ID attribute passed in the last call to - XML_StartElementHandler, or -1 if there is no ID attribute. Each - attribute/value pair counts as 2; thus this correspondds to an - index into the atts array passed to the XML_StartElementHandler. + XML_StartElementHandler, or -1 if there is no ID attribute or + parser == NULL. Each attribute/value pair counts as 2; thus this + correspondds to an index into the atts array passed to the + XML_StartElementHandler. */ XMLPARSEAPI(int) XML_GetIdAttributeIndex(XML_Parser parser); @@ -901,6 +905,7 @@ enum XML_ParamEntityParsing { entities is requested; otherwise it will return non-zero. Note: If XML_SetParamEntityParsing is called after XML_Parse or XML_ParseBuffer, then it has no effect and will always return 0. + Note: If parser == NULL, the function will do nothing and return 0. */ XMLPARSEAPI(int) XML_SetParamEntityParsing(XML_Parser parser, @@ -910,6 +915,7 @@ XML_SetParamEntityParsing(XML_Parser parser, Helps in preventing DoS attacks based on predicting hash function behavior. This must be called before parsing is started. Returns 1 if successful, 0 when called after parsing has started. + Note: If parser == NULL, the function will do nothing and return 0. */ XMLPARSEAPI(int) XML_SetHashSalt(XML_Parser parser, @@ -936,6 +942,10 @@ XML_GetErrorCode(XML_Parser parser); the location is the location of the character at which the error was detected; otherwise the location is the location of the last parse event, as described above. + + Note: XML_GetCurrentLineNumber and XML_GetCurrentColumnNumber + return 0 to indicate an error. + Note: XML_GetCurrentByteIndex returns -1 to indicate an error. */ XMLPARSEAPI(XML_Size) XML_GetCurrentLineNumber(XML_Parser parser); XMLPARSEAPI(XML_Size) XML_GetCurrentColumnNumber(XML_Parser parser); @@ -973,9 +983,12 @@ XML_FreeContentModel(XML_Parser parser, XML_Content *model); /* Exposing the memory handling functions used in Expat */ XMLPARSEAPI(void *) +XML_ATTR_MALLOC +XML_ATTR_ALLOC_SIZE(2) XML_MemMalloc(XML_Parser parser, size_t size); XMLPARSEAPI(void *) +XML_ATTR_ALLOC_SIZE(3) XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); XMLPARSEAPI(void) @@ -1031,14 +1044,12 @@ XMLPARSEAPI(const XML_Feature *) XML_GetFeatureList(void); -/* Expat follows the GNU/Linux convention of odd number minor version for - beta/development releases and even number minor version for stable - releases. Micro is bumped with each release, and set to 0 with each - change to major or minor version. +/* Expat follows the semantic versioning convention. + See http://semver.org. */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 1 -#define XML_MICRO_VERSION 0 +#define XML_MINOR_VERSION 2 +#define XML_MICRO_VERSION 1 #ifdef __cplusplus } diff --git a/contrib/hbexpat/3rd/expat/expat.hbp b/contrib/hbexpat/3rd/expat/expat.hbp index 57e40cd21c..2644027ab5 100644 --- a/contrib/hbexpat/3rd/expat/expat.hbp +++ b/contrib/hbexpat/3rd/expat/expat.hbp @@ -20,9 +20,9 @@ xmltok.c {bcc}-cflag=-w-8008 {bcc}-cflag=-w-8066 -# ORIGIN http://expat.sourceforge.net/ -# VER 2.1.0 -# URL http://sourceforge.net/projects/expat/files/expat/2.1.0/expat-2.1.0.tar.gz/download +# ORIGIN https://expat.sourceforge.net/ +# VER 2.2.1 +# URL https://downloads.sourceforge.net/project/expat/expat/2.2.1/expat-2.2.1.tar.bz2 # DIFF expat.dif # # MAP COPYING @@ -31,7 +31,6 @@ xmltok.c # MAP lib/xmltok.c xmltok.c # MAP lib/xmltok_impl.c xmltok_i.c # MAP lib/xmltok_ns.c xmltok_n.c -# MAP lib/amigaconfig.h amigacon.h # MAP lib/ascii.h ascii.h # MAP lib/asciitab.h asciitab.h # MAP lib/expat.h expat.h @@ -39,8 +38,8 @@ xmltok.c # MAP lib/iasciitab.h iasciita.h # MAP lib/internal.h internal.h # MAP lib/latin1tab.h latin1ta.h -# MAP lib/macconfig.h macconfi.h # MAP lib/nametab.h nametab.h +# MAP lib/siphash.h siphash.h # MAP lib/utf8tab.h utf8tab.h # MAP lib/winconfig.h winconfi.h # MAP lib/xmlrole.h xmlrole.h diff --git a/contrib/hbexpat/3rd/expat/expat_ex.h b/contrib/hbexpat/3rd/expat/expat_ex.h index 2c03284ea2..892eb4bbed 100644 --- a/contrib/hbexpat/3rd/expat/expat_ex.h +++ b/contrib/hbexpat/3rd/expat/expat_ex.h @@ -65,12 +65,26 @@ #endif #endif /* not defined XML_STATIC */ +#if !defined(XMLIMPORT) && defined(__GNUC__) && (__GNUC__ >= 4) +#define XMLIMPORT __attribute__ ((visibility ("default"))) +#endif /* If we didn't define it above, define it away: */ #ifndef XMLIMPORT #define XMLIMPORT #endif +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96)) +#define XML_ATTR_MALLOC __attribute__((__malloc__)) +#else +#define XML_ATTR_MALLOC +#endif + +#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +#define XML_ATTR_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) +#else +#define XML_ATTR_ALLOC_SIZE(x) +#endif #define XMLPARSEAPI(type) XMLIMPORT type XMLCALL @@ -79,7 +93,10 @@ extern "C" { #endif #ifdef XML_UNICODE_WCHAR_T -#define XML_UNICODE +# define XML_UNICODE +# if defined(__SIZEOF_WCHAR_T__) && (__SIZEOF_WCHAR_T__ != 2) +# error "sizeof(wchar_t) != 2; Need -fshort-wchar for both Expat and libc" +# endif #endif #ifdef XML_UNICODE /* Information is UTF-16 encoded. */ diff --git a/contrib/hbexpat/3rd/expat/internal.h b/contrib/hbexpat/3rd/expat/internal.h index dd5454831d..94cb98e15c 100644 --- a/contrib/hbexpat/3rd/expat/internal.h +++ b/contrib/hbexpat/3rd/expat/internal.h @@ -71,3 +71,25 @@ #define inline #endif #endif + +#ifndef UNUSED_P +# ifdef __GNUC__ +# define UNUSED_P(p) UNUSED_ ## p __attribute__((__unused__)) +# else +# define UNUSED_P(p) UNUSED_ ## p +# endif +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + + +void +align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef); + + +#ifdef __cplusplus +} +#endif diff --git a/contrib/hbexpat/3rd/expat/macconfi.h b/contrib/hbexpat/3rd/expat/macconfi.h deleted file mode 100644 index 2725caaf54..0000000000 --- a/contrib/hbexpat/3rd/expat/macconfi.h +++ /dev/null @@ -1,53 +0,0 @@ -/*================================================================ -** Copyright 2000, Clark Cooper -** All rights reserved. -** -** This is free software. You are permitted to copy, distribute, or modify -** it under the terms of the MIT/X license (contained in the COPYING file -** with this distribution.) -** -*/ - -#ifndef MACCONFIG_H -#define MACCONFIG_H - - -/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */ -#define BYTEORDER 4321 - -/* Define to 1 if you have the `bcopy' function. */ -#undef HAVE_BCOPY - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE - -/* Define to 1 if you have a working `mmap' system call. */ -#undef HAVE_MMAP - -/* Define to 1 if you have the header file. */ -#undef HAVE_UNISTD_H - -/* whether byteorder is bigendian */ -#define WORDS_BIGENDIAN - -/* Define to specify how much context to retain around the current parse - point. */ -#undef XML_CONTEXT_BYTES - -/* Define to make parameter entity parsing functionality available. */ -#define XML_DTD - -/* Define to make XML Namespaces functionality available. */ -#define XML_NS - -/* Define to empty if `const' does not conform to ANSI C. */ -#undef const - -/* Define to `long' if does not define. */ -#define off_t long - -/* Define to `unsigned' if does not define. */ -#undef size_t - - -#endif /* ifndef MACCONFIG_H */ diff --git a/contrib/hbexpat/3rd/expat/winconfi.h b/contrib/hbexpat/3rd/expat/winconfi.h index c1b791d62d..9bf014d7fb 100644 --- a/contrib/hbexpat/3rd/expat/winconfi.h +++ b/contrib/hbexpat/3rd/expat/winconfi.h @@ -17,6 +17,12 @@ #include #include + +#if defined(HAVE_EXPAT_CONFIG_H) /* e.g. MinGW */ +# include +#else /* !defined(HAVE_EXPAT_CONFIG_H) */ + + #define XML_NS 1 #define XML_DTD 1 #define XML_CONTEXT_BYTES 1024 @@ -27,4 +33,8 @@ /* Windows has memmove() available. */ #define HAVE_MEMMOVE + +#endif /* !defined(HAVE_EXPAT_CONFIG_H) */ + + #endif /* ndef WINCONFIG_H */ diff --git a/contrib/hbexpat/3rd/expat/xmlparse.c b/contrib/hbexpat/3rd/expat/xmlparse.c index 44a473bf52..2b37e42c7f 100644 --- a/contrib/hbexpat/3rd/expat/xmlparse.c +++ b/contrib/hbexpat/3rd/expat/xmlparse.c @@ -1,31 +1,39 @@ /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd See the file COPYING for copying permission. + + 77fea421d361dca90041d0040ecf1dca651167fadf2af79e990e35168d70d933 (2.2.1+) */ +#define _GNU_SOURCE /* syscall prototype */ + #include #include /* memset(), memcpy() */ #include #include /* UINT_MAX */ -#include /* time() */ +#include /* fprintf */ +#include /* getenv */ + +#ifdef _WIN32 +#define getpid GetCurrentProcessId +#else +#include /* gettimeofday() */ +#include /* getpid() */ +#include /* getpid() */ +#endif #define XML_BUILDING_EXPAT 1 #ifdef HARBOUR_CONF #include "_hbconf.h" -#elif defined(COMPILED_FROM_DSP) +#elif defined(_WIN32) #include "winconfi.h" -#elif defined(MACOS_CLASSIC) -#include "macconfi.h" -#elif defined(__amigaos__) -#include "amigacon.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" #elif defined(HAVE_EXPAT_CONFIG_H) #include -#endif /* ndef COMPILED_FROM_DSP */ +#endif /* ndef _WIN32 */ #include "ascii.h" #include "expat.h" +#include "siphash.h" #ifdef XML_UNICODE #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX @@ -104,17 +112,11 @@ typedef struct { const XML_Memory_Handling_Suite *mem; } HASH_TABLE; -/* Basic character hash algorithm, taken from Python's string hash: - h = h * 1000003 ^ character, the constant being a prime number. +static size_t +keylen(KEY s); -*/ -#ifdef XML_UNICODE -#define CHAR_HASH(h, c) \ - (((h) * 0xF4243) ^ (unsigned short)(c)) -#else -#define CHAR_HASH(h, c) \ - (((h) * 0xF4243) ^ (unsigned char)(c)) -#endif +static void +copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key); /* For probing (after a collision) we need a step size relative prime to the hash table size, which is a power of 2. We use double-hashing, @@ -350,6 +352,8 @@ doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, const char *end, const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ +static void +freeBindings(XML_Parser parser, BINDING *bindings); static enum XML_Error storeAtts(XML_Parser parser, const ENCODING *, const char *s, TAG_NAME *tagNamePtr, BINDING **bindingsPtr); @@ -434,7 +438,7 @@ static ELEMENT_TYPE * getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, const char *end); -static unsigned long generate_hash_secret_salt(void); +static unsigned long generate_hash_secret_salt(XML_Parser parser); static XML_Bool startParsing(XML_Parser parser); static XML_Parser @@ -692,12 +696,155 @@ static const XML_Char implicitContext[] = { ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0' }; + +#if defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) +# include + +# if defined(HAVE_GETRANDOM) +# include /* getrandom */ +# else +# include /* syscall */ +# include /* SYS_getrandom */ +# endif + +/* Obtain entropy on Linux 3.17+ */ +static int +writeRandomBytes_getrandom(void * target, size_t count) { + int success = 0; /* full count bytes written? */ + size_t bytesWrittenTotal = 0; + const unsigned int getrandomFlags = 0; + + do { + void * const currentTarget = (void*)((char*)target + bytesWrittenTotal); + const size_t bytesToWrite = count - bytesWrittenTotal; + + const int bytesWrittenMore = +#if defined(HAVE_GETRANDOM) + getrandom(currentTarget, bytesToWrite, getrandomFlags); +#else + syscall(SYS_getrandom, currentTarget, bytesToWrite, getrandomFlags); +#endif + + if (bytesWrittenMore > 0) { + bytesWrittenTotal += bytesWrittenMore; + if (bytesWrittenTotal >= count) + success = 1; + } + } while (! success && (errno == EINTR || errno == EAGAIN)); + + return success; +} + +#endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ + + +#ifdef _WIN32 + +typedef BOOLEAN (APIENTRY *RTLGENRANDOM_FUNC)(PVOID, ULONG); + +/* Obtain entropy on Windows XP / Windows Server 2003 and later. + * Hint on RtlGenRandom and the following article from libsodioum. + * + * Michael Howard: Cryptographically Secure Random number on Windows without using CryptoAPI + * https://blogs.msdn.microsoft.com/michael_howard/2005/01/14/cryptographically-secure-random-number-on-windows-without-using-cryptoapi/ + */ +static int +writeRandomBytes_RtlGenRandom(void * target, size_t count) { + int success = 0; /* full count bytes written? */ + const HMODULE advapi32 = LoadLibrary(TEXT("ADVAPI32.DLL")); + + if (advapi32) { + const RTLGENRANDOM_FUNC RtlGenRandom + = (RTLGENRANDOM_FUNC)GetProcAddress(advapi32, "SystemFunction036"); + if (RtlGenRandom) { + if (RtlGenRandom((PVOID)target, (ULONG)count) == TRUE) { + success = 1; + } + } + FreeLibrary(advapi32); + } + + return success; +} + +#endif /* _WIN32 */ + + static unsigned long -generate_hash_secret_salt(void) +gather_time_entropy(void) { - unsigned int seed = time(NULL) % UINT_MAX; - srand(seed); - return rand(); +#ifdef _WIN32 + FILETIME ft; + GetSystemTimeAsFileTime(&ft); /* never fails */ + return ft.dwHighDateTime ^ ft.dwLowDateTime; +#else + struct timeval tv; + int gettimeofday_res; + + gettimeofday_res = gettimeofday(&tv, NULL); + assert (gettimeofday_res == 0); + + /* Microseconds time is <20 bits entropy */ + return tv.tv_usec; +#endif +} + +#if defined(HAVE_ARC4RANDOM_BUF) && defined(HAVE_LIBBSD) +# include +#endif + +static unsigned long +ENTROPY_DEBUG(const char * label, unsigned long entropy) { + const char * const EXPAT_ENTROPY_DEBUG = getenv("EXPAT_ENTROPY_DEBUG"); + if (EXPAT_ENTROPY_DEBUG && ! strcmp(EXPAT_ENTROPY_DEBUG, "1")) { + fprintf(stderr, "Entropy: %s --> 0x%0*lx (%lu bytes)\n", + label, + (int)sizeof(entropy) * 2, entropy, + (unsigned long)sizeof(entropy)); + } + return entropy; +} + +static unsigned long +generate_hash_secret_salt(XML_Parser parser) +{ + unsigned long entropy; + (void)parser; +#if defined(HAVE_ARC4RANDOM_BUF) || defined(__CloudABI__) + (void)gather_time_entropy; + arc4random_buf(&entropy, sizeof(entropy)); + return ENTROPY_DEBUG("arc4random_buf", entropy); +#else + /* Try high quality providers first .. */ +#ifdef _WIN32 + if (writeRandomBytes_RtlGenRandom((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("RtlGenRandom", entropy); + } +#elif defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) + if (writeRandomBytes_getrandom((void *)&entropy, sizeof(entropy))) { + return ENTROPY_DEBUG("getrandom", entropy); + } +#endif + /* .. and self-made low quality for backup: */ + + /* Process ID is 0 bits entropy if attacker has local access */ + entropy = gather_time_entropy() ^ getpid(); + + /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ + if (sizeof(unsigned long) == 4) { + return ENTROPY_DEBUG("fallback(4)", entropy * 2147483647); + } else { + return ENTROPY_DEBUG("fallback(8)", + entropy * (unsigned long)2305843009213693951); + } +#endif +} + +static unsigned long +get_hash_secret_salt(XML_Parser parser) { + if (parser->m_parentParser != NULL) + return get_hash_secret_salt(parser->m_parentParser); + return parser->m_hash_secret_salt; } static XML_Bool /* only valid for root parser */ @@ -705,7 +852,7 @@ startParsing(XML_Parser parser) { /* hash functions must be initialized before setContext() is called */ if (hash_secret_salt == 0) - hash_secret_salt = generate_hash_secret_salt(); + hash_secret_salt = generate_hash_secret_salt(parser); if (ns) { /* implicit context only set for root parser, since child parsers (i.e. external entity parsers) will inherit it @@ -928,6 +1075,10 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { TAG *tStk; OPEN_INTERNAL_ENTITY *openEntityList; + + if (parser == NULL) + return XML_FALSE; + if (parentParser) return XML_FALSE; /* move tagStack to freeTagList */ @@ -962,6 +1113,8 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) enum XML_Status XMLCALL XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { + if (parser == NULL) + return XML_STATUS_ERROR; /* Block after XML_Parse()/XML_ParseBuffer() has been called. XXX There's no way for the caller to determine which of the XXX possible error cases caused the XML_STATUS_ERROR return. @@ -985,52 +1138,88 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, { XML_Parser parser = oldParser; DTD *newDtd = NULL; - DTD *oldDtd = _dtd; - XML_StartElementHandler oldStartElementHandler = startElementHandler; - XML_EndElementHandler oldEndElementHandler = endElementHandler; - XML_CharacterDataHandler oldCharacterDataHandler = characterDataHandler; - XML_ProcessingInstructionHandler oldProcessingInstructionHandler - = processingInstructionHandler; - XML_CommentHandler oldCommentHandler = commentHandler; - XML_StartCdataSectionHandler oldStartCdataSectionHandler - = startCdataSectionHandler; - XML_EndCdataSectionHandler oldEndCdataSectionHandler - = endCdataSectionHandler; - XML_DefaultHandler oldDefaultHandler = defaultHandler; - XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler - = unparsedEntityDeclHandler; - XML_NotationDeclHandler oldNotationDeclHandler = notationDeclHandler; - XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler - = startNamespaceDeclHandler; - XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler - = endNamespaceDeclHandler; - XML_NotStandaloneHandler oldNotStandaloneHandler = notStandaloneHandler; - XML_ExternalEntityRefHandler oldExternalEntityRefHandler - = externalEntityRefHandler; - XML_SkippedEntityHandler oldSkippedEntityHandler = skippedEntityHandler; - XML_UnknownEncodingHandler oldUnknownEncodingHandler - = unknownEncodingHandler; - XML_ElementDeclHandler oldElementDeclHandler = elementDeclHandler; - XML_AttlistDeclHandler oldAttlistDeclHandler = attlistDeclHandler; - XML_EntityDeclHandler oldEntityDeclHandler = entityDeclHandler; - XML_XmlDeclHandler oldXmlDeclHandler = xmlDeclHandler; - ELEMENT_TYPE * oldDeclElementType = declElementType; + DTD *oldDtd; + XML_StartElementHandler oldStartElementHandler; + XML_EndElementHandler oldEndElementHandler; + XML_CharacterDataHandler oldCharacterDataHandler; + XML_ProcessingInstructionHandler oldProcessingInstructionHandler; + XML_CommentHandler oldCommentHandler; + XML_StartCdataSectionHandler oldStartCdataSectionHandler; + XML_EndCdataSectionHandler oldEndCdataSectionHandler; + XML_DefaultHandler oldDefaultHandler; + XML_UnparsedEntityDeclHandler oldUnparsedEntityDeclHandler; + XML_NotationDeclHandler oldNotationDeclHandler; + XML_StartNamespaceDeclHandler oldStartNamespaceDeclHandler; + XML_EndNamespaceDeclHandler oldEndNamespaceDeclHandler; + XML_NotStandaloneHandler oldNotStandaloneHandler; + XML_ExternalEntityRefHandler oldExternalEntityRefHandler; + XML_SkippedEntityHandler oldSkippedEntityHandler; + XML_UnknownEncodingHandler oldUnknownEncodingHandler; + XML_ElementDeclHandler oldElementDeclHandler; + XML_AttlistDeclHandler oldAttlistDeclHandler; + XML_EntityDeclHandler oldEntityDeclHandler; + XML_XmlDeclHandler oldXmlDeclHandler; + ELEMENT_TYPE * oldDeclElementType; - void *oldUserData = userData; - void *oldHandlerArg = handlerArg; - XML_Bool oldDefaultExpandInternalEntities = defaultExpandInternalEntities; - XML_Parser oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; + void *oldUserData; + void *oldHandlerArg; + XML_Bool oldDefaultExpandInternalEntities; + XML_Parser oldExternalEntityRefHandlerArg; #ifdef XML_DTD - enum XML_ParamEntityParsing oldParamEntityParsing = paramEntityParsing; - int oldInEntityValue = prologState.inEntityValue; + enum XML_ParamEntityParsing oldParamEntityParsing; + int oldInEntityValue; #endif - XML_Bool oldns_triplets = ns_triplets; + XML_Bool oldns_triplets; /* Note that the new parser shares the same hash secret as the old parser, so that dtdCopy and copyEntityTable can lookup values from hash tables associated with either parser without us having to worry which hash secrets each table has. */ - unsigned long oldhash_secret_salt = hash_secret_salt; + unsigned long oldhash_secret_salt; + + /* Validate the oldParser parameter before we pull everything out of it */ + if (oldParser == NULL) + return NULL; + + /* Stash the original parser contents on the stack */ + oldDtd = _dtd; + oldStartElementHandler = startElementHandler; + oldEndElementHandler = endElementHandler; + oldCharacterDataHandler = characterDataHandler; + oldProcessingInstructionHandler = processingInstructionHandler; + oldCommentHandler = commentHandler; + oldStartCdataSectionHandler = startCdataSectionHandler; + oldEndCdataSectionHandler = endCdataSectionHandler; + oldDefaultHandler = defaultHandler; + oldUnparsedEntityDeclHandler = unparsedEntityDeclHandler; + oldNotationDeclHandler = notationDeclHandler; + oldStartNamespaceDeclHandler = startNamespaceDeclHandler; + oldEndNamespaceDeclHandler = endNamespaceDeclHandler; + oldNotStandaloneHandler = notStandaloneHandler; + oldExternalEntityRefHandler = externalEntityRefHandler; + oldSkippedEntityHandler = skippedEntityHandler; + oldUnknownEncodingHandler = unknownEncodingHandler; + oldElementDeclHandler = elementDeclHandler; + oldAttlistDeclHandler = attlistDeclHandler; + oldEntityDeclHandler = entityDeclHandler; + oldXmlDeclHandler = xmlDeclHandler; + oldDeclElementType = declElementType; + + oldUserData = userData; + oldHandlerArg = handlerArg; + oldDefaultExpandInternalEntities = defaultExpandInternalEntities; + oldExternalEntityRefHandlerArg = externalEntityRefHandlerArg; +#ifdef XML_DTD + oldParamEntityParsing = paramEntityParsing; + oldInEntityValue = prologState.inEntityValue; +#endif + oldns_triplets = ns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + oldhash_secret_salt = hash_secret_salt; #ifdef XML_DTD if (!context) @@ -1196,12 +1385,15 @@ XML_ParserFree(XML_Parser parser) void XMLCALL XML_UseParserAsHandlerArg(XML_Parser parser) { - handlerArg = parser; + if (parser != NULL) + handlerArg = parser; } enum XML_Error XMLCALL XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { + if (parser == NULL) + return XML_ERROR_INVALID_ARGUMENT; #ifdef XML_DTD /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) @@ -1216,6 +1408,8 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) void XMLCALL XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { + if (parser == NULL) + return; /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) return; @@ -1225,6 +1419,8 @@ XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) void XMLCALL XML_SetUserData(XML_Parser parser, void *p) { + if (parser == NULL) + return; if (handlerArg == userData) handlerArg = userData = p; else @@ -1234,6 +1430,8 @@ XML_SetUserData(XML_Parser parser, void *p) enum XML_Status XMLCALL XML_SetBase(XML_Parser parser, const XML_Char *p) { + if (parser == NULL) + return XML_STATUS_ERROR; if (p) { p = poolCopyString(&_dtd->pool, p); if (!p) @@ -1248,18 +1446,24 @@ XML_SetBase(XML_Parser parser, const XML_Char *p) const XML_Char * XMLCALL XML_GetBase(XML_Parser parser) { + if (parser == NULL) + return NULL; return curBase; } int XMLCALL XML_GetSpecifiedAttributeCount(XML_Parser parser) { + if (parser == NULL) + return -1; return nSpecifiedAtts; } int XMLCALL XML_GetIdAttributeIndex(XML_Parser parser) { + if (parser == NULL) + return -1; return idAttIndex; } @@ -1267,6 +1471,8 @@ XML_GetIdAttributeIndex(XML_Parser parser) const XML_AttrInfo * XMLCALL XML_GetAttributeInfo(XML_Parser parser) { + if (parser == NULL) + return NULL; return attInfo; } #endif @@ -1276,6 +1482,8 @@ XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end) { + if (parser == NULL) + return; startElementHandler = start; endElementHandler = end; } @@ -1283,34 +1491,39 @@ XML_SetElementHandler(XML_Parser parser, void XMLCALL XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { - startElementHandler = start; + if (parser != NULL) + startElementHandler = start; } void XMLCALL XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { - endElementHandler = end; + if (parser != NULL) + endElementHandler = end; } void XMLCALL XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler) { - characterDataHandler = handler; + if (parser != NULL) + characterDataHandler = handler; } void XMLCALL XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler) { - processingInstructionHandler = handler; + if (parser != NULL) + processingInstructionHandler = handler; } void XMLCALL XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { - commentHandler = handler; + if (parser != NULL) + commentHandler = handler; } void XMLCALL @@ -1318,6 +1531,8 @@ XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end) { + if (parser == NULL) + return; startCdataSectionHandler = start; endCdataSectionHandler = end; } @@ -1325,19 +1540,23 @@ XML_SetCdataSectionHandler(XML_Parser parser, void XMLCALL XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start) { - startCdataSectionHandler = start; + if (parser != NULL) + startCdataSectionHandler = start; } void XMLCALL XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end) { - endCdataSectionHandler = end; + if (parser != NULL) + endCdataSectionHandler = end; } void XMLCALL XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { + if (parser == NULL) + return; defaultHandler = handler; defaultExpandInternalEntities = XML_FALSE; } @@ -1346,6 +1565,8 @@ void XMLCALL XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { + if (parser == NULL) + return; defaultHandler = handler; defaultExpandInternalEntities = XML_TRUE; } @@ -1355,6 +1576,8 @@ XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end) { + if (parser == NULL) + return; startDoctypeDeclHandler = start; endDoctypeDeclHandler = end; } @@ -1362,27 +1585,31 @@ XML_SetDoctypeDeclHandler(XML_Parser parser, void XMLCALL XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start) { - startDoctypeDeclHandler = start; + if (parser != NULL) + startDoctypeDeclHandler = start; } void XMLCALL XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { - endDoctypeDeclHandler = end; + if (parser != NULL) + endDoctypeDeclHandler = end; } void XMLCALL XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler) { - unparsedEntityDeclHandler = handler; + if (parser != NULL) + unparsedEntityDeclHandler = handler; } void XMLCALL XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { - notationDeclHandler = handler; + if (parser != NULL) + notationDeclHandler = handler; } void XMLCALL @@ -1390,6 +1617,8 @@ XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end) { + if (parser == NULL) + return; startNamespaceDeclHandler = start; endNamespaceDeclHandler = end; } @@ -1397,32 +1626,38 @@ XML_SetNamespaceDeclHandler(XML_Parser parser, void XMLCALL XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start) { - startNamespaceDeclHandler = start; + if (parser != NULL) + startNamespaceDeclHandler = start; } void XMLCALL XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end) { - endNamespaceDeclHandler = end; + if (parser != NULL) + endNamespaceDeclHandler = end; } void XMLCALL XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler) { - notStandaloneHandler = handler; + if (parser != NULL) + notStandaloneHandler = handler; } void XMLCALL XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler) { - externalEntityRefHandler = handler; + if (parser != NULL) + externalEntityRefHandler = handler; } void XMLCALL XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { + if (parser == NULL) + return; if (arg) externalEntityRefHandlerArg = (XML_Parser)arg; else @@ -1433,7 +1668,8 @@ void XMLCALL XML_SetSkippedEntityHandler(XML_Parser parser, XML_SkippedEntityHandler handler) { - skippedEntityHandler = handler; + if (parser != NULL) + skippedEntityHandler = handler; } void XMLCALL @@ -1441,6 +1677,8 @@ XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void *data) { + if (parser == NULL) + return; unknownEncodingHandler = handler; unknownEncodingHandlerData = data; } @@ -1449,33 +1687,39 @@ void XMLCALL XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { - elementDeclHandler = eldecl; + if (parser != NULL) + elementDeclHandler = eldecl; } void XMLCALL XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { - attlistDeclHandler = attdecl; + if (parser != NULL) + attlistDeclHandler = attdecl; } void XMLCALL XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { - entityDeclHandler = handler; + if (parser != NULL) + entityDeclHandler = handler; } void XMLCALL XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { - xmlDeclHandler = handler; + if (parser != NULL) + xmlDeclHandler = handler; } int XMLCALL XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing peParsing) { + if (parser == NULL) + return 0; /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) return 0; @@ -1491,6 +1735,10 @@ int XMLCALL XML_SetHashSalt(XML_Parser parser, unsigned long hash_salt) { + if (parser == NULL) + return 0; + if (parser->m_parentParser) + return XML_SetHashSalt(parser->m_parentParser, hash_salt); /* block after XML_Parse()/XML_ParseBuffer() has been called */ if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) return 0; @@ -1501,6 +1749,10 @@ XML_SetHashSalt(XML_Parser parser, enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + if ((parser == NULL) || (len < 0) || ((s == NULL) && (len != 0))) { + errorCode = XML_ERROR_INVALID_ARGUMENT; + return XML_STATUS_ERROR; + } switch (ps_parsing) { case XML_SUSPENDED: errorCode = XML_ERROR_SUSPENDED; @@ -1552,7 +1804,14 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) else if (bufferPtr == bufferEnd) { const char *end; int nLeftOver; - enum XML_Error result; + enum XML_Status result; + /* Detect overflow (a+b > MAX <==> b > MAX-a) */ + if (len > ((XML_Size)-1) / 2 - parseEndByteIndex) { + errorCode = XML_ERROR_NO_MEMORY; + eventPtr = eventEndPtr = NULL; + processor = errorProcessor; + return XML_STATUS_ERROR; + } parseEndByteIndex += len; positionPtr = s; ps_finalBuffer = (XML_Bool)isFinal; @@ -1585,11 +1844,14 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) nLeftOver = s + len - end; if (nLeftOver) { if (buffer == NULL || nLeftOver > bufferLim - buffer) { - /* FIXME avoid integer overflow */ - char *temp; - temp = (buffer == NULL - ? (char *)MALLOC(len * 2) - : (char *)REALLOC(buffer, len * 2)); + /* avoid _signed_ integer overflow */ + char *temp = NULL; + const int bytesToAllocate = (int)((unsigned)len * 2U); + if (bytesToAllocate > 0) { + temp = (buffer == NULL + ? (char *)MALLOC(bytesToAllocate) + : (char *)REALLOC(buffer, bytesToAllocate)); + } if (temp == NULL) { errorCode = XML_ERROR_NO_MEMORY; eventPtr = eventEndPtr = NULL; @@ -1597,7 +1859,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) return XML_STATUS_ERROR; } buffer = temp; - bufferLim = buffer + len * 2; + bufferLim = buffer + bytesToAllocate; } memcpy(buffer, end, nLeftOver); } @@ -1627,6 +1889,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) const char *start; enum XML_Status result = XML_STATUS_OK; + if (parser == NULL) + return XML_STATUS_ERROR; switch (ps_parsing) { case XML_SUSPENDED: errorCode = XML_ERROR_SUSPENDED; @@ -1680,6 +1944,12 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) void * XMLCALL XML_GetBuffer(XML_Parser parser, int len) { + if (parser == NULL) + return NULL; + if (len < 0) { + errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } switch (ps_parsing) { case XML_SUSPENDED: errorCode = XML_ERROR_SUSPENDED; @@ -1691,11 +1961,17 @@ XML_GetBuffer(XML_Parser parser, int len) } if (len > bufferLim - bufferEnd) { - /* FIXME avoid integer overflow */ - int neededSize = len + (int)(bufferEnd - bufferPtr); #ifdef XML_CONTEXT_BYTES - int keep = (int)(bufferPtr - buffer); - + int keep; +#endif /* defined XML_CONTEXT_BYTES */ + /* Do not invoke signed arithmetic overflow: */ + int neededSize = (int) ((unsigned)len + (unsigned)(bufferEnd - bufferPtr)); + if (neededSize < 0) { + errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } +#ifdef XML_CONTEXT_BYTES + keep = (int)(bufferPtr - buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; neededSize += keep; @@ -1720,8 +1996,13 @@ XML_GetBuffer(XML_Parser parser, int len) if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { - bufferSize *= 2; - } while (bufferSize < neededSize); + /* Do not invoke signed arithmetic overflow: */ + bufferSize = (int) (2U * (unsigned) bufferSize); + } while (bufferSize < neededSize && bufferSize > 0); + if (bufferSize <= 0) { + errorCode = XML_ERROR_NO_MEMORY; + return NULL; + } newBuf = (char *)MALLOC(bufferSize); if (newBuf == 0) { errorCode = XML_ERROR_NO_MEMORY; @@ -1761,6 +2042,8 @@ XML_GetBuffer(XML_Parser parser, int len) enum XML_Status XMLCALL XML_StopParser(XML_Parser parser, XML_Bool resumable) { + if (parser == NULL) + return XML_STATUS_ERROR; switch (ps_parsing) { case XML_SUSPENDED: if (resumable) { @@ -1793,6 +2076,8 @@ XML_ResumeParser(XML_Parser parser) { enum XML_Status result = XML_STATUS_OK; + if (parser == NULL) + return XML_STATUS_ERROR; if (ps_parsing != XML_SUSPENDED) { errorCode = XML_ERROR_NOT_SUSPENDED; return XML_STATUS_ERROR; @@ -1829,6 +2114,8 @@ XML_ResumeParser(XML_Parser parser) void XMLCALL XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) { + if (parser == NULL) + return; assert(status != NULL); *status = parser->m_parsingStatus; } @@ -1836,20 +2123,26 @@ XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) enum XML_Error XMLCALL XML_GetErrorCode(XML_Parser parser) { + if (parser == NULL) + return XML_ERROR_INVALID_ARGUMENT; return errorCode; } XML_Index XMLCALL XML_GetCurrentByteIndex(XML_Parser parser) { + if (parser == NULL) + return -1; if (eventPtr) - return parseEndByteIndex - (parseEndPtr - eventPtr); + return (XML_Index)(parseEndByteIndex - (parseEndPtr - eventPtr)); return -1; } int XMLCALL XML_GetCurrentByteCount(XML_Parser parser) { + if (parser == NULL) + return 0; if (eventEndPtr && eventPtr) return (int)(eventEndPtr - eventPtr); return 0; @@ -1859,11 +2152,19 @@ const char * XMLCALL XML_GetInputContext(XML_Parser parser, int *offset, int *size) { #ifdef XML_CONTEXT_BYTES + if (parser == NULL) + return NULL; if (eventPtr && buffer) { - *offset = (int)(eventPtr - buffer); - *size = (int)(bufferEnd - buffer); + if (offset != NULL) + *offset = (int)(eventPtr - buffer); + if (size != NULL) + *size = (int)(bufferEnd - buffer); return buffer; } +#else + (void)parser; + (void)offset; + (void)size; #endif /* defined XML_CONTEXT_BYTES */ return (char *) 0; } @@ -1871,6 +2172,8 @@ XML_GetInputContext(XML_Parser parser, int *offset, int *size) XML_Size XMLCALL XML_GetCurrentLineNumber(XML_Parser parser) { + if (parser == NULL) + return 0; if (eventPtr && eventPtr >= positionPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); positionPtr = eventPtr; @@ -1881,6 +2184,8 @@ XML_GetCurrentLineNumber(XML_Parser parser) XML_Size XMLCALL XML_GetCurrentColumnNumber(XML_Parser parser) { + if (parser == NULL) + return 0; if (eventPtr && eventPtr >= positionPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); positionPtr = eventPtr; @@ -1891,30 +2196,38 @@ XML_GetCurrentColumnNumber(XML_Parser parser) void XMLCALL XML_FreeContentModel(XML_Parser parser, XML_Content *model) { - FREE(model); + if (parser != NULL) + FREE(model); } void * XMLCALL XML_MemMalloc(XML_Parser parser, size_t size) { + if (parser == NULL) + return NULL; return MALLOC(size); } void * XMLCALL XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { + if (parser == NULL) + return NULL; return REALLOC(ptr, size); } void XMLCALL XML_MemFree(XML_Parser parser, void *ptr) { - FREE(ptr); + if (parser != NULL) + FREE(ptr); } void XMLCALL XML_DefaultCurrent(XML_Parser parser) { + if (parser == NULL) + return; if (defaultHandler) { if (openInternalEntities) reportDefault(parser, @@ -2417,11 +2730,11 @@ doContent(XML_Parser parser, for (;;) { int bufSize; int convLen; - XmlConvert(enc, + const enum XML_Convert_Result convert_res = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); convLen = (int)(toPtr - (XML_Char *)tag->buf); - if (fromPtr == rawNameEnd) { + if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { tag->name.strLen = convLen; break; } @@ -2464,8 +2777,10 @@ doContent(XML_Parser parser, return XML_ERROR_NO_MEMORY; poolFinish(&tempPool); result = storeAtts(parser, enc, s, &name, &bindings); - if (result) + if (result != XML_ERROR_NONE) { + freeBindings(parser, bindings); return result; + } poolFinish(&tempPool); if (startElementHandler) { startElementHandler(handlerArg, name.str, (const XML_Char **)atts); @@ -2480,15 +2795,7 @@ doContent(XML_Parser parser, if (noElmHandlers && defaultHandler) reportDefault(parser, enc, s, next); poolClear(&tempPool); - while (bindings) { - BINDING *b = bindings; - if (endNamespaceDeclHandler) - endNamespaceDeclHandler(handlerArg, b->prefix->name); - bindings = bindings->nextTagBinding; - b->nextTagBinding = freeBindingList; - freeBindingList = b; - b->prefix->binding = b->prevPrefixBinding; - } + freeBindings(parser, bindings); } if (tagLevel == 0) return epilogProcessor(parser, next, end, nextPtr); @@ -2642,11 +2949,11 @@ doContent(XML_Parser parser, if (MUST_CONVERT(enc, s)) { for (;;) { ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; charDataHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); - if (s == next) + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } @@ -2686,6 +2993,29 @@ doContent(XML_Parser parser, /* not reached */ } +/* This function does not call free() on the allocated memory, merely + * moving it to the parser's freeBindingList where it can be freed or + * reused as appropriate. + */ +static void +freeBindings(XML_Parser parser, BINDING *bindings) +{ + while (bindings) { + BINDING *b = bindings; + + /* startNamespaceDeclHandler will have been called for this + * binding in addBindings(), so call the end handler now. + */ + if (endNamespaceDeclHandler) + endNamespaceDeclHandler(handlerArg, b->prefix->name); + + bindings = bindings->nextTagBinding; + b->nextTagBinding = freeBindingList; + freeBindingList = b; + b->prefix->binding = b->prevPrefixBinding; + } +} + /* Precondition: all arguments must be non-NULL; Purpose: - normalize attributes @@ -2910,29 +3240,41 @@ storeAtts(XML_Parser parser, const ENCODING *enc, if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; const BINDING *b; - unsigned long uriHash = hash_secret_salt; + unsigned long uriHash; + struct siphash sip_state; + struct sipkey sip_key; + + copy_salt_to_sipkey(parser, &sip_key); + sip24_init(&sip_state, &sip_key); + ((XML_Char *)s)[-1] = 0; /* clear flag */ id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); + if (!id || !id->prefix) + return XML_ERROR_NO_MEMORY; b = id->prefix->binding; if (!b) return XML_ERROR_UNBOUND_PREFIX; - /* as we expand the name we also calculate its hash value */ for (j = 0; j < b->uriLen; j++) { const XML_Char c = b->uri[j]; if (!poolAppendChar(&tempPool, c)) return XML_ERROR_NO_MEMORY; - uriHash = CHAR_HASH(uriHash, c); } + + sip24_update(&sip_state, b->uri, b->uriLen * sizeof(XML_Char)); + while (*s++ != XML_T(ASCII_COLON)) ; + + sip24_update(&sip_state, s, keylen(s) * sizeof(XML_Char)); + do { /* copies null terminator */ - const XML_Char c = *s; if (!poolAppendChar(&tempPool, *s)) return XML_ERROR_NO_MEMORY; - uriHash = CHAR_HASH(uriHash, c); } while (*s++); + uriHash = (unsigned long)sip24_final(&sip_state); + { /* Check hash table for duplicate of expanded name (uriName). Derived from code in lookup(parser, HASH_TABLE *table, ...). */ @@ -3250,11 +3592,11 @@ doCdataSection(XML_Parser parser, if (MUST_CONVERT(enc, s)) { for (;;) { ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + const enum XML_Convert_Result convert_res = XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = next; charDataHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); - if (s == next) + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; *eventPP = s; } @@ -3646,6 +3988,14 @@ entityValueInitProcessor(XML_Parser parser, *nextPtr = next; return XML_ERROR_NONE; } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with + "internalEventEndPtr = NULL; textStart = (char *)entity->textPtr; textEnd = (char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; #ifdef XML_DTD if (entity->is_param) { @@ -4867,6 +5219,8 @@ internalEntityProcessor(XML_Parser parser, entity = openEntity->entity; textStart = ((char *)entity->textPtr) + entity->processed; textEnd = (char *)(entity->textPtr + entity->textLen); + /* Set a safe default value in case 'next' does not get set */ + next = textStart; #ifdef XML_DTD if (entity->is_param) { @@ -4913,9 +5267,9 @@ internalEntityProcessor(XML_Parser parser, static enum XML_Error PTRCALL errorProcessor(XML_Parser parser, - const char *s, - const char *end, - const char **nextPtr) + const char *UNUSED_P(s), + const char *UNUSED_P(end), + const char **UNUSED_P(nextPtr)) { return errorCode; } @@ -5331,6 +5685,7 @@ reportDefault(XML_Parser parser, const ENCODING *enc, const char *s, const char *end) { if (MUST_CONVERT(enc, s)) { + enum XML_Convert_Result convert_res; const char **eventPP; const char **eventEndPP; if (enc == encoding) { @@ -5343,11 +5698,11 @@ reportDefault(XML_Parser parser, const ENCODING *enc, } do { ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); + convert_res = XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); *eventPP = s; - } while (s != end); + } while ((convert_res != XML_CONVERT_COMPLETED) && (convert_res != XML_CONVERT_INPUT_INCOMPLETE)); } else defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); @@ -5477,6 +5832,8 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, return NULL; id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); + if (!id->prefix) + return NULL; if (id->prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); else @@ -5824,7 +6181,6 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_H newE->defaultAtts = (DEFAULT_ATTRIBUTE *) ms->malloc_fcn(oldE->nDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (!newE->defaultAtts) { - ms->free_fcn(newE); return 0; } } @@ -5959,13 +6315,32 @@ keyeq(KEY s1, KEY s2) return XML_FALSE; } +static size_t +keylen(KEY s) +{ + size_t len = 0; + for (; *s; s++, len++); + return len; +} + +static void +copy_salt_to_sipkey(XML_Parser parser, struct sipkey * key) +{ + key->k[0] = 0; + key->k[1] = get_hash_secret_salt(parser); +} + static unsigned long FASTCALL hash(XML_Parser parser, KEY s) { - unsigned long h = hash_secret_salt; - while (*s) - h = CHAR_HASH(h, *s++); - return h; + struct siphash state; + struct sipkey key; + (void)sip_tobin; + (void)sip24_valid; + copy_salt_to_sipkey(parser, &key); + sip24_init(&state, &key); + sip24_update(&state, s, keylen(s) * sizeof(XML_Char)); + return (unsigned long)sip24_final(&state); } static NAMED * @@ -6150,8 +6525,8 @@ poolAppend(STRING_POOL *pool, const ENCODING *enc, if (!pool->ptr && !poolGrow(pool)) return NULL; for (;;) { - XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); - if (ptr == end) + const enum XML_Convert_Result convert_res = XmlConvert(enc, &ptr, end, (ICHAR **)&(pool->ptr), (ICHAR *)pool->end); + if ((convert_res == XML_CONVERT_COMPLETED) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) break; if (!poolGrow(pool)) return NULL; @@ -6208,6 +6583,35 @@ poolStoreString(STRING_POOL *pool, const ENCODING *enc, return pool->start; } +static size_t +poolBytesToAllocateFor(int blockSize) +{ + /* Unprotected math would be: + ** return offsetof(BLOCK, s) + blockSize * sizeof(XML_Char); + ** + ** Detect overflow, avoiding _signed_ overflow undefined behavior + ** For a + b * c we check b * c in isolation first, so that addition of a + ** on top has no chance of making us accept a small non-negative number + */ + const size_t stretch = sizeof(XML_Char); /* can be 4 bytes */ + + if (blockSize <= 0) + return 0; + + if (blockSize > (int)(INT_MAX / stretch)) + return 0; + + { + const int stretchedBlockSize = blockSize * (int)stretch; + const int bytesToAllocate = (int)( + offsetof(BLOCK, s) + (unsigned)stretchedBlockSize); + if (bytesToAllocate < 0) + return 0; + + return (size_t)bytesToAllocate; + } +} + static XML_Bool FASTCALL poolGrow(STRING_POOL *pool) { @@ -6235,11 +6639,19 @@ poolGrow(STRING_POOL *pool) } } if (pool->blocks && pool->start == pool->blocks->s) { - int blockSize = (int)(pool->end - pool->start)*2; - BLOCK *temp = (BLOCK *) - pool->mem->realloc_fcn(pool->blocks, - (offsetof(BLOCK, s) - + blockSize * sizeof(XML_Char))); + BLOCK *temp; + int blockSize = (int)((unsigned)(pool->end - pool->start)*2U); + size_t bytesToAllocate; + + if (blockSize < 0) + return XML_FALSE; + + bytesToAllocate = poolBytesToAllocateFor(blockSize); + if (bytesToAllocate == 0) + return XML_FALSE; + + temp = (BLOCK *) + pool->mem->realloc_fcn(pool->blocks, (unsigned)bytesToAllocate); if (temp == NULL) return XML_FALSE; pool->blocks = temp; @@ -6251,12 +6663,26 @@ poolGrow(STRING_POOL *pool) else { BLOCK *tem; int blockSize = (int)(pool->end - pool->start); + size_t bytesToAllocate; + + if (blockSize < 0) + return XML_FALSE; + if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; - else + else { + /* Detect overflow, avoiding _signed_ overflow undefined behavior */ + if ((int)((unsigned)blockSize * 2U) < 0) { + return XML_FALSE; + } blockSize *= 2; - tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s) - + blockSize * sizeof(XML_Char)); + } + + bytesToAllocate = poolBytesToAllocateFor(blockSize); + if (bytesToAllocate == 0) + return XML_FALSE; + + tem = (BLOCK *)pool->mem->malloc_fcn(bytesToAllocate); if (!tem) return XML_FALSE; tem->size = blockSize; diff --git a/contrib/hbexpat/3rd/expat/xmlrole.c b/contrib/hbexpat/3rd/expat/xmlrole.c index 985d356152..e3c4146fc4 100644 --- a/contrib/hbexpat/3rd/expat/xmlrole.c +++ b/contrib/hbexpat/3rd/expat/xmlrole.c @@ -6,19 +6,13 @@ #ifdef HARBOUR_CONF #include "_hbconf.h" -#elif defined(COMPILED_FROM_DSP) +#elif defined(_WIN32) #include "winconfi.h" -#elif defined(MACOS_CLASSIC) -#include "macconfi.h" -#elif defined(__amigaos__) -#include "amigacon.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" #else #ifdef HAVE_EXPAT_CONFIG_H #include #endif -#endif /* ndef COMPILED_FROM_DSP */ +#endif /* ndef _WIN32 */ #include "expat_ex.h" #include "internal.h" @@ -197,9 +191,9 @@ prolog1(PROLOG_STATE *state, static int PTRCALL prolog2(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -218,9 +212,9 @@ prolog2(PROLOG_STATE *state, static int PTRCALL doctype0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -266,9 +260,9 @@ doctype1(PROLOG_STATE *state, static int PTRCALL doctype2(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -283,9 +277,9 @@ doctype2(PROLOG_STATE *state, static int PTRCALL doctype3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -300,9 +294,9 @@ doctype3(PROLOG_STATE *state, static int PTRCALL doctype4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -320,9 +314,9 @@ doctype4(PROLOG_STATE *state, static int PTRCALL doctype5(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -439,9 +433,9 @@ externalSubset1(PROLOG_STATE *state, static int PTRCALL entity0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -459,9 +453,9 @@ entity0(PROLOG_STATE *state, static int PTRCALL entity1(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -504,9 +498,9 @@ entity2(PROLOG_STATE *state, static int PTRCALL entity3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -521,9 +515,9 @@ entity3(PROLOG_STATE *state, static int PTRCALL entity4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -561,9 +555,9 @@ entity5(PROLOG_STATE *state, static int PTRCALL entity6(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -607,9 +601,9 @@ entity7(PROLOG_STATE *state, static int PTRCALL entity8(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -624,9 +618,9 @@ entity8(PROLOG_STATE *state, static int PTRCALL entity9(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -641,9 +635,9 @@ entity9(PROLOG_STATE *state, static int PTRCALL entity10(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -658,9 +652,9 @@ entity10(PROLOG_STATE *state, static int PTRCALL notation0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -699,9 +693,9 @@ notation1(PROLOG_STATE *state, static int PTRCALL notation2(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -716,9 +710,9 @@ notation2(PROLOG_STATE *state, static int PTRCALL notation3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -734,9 +728,9 @@ notation3(PROLOG_STATE *state, static int PTRCALL notation4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -755,9 +749,9 @@ notation4(PROLOG_STATE *state, static int PTRCALL attlist0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -773,9 +767,9 @@ attlist0(PROLOG_STATE *state, static int PTRCALL attlist1(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -835,9 +829,9 @@ attlist2(PROLOG_STATE *state, static int PTRCALL attlist3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -854,9 +848,9 @@ attlist3(PROLOG_STATE *state, static int PTRCALL attlist4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -874,9 +868,9 @@ attlist4(PROLOG_STATE *state, static int PTRCALL attlist5(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -891,9 +885,9 @@ attlist5(PROLOG_STATE *state, static int PTRCALL attlist6(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -908,9 +902,9 @@ attlist6(PROLOG_STATE *state, static int PTRCALL attlist7(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -969,9 +963,9 @@ attlist8(PROLOG_STATE *state, static int PTRCALL attlist9(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -986,9 +980,9 @@ attlist9(PROLOG_STATE *state, static int PTRCALL element0(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1074,9 +1068,9 @@ element2(PROLOG_STATE *state, static int PTRCALL element3(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1099,9 +1093,9 @@ element3(PROLOG_STATE *state, static int PTRCALL element4(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1117,9 +1111,9 @@ element4(PROLOG_STATE *state, static int PTRCALL element5(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1138,9 +1132,9 @@ element5(PROLOG_STATE *state, static int PTRCALL element6(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1168,9 +1162,9 @@ element6(PROLOG_STATE *state, static int PTRCALL element7(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1242,9 +1236,9 @@ condSect0(PROLOG_STATE *state, static int PTRCALL condSect1(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1260,9 +1254,9 @@ condSect1(PROLOG_STATE *state, static int PTRCALL condSect2(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1279,9 +1273,9 @@ condSect2(PROLOG_STATE *state, static int PTRCALL declClose(PROLOG_STATE *state, int tok, - const char *ptr, - const char *end, - const ENCODING *enc) + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { switch (tok) { case XML_TOK_PROLOG_S: @@ -1294,11 +1288,11 @@ declClose(PROLOG_STATE *state, } static int PTRCALL -error(PROLOG_STATE *state, - int tok, - const char *ptr, - const char *end, - const ENCODING *enc) +error(PROLOG_STATE *UNUSED_P(state), + int UNUSED_P(tok), + const char *UNUSED_P(ptr), + const char *UNUSED_P(end), + const ENCODING *UNUSED_P(enc)) { return XML_ROLE_NONE; } diff --git a/contrib/hbexpat/3rd/expat/xmltok.c b/contrib/hbexpat/3rd/expat/xmltok.c index 4adb8fbede..3edb573d0b 100644 --- a/contrib/hbexpat/3rd/expat/xmltok.c +++ b/contrib/hbexpat/3rd/expat/xmltok.c @@ -6,19 +6,13 @@ #ifdef HARBOUR_CONF #include "_hbconf.h" -#elif defined(COMPILED_FROM_DSP) +#elif defined(_WIN32) #include "winconfi.h" -#elif defined(MACOS_CLASSIC) -#include "macconfi.h" -#elif defined(__amigaos__) -#include "amigacon.h" -#elif defined(__WATCOMC__) -#include "watcomconfig.h" #else #ifdef HAVE_EXPAT_CONFIG_H #include #endif -#endif /* ndef COMPILED_FROM_DSP */ +#endif /* ndef _WIN32 */ #include "expat_ex.h" #include "internal.h" @@ -48,7 +42,7 @@ #define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) #define UCS2_GET_NAMING(pages, hi, lo) \ - (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1 << ((lo) & 0x1F))) + (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo) & 0x1F))) /* A 2 byte UTF-8 representation splits the characters 11 bits between the bottom 5 and 6 bits of the bytes. We need 8 bits to index into @@ -58,7 +52,7 @@ (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ + ((((byte)[0]) & 3) << 1) \ + ((((byte)[1]) >> 5) & 1)] \ - & (1 << (((byte)[1]) & 0x1F))) + & (1u << (((byte)[1]) & 0x1F))) /* A 3 byte UTF-8 representation splits the characters 16 bits between the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index @@ -71,7 +65,7 @@ << 3) \ + ((((byte)[1]) & 3) << 1) \ + ((((byte)[2]) >> 5) & 1)] \ - & (1 << (((byte)[2]) & 0x1F))) + & (1u << (((byte)[2]) & 0x1F))) #define UTF8_GET_NAMING(pages, p, n) \ ((n) == 2 \ @@ -124,19 +118,19 @@ ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) static int PTRFASTCALL -isNever(const ENCODING *enc, const char *p) +isNever(const ENCODING *UNUSED_P(enc), const char *UNUSED_P(p)) { return 0; } static int PTRFASTCALL -utf8_isName2(const ENCODING *enc, const char *p) +utf8_isName2(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); } static int PTRFASTCALL -utf8_isName3(const ENCODING *enc, const char *p) +utf8_isName3(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); } @@ -144,13 +138,13 @@ utf8_isName3(const ENCODING *enc, const char *p) #define utf8_isName4 isNever static int PTRFASTCALL -utf8_isNmstrt2(const ENCODING *enc, const char *p) +utf8_isNmstrt2(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); } static int PTRFASTCALL -utf8_isNmstrt3(const ENCODING *enc, const char *p) +utf8_isNmstrt3(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); } @@ -158,19 +152,19 @@ utf8_isNmstrt3(const ENCODING *enc, const char *p) #define utf8_isNmstrt4 isNever static int PTRFASTCALL -utf8_isInvalid2(const ENCODING *enc, const char *p) +utf8_isInvalid2(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_INVALID2((const unsigned char *)p); } static int PTRFASTCALL -utf8_isInvalid3(const ENCODING *enc, const char *p) +utf8_isInvalid3(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_INVALID3((const unsigned char *)p); } static int PTRFASTCALL -utf8_isInvalid4(const ENCODING *enc, const char *p) +utf8_isInvalid4(const ENCODING *UNUSED_P(enc), const char *p) { return UTF8_INVALID4((const unsigned char *)p); } @@ -224,16 +218,16 @@ struct normal_encoding { E ## isInvalid3, \ E ## isInvalid4 -#define NULL_VTABLE() \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL, \ - NULL +#define NULL_VTABLE \ + /* isName2 */ NULL, \ + /* isName3 */ NULL, \ + /* isName4 */ NULL, \ + /* isNmstrt2 */ NULL, \ + /* isNmstrt3 */ NULL, \ + /* isNmstrt4 */ NULL, \ + /* isInvalid2 */ NULL, \ + /* isInvalid3 */ NULL, \ + /* isInvalid4 */ NULL static int FASTCALL checkCharRefNumber(int); @@ -331,39 +325,89 @@ enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ UTF8_cval4 = 0xf0 }; -static void PTRCALL -utf8_toUtf8(const ENCODING *enc, +void +align_limit_to_full_utf8_characters(const char * from, const char ** fromLimRef) +{ + const char * fromLim = *fromLimRef; + size_t walked = 0; + for (; fromLim > from; fromLim--, walked++) { + const unsigned char prev = (unsigned char)fromLim[-1]; + if ((prev & 0xf8u) == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ + if (walked + 1 >= 4) { + fromLim += 4 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xf0u) == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ + if (walked + 1 >= 3) { + fromLim += 3 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0xe0u) == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ + if (walked + 1 >= 2) { + fromLim += 2 - 1; + break; + } else { + walked = 0; + } + } else if ((prev & 0x80u) == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ + break; + } + } + *fromLimRef = fromLim; +} + +static enum XML_Convert_Result PTRCALL +utf8_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, char **toP, const char *toLim) { char *to; const char *from; - if (fromLim - *fromP > toLim - *toP) { - /* Avoid copying partial characters. */ - for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) - if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) - break; - } - for (to = *toP, from = *fromP; from != fromLim; from++, to++) + const char *fromLimInitial = fromLim; + + /* Avoid copying partial characters. */ + align_limit_to_full_utf8_characters(*fromP, &fromLim); + + for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++) *to = *from; *fromP = from; *toP = to; + + if (fromLim < fromLimInitial) + return XML_CONVERT_INPUT_INCOMPLETE; + else if ((to == toLim) && (from < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } -static void PTRCALL +static enum XML_Convert_Result PTRCALL utf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; unsigned short *to = *toP; const char *from = *fromP; - while (from != fromLim && to != toLim) { + while (from < fromLim && to < toLim) { switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { case BT_LEAD2: + if (fromLim - from < 2) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); from += 2; break; case BT_LEAD3: + if (fromLim - from < 3) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); from += 3; @@ -371,8 +415,14 @@ utf8_toUtf16(const ENCODING *enc, case BT_LEAD4: { unsigned long n; - if (to + 1 == toLim) + if (toLim - to < 2) { + res = XML_CONVERT_OUTPUT_EXHAUSTED; goto after; + } + if (fromLim - from < 4) { + res = XML_CONVERT_INPUT_INCOMPLETE; + goto after; + } n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); n -= 0x10000; @@ -387,9 +437,12 @@ utf8_toUtf16(const ENCODING *enc, break; } } + if (from < fromLim) + res = XML_CONVERT_OUTPUT_EXHAUSTED; after: *fromP = from; *toP = to; + return res; } #ifdef XML_NS @@ -438,38 +491,43 @@ static const struct normal_encoding internal_utf8_encoding = { STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) }; -static void PTRCALL -latin1_toUtf8(const ENCODING *enc, +static enum XML_Convert_Result PTRCALL +latin1_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, char **toP, const char *toLim) { for (;;) { unsigned char c; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; c = (unsigned char)**fromP; if (c & 0x80) { if (toLim - *toP < 2) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = (char)((c >> 6) | UTF8_cval2); *(*toP)++ = (char)((c & 0x3f) | 0x80); (*fromP)++; } else { if (*toP == toLim) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; *(*toP)++ = *(*fromP)++; } } } -static void PTRCALL -latin1_toUtf16(const ENCODING *enc, +static enum XML_Convert_Result PTRCALL +latin1_toUtf16(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { - while (*fromP != fromLim && *toP != toLim) + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = (unsigned char)*(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS @@ -480,7 +538,7 @@ static const struct normal_encoding latin1_encoding_ns = { #include "asciitab.h" #include "latin1ta.h" }, - STANDARD_VTABLE(sb_) NULL_VTABLE() + STANDARD_VTABLE(sb_) NULL_VTABLE }; #endif @@ -493,16 +551,21 @@ static const struct normal_encoding latin1_encoding = { #undef BT_COLON #include "latin1ta.h" }, - STANDARD_VTABLE(sb_) NULL_VTABLE() + STANDARD_VTABLE(sb_) NULL_VTABLE }; -static void PTRCALL -ascii_toUtf8(const ENCODING *enc, +static enum XML_Convert_Result PTRCALL +ascii_toUtf8(const ENCODING *UNUSED_P(enc), const char **fromP, const char *fromLim, char **toP, const char *toLim) { - while (*fromP != fromLim && *toP != toLim) + while (*fromP < fromLim && *toP < toLim) *(*toP)++ = *(*fromP)++; + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } #ifdef XML_NS @@ -513,7 +576,7 @@ static const struct normal_encoding ascii_encoding_ns = { #include "asciitab.h" /* BT_NONXML == 0 */ }, - STANDARD_VTABLE(sb_) NULL_VTABLE() + STANDARD_VTABLE(sb_) NULL_VTABLE }; #endif @@ -526,7 +589,7 @@ static const struct normal_encoding ascii_encoding = { #undef BT_COLON /* BT_NONXML == 0 */ }, - STANDARD_VTABLE(sb_) NULL_VTABLE() + STANDARD_VTABLE(sb_) NULL_VTABLE }; static int PTRFASTCALL @@ -549,13 +612,14 @@ unicode_byte_type(char hi, char lo) } #define DEFINE_UTF16_TO_UTF8(E) \ -static void PTRCALL \ -E ## toUtf8(const ENCODING *enc, \ +static enum XML_Convert_Result PTRCALL \ +E ## toUtf8(const ENCODING *UNUSED_P(enc), \ const char **fromP, const char *fromLim, \ char **toP, const char *toLim) \ { \ - const char *from; \ - for (from = *fromP; from != fromLim; from += 2) { \ + const char *from = *fromP; \ + fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ + for (; from < fromLim; from += 2) { \ int plane; \ unsigned char lo2; \ unsigned char lo = GET_LO(from); \ @@ -565,7 +629,7 @@ E ## toUtf8(const ENCODING *enc, \ if (lo < 0x80) { \ if (*toP == toLim) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ *(*toP)++ = lo; \ break; \ @@ -575,7 +639,7 @@ E ## toUtf8(const ENCODING *enc, \ case 0x4: case 0x5: case 0x6: case 0x7: \ if (toLim - *toP < 2) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ *(*toP)++ = ((lo & 0x3f) | 0x80); \ @@ -583,7 +647,7 @@ E ## toUtf8(const ENCODING *enc, \ default: \ if (toLim - *toP < 3) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ } \ /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ @@ -593,7 +657,11 @@ E ## toUtf8(const ENCODING *enc, \ case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ if (toLim - *toP < 4) { \ *fromP = from; \ - return; \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + } \ + if (fromLim - from < 4) { \ + *fromP = from; \ + return XML_CONVERT_INPUT_INCOMPLETE; \ } \ plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ *(*toP)++ = ((plane >> 2) | UTF8_cval4); \ @@ -609,20 +677,32 @@ E ## toUtf8(const ENCODING *enc, \ } \ } \ *fromP = from; \ + if (from < fromLim) \ + return XML_CONVERT_INPUT_INCOMPLETE; \ + else \ + return XML_CONVERT_COMPLETED; \ } #define DEFINE_UTF16_TO_UTF16(E) \ -static void PTRCALL \ -E ## toUtf16(const ENCODING *enc, \ +static enum XML_Convert_Result PTRCALL \ +E ## toUtf16(const ENCODING *UNUSED_P(enc), \ const char **fromP, const char *fromLim, \ unsigned short **toP, const unsigned short *toLim) \ { \ + enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ + fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ /* Avoid copying first half only of surrogate */ \ if (fromLim - *fromP > ((toLim - *toP) << 1) \ - && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ + && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ fromLim -= 2; \ - for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ + res = XML_CONVERT_INPUT_INCOMPLETE; \ + } \ + for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ + if ((*toP == toLim) && (*fromP < fromLim)) \ + return XML_CONVERT_OUTPUT_EXHAUSTED; \ + else \ + return res; \ } #define SET2(ptr, ch) \ @@ -739,7 +819,7 @@ static const struct normal_encoding little2_encoding_ns = { #include "asciitab.h" #include "latin1ta.h" }, - STANDARD_VTABLE(little2_) NULL_VTABLE() + STANDARD_VTABLE(little2_) NULL_VTABLE }; #endif @@ -758,7 +838,7 @@ static const struct normal_encoding little2_encoding = { #undef BT_COLON #include "latin1ta.h" }, - STANDARD_VTABLE(little2_) NULL_VTABLE() + STANDARD_VTABLE(little2_) NULL_VTABLE }; #if BYTEORDER != 4321 @@ -771,7 +851,7 @@ static const struct normal_encoding internal_little2_encoding_ns = { #include "iasciita.h" #include "latin1ta.h" }, - STANDARD_VTABLE(little2_) NULL_VTABLE() + STANDARD_VTABLE(little2_) NULL_VTABLE }; #endif @@ -784,7 +864,7 @@ static const struct normal_encoding internal_little2_encoding = { #undef BT_COLON #include "latin1ta.h" }, - STANDARD_VTABLE(little2_) NULL_VTABLE() + STANDARD_VTABLE(little2_) NULL_VTABLE }; #endif @@ -880,7 +960,7 @@ static const struct normal_encoding big2_encoding_ns = { #include "asciitab.h" #include "latin1ta.h" }, - STANDARD_VTABLE(big2_) NULL_VTABLE() + STANDARD_VTABLE(big2_) NULL_VTABLE }; #endif @@ -899,7 +979,7 @@ static const struct normal_encoding big2_encoding = { #undef BT_COLON #include "latin1ta.h" }, - STANDARD_VTABLE(big2_) NULL_VTABLE() + STANDARD_VTABLE(big2_) NULL_VTABLE }; #if BYTEORDER != 1234 @@ -912,7 +992,7 @@ static const struct normal_encoding internal_big2_encoding_ns = { #include "iasciita.h" #include "latin1ta.h" }, - STANDARD_VTABLE(big2_) NULL_VTABLE() + STANDARD_VTABLE(big2_) NULL_VTABLE }; #endif @@ -925,7 +1005,7 @@ static const struct normal_encoding internal_big2_encoding = { #undef BT_COLON #include "latin1ta.h" }, - STANDARD_VTABLE(big2_) NULL_VTABLE() + STANDARD_VTABLE(big2_) NULL_VTABLE }; #endif @@ -951,7 +1031,7 @@ streqci(const char *s1, const char *s2) } static void PTRCALL -initUpdatePosition(const ENCODING *enc, const char *ptr, +initUpdatePosition(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, POSITION *pos) { normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); @@ -1301,7 +1381,7 @@ unknown_isInvalid(const ENCODING *enc, const char *p) return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; } -static void PTRCALL +static enum XML_Convert_Result PTRCALL unknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim) @@ -1312,21 +1392,21 @@ unknown_toUtf8(const ENCODING *enc, const char *utf8; int n; if (*fromP == fromLim) - break; + return XML_CONVERT_COMPLETED; utf8 = uenc->utf8[(unsigned char)**fromP]; n = *utf8++; if (n == 0) { int c = uenc->convert(uenc->userData, *fromP); n = XmlUtf8Encode(c, buf); if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; utf8 = buf; *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] - (BT_LEAD2 - 2)); } else { if (n > toLim - *toP) - break; + return XML_CONVERT_OUTPUT_EXHAUSTED; (*fromP)++; } do { @@ -1335,13 +1415,13 @@ unknown_toUtf8(const ENCODING *enc, } } -static void PTRCALL +static enum XML_Convert_Result PTRCALL unknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, const unsigned short *toLim) { const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); - while (*fromP != fromLim && *toP != toLim) { + while (*fromP < fromLim && *toP < toLim) { unsigned short c = uenc->utf16[(unsigned char)**fromP]; if (c == 0) { c = (unsigned short) @@ -1353,6 +1433,11 @@ unknown_toUtf16(const ENCODING *enc, (*fromP)++; *(*toP)++ = c; } + + if ((*toP == toLim) && (*fromP < fromLim)) + return XML_CONVERT_OUTPUT_EXHAUSTED; + else + return XML_CONVERT_COMPLETED; } ENCODING * @@ -1516,7 +1601,7 @@ initScan(const ENCODING * const *encodingTable, { const ENCODING **encPtr; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; encPtr = enc->encPtr; if (ptr + 1 == end) { diff --git a/contrib/hbexpat/3rd/expat/xmltok.h b/contrib/hbexpat/3rd/expat/xmltok.h index ca867aa6b4..752007e8b9 100644 --- a/contrib/hbexpat/3rd/expat/xmltok.h +++ b/contrib/hbexpat/3rd/expat/xmltok.h @@ -130,6 +130,12 @@ typedef int (PTRCALL *SCANNER)(const ENCODING *, const char *, const char **); +enum XML_Convert_Result { + XML_CONVERT_COMPLETED = 0, + XML_CONVERT_INPUT_INCOMPLETE = 1, + XML_CONVERT_OUTPUT_EXHAUSTED = 2 /* and therefore potentially input remaining as well */ +}; + struct encoding { SCANNER scanners[XML_N_STATES]; SCANNER literalScanners[XML_N_LITERAL_TYPES]; @@ -158,12 +164,12 @@ struct encoding { const char *ptr, const char *end, const char **badPtr); - void (PTRCALL *utf8Convert)(const ENCODING *enc, + enum XML_Convert_Result (PTRCALL *utf8Convert)(const ENCODING *enc, const char **fromP, const char *fromLim, char **toP, const char *toLim); - void (PTRCALL *utf16Convert)(const ENCODING *enc, + enum XML_Convert_Result (PTRCALL *utf16Convert)(const ENCODING *enc, const char **fromP, const char *fromLim, unsigned short **toP, diff --git a/contrib/hbexpat/3rd/expat/xmltok_i.c b/contrib/hbexpat/3rd/expat/xmltok_i.c index 9c2895b877..5f779c0571 100644 --- a/contrib/hbexpat/3rd/expat/xmltok_i.c +++ b/contrib/hbexpat/3rd/expat/xmltok_i.c @@ -87,27 +87,45 @@ #define PREFIX(ident) ident #endif + +#define HAS_CHARS(enc, ptr, end, count) \ + (end - ptr >= count * MINBPC(enc)) + +#define HAS_CHAR(enc, ptr, end) \ + HAS_CHARS(enc, ptr, end, 1) + +#define REQUIRE_CHARS(enc, ptr, end, count) \ + { \ + if (! HAS_CHARS(enc, ptr, end, count)) { \ + return XML_TOK_PARTIAL; \ + } \ + } + +#define REQUIRE_CHAR(enc, ptr, end) \ + REQUIRE_CHARS(enc, ptr, end, 1) + + /* ptr points to character following " */ switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) { case BT_S: case BT_CR: case BT_LF: case BT_PERCNT: @@ -175,7 +191,7 @@ PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, +PREFIX(checkPiTarget)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, int *tokPtr) { int upper = 0; @@ -225,15 +241,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, { int tok; const char *target = ptr; - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: @@ -242,13 +257,12 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, return XML_TOK_INVALID; } ptr += MINBPC(enc); - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_QUEST: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; @@ -266,8 +280,7 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, return XML_TOK_INVALID; } ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr + MINBPC(enc); return tok; @@ -282,15 +295,14 @@ PREFIX(scanPi)(const ENCODING *enc, const char *ptr, } static int PTRCALL -PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, +PREFIX(scanCdataSection)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end, const char **nextTokPtr) { static const char CDATA_LSQB[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, ASCII_LSQB }; int i; /* CDATA[ */ - if (end - ptr < 6 * MINBPC(enc)) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 6); for (i = 0; i < 6; i++, ptr += MINBPC(enc)) { if (!CHAR_MATCHES(enc, ptr, CDATA_LSQB[i])) { *nextTokPtr = ptr; @@ -305,7 +317,7 @@ static int PTRCALL PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -319,13 +331,11 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, switch (BYTE_TYPE(enc, ptr)) { case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); break; @@ -334,8 +344,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, return XML_TOK_CDATA_SECT_CLOSE; case BT_CR: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); *nextTokPtr = ptr; @@ -348,7 +357,7 @@ PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -383,19 +392,18 @@ static int PTRCALL PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: case BT_LF: break; @@ -432,7 +440,7 @@ static int PTRCALL PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -441,7 +449,7 @@ PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -464,7 +472,7 @@ static int PTRCALL PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr != end) { + if (HAS_CHAR(enc, ptr, end)) { if (CHAR_MATCHES(enc, ptr, ASCII_x)) return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); switch (BYTE_TYPE(enc, ptr)) { @@ -474,7 +482,7 @@ PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, *nextTokPtr = ptr; return XML_TOK_INVALID; } - for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) { + for (ptr += MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: break; @@ -496,8 +504,7 @@ static int PTRCALL PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_NUM: @@ -506,7 +513,7 @@ PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -529,7 +536,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS int hadColon = 0; #endif - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -540,8 +547,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, } hadColon = 1; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: @@ -555,8 +561,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, int t; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); t = BYTE_TYPE(enc, ptr); if (t == BT_EQUALS) break; @@ -579,8 +584,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, #endif for (;;) { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); open = BYTE_TYPE(enc, ptr); if (open == BT_QUOT || open == BT_APOS) break; @@ -598,8 +602,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, /* in attribute value */ for (;;) { int t; - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); t = BYTE_TYPE(enc, ptr); if (t == open) break; @@ -624,8 +627,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, } } ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_CR: @@ -642,8 +644,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, /* ptr points to closing quote */ for (;;) { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_CR: case BT_LF: @@ -655,8 +656,7 @@ PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end, case BT_SOL: sol: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -688,13 +688,12 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, #ifdef XML_NS int hadColon; #endif - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_EXCL: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_MINUS: return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -716,7 +715,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, hadColon = 0; #endif /* we have a start-tag */ - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) #ifdef XML_NS @@ -727,8 +726,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, } hadColon = 1; ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: @@ -740,7 +738,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_CR: case BT_LF: { ptr += MINBPC(enc); - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_GT: @@ -765,8 +763,7 @@ PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end, case BT_SOL: sol: ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { *nextTokPtr = ptr; return XML_TOK_INVALID; @@ -785,7 +782,7 @@ static int PTRCALL PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -803,7 +800,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr); case BT_CR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -814,12 +811,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_DATA_NEWLINE; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_RSQB)) break; ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_RSQB; if (!CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr -= MINBPC(enc); @@ -832,7 +829,7 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); break; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -845,12 +842,12 @@ PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end, LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) #undef LEAD_CASE case BT_RSQB: - if (ptr + MINBPC(enc) != end) { + if (HAS_CHARS(enc, ptr, end, 2)) { if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_RSQB)) { ptr += MINBPC(enc); break; } - if (ptr + 2*MINBPC(enc) != end) { + if (HAS_CHARS(enc, ptr, end, 3)) { if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), ASCII_GT)) { ptr += MINBPC(enc); break; @@ -884,8 +881,7 @@ static int PTRCALL PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) case BT_S: case BT_LF: case BT_CR: case BT_PERCNT: @@ -895,7 +891,7 @@ PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_SEMI: @@ -913,15 +909,14 @@ static int PTRCALL PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) default: *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_CR: case BT_LF: case BT_S: @@ -941,7 +936,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { int t = BYTE_TYPE(enc, ptr); switch (t) { INVALID_CASES(ptr, nextTokPtr) @@ -950,7 +945,7 @@ PREFIX(scanLit)(int open, const ENCODING *enc, ptr += MINBPC(enc); if (t != open) break; - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_LITERAL; *nextTokPtr = ptr; switch (BYTE_TYPE(enc, ptr)) { @@ -973,7 +968,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { int tok; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; if (MINBPC(enc) > 1) { size_t n = end - ptr; @@ -992,8 +987,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_LT: { ptr += MINBPC(enc); - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); switch (BYTE_TYPE(enc, ptr)) { case BT_EXCL: return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr); @@ -1021,7 +1015,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, case BT_S: case BT_LF: for (;;) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) break; switch (BYTE_TYPE(enc, ptr)) { case BT_S: case BT_LF: @@ -1048,11 +1042,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_BRACKET; case BT_RSQB: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_BRACKET; if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if (ptr + MINBPC(enc) == end) - return XML_TOK_PARTIAL; + REQUIRE_CHARS(enc, ptr, end, 2); if (CHAR_MATCHES(enc, ptr + MINBPC(enc), ASCII_GT)) { *nextTokPtr = ptr + 2*MINBPC(enc); return XML_TOK_COND_SECT_CLOSE; @@ -1065,7 +1058,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, return XML_TOK_OPEN_PAREN; case BT_RPAR: ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return -XML_TOK_CLOSE_PAREN; switch (BYTE_TYPE(enc, ptr)) { case BT_AST: @@ -1141,7 +1134,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, *nextTokPtr = ptr; return XML_TOK_INVALID; } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) case BT_GT: case BT_RPAR: case BT_COMMA: @@ -1154,8 +1147,7 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end, ptr += MINBPC(enc); switch (tok) { case XML_TOK_NAME: - if (ptr == end) - return XML_TOK_PARTIAL; + REQUIRE_CHAR(enc, ptr, end); tok = XML_TOK_PREFIXED_NAME; switch (BYTE_TYPE(enc, ptr)) { CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) @@ -1204,10 +1196,12 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) + return XML_TOK_PARTIAL; start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1232,7 +1226,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1262,10 +1256,12 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end, const char **nextTokPtr) { const char *start; - if (ptr == end) + if (ptr >= end) return XML_TOK_NONE; + else if (! HAS_CHAR(enc, ptr, end)) + return XML_TOK_PARTIAL; start = ptr; - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: ptr += n; break; @@ -1294,7 +1290,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, case BT_CR: if (ptr == start) { ptr += MINBPC(enc); - if (ptr == end) + if (! HAS_CHAR(enc, ptr, end)) return XML_TOK_TRAILING_CR; if (BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); @@ -1326,15 +1322,15 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, end = ptr + n; } } - while (ptr != end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { INVALID_CASES(ptr, nextTokPtr) case BT_LT: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_EXCL)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_LSQB)) { ++level; ptr += MINBPC(enc); @@ -1342,11 +1338,11 @@ PREFIX(ignoreSectionTok)(const ENCODING *enc, const char *ptr, } break; case BT_RSQB: - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_RSQB)) { - if ((ptr += MINBPC(enc)) == end) - return XML_TOK_PARTIAL; + ptr += MINBPC(enc); + REQUIRE_CHAR(enc, ptr, end); if (CHAR_MATCHES(enc, ptr, ASCII_GT)) { ptr += MINBPC(enc); if (level == 0) { @@ -1373,7 +1369,7 @@ PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end, { ptr += MINBPC(enc); end -= MINBPC(enc); - for (; ptr != end; ptr += MINBPC(enc)) { + for (; HAS_CHAR(enc, ptr, end); ptr += MINBPC(enc)) { switch (BYTE_TYPE(enc, ptr)) { case BT_DIGIT: case BT_HEX: @@ -1521,7 +1517,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, } static int PTRFASTCALL -PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) +PREFIX(charRefNumber)(const ENCODING *UNUSED_P(enc), const char *ptr) { int result = 0; /* skip &# */ @@ -1565,7 +1561,7 @@ PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr) } static int PTRCALL -PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, +PREFIX(predefinedEntityName)(const ENCODING *UNUSED_P(enc), const char *ptr, const char *end) { switch ((end - ptr)/MINBPC(enc)) { @@ -1683,11 +1679,11 @@ PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2) } static int PTRCALL -PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, +PREFIX(nameMatchesAscii)(const ENCODING *UNUSED_P(enc), const char *ptr1, const char *end1, const char *ptr2) { for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) { - if (ptr1 == end1) + if (end1 - ptr1 < MINBPC(enc)) return 0; if (!CHAR_MATCHES(enc, ptr1, *ptr2)) return 0; @@ -1744,7 +1740,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *end, POSITION *pos) { - while (ptr < end) { + while (HAS_CHAR(enc, ptr, end)) { switch (BYTE_TYPE(enc, ptr)) { #define LEAD_CASE(n) \ case BT_LEAD ## n: \ @@ -1760,7 +1756,7 @@ PREFIX(updatePosition)(const ENCODING *enc, case BT_CR: pos->lineNumber++; ptr += MINBPC(enc); - if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF) + if (HAS_CHAR(enc, ptr, end) && BYTE_TYPE(enc, ptr) == BT_LF) ptr += MINBPC(enc); pos->columnNumber = (XML_Size)-1; break; diff --git a/contrib/hbexpat/hbexpat.ch b/contrib/hbexpat/hbexpat.ch index 1b111b6e09..96668535a8 100644 --- a/contrib/hbexpat/hbexpat.ch +++ b/contrib/hbexpat/hbexpat.ch @@ -94,6 +94,7 @@ #define HB_XML_ERROR_RESERVED_PREFIX_XML 38 #define HB_XML_ERROR_RESERVED_PREFIX_XMLNS 39 #define HB_XML_ERROR_RESERVED_NAMESPACE_URI 40 +#define HB_XML_ERROR_INVALID_ARGUMENT 41 #define HB_XML_INITIALIZED 0 #define HB_XML_PARSING 1