Files
harbour-core/harbour/tests/uc16_gen.prg
Przemyslaw Czerpak 93d3a46d84 2012-04-20 17:52 UTC+0200 Przemyslaw Czerpak (druzus/at/poczta.onet.pl)
* harbour/include/hbdefs.h
    * moved HB_WCHAR definition from hbapicdp.h to hbdefs.h

  * harbour/include/hbapicdp.h
  * harbour/include/hbcdpreg.h
  * harbour/src/rtl/cdpapi.c
  * harbour/src/rtl/cdpapihb.c
    + added support for custom sorting redirected from HVM
    + added support for custom character indexes in strings
    + added support for custom character flags (upper, lower, alpha, digit)
    + added support for custom upper/lower conversions
    + added support for CPs using unicode character values instead of
      ASCII ones
    + added new CP functions which respects custom CP settings:
        hb_cdpUpperWC(), hb_cdpTextLen(), hb_cdpTextPos(),
        hb_cdpTextPosEx(), hb_cdpTextGetU16(), hb_cdpTextPutU16(),
        hb_cdpCharEq(), hb_cdpCharCaseEq()
    + added new conversion functions:
        hb_cdpGetUC(), hb_cdpGetWC(), hb_cdpGetU16Ctrl()
    + added macros to detect codepage parameters:
         HB_CDP_ISBINSORT() - codepage uses simple binary sorting
         HB_CDP_ISCUSTOM()  - codepage uses custom string decoding
         HB_CDP_ISCHARIDX() - codepage use character indexes instead
                              of bytes ones
         HB_CDP_ISCHARUNI() - CHR(), ASC() and similar functions operates
                              on Unicode values instead of bytes
         HB_CDP_ISUTF8()    - codepage uses UTF-8 encoding

  * harbour/include/inkey.ch
    - removed HB_INKEY_EXTENDED - it was not used in Harbour
    + added new flag HB_INKEY_EXT - it allows to used different
      event encoding system with automatic translation to
      standard Clipper values. It will be used in the future
      as base for low level GTs code. Now it's possible to
      selectively switch to the new system. New codes are
      unique and do not interacts with Clipper.
    + added HB_INKEY_ALL macro - it's similar to INKEY_ALL but
      enables also GTEVENT keys.

  * harbour/include/hbgtcore.h
  * harbour/src/rtl/hbgtcore.c
    * changed character values in internal screen buffer to unicode.
      Warning: this modification interacts with SAVESCREE()/RESTSCREEN()
               data. Now all GTs uses extended definition. If somene
               has code which needs VGA like screen buffers returned
               by SAVESCREE() then it can be forced by
                  hb_gtInfo( HB_GTI_COMPATBUFFER, .t. )
               Anyhow it will for to use only one CP in output.
    + added new GT methods which operated on HB_WCHAR values
    * modified existing methods using HB_WCHAR/HB_USHORT parameters
      to operate on HB_WCHAR values
    + added codepage conversion code directly to core code so it can
      be eliminated from low level GT drivers which do not need some
      special operations on it.
    + added support for extended inkey codes, they allow to encoded
      unicode values, mouse events with flags, keyboard events with
      modifier flags (shift,alt,ctrl,...), etc.
      Now in low level GT code only unicode key values are used but
      I plan to updated all GTs and switch to new code - it should
      resolve many small problems inheriting with Clipper inkey codes.

  * harbour/include/hbapifs.h
    * moved hb_fsNameConvU16() definition from hbapicdp.h to hbapifs.h

  * harbour/include/hbxvm.h
    - removed comment copied by mistake from GT header file

  * harbour/include/hbwinuni.h
    * changed HB_CHARDUP() and HB_CHARDUPN() macros - now they
      operate on functions which can be used without active HVM.
      In such case ANSI CP is used as source encoding.
    + added new macros: HB_OSSTRDUP() and HB_OSSTRDUP2().
      They make conversions from OS encoding to HVM one.
      They can be used without active HVM and in such case ANSI CP
      is used as destination encoding.

  * harbour/include/hbapi.h
  * harbour/src/vm/cmdarg.c
    + added new function hb_cmdargProgName()
      It returns application name with path or NULL if not set,
      caller must free returned value with hb_xfree() if not NULL.
      The string is in HVM encoding so it does not need any additional
      encodings.
    * modified hb_cmdargString() to return strings in HVM encoding.
    % some small code optimizations
    ; NOTE: parameters passed to MS-WINDOWS GUI programs which use
            WinMain() instead of main() as startup entry are translated
            to ANSICP before they can be accessed by application.
            This can be eliminated though not for console programs
            using where C compiler makes such translation to main()
            parameters.
            in all systems parameters are passed to application
            startup and init functions before programmer can set OS CP.
            This can be resolved by adding:
               INIT PROC CLIPINIT()
                  SET( _SET_CODEPAGE, <userCP> )
                  SET( _SET_OSCODEPAGE, <systemCP> )
               RETURN
            to linked code. INIT procedures called CLIPINIT() are
            executed before any other ones.

  * harbour/include/hbapi.h
  * harbour/src/vm/set.c
    + added new functions for CP converisons which can be used with
      and without active HVM:
         hb_osStrEncode(), hb_osStrEncodeN(),
         hb_osStrDecode(), hb_osStrDecode2(),
         hb_osStrU16Encode(), hb_osStrU16EncodeN(),
         hb_osStrU16Decode(), hb_osStrU16Decode2(),

  * harbour/include/hbvm.h
  * harbour/src/vm/hvm.c
    + added new function hb_vmIsReady() - it's similar to hb_vmActive()
      but it also checks if current thread has active HVM stack.
    * moved hb_cdpReleaseAll() to the end of HVM cleanup code so
      CP conversions are longer active.

  * harbour/include/hbcomp.h
  * harbour/src/compiler/cmdcheck.c
  * harbour/src/compiler/hbusage.c
  * harbour/src/common/expropt2.c
    + added new compiler switch:
         -ku  - strings in user encoding
      Now it informs compiler that strings use custom encoding so some
      optimizations which are byte oriented cannot be used.
      It's possible that in the future we will change above definition
      to sth like: "strings in UTF8 encoding" but now I would like to
      keep more general.

  * harbour/src/vm/macro.c
    * inform macrocompiler about custom CPs using own character indexes
      to disable byte oriented optimizations.

  * harbour/include/hbapigt.h
  * harbour/src/rtl/gtapi.c
  * harbour/src/rtl/inkeyapi.c
    + added HB_B_*_W macros with unicode box character definitions
    + added HB_MBUTTON_* macros
    + added new GT functions: hb_gtHostCP() and hb_gtBoxCP() which
      allows to extract CPs used in translations by GTs.
    + added new function hb_inkeyKeyString() - it converts inkey value
      to corresponding string
    + added new function hb_inkeyKeyStd() - it converts new extended
      key value to standard Clipper one.

  * harbour/include/hbapifs.h
  * harbour/src/common/hbffind.c
    * moved OS codepage translations fully to hb_fsFind*() functions.
      It fixes few problems which existed before, i.e. double CP
      conversions in MS-Windows builds and simplifies upper level code.

  * harbour/src/pp/hbpp.c
  * harbour/src/rtl/direct.c
  * harbour/src/rtl/fssize.c
  * harbour/src/rtl/file.c
  * harbour/contrib/hbct/files.c
    * eliminated not longer necessary CP conversions in code calling
      hb_fsFind*() functions.

  * harbour/src/common/hbgete.c
    * moved OS codepage translations to hb_getenv(), hb_getenv_buffer()
      and hb_setenv() functions. It fixes few problems which existed
      before, i.e. double CP conversions in MS-Windows builds and
      simplifies upper level code.

  * harbour/src/rtl/net.c
  * harbour/src/rtl/gete.c
    * eliminated not longer necessary CP conversions in code calling
      hb_getenv()/hb_setenv() functions.
    ; NOTE: additional parameters in HB_GETENV() and HB_SETENV() which
            disabled CP conversions are not longer supported.
            They were strictly platform dependent and ignored in chosen
            cases (i.e. in MS-Windows UNICODE builds we always have to
            convert strings transferred between HVM and OS. If someone
            needs old functionality for other platform then he should
            temporary disable _SET_OSCODEPAGE.

  * harbour/src/common/hbver.c
  * harbour/src/common/hbfsapi.c
  * harbour/src/rtl/fstemp.c
  * harbour/src/rtl/fslink.c
    * eliminated HB_TCHAR_*() macros

  * harbour/src/common/strwild.c
    + added supprot for custom CPs using own character indexes in:
         hb_strMatchWild(),
         hb_strMatchWildExact(),
         hb_strMatchCaseWildExact()

  * harbour/src/nortl/nortl.c
    + added new dummy function replacement for binaries which are not
      linked with HVM.

  * harbour/src/rtl/filesys.c
    ! fixed double CP conversions in MS-Windows builds of hb_fsCurDirBuff()
    * use hb_vmIsReady() instead of hb_stackId() in file name conversions.
    * use hb_cmdargProgName() in hb_fsBaseDirBuff()

  * harbour/src/rtl/philes.c
    * use hb_cmdargProgName() in HB_PROGNAME() function.

  * harbour/src/rtl/gtcgi/gtcgi.c
  * harbour/src/rtl/gtstd/gtstd.c
  * harbour/src/rtl/gtpca/gtpca.c
  * harbour/src/rtl/gtdos/gtdos.c
  * harbour/src/rtl/gtos2/gtos2.c
  * harbour/src/rtl/gtwin/gtwin.c
  * harbour/src/rtl/gtwvt/gtwvt.h
  * harbour/src/rtl/gtwvt/gtwvt.c
  * harbour/src/rtl/gttrm/gttrm.c
  * harbour/src/rtl/gtcrs/gtcrs.c
  * harbour/src/rtl/gtsln/gtsln.c
  * harbour/src/rtl/gtsln/kbsln.c
  * harbour/src/rtl/gtsln/gtsln.h
  * harbour/src/rtl/gtxwc/gtxwc.h
  * harbour/src/rtl/gtxwc/gtxwc.c
  * harbour/contrib/gtwvg/gtwvg.c
  * harbour/contrib/gtwvg/gtwvg.h
  * harbour/contrib/gtalleg/gtalleg.c
  * harbour/contrib/hbqt/gtqtc/gtqtc.cpp
  * harbour/contrib/hbqt/gtqtc/gtqtc.h
    * updated to work with new unicode GT API
      please make tests with different GTs - I'm not able to test
      all of them, i.e. I do not have any OS2 machine.

  * harbour/src/rtl/box.c
  * harbour/src/rtl/oldbox.c
  * harbour/src/rtl/scroll.c
  * harbour/src/rtl/console.c
    * updated to work with new unicode GT API and
      CPs using custom character indexes

  * harbour/src/rtl/at.c
  * harbour/src/rtl/ati.c
  * harbour/src/rtl/rat.c
  * harbour/src/rtl/len.c
  * harbour/src/rtl/transfrm.c
  * harbour/src/rtl/left.c
  * harbour/src/rtl/right.c
  * harbour/src/rtl/substr.c
  * harbour/src/rtl/stuff.c
  * harbour/src/rtl/padc.c
  * harbour/src/rtl/padl.c
  * harbour/src/rtl/padr.c
    * updated to work with CPs using custom character indexes

  * harbour/src/rtl/chrasc.c
    + added support for HB_CDP_ISCHARUNI() CPs.

  * harbour/src/rtl/mlcfunc.c
    * rewritten from scratch to work with CPs using custom character
      indexes

  * harbour/src/rtl/accept.c
    * updated to work with unicode inkey values and
      CPs using custom character indexes

  * harbour/src/rtl/strmatch.c
    % small optimization

  * harbour/src/rtl/Makefile
  + harbour/src/rtl/chruni.c
    + added new PRG functions which allows to make byte/binary and
      unicode/character operations on strings:
         HB_UCHAR( <nCode> ) -> <cText>
            return string with U+nCode character in HVM CP encoding
         HB_BCHAR( <nCode> ) -> <cText>
            return 1 byte string with <nCode> value
         HB_UCODE( <cText> ) -> <nCode>
            return unicode value of 1-st character (not byte) in given string
         HB_BCODE( <cText> ) -> <nCode>
            return value of 1-st byte in given string
         HB_ULEN( <cText> ) -> <nChars>
            return string length in characters
         HB_BLEN( <cText> ) -> <nBytes>
            return string length in bytes
         HB_UPEEK( <cText>, <n> ) -> <nCode>
            return unicode value of <n>-th character in given string
         HB_BPEEK( <cText>, <n> ) -> <nCode>
            return value of <n>-th byte in given string
         HB_UPOKE( [@]<cText>, <n>, <nVal> ) -> <cText>
            change <n>-th character in given string to unicode <nVal> one and
            return modified text
         HB_BPOKE( [@]<cText>, <n>, <nVal> ) -> <cText>
            change <n>-th byte in given string to <nVal> and return modified
            text

  * harbour/src/rtl/hbdoc.prg
  * harbour/src/rtl/memvarhb.prg
    * use HB_BCHAR() for binary string definitions

  * harbour/src/rtl/hbi18n2.prg
    * use hb_utf8CHR( 0xFEFF ) instead of hardcoded binary sting and
      HB_BLEN() instead of LEN()

  * harbour/src/rtl/inkey.c
    + added new functions:
         HB_KEYCHAR( <nKey> ) -> <cChar>
         HB_KEYSTD( <nExtKey> ) -> <nClipKey>
    ! use HB_INKEY_ALL instead of INKEY_ALL in LASTKEY()

  * harbour/src/rtl/achoice.prg
  * harbour/src/rtl/browse.prg
  * harbour/src/rtl/menuto.prg
  * harbour/src/rtl/tgetlist.prg
  * harbour/src/rtl/teditor.prg
  * harbour/src/rtl/tlabel.prg
  * harbour/src/rtl/tpopup.prg
  * harbour/src/rtl/radiobtn.prg
  * harbour/src/rtl/radiogrp.prg
  * harbour/src/rtl/wait.prg
    * updated to work with unicode inkey() values

  * harbour/src/rtl/listbox.prg
    * use box.ch macros instead of explicit CHR(...) definitions

  * harbour/src/rtl/ttopbar.prg
    * updated to work with different type of CPs

  * harbour/src/rtl/scrollbr.prg
    * formatting

  * harbour/src/rtl/mouse53.c
    * use HB_MBUTTON_* macros instead of local ones

  * harbour/src/codepage/cp_utf8.c
  + harbour/src/codepage/uc16def.c
  + harbour/src/codepage/utf8sort.c
    * replaced UTF8ASC with new CP: UTF8EX
      This CP uses character indexes instead of bytes one
      and operates on unicode characters flags.
      Tables for upper/lower conversions and upper/lower/alpha/digit
      flags were generated automatically from
         http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
      It also uses custom collation rules. It's very simple one
      level sorting based on UTF8 C collation.
      If someone needs some advanced sorting rules, then it's enough
      to create copy of this cp with user custom version of UTF8_cmp()
      and UTF8_cmpi() functions, i.e. they can be redirected to some
      external library like ICU (icu-project.org).

  * harbour/contrib/hbct/ctwin.c
  * harbour/contrib/hbct/ctwin.h
  * harbour/contrib/hbct/ctwfunc.c
    * added support for new unicode GT API

  * harbour/contrib/xhb/xhbfunc.c
    * redirected HB_CMDARGARGV() to HB_PROGNAME()

  * harbour/contrib/hbnf/origin.c
    * redirected FT_ORIGIN() to HB_PROGNAME()

  * harbour/contrib/hbnf/getenvrn.c
    ! windows version of FT_GETE() fully rewritten - it should
      fix well known problems reported to the devel list. Please test.
    % small optimization for other systems

  * harbour/contrib/hbfship/exec.c
    * use hb_cmdargProgName() in EXECNAME()

  + harbour/tests/uc16_gen.prg
    + added code which generates tables with unicode character flags
      from http://www.unicode.org/Public/UNIDATA/UnicodeData.txt.
      harbour/src/codepage/uc16def.c was generated by this code.

  * harbour/tests/inkeytst.prg
  * harbour/tests/wvtext.prg
  * harbour/tests/gtkeys.prg
    * use HB_INKEY_ALL

  * harbour/include/harbour.hbx
  * harbour/include/hbcpage.hbx
  * harbour/include/hblang.hbx
    * regenerated

   ; It was quite big peace of modifications and for sure not everything
     is well tested so please make test and report problems you will find.
   ; This is basic version which introduce to HVM CPs with custom encodings.
     Some contrib code has to be updated to work correctly with it.
     I hope that developers interesting in will make necessary updates.
     I haven't touched GTWVW code at all - sorry but it needs very serious
     work to make it production ready and fix all existing problems.
   ; Special thanks to OTC - this firm sponsored adding basic UTF8 support
     to HVM.
2012-04-20 15:55:44 +00:00

491 lines
15 KiB
Plaintext

/*
* $Id$
*/
/*
* Harbour Project source code:
* code to generate C source with conversion tables between BIG5 and UCS16
* using data defined by Unicode, Inc. in BIG5.TXT
*
* Copyright 2011 Przemyslaw Czerpak <druzus / at / priv.onet.pl>
* www - http://harbour-project.org
*
*/
//#define DO_START_OPT
/* character flags */
#define HB_CDP_DIGIT 0x01
#define HB_CDP_ALPHA 0x02
#define HB_CDP_LOWER 0x04
#define HB_CDP_UPPER 0x08
proc main()
local cLine, aLine
local cGenCat
local nCode, nUpper, nLower, nFlags
local nUppers, nLowers
local nMinCh, nMinUp, nMinLo
local nMaxCh, nMaxUp, nMaxLo
local nBitCh, nBitUp, nBitLo
local aLower, aUpper, aFlags
local cResult
local aInd, aVal, hVal
local n, nn
local nWarning := 2
local lConvAll := .f.
aLower := afill( array( 0x10000 ), 0 )
aUpper := afill( array( 0x10000 ), 0 )
aFlags := afill( array( 0x10000 ), 0 )
nMaxCh := nMaxUp := nMaxLo := 0x0000
nMinCh := nMinUp := nMinLo := 0xFFFF
nUppers := nLowers := 0
for each cLine in hb_aTokens( hb_memoRead( "UnicodeData.txt" ), hb_eol() )
if !empty( cLine )
aLine := hb_aTokens( cLine, ";" )
if len( aLine ) == 15
nCode := hb_hexToNum( aLine[ 1 ] )
if nCode > 0 .and. nCode < 0xFFFF
nUpper := hb_hexToNum( aLine[ 13 ] )
nLower := hb_hexToNum( aLine[ 14 ] )
nFlags := 0
cGenCat := aLine[ 3 ]
if "Lu" $ cGenCat
nFlags := hb_bitOR( nFlags, HB_CDP_ALPHA, HB_CDP_UPPER )
if "Lt" $ cGenCat
? "title + upper, line:", cLine:__enumIndex()
endif
elseif nLower != 0
if "Lt" $ cGenCat .or. !lConvAll
nLower := 0
elseif nWarning >= 2
? "lower for non upper, line:", cLine:__enumIndex()
endif
endif
if "Ll" $ cGenCat
if "Lt" $ cGenCat
? "title + lower, line:", cLine:__enumIndex()
endif
nFlags := hb_bitOR( nFlags, HB_CDP_ALPHA, HB_CDP_LOWER )
elseif nUpper != 0
if "Lt" $ cGenCat .or. !lConvAll
nUpper := 0
elseif nWarning >= 2
? "upper for non lower, line:", cLine:__enumIndex()
endif
endif
if nCode >= asc( "0" ) .and. nCode <= asc( "9" )
nFlags := hb_bitOR( nFlags, HB_CDP_DIGIT )
endif
if nUpper >= 0xFFFF
? "Lower out of range, line:", cLine:__enumIndex()
endif
if nLower >= 0xFFFF
? "Upper out of range, line:", cLine:__enumIndex()
endif
if nCode > 0 .and. nCode < 0xFFFF
if nFlags > 0
aFlags[ nCode ] := nFlags
if nMaxCh < nCode
nMaxCh := nCode
endif
if nMinCh > nCode
nMinCh := nCode
endif
endif
if nUpper > 0
nUppers++
aUpper[ nCode ] := nUpper
if nMaxUp < nCode
nMaxUp := nCode
endif
if nMinUp > nCode
nMinUp := nCode
endif
endif
if nLower > 0
nLowers++
aLower[ nCode ] := nLower
if nMaxLo < nCode
nMaxLo := nCode
endif
if nMinLo > nCode
nMinLo := nCode
endif
endif
endif
endif
else
? "Wrong line:", cLine:__enumIndex()
endif
endif
next
? "uppers#:", nUppers
? "min upper:", nMinUp
? "max upper:", nMaxUp
? "TOUPPER tables."
? "raw size:", hb_ntos( ( nMaxUp - nMinUp + 1 ) * 2 )
#ifndef DO_START_OPT
nMinUp := min( nMinUp, 48 ) // optimal
#endif
n := min_size16( aUpper, @nMinUp, nMaxUp, @nBitUp )
? "minimal size:", hb_ntos( n ), ;
"for", hb_ntos( hb_bitshift( 1, nBitUp ) ), "byte blocks, (from: " + hb_ntos( nMinUp ) + ")"
?
? "lowers#:", nLowers
? "min lower:", nMinLo
? "max lower:", nMaxLo
? "TOLOWER tables."
? "raw size:", hb_ntos( ( nMaxLo - nMinLo + 1 ) * 2 )
#ifndef DO_START_OPT
nMinLo := min( nMinLo, 32 ) // optimal
#endif
n := min_size16( aLower, @nMinLo, nMaxLo, @nBitLo )
? "minimal size:", hb_ntos( n ), ;
"for", hb_ntos( hb_bitshift( 1, nBitLo ) ), "byte blocks, (from: "+hb_ntos( nMinLo ) + ")"
?
? "min char:", nMinCh
? "max char:", nMaxCh
? "ATTR tables."
? "raw size:", hb_ntos( int( ( nMaxCh - nMinCh + 2 ) / 2 ) )
#ifndef DO_START_OPT
nMinCh := min( nMinCh, 0 ) // optimal
#endif
n := min_size04( aFlags, @nMinCh, nMaxCh, @nBitCh )
? "minimal size:", hb_ntos( n ), ;
"for", hb_ntos( hb_bitshift( 1, nBitCh ) ), "byte blocks, (from: " + hb_ntos( nMinCh ) + ")"
?
*/
cResult := ;
"/*" + hb_eol() + ;
" * $Id$" + hb_eol() + ;
" */" + hb_eol() + ;
hb_eol() + ;
"/*" + hb_eol() + ;
" * Harbour Project source code:" + hb_eol() + ;
" * Unicode character tables" + hb_eol() + ;
" * code generated automatically by tests/uc_gen.prg" + hb_eol() + ;
" *" + hb_eol() + ;
" * Copyright 2012 Przemyslaw Czerpak <druzus / at / priv.onet.pl>" + hb_eol() + ;
" * www - http://harbour-project.org" + hb_eol() + ;
" *" + hb_eol() + ;
" */" + hb_eol() + ;
hb_eol()
cResult += '#include "hbapi.h"' + hb_eol()
cResult += hb_eol()
cResult += hb_eol()
cResult += "#define HB_UCUP_FIRST 0x" + hb_numToHex( nMinUp, 4 ) + hb_eol()
cResult += "#define HB_UCUP_LAST 0x" + hb_numToHex( nMaxUp, 4 ) + hb_eol()
cResult += "#define HB_UCUP_BITS " + hb_ntos( nBitUp ) + hb_eol()
cResult += hb_eol()
calc_size16( aUpper, nMinUp, nMaxUp, nBitUp, @hVal, @aInd, @nn )
aVal := hash_to_array16( hVal )
check_conv16( aUpper, aInd, aVal, nMinUp, nMaxUp, nBitUp )
cResult += array_to_code( aInd, "s_up_idx", nn )
cResult += hb_eol()
cResult += array_to_code( aVal, "s_up_val", 2 )
cResult += hb_eol()
cResult += index_func16( "s_uc_upper", "s_up_idx", "s_up_val", ;
"HB_UCUP_FIRST", "HB_UCUP_LAST", "HB_UCUP_BITS" )
cResult += hb_eol()
cResult += "#define HB_UCLO_FIRST 0x" + hb_numToHex( nMinLo, 4 ) + hb_eol()
cResult += "#define HB_UCLO_LAST 0x" + hb_numToHex( nMaxLo, 4 ) + hb_eol()
cResult += "#define HB_UCLO_BITS " + hb_ntos( nBitLo ) + hb_eol()
cResult += hb_eol()
calc_size16( aLower, nMinLo, nMaxLo, nBitLo, @hVal, @aInd, @nn )
aVal := hash_to_array16( hVal )
check_conv16( aLower, aInd, aVal, nMinLo, nMaxLo, nBitLo )
cResult += array_to_code( aInd, "s_lo_idx", nn )
cResult += hb_eol()
cResult += array_to_code( aVal, "s_lo_val", 2 )
cResult += hb_eol()
cResult += index_func16( "s_uc_lower", "s_lo_idx", "s_lo_val", ;
"HB_UCLO_FIRST", "HB_UCLO_LAST", "HB_UCLO_BITS" )
cResult += hb_eol()
cResult += "#define HB_UCFL_FIRST 0x" + hb_numToHex( nMinCh, 4 ) + hb_eol()
cResult += "#define HB_UCFL_LAST 0x" + hb_numToHex( nMaxCh, 4 ) + hb_eol()
cResult += "#define HB_UCFL_BITS " + hb_ntos( nBitCh ) + hb_eol()
cResult += hb_eol()
calc_size04( aFlags, nMinCh, nMaxCh, nBitCh, @hVal, @aInd, @nn )
aVal := hash_to_array04( hVal )
check_conv04( aFlags, aInd, aVal, nMinCh, nMaxCh, nBitCh )
cResult += array_to_code( aInd, "s_ch_idx", nn )
cResult += hb_eol()
cResult += array_to_code( aVal, "s_ch_val", 1 )
cResult += hb_eol()
cResult += index_func04( "s_uc_flags", "s_ch_idx", "s_ch_val", ;
"HB_UCFL_FIRST", "HB_UCFL_LAST", "HB_UCFL_BITS" )
hb_memowrit( "uc16def.c", cResult )
return
static function array_to_code( aVal, cName, nn )
local cResult, l, n
cResult := "static const " + ;
iif( nn == 1, "HB_BYTE", "HB_USHORT" ) + " " + ;
cName + "[ " + hb_ntos( len( aVal ) ) + " ] =" + hb_eol()
cResult += "{" + hb_eol()
l := 0
for n := 1 to len( aVal )
if ++l > iif( nn == 1, 12, 8 )
l := 1
cResult += ","
cResult += hb_eol()
cResult += " "
elseif n == 1
cResult += " "
else
cResult += ", "
endif
cResult += "0x"
cResult += hb_numToHex( aVal[ n ], nn * 2 )
next
cResult += hb_eol()
cResult += "};" + hb_eol()
return cResult;
static function hash_to_array16( hVal )
local aVal := {}, cLine, n
for each cLine in hVal
for n := 1 to len( cLine ) step( 2 )
aadd( aVal, bin2w( substr( cLine, n, 2 ) ) )
next
next
return aVal
static function hash_to_array04( hVal )
local aVal := {}, cLine, c
for each cLine in hVal
for each c in cLine
aadd( aVal, asc( c ) )
next
next
return aVal
static function index_func16( cName, cNameInd, cNameConv, cMin, cMax, cBit )
local cResult
cResult := "static HB_USHORT " + cName + "( int n )" + hb_eol() + ;
"{" + hb_eol() + ;
" n -= " + cMin + ";" + hb_eol() + ;
" if( n >= 0 && n <= ( " + cMax + " - " + cMin + " ) )" + hb_eol() + ;
" {" + hb_eol() + ;
" return " + cNameConv + "[ ( " + cNameInd + ;
"[ n >> " + cBit + " ] << " + cBit + " ) +" + hb_eol() + ;
space( len( cNameConv ) + 15 ) + ;
"( n & ( ( 1 << " + cBit + " ) - 1 ) ) ];" + hb_eol() + ;
" }" + hb_eol() + ;
" return 0;" + hb_eol() + ;
"}" + hb_eol()
return cResult
static function index_func04( cName, cNameInd, cNameConv, cMin, cMax, cBit )
local cResult
cResult := "static int " + cName + "( int n )" + hb_eol() + ;
"{" + hb_eol() + ;
" n -= " + cMin + ";" + hb_eol() + ;
" if( n >= 0 && n <= ( " + cMax + " - " + cMin + " ) )" + hb_eol() + ;
" {" + hb_eol() + ;
" HB_BYTE v;" + hb_eol() + ;
" v = " + cNameConv + "[ ( " + cNameInd + ;
"[ n >> " + cBit + " ] << ( " + cBit + " - 1 ) ) +" + hb_eol() + ;
space( len( cNameConv ) + 12 ) + ;
"( ( n & ( ( 1 << " + cBit + " ) - 1 ) ) >> 1 ) ];" + hb_eol() + ;
" return n & 1 ? v >> 4 : v & 0x0F;" + hb_eol() + ;
" }" + hb_eol() + ;
" return 0;" + hb_eol() + ;
"}" + hb_eol()
return cResult
function min_size16( aVal, nMin, nMax, nBit )
local n, nM, nS, nSize, nMinX
nSize := 0xFFFFFF
nMinX := nMin
#ifdef DO_START_OPT
for nM := 0 to nMin
#else
for nM := nMin to nMin
#endif
for n := 1 to 16
nS := calc_size16( aVal, nM, nMax, n )
if nS < nSize
nSize := nS
nBit := n
nMinX := nM
endif
next
next
nMin := nMinX
return nSize
function calc_size16( aVal, nMin, nMax, nBit, hVal, aInd, nn )
local nLine, n, cLine, c
nLine := int( 2 ^ ( nBit + 1 ) )
cLine := ""
hVal := {=>}
aInd := {}
hb_hKeepOrder( hVal, .t. )
for n := nMin to nMax
cLine += i2bin( iif( n == 0, 0, aVal[ n ] ) )
if len( cLine ) == nLine
hVal[ cLine ] := cLine
aadd( aInd, hb_hpos( hVal, cLine ) - 1 )
cLine := ""
endif
next
if ! cLine == ""
for each c in hVal
if c = cLine
cLine := c
exit
endif
next
hVal[ cLine ] := cLine
aadd( aInd, hb_hpos( hVal, cLine ) - 1 )
endif
nn := iif( len( aInd ) > 256, 2, 1 )
n := len( aInd ) * nn
for each c in hVal
n += len( c )
next
return n
function min_size04( aVal, nMin, nMax, nBit )
local n, nM, nS, nSize, nMinX
nSize := 0xFFFFFF
nMinX := nMin
#ifdef DO_START_OPT
for nM := 0 to nMin
#else
for nM := nMin to nMin
#endif
for n := 1 to 16
nS := calc_size04( aVal, nM, nMax, n )
if nS < nSize
nSize := nS
nBit := n
nMinX := nM
endif
next
next
nMin := nMinX
return nSize
function calc_size04( aVal, nMin, nMax, nBit, hVal, aInd, nn )
local nLine, n, cLine, c
nLine := int( 2 ^ ( nBit - 1 ) )
cLine := ""
hVal := {=>}
aInd := {}
hb_hKeepOrder( hVal, .t. )
for n := nMin to nMax step 2
cLine += chr( iif( n == 0, 0, aVal[ n ] ) + aVal[ n + 1 ] * 16 )
if len( cLine ) == nLine
hVal[ cLine ] := cLine
aadd( aInd, hb_hpos( hVal, cLine ) - 1 )
cLine := ""
endif
next
if ! cLine == ""
for each c in hVal
if c = cLine
cLine := c
exit
endif
next
hVal[ cLine ] := cLine
aadd( aInd, hb_hpos( hVal, cLine ) - 1 )
endif
nn := iif( len( aInd ) > 256, 2, 1 )
n := len( aInd ) * nn
for each c in hVal
n += len( c )
next
return n
static function conv_get16( n, aInd, aVal, nMin, nMax, nBit )
local nDiv
if n >= nMin .and. n <= nMax
nDiv := 2 ^ nBit
n -= nMin
return aVal[ aInd[ n / nDiv + 1 ] * nDiv + n % nDiv + 1 ]
endif
return 0
static function check_conv16( aConv, aInd, aVal, nMin, nMax, nBit )
local n, nVal
for n := 1 to len( aConv )
nVal := conv_get16( n, aInd, aVal, nMin, nMax, nBit )
if aConv[ n ] != nVal
? "Wrong decoding:", n, aConv[ n ], nVal, len( aConv ), nMax //, hb_eol()
break
// exit
endif
next
return nil
static function conv_get04( n, aInd, aVal, nMin, nMax, nBit )
local nDiv, nByte, nInd
if n >= nMin .and. n <= nMax
nDiv := int( 2 ^ nBit )
n -= nMin
// nInd := aInd[ n / nDiv + 1 ] * nDiv + n % nDiv
// nByte := aVal[ nInd / 2 + 1 ]
// return iif( n % 2 == 0, hb_bitAnd( nByte, 0x0F ), int( nByte / 16 ) )
nInd := aInd[ n / nDiv + 1 ] * nDiv / 2 + ( n % nDiv ) / 2
nByte := aVal[ nInd + 1 ]
return iif( n % 2 == 1, int( nByte / 16 ), hb_bitAnd( nByte, 0x0F ) )
// v = s_ch_val[ ( s_ch_idx[ n >> HB_UCFL_BITS ] << ( HB_UCFL_BITS - 1 ) ) +
// ( ( n & ( ( 1 << HB_UCFL_BITS ) - 1 ) ) >> 1 ) ];
// return n & 1 ? v >> 4 : v & 0x0F;
endif
return 0
static function check_conv04( aConv, aInd, aVal, nMin, nMax, nBit )
local n, nVal
for n := 1 to len( aConv )
nVal := conv_get04( n, aInd, aVal, nMin, nMax, nBit )
if aConv[ n ] != nVal
? "Wrong decoding:", n, aConv[ n ], nVal, len( aConv ), nMax //, hb_eol()
// break
// exit
endif
next
return nil