From f8ea903321554c6d7e07f5dcfb440c248d40e518 Mon Sep 17 00:00:00 2001 From: Brian Hays Date: Thu, 14 Jun 2001 07:32:15 +0000 Subject: [PATCH] 2001-06-14 12:39 UTC-0800 Brian Hays --- harbour/ChangeLog | 13 +++++ harbour/contrib/libmisc/hb_f.c | 86 +++++++++++++++++++++++++++++++--- harbour/source/rtl/philes.c | 2 +- 3 files changed, 94 insertions(+), 7 deletions(-) diff --git a/harbour/ChangeLog b/harbour/ChangeLog index a6b0824fef..813118cdd4 100644 --- a/harbour/ChangeLog +++ b/harbour/ChangeLog @@ -1,3 +1,16 @@ +2001-06-14 12:39 UTC-0800 Brian Hays + * source/rtl/philes.c + * changed comment re HB_FEOF TO HB_F_EOF + * contrib/libmisc/hb_f.c + * fixed several bugs, mostly EOF handling, and bumped + record size to 4096 + + added HB_FINFO and HB_FREADANDSKIP, the functions I used to + test and debug. HB_FREADANDSKIP goes a bit beyond the bug-fix + category, but it's well tested. For reading comma-delimited + files, it does a single read to both skip a "record" + and return the buffer. It also recognizes hard-returns embedded + within quotes as data, not a record separator. + 2001-06-12 11:59 UTC-0800 Brian Hays * contrib/rdd_ads/ads1.c diff --git a/harbour/contrib/libmisc/hb_f.c b/harbour/contrib/libmisc/hb_f.c index 9bb593dc7a..4b8bafc5af 100644 --- a/harbour/contrib/libmisc/hb_f.c +++ b/harbour/contrib/libmisc/hb_f.c @@ -50,11 +50,11 @@ * */ -/* please run $(HARBOUR)\tests\working\testhbf.prg for testing */ +/* please run $(HARBOUR)\tests\testhbf.prg for testing */ #include "hbapifs.h" -#define b_size 1024 +#define b_size 4096 #define c_size 4096 static long hb_hbfskip( int recs ); @@ -62,13 +62,13 @@ static long hb_hbfskip( int recs ); static long last_rec[10]; static long recno[10]; static long offset[10]; -static int handles[10]; -static int area = 0; +static int handles[10]; +static int area = 0; static char *b; static char *c; static long last_off[10]; static long lastbyte[10]; -static int isEof[10]; +static int isEof[10]; HB_FUNC( HB_FUSE ) { @@ -90,6 +90,7 @@ HB_FUNC( HB_FUSE ) b = ( char * )hb_xgrab( b_size ); c = ( char * )hb_xgrab( c_size ); lastbyte[area] = hb_fsSeek( handles[area], 0L, SEEK_END ); + isEof[area] = (lastbyte[area] == 0); hb_retni( handles[area] ); } else { @@ -288,21 +289,25 @@ HB_FUNC( HB_FGOTOP ) { offset[area] = 0L; recno[area] = 1L; + isEof[area] = (lastbyte[area] == 0); } HB_FUNC( HB_FLASTREC ) { long old_rec; long old_offset; + int bIsEof; old_rec = recno[area]; old_offset = offset[area]; + bIsEof = isEof[area]; HB_FUNCNAME( HB_FGOBOTTOM )(); hb_retnl( last_rec[area] ); - recno[area] = old_rec; + recno[area] = old_rec; offset[area] = old_offset; + isEof[area] = bIsEof ; } HB_FUNC( HB_FSELECT ) @@ -312,3 +317,72 @@ HB_FUNC( HB_FSELECT ) if ( ISNUM(1) ) area = hb_parni(1) - 1; } + +HB_FUNC( HB_FINFO ) /* used for debugging */ +{ + hb_reta( 6 ); + hb_storni( area+1, -1, 1); + hb_storni( last_rec[area], -1, 2); + hb_storni( recno[area], -1, 3); + hb_storni( offset[area], -1, 4); + hb_storni( lastbyte[area], -1, 5); + hb_storl ( isEof[area], -1, 6); + +} + +HB_FUNC( HB_FREADANDSKIP ) +{ +/* ------------------------------------------------ + Warning: This is a rogue function! It is a first shot at adding the logic + to read .CSV records that respect CRLF embedded within quotes. + It is very common, especially with Microsoft products, for + comma-separated files to allow a field (usually an address field) + to have hard returns within it. These records appear corrupted to any + reader that presumes all hard returns are record separators. + + This function is useful right now to loop through a CSV file + while !hb_feof(), but it does NOT recognize the same record count + and positioning that the other functions in this file use. + It does its own skip and read, so an entire file can be read + sequentially with just this function. + -BH + --------------------------------------------------*/ + long x = 0; + long read; + BOOL bInField = 0, bHasCRLF = FALSE; + + hb_fsSeek( handles[area], offset[area], SEEK_SET ); + read = hb_fsRead( handles[area], ( BYTE * ) b, b_size ); + + while ( x < read ) + { + if ( *(b + x) == '"' ) + { + bInField = !bInField ; + x++; + continue; + } + if ( bInField ) + { + x++; + continue; + } + if( ((*(b + x) == 13) && x < read-1 && (*(b + x + 1) == 10)) || + ((*(b + x) == 10) && x < read-1 && (*(b + x + 1) == 13)) ) + { + x += 2; + break; + } + x++; + } + + offset[area] = offset[area] + x; + recno[area] += 1; + // See if there's more to read + if ( !isEof[area] ) + isEof[area] = (lastbyte[area] <= offset[area] + 1) ; + + hb_retclen( b, x - (bHasCRLF ? 2 : 0) ); + +} + diff --git a/harbour/source/rtl/philes.c b/harbour/source/rtl/philes.c index f8f9fe0cec..c30f7a4dbb 100644 --- a/harbour/source/rtl/philes.c +++ b/harbour/source/rtl/philes.c @@ -41,7 +41,7 @@ * CURDIR() * * Copyright 2000 David G. Holm - * HB_FEOF() + * HB_F_EOF() * * See doc/license.txt for licensing terms. *