mirror of
https://github.com/ultimatepp/ultimatepp.git
synced 2026-06-01 06:12:23 -06:00
1200 lines
42 KiB
C++
1200 lines
42 KiB
C++
/******************************************************************************
|
|
* $Id: cpl_csv.cpp 29330 2015-06-14 12:11:11Z rouault $
|
|
*
|
|
* Project: CPL - Common Portability Library
|
|
* Purpose: CSV (comma separated value) file access.
|
|
* Author: Frank Warmerdam, warmerdam@pobox.com
|
|
*
|
|
******************************************************************************
|
|
* Copyright (c) 1999, Frank Warmerdam
|
|
* Copyright (c) 2009-2012, Even Rouault <even dot rouault at mines-paris dot org>
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included
|
|
* in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
****************************************************************************/
|
|
|
|
#include "cpl_csv.h"
|
|
#include "cpl_conv.h"
|
|
#include "cpl_multiproc.h"
|
|
#include "gdal_csv.h"
|
|
|
|
CPL_CVSID("$Id: cpl_csv.cpp 29330 2015-06-14 12:11:11Z rouault $");
|
|
|
|
/* ==================================================================== */
|
|
/* The CSVTable is a persistant set of info about an open CSV */
|
|
/* table. While it doesn't currently maintain a record index, */
|
|
/* or in-memory copy of the table, it could be changed to do so */
|
|
/* in the future. */
|
|
/* ==================================================================== */
|
|
typedef struct ctb {
|
|
FILE *fp;
|
|
|
|
struct ctb *psNext;
|
|
|
|
char *pszFilename;
|
|
|
|
char **papszFieldNames;
|
|
|
|
char **papszRecFields;
|
|
|
|
int iLastLine;
|
|
|
|
int bNonUniqueKey;
|
|
|
|
/* Cache for whole file */
|
|
int nLineCount;
|
|
char **papszLines;
|
|
int *panLineIndex;
|
|
char *pszRawData;
|
|
} CSVTable;
|
|
|
|
|
|
static void CSVDeaccessInternal( CSVTable **ppsCSVTableList, int bCanUseTLS, const char * pszFilename );
|
|
|
|
/************************************************************************/
|
|
/* CSVFreeTLS() */
|
|
/************************************************************************/
|
|
static void CSVFreeTLS(void* pData)
|
|
{
|
|
CSVDeaccessInternal( (CSVTable **)pData, FALSE, NULL );
|
|
CPLFree(pData);
|
|
}
|
|
|
|
/* It would likely be better to share this list between threads, but
|
|
that will require some rework. */
|
|
|
|
/************************************************************************/
|
|
/* CSVAccess() */
|
|
/* */
|
|
/* This function will fetch a handle to the requested table. */
|
|
/* If not found in the ``open table list'' the table will be */
|
|
/* opened and added to the list. Eventually this function may */
|
|
/* become public with an abstracted return type so that */
|
|
/* applications can set options about the table. For now this */
|
|
/* isn't done. */
|
|
/************************************************************************/
|
|
|
|
static CSVTable *CSVAccess( const char * pszFilename )
|
|
|
|
{
|
|
CSVTable *psTable;
|
|
FILE *fp;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Fetch the table, and allocate the thread-local pointer to it */
|
|
/* if there isn't already one. */
|
|
/* -------------------------------------------------------------------- */
|
|
CSVTable **ppsCSVTableList;
|
|
|
|
ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
|
|
if( ppsCSVTableList == NULL )
|
|
{
|
|
ppsCSVTableList = (CSVTable **) CPLCalloc(1,sizeof(CSVTable*));
|
|
CPLSetTLSWithFreeFunc( CTLS_CSVTABLEPTR, ppsCSVTableList, CSVFreeTLS );
|
|
}
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Is the table already in the list. */
|
|
/* -------------------------------------------------------------------- */
|
|
for( psTable = *ppsCSVTableList;
|
|
psTable != NULL;
|
|
psTable = psTable->psNext )
|
|
{
|
|
if( EQUAL(psTable->pszFilename,pszFilename) )
|
|
{
|
|
/*
|
|
* Eventually we should consider promoting to the front of
|
|
* the list to accelerate frequently accessed tables.
|
|
*/
|
|
|
|
return( psTable );
|
|
}
|
|
}
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* If not, try to open it. */
|
|
/* -------------------------------------------------------------------- */
|
|
fp = VSIFOpen( pszFilename, "rb" );
|
|
if( fp == NULL )
|
|
return NULL;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Create an information structure about this table, and add to */
|
|
/* the front of the list. */
|
|
/* -------------------------------------------------------------------- */
|
|
psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
|
|
|
|
psTable->fp = fp;
|
|
psTable->pszFilename = CPLStrdup( pszFilename );
|
|
psTable->bNonUniqueKey = FALSE; /* as far as we know now */
|
|
psTable->psNext = *ppsCSVTableList;
|
|
|
|
*ppsCSVTableList = psTable;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Read the table header record containing the field names. */
|
|
/* -------------------------------------------------------------------- */
|
|
psTable->papszFieldNames = CSVReadParseLine( fp );
|
|
|
|
return( psTable );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVDeaccess() */
|
|
/************************************************************************/
|
|
|
|
static void CSVDeaccessInternal( CSVTable **ppsCSVTableList, int bCanUseTLS, const char * pszFilename )
|
|
|
|
{
|
|
CSVTable *psLast, *psTable;
|
|
|
|
if( ppsCSVTableList == NULL )
|
|
return;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* A NULL means deaccess all tables. */
|
|
/* -------------------------------------------------------------------- */
|
|
if( pszFilename == NULL )
|
|
{
|
|
while( *ppsCSVTableList != NULL )
|
|
CSVDeaccessInternal( ppsCSVTableList, bCanUseTLS, (*ppsCSVTableList)->pszFilename );
|
|
|
|
return;
|
|
}
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Find this table. */
|
|
/* -------------------------------------------------------------------- */
|
|
psLast = NULL;
|
|
for( psTable = *ppsCSVTableList;
|
|
psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
|
|
psTable = psTable->psNext )
|
|
{
|
|
psLast = psTable;
|
|
}
|
|
|
|
if( psTable == NULL )
|
|
{
|
|
if (bCanUseTLS)
|
|
CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename );
|
|
return;
|
|
}
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Remove the link from the list. */
|
|
/* -------------------------------------------------------------------- */
|
|
if( psLast != NULL )
|
|
psLast->psNext = psTable->psNext;
|
|
else
|
|
*ppsCSVTableList = psTable->psNext;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Free the table. */
|
|
/* -------------------------------------------------------------------- */
|
|
if( psTable->fp != NULL )
|
|
VSIFClose( psTable->fp );
|
|
|
|
CSLDestroy( psTable->papszFieldNames );
|
|
CSLDestroy( psTable->papszRecFields );
|
|
CPLFree( psTable->pszFilename );
|
|
CPLFree( psTable->panLineIndex );
|
|
CPLFree( psTable->pszRawData );
|
|
CPLFree( psTable->papszLines );
|
|
|
|
CPLFree( psTable );
|
|
|
|
if (bCanUseTLS)
|
|
CPLReadLine( NULL );
|
|
}
|
|
|
|
void CSVDeaccess( const char * pszFilename )
|
|
{
|
|
CSVTable **ppsCSVTableList;
|
|
/* -------------------------------------------------------------------- */
|
|
/* Fetch the table, and allocate the thread-local pointer to it */
|
|
/* if there isn't already one. */
|
|
/* -------------------------------------------------------------------- */
|
|
ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
|
|
|
|
CSVDeaccessInternal(ppsCSVTableList, TRUE, pszFilename);
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVSplitLine() */
|
|
/* */
|
|
/* Tokenize a CSV line into fields in the form of a string */
|
|
/* list. This is used instead of the CPLTokenizeString() */
|
|
/* because it provides correct CSV escaping and quoting */
|
|
/* semantics. */
|
|
/************************************************************************/
|
|
|
|
static char **CSVSplitLine( const char *pszString, char chDelimiter )
|
|
|
|
{
|
|
char **papszRetList = NULL;
|
|
char *pszToken;
|
|
int nTokenMax, nTokenLen;
|
|
|
|
pszToken = (char *) CPLCalloc(10,1);
|
|
nTokenMax = 10;
|
|
|
|
while( pszString != NULL && *pszString != '\0' )
|
|
{
|
|
int bInString = FALSE;
|
|
|
|
nTokenLen = 0;
|
|
|
|
/* Try to find the next delimeter, marking end of token */
|
|
for( ; *pszString != '\0'; pszString++ )
|
|
{
|
|
|
|
/* End if this is a delimeter skip it and break. */
|
|
if( !bInString && *pszString == chDelimiter )
|
|
{
|
|
pszString++;
|
|
break;
|
|
}
|
|
|
|
if( *pszString == '"' )
|
|
{
|
|
if( !bInString || pszString[1] != '"' )
|
|
{
|
|
bInString = !bInString;
|
|
continue;
|
|
}
|
|
else /* doubled quotes in string resolve to one quote */
|
|
{
|
|
pszString++;
|
|
}
|
|
}
|
|
|
|
if( nTokenLen >= nTokenMax-2 )
|
|
{
|
|
nTokenMax = nTokenMax * 2 + 10;
|
|
pszToken = (char *) CPLRealloc( pszToken, nTokenMax );
|
|
}
|
|
|
|
pszToken[nTokenLen] = *pszString;
|
|
nTokenLen++;
|
|
}
|
|
|
|
pszToken[nTokenLen] = '\0';
|
|
papszRetList = CSLAddString( papszRetList, pszToken );
|
|
|
|
/* If the last token is an empty token, then we have to catch
|
|
* it now, otherwise we won't reenter the loop and it will be lost.
|
|
*/
|
|
if ( *pszString == '\0' && *(pszString-1) == chDelimiter )
|
|
{
|
|
papszRetList = CSLAddString( papszRetList, "" );
|
|
}
|
|
}
|
|
|
|
if( papszRetList == NULL )
|
|
papszRetList = (char **) CPLCalloc(sizeof(char *),1);
|
|
|
|
CPLFree( pszToken );
|
|
|
|
return papszRetList;
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVFindNextLine() */
|
|
/* */
|
|
/* Find the start of the next line, while at the same time zero */
|
|
/* terminating this line. Take into account that there may be */
|
|
/* newline indicators within quoted strings, and that quotes */
|
|
/* can be escaped with a backslash. */
|
|
/************************************************************************/
|
|
|
|
static char *CSVFindNextLine( char *pszThisLine )
|
|
|
|
{
|
|
int nQuoteCount = 0, i;
|
|
|
|
for( i = 0; pszThisLine[i] != '\0'; i++ )
|
|
{
|
|
if( pszThisLine[i] == '\"'
|
|
&& (i == 0 || pszThisLine[i-1] != '\\') )
|
|
nQuoteCount++;
|
|
|
|
if( (pszThisLine[i] == 10 || pszThisLine[i] == 13)
|
|
&& (nQuoteCount % 2) == 0 )
|
|
break;
|
|
}
|
|
|
|
while( pszThisLine[i] == 10 || pszThisLine[i] == 13 )
|
|
pszThisLine[i++] = '\0';
|
|
|
|
if( pszThisLine[i] == '\0' )
|
|
return NULL;
|
|
else
|
|
return pszThisLine + i;
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVIngest() */
|
|
/* */
|
|
/* Load entire file into memory and setup index if possible. */
|
|
/************************************************************************/
|
|
|
|
static void CSVIngest( const char *pszFilename )
|
|
|
|
{
|
|
CSVTable *psTable = CSVAccess( pszFilename );
|
|
int nFileLen, i, nMaxLineCount, iLine = 0;
|
|
char *pszThisLine;
|
|
|
|
if( psTable->pszRawData != NULL )
|
|
return;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Ingest whole file. */
|
|
/* -------------------------------------------------------------------- */
|
|
VSIFSeek( psTable->fp, 0, SEEK_END );
|
|
nFileLen = VSIFTell( psTable->fp );
|
|
VSIRewind( psTable->fp );
|
|
|
|
psTable->pszRawData = (char *) CPLMalloc(nFileLen+1);
|
|
if( (int) VSIFRead( psTable->pszRawData, 1, nFileLen, psTable->fp )
|
|
!= nFileLen )
|
|
{
|
|
CPLFree( psTable->pszRawData );
|
|
psTable->pszRawData = NULL;
|
|
|
|
CPLError( CE_Failure, CPLE_FileIO, "Read of file %s failed.",
|
|
psTable->pszFilename );
|
|
return;
|
|
}
|
|
|
|
psTable->pszRawData[nFileLen] = '\0';
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Get count of newlines so we can allocate line array. */
|
|
/* -------------------------------------------------------------------- */
|
|
nMaxLineCount = 0;
|
|
for( i = 0; i < nFileLen; i++ )
|
|
{
|
|
if( psTable->pszRawData[i] == 10 )
|
|
nMaxLineCount++;
|
|
}
|
|
|
|
psTable->papszLines = (char **) CPLCalloc(sizeof(char*),nMaxLineCount);
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Build a list of record pointers into the raw data buffer */
|
|
/* based on line terminators. Zero terminate the line */
|
|
/* strings. */
|
|
/* -------------------------------------------------------------------- */
|
|
/* skip header line */
|
|
pszThisLine = CSVFindNextLine( psTable->pszRawData );
|
|
|
|
while( pszThisLine != NULL && iLine < nMaxLineCount )
|
|
{
|
|
psTable->papszLines[iLine++] = pszThisLine;
|
|
pszThisLine = CSVFindNextLine( pszThisLine );
|
|
}
|
|
|
|
psTable->nLineCount = iLine;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Allocate and populate index array. Ensure they are in */
|
|
/* ascending order so that binary searches can be done on the */
|
|
/* array. */
|
|
/* -------------------------------------------------------------------- */
|
|
psTable->panLineIndex = (int *) CPLMalloc(sizeof(int)*psTable->nLineCount);
|
|
for( i = 0; i < psTable->nLineCount; i++ )
|
|
{
|
|
psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
|
|
|
|
if( i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i-1] )
|
|
{
|
|
CPLFree( psTable->panLineIndex );
|
|
psTable->panLineIndex = NULL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
psTable->iLastLine = -1;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* We should never need the file handle against, so close it. */
|
|
/* -------------------------------------------------------------------- */
|
|
VSIFClose( psTable->fp );
|
|
psTable->fp = NULL;
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVDetectSeperator() */
|
|
/************************************************************************/
|
|
|
|
/** Detect which field separator is used.
|
|
*
|
|
* Currently, it can detect comma, semicolon, space or tabulation. In case of
|
|
* ambiguity or no separator found, comma will be considered as the separator.
|
|
*
|
|
* @return ',', ';', ' ' or '\t'
|
|
*/
|
|
char CSVDetectSeperator (const char* pszLine)
|
|
{
|
|
int bInString = FALSE;
|
|
char chDelimiter = '\0';
|
|
int nCountSpace = 0;
|
|
|
|
for( ; *pszLine != '\0'; pszLine++ )
|
|
{
|
|
if( !bInString && (*pszLine == ',' || *pszLine == ';' || *pszLine == '\t'))
|
|
{
|
|
if (chDelimiter == '\0')
|
|
chDelimiter = *pszLine;
|
|
else if (chDelimiter != *pszLine)
|
|
{
|
|
/* The separator is not consistent on the line. */
|
|
CPLDebug("CSV", "Inconsistent separator. '%c' and '%c' found. Using ',' as default",
|
|
chDelimiter, *pszLine);
|
|
chDelimiter = ',';
|
|
break;
|
|
}
|
|
}
|
|
else if( !bInString && *pszLine == ' ' )
|
|
nCountSpace ++;
|
|
else if( *pszLine == '"' )
|
|
{
|
|
if( !bInString || pszLine[1] != '"' )
|
|
{
|
|
bInString = !bInString;
|
|
continue;
|
|
}
|
|
else /* doubled quotes in string resolve to one quote */
|
|
{
|
|
pszLine++;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (chDelimiter == '\0')
|
|
{
|
|
if( nCountSpace > 0 )
|
|
chDelimiter = ' ';
|
|
else
|
|
chDelimiter = ',';
|
|
}
|
|
|
|
return chDelimiter;
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVReadParseLine() */
|
|
/* */
|
|
/* Read one line, and return split into fields. The return */
|
|
/* result is a stringlist, in the sense of the CSL functions. */
|
|
/************************************************************************/
|
|
|
|
char **CSVReadParseLine( FILE * fp )
|
|
{
|
|
return CSVReadParseLine2(fp, ',');
|
|
}
|
|
|
|
char **CSVReadParseLine2( FILE * fp, char chDelimiter )
|
|
|
|
{
|
|
const char *pszLine;
|
|
char *pszWorkLine;
|
|
char **papszReturn;
|
|
|
|
CPLAssert( fp != NULL );
|
|
if( fp == NULL )
|
|
return( NULL );
|
|
|
|
pszLine = CPLReadLine( fp );
|
|
if( pszLine == NULL )
|
|
return( NULL );
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* If there are no quotes, then this is the simple case. */
|
|
/* Parse, and return tokens. */
|
|
/* -------------------------------------------------------------------- */
|
|
if( strchr(pszLine,'\"') == NULL )
|
|
return CSVSplitLine( pszLine, chDelimiter );
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* We must now count the quotes in our working string, and as */
|
|
/* long as it is odd, keep adding new lines. */
|
|
/* -------------------------------------------------------------------- */
|
|
pszWorkLine = CPLStrdup( pszLine );
|
|
|
|
int i = 0, nCount = 0;
|
|
int nWorkLineLength = strlen(pszWorkLine);
|
|
|
|
while( TRUE )
|
|
{
|
|
for( ; pszWorkLine[i] != '\0'; i++ )
|
|
{
|
|
if( pszWorkLine[i] == '\"'
|
|
&& (i == 0 || pszWorkLine[i-1] != '\\') )
|
|
nCount++;
|
|
}
|
|
|
|
if( nCount % 2 == 0 )
|
|
break;
|
|
|
|
pszLine = CPLReadLine( fp );
|
|
if( pszLine == NULL )
|
|
break;
|
|
|
|
int nLineLen = strlen(pszLine);
|
|
|
|
char* pszWorkLineTmp = (char *)
|
|
VSIRealloc(pszWorkLine,
|
|
nWorkLineLength + nLineLen + 2);
|
|
if (pszWorkLineTmp == NULL)
|
|
break;
|
|
pszWorkLine = pszWorkLineTmp;
|
|
strcat( pszWorkLine + nWorkLineLength, "\n" ); // This gets lost in CPLReadLine().
|
|
strcat( pszWorkLine + nWorkLineLength, pszLine );
|
|
|
|
nWorkLineLength += nLineLen + 1;
|
|
}
|
|
|
|
papszReturn = CSVSplitLine( pszWorkLine, chDelimiter );
|
|
|
|
CPLFree( pszWorkLine );
|
|
|
|
return papszReturn;
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVCompare() */
|
|
/* */
|
|
/* Compare a field to a search value using a particular */
|
|
/* criteria. */
|
|
/************************************************************************/
|
|
|
|
static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
|
|
CSVCompareCriteria eCriteria )
|
|
|
|
{
|
|
if( eCriteria == CC_ExactString )
|
|
{
|
|
return( strcmp( pszFieldValue, pszTarget ) == 0 );
|
|
}
|
|
else if( eCriteria == CC_ApproxString )
|
|
{
|
|
return( EQUAL( pszFieldValue, pszTarget ) );
|
|
}
|
|
else if( eCriteria == CC_Integer )
|
|
{
|
|
return( atoi(pszFieldValue) == atoi(pszTarget) );
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVScanLines() */
|
|
/* */
|
|
/* Read the file scanline for lines where the key field equals */
|
|
/* the indicated value with the suggested comparison criteria. */
|
|
/* Return the first matching line split into fields. */
|
|
/************************************************************************/
|
|
|
|
char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
|
|
CSVCompareCriteria eCriteria )
|
|
|
|
{
|
|
char **papszFields = NULL;
|
|
int bSelected = FALSE, nTestValue;
|
|
|
|
CPLAssert( pszValue != NULL );
|
|
CPLAssert( iKeyField >= 0 );
|
|
CPLAssert( fp != NULL );
|
|
|
|
nTestValue = atoi(pszValue);
|
|
|
|
while( !bSelected ) {
|
|
papszFields = CSVReadParseLine( fp );
|
|
if( papszFields == NULL )
|
|
return( NULL );
|
|
|
|
if( CSLCount( papszFields ) < iKeyField+1 )
|
|
{
|
|
/* not selected */
|
|
}
|
|
else if( eCriteria == CC_Integer
|
|
&& atoi(papszFields[iKeyField]) == nTestValue )
|
|
{
|
|
bSelected = TRUE;
|
|
}
|
|
else
|
|
{
|
|
bSelected = CSVCompare( papszFields[iKeyField], pszValue,
|
|
eCriteria );
|
|
}
|
|
|
|
if( !bSelected )
|
|
{
|
|
CSLDestroy( papszFields );
|
|
papszFields = NULL;
|
|
}
|
|
}
|
|
|
|
return( papszFields );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVScanLinesIndexed() */
|
|
/* */
|
|
/* Read the file scanline for lines where the key field equals */
|
|
/* the indicated value with the suggested comparison criteria. */
|
|
/* Return the first matching line split into fields. */
|
|
/************************************************************************/
|
|
|
|
static char **
|
|
CSVScanLinesIndexed( CSVTable *psTable, int nKeyValue )
|
|
|
|
{
|
|
int iTop, iBottom, iMiddle, iResult = -1;
|
|
|
|
CPLAssert( psTable->panLineIndex != NULL );
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Find target record with binary search. */
|
|
/* -------------------------------------------------------------------- */
|
|
iTop = psTable->nLineCount-1;
|
|
iBottom = 0;
|
|
|
|
while( iTop >= iBottom )
|
|
{
|
|
iMiddle = (iTop + iBottom) / 2;
|
|
if( psTable->panLineIndex[iMiddle] > nKeyValue )
|
|
iTop = iMiddle - 1;
|
|
else if( psTable->panLineIndex[iMiddle] < nKeyValue )
|
|
iBottom = iMiddle + 1;
|
|
else
|
|
{
|
|
iResult = iMiddle;
|
|
// if a key is not unique, select the first instance of it.
|
|
while( iResult > 0
|
|
&& psTable->panLineIndex[iResult-1] == nKeyValue )
|
|
{
|
|
psTable->bNonUniqueKey = TRUE;
|
|
iResult--;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
if( iResult == -1 )
|
|
return NULL;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Parse target line, and update iLastLine indicator. */
|
|
/* -------------------------------------------------------------------- */
|
|
psTable->iLastLine = iResult;
|
|
|
|
return CSVSplitLine( psTable->papszLines[iResult], ',' );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVScanLinesIngested() */
|
|
/* */
|
|
/* Read the file scanline for lines where the key field equals */
|
|
/* the indicated value with the suggested comparison criteria. */
|
|
/* Return the first matching line split into fields. */
|
|
/************************************************************************/
|
|
|
|
static char **
|
|
CSVScanLinesIngested( CSVTable *psTable, int iKeyField, const char * pszValue,
|
|
CSVCompareCriteria eCriteria )
|
|
|
|
{
|
|
char **papszFields = NULL;
|
|
int bSelected = FALSE, nTestValue;
|
|
|
|
CPLAssert( pszValue != NULL );
|
|
CPLAssert( iKeyField >= 0 );
|
|
|
|
nTestValue = atoi(pszValue);
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Short cut for indexed files. */
|
|
/* -------------------------------------------------------------------- */
|
|
if( iKeyField == 0 && eCriteria == CC_Integer
|
|
&& psTable->panLineIndex != NULL )
|
|
return CSVScanLinesIndexed( psTable, nTestValue );
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Scan from in-core lines. */
|
|
/* -------------------------------------------------------------------- */
|
|
while( !bSelected && psTable->iLastLine+1 < psTable->nLineCount ) {
|
|
psTable->iLastLine++;
|
|
papszFields = CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );
|
|
|
|
if( CSLCount( papszFields ) < iKeyField+1 )
|
|
{
|
|
/* not selected */
|
|
}
|
|
else if( eCriteria == CC_Integer
|
|
&& atoi(papszFields[iKeyField]) == nTestValue )
|
|
{
|
|
bSelected = TRUE;
|
|
}
|
|
else
|
|
{
|
|
bSelected = CSVCompare( papszFields[iKeyField], pszValue,
|
|
eCriteria );
|
|
}
|
|
|
|
if( !bSelected )
|
|
{
|
|
CSLDestroy( papszFields );
|
|
papszFields = NULL;
|
|
}
|
|
}
|
|
|
|
return( papszFields );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVGetNextLine() */
|
|
/* */
|
|
/* Fetch the next line of a CSV file based on a passed in */
|
|
/* filename. Returns NULL at end of file, or if file is not */
|
|
/* really established. */
|
|
/************************************************************************/
|
|
|
|
char **CSVGetNextLine( const char *pszFilename )
|
|
|
|
{
|
|
CSVTable *psTable;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Get access to the table. */
|
|
/* -------------------------------------------------------------------- */
|
|
CPLAssert( pszFilename != NULL );
|
|
|
|
psTable = CSVAccess( pszFilename );
|
|
if( psTable == NULL )
|
|
return NULL;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* If we use CSVGetNextLine() we can pretty much assume we have */
|
|
/* a non-unique key. */
|
|
/* -------------------------------------------------------------------- */
|
|
psTable->bNonUniqueKey = TRUE;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Do we have a next line available? This only works for */
|
|
/* ingested tables I believe. */
|
|
/* -------------------------------------------------------------------- */
|
|
if( psTable->iLastLine+1 >= psTable->nLineCount )
|
|
return NULL;
|
|
|
|
psTable->iLastLine++;
|
|
CSLDestroy( psTable->papszRecFields );
|
|
psTable->papszRecFields =
|
|
CSVSplitLine( psTable->papszLines[psTable->iLastLine], ',' );
|
|
|
|
return psTable->papszRecFields;
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVScanFile() */
|
|
/* */
|
|
/* Scan a whole file using criteria similar to above, but also */
|
|
/* taking care of file opening and closing. */
|
|
/************************************************************************/
|
|
|
|
char **CSVScanFile( const char * pszFilename, int iKeyField,
|
|
const char * pszValue, CSVCompareCriteria eCriteria )
|
|
|
|
{
|
|
CSVTable *psTable;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Get access to the table. */
|
|
/* -------------------------------------------------------------------- */
|
|
CPLAssert( pszFilename != NULL );
|
|
|
|
if( iKeyField < 0 )
|
|
return NULL;
|
|
|
|
psTable = CSVAccess( pszFilename );
|
|
if( psTable == NULL )
|
|
return NULL;
|
|
|
|
CSVIngest( pszFilename );
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Does the current record match the criteria? If so, just */
|
|
/* return it again. */
|
|
/* -------------------------------------------------------------------- */
|
|
if( iKeyField >= 0
|
|
&& iKeyField < CSLCount(psTable->papszRecFields)
|
|
&& CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria)
|
|
&& !psTable->bNonUniqueKey )
|
|
{
|
|
return psTable->papszRecFields;
|
|
}
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Scan the file from the beginning, replacing the ``current */
|
|
/* record'' in our structure with the one that is found. */
|
|
/* -------------------------------------------------------------------- */
|
|
psTable->iLastLine = -1;
|
|
CSLDestroy( psTable->papszRecFields );
|
|
|
|
if( psTable->pszRawData != NULL )
|
|
psTable->papszRecFields =
|
|
CSVScanLinesIngested( psTable, iKeyField, pszValue, eCriteria );
|
|
else
|
|
{
|
|
VSIRewind( psTable->fp );
|
|
CPLReadLine( psTable->fp ); /* throw away the header line */
|
|
|
|
psTable->papszRecFields =
|
|
CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
|
|
}
|
|
|
|
return( psTable->papszRecFields );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CPLGetFieldId() */
|
|
/* */
|
|
/* Read the first record of a CSV file (rewinding to be sure), */
|
|
/* and find the field with the indicated name. Returns -1 if */
|
|
/* it fails to find the field name. Comparison is case */
|
|
/* insensitive, but otherwise exact. After this function has */
|
|
/* been called the file pointer will be positioned just after */
|
|
/* the first record. */
|
|
/************************************************************************/
|
|
|
|
int CSVGetFieldId( FILE * fp, const char * pszFieldName )
|
|
|
|
{
|
|
char **papszFields;
|
|
int i;
|
|
|
|
CPLAssert( fp != NULL && pszFieldName != NULL );
|
|
|
|
VSIRewind( fp );
|
|
|
|
papszFields = CSVReadParseLine( fp );
|
|
for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
|
|
{
|
|
if( EQUAL(papszFields[i],pszFieldName) )
|
|
{
|
|
CSLDestroy( papszFields );
|
|
return i;
|
|
}
|
|
}
|
|
|
|
CSLDestroy( papszFields );
|
|
|
|
return -1;
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVGetFileFieldId() */
|
|
/* */
|
|
/* Same as CPLGetFieldId(), except that we get the file based */
|
|
/* on filename, rather than having an existing handle. */
|
|
/************************************************************************/
|
|
|
|
int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
|
|
|
|
{
|
|
CSVTable *psTable;
|
|
int i;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Get access to the table. */
|
|
/* -------------------------------------------------------------------- */
|
|
CPLAssert( pszFilename != NULL );
|
|
|
|
psTable = CSVAccess( pszFilename );
|
|
if( psTable == NULL )
|
|
return -1;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Find the requested field. */
|
|
/* -------------------------------------------------------------------- */
|
|
for( i = 0;
|
|
psTable->papszFieldNames != NULL
|
|
&& psTable->papszFieldNames[i] != NULL;
|
|
i++ )
|
|
{
|
|
if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
|
|
{
|
|
return i;
|
|
}
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
|
|
/************************************************************************/
|
|
/* CSVScanFileByName() */
|
|
/* */
|
|
/* Same as CSVScanFile(), but using a field name instead of a */
|
|
/* field number. */
|
|
/************************************************************************/
|
|
|
|
char **CSVScanFileByName( const char * pszFilename,
|
|
const char * pszKeyFieldName,
|
|
const char * pszValue, CSVCompareCriteria eCriteria )
|
|
|
|
{
|
|
int iKeyField;
|
|
|
|
iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
|
|
if( iKeyField == -1 )
|
|
return NULL;
|
|
|
|
return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVGetField() */
|
|
/* */
|
|
/* The all-in-one function to fetch a particular field value */
|
|
/* from a CSV file. Note this function will return an empty */
|
|
/* string, rather than NULL if it fails to find the desired */
|
|
/* value for some reason. The caller can't establish that the */
|
|
/* fetch failed. */
|
|
/************************************************************************/
|
|
|
|
const char *CSVGetField( const char * pszFilename,
|
|
const char * pszKeyFieldName,
|
|
const char * pszKeyFieldValue,
|
|
CSVCompareCriteria eCriteria,
|
|
const char * pszTargetField )
|
|
|
|
{
|
|
CSVTable *psTable;
|
|
char **papszRecord;
|
|
int iTargetField;
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Find the table. */
|
|
/* -------------------------------------------------------------------- */
|
|
psTable = CSVAccess( pszFilename );
|
|
if( psTable == NULL )
|
|
return "";
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Find the correct record. */
|
|
/* -------------------------------------------------------------------- */
|
|
papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
|
|
pszKeyFieldValue, eCriteria );
|
|
|
|
if( papszRecord == NULL )
|
|
return "";
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Figure out which field we want out of this. */
|
|
/* -------------------------------------------------------------------- */
|
|
iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
|
|
if( iTargetField < 0 )
|
|
return "";
|
|
|
|
if( iTargetField >= CSLCount( papszRecord ) )
|
|
return "";
|
|
|
|
return( papszRecord[iTargetField] );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* GDALDefaultCSVFilename() */
|
|
/************************************************************************/
|
|
|
|
typedef struct
|
|
{
|
|
char szPath[512];
|
|
int bCSVFinderInitialized;
|
|
} DefaultCSVFileNameTLS;
|
|
|
|
|
|
const char * GDALDefaultCSVFilename( const char *pszBasename )
|
|
|
|
{
|
|
/* -------------------------------------------------------------------- */
|
|
/* Do we already have this file accessed? If so, just return */
|
|
/* the existing path without any further probing. */
|
|
/* -------------------------------------------------------------------- */
|
|
CSVTable **ppsCSVTableList;
|
|
|
|
ppsCSVTableList = (CSVTable **) CPLGetTLS( CTLS_CSVTABLEPTR );
|
|
if( ppsCSVTableList != NULL )
|
|
{
|
|
CSVTable *psTable;
|
|
int nBasenameLen = strlen(pszBasename);
|
|
|
|
for( psTable = *ppsCSVTableList;
|
|
psTable != NULL;
|
|
psTable = psTable->psNext )
|
|
{
|
|
int nFullLen = strlen(psTable->pszFilename);
|
|
|
|
if( nFullLen > nBasenameLen
|
|
&& strcmp(psTable->pszFilename+nFullLen-nBasenameLen,
|
|
pszBasename) == 0
|
|
&& strchr("/\\",psTable->pszFilename[+nFullLen-nBasenameLen-1])
|
|
!= NULL )
|
|
{
|
|
return psTable->pszFilename;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* -------------------------------------------------------------------- */
|
|
/* Otherwise we need to look harder for it. */
|
|
/* -------------------------------------------------------------------- */
|
|
DefaultCSVFileNameTLS* pTLSData =
|
|
(DefaultCSVFileNameTLS *) CPLGetTLS( CTLS_CSVDEFAULTFILENAME );
|
|
if (pTLSData == NULL)
|
|
{
|
|
pTLSData = (DefaultCSVFileNameTLS*) CPLCalloc(1, sizeof(DefaultCSVFileNameTLS));
|
|
CPLSetTLS( CTLS_CSVDEFAULTFILENAME, pTLSData, TRUE );
|
|
}
|
|
|
|
FILE *fp = NULL;
|
|
const char *pszResult;
|
|
|
|
pszResult = CPLFindFile( "epsg_csv", pszBasename );
|
|
|
|
if( pszResult != NULL )
|
|
return pszResult;
|
|
|
|
if( !pTLSData->bCSVFinderInitialized )
|
|
{
|
|
pTLSData->bCSVFinderInitialized = TRUE;
|
|
|
|
if( CPLGetConfigOption("GEOTIFF_CSV",NULL) != NULL )
|
|
CPLPushFinderLocation( CPLGetConfigOption("GEOTIFF_CSV",NULL));
|
|
|
|
if( CPLGetConfigOption("GDAL_DATA",NULL) != NULL )
|
|
CPLPushFinderLocation( CPLGetConfigOption("GDAL_DATA",NULL) );
|
|
|
|
pszResult = CPLFindFile( "epsg_csv", pszBasename );
|
|
|
|
if( pszResult != NULL )
|
|
return pszResult;
|
|
}
|
|
|
|
#ifdef GDAL_PREFIX
|
|
#ifdef MACOSX_FRAMEWORK
|
|
strcpy( pTLSData->szPath, GDAL_PREFIX "/Resources/epsg_csv/" );
|
|
CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
|
|
#else
|
|
strcpy( pTLSData->szPath, GDAL_PREFIX "/share/epsg_csv/" );
|
|
CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
|
|
#endif
|
|
#else
|
|
strcpy( pTLSData->szPath, "/usr/local/share/epsg_csv/" );
|
|
CPLStrlcat( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
|
|
#endif
|
|
if( (fp = fopen( pTLSData->szPath, "rt" )) == NULL )
|
|
CPLStrlcpy( pTLSData->szPath, pszBasename, sizeof(pTLSData->szPath) );
|
|
|
|
if( fp != NULL )
|
|
fclose( fp );
|
|
|
|
return( pTLSData->szPath );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* CSVFilename() */
|
|
/* */
|
|
/* Return the full path to a particular CSV file. This will */
|
|
/* eventually be something the application can override. */
|
|
/************************************************************************/
|
|
|
|
CPL_C_START
|
|
static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
|
|
CPL_C_END
|
|
|
|
const char * CSVFilename( const char *pszBasename )
|
|
|
|
{
|
|
if( pfnCSVFilenameHook == NULL )
|
|
return GDALDefaultCSVFilename( pszBasename );
|
|
else
|
|
return( pfnCSVFilenameHook( pszBasename ) );
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* SetCSVFilenameHook() */
|
|
/* */
|
|
/* Applications can use this to set a function that will */
|
|
/* massage CSV filenames. */
|
|
/************************************************************************/
|
|
|
|
/**
|
|
* Override CSV file search method.
|
|
*
|
|
* @param pfnNewHook The pointer to a function which will return the
|
|
* full path for a given filename.
|
|
*
|
|
|
|
This function allows an application to override how the GTIFGetDefn() and related function find the CSV (Comma Separated
|
|
Value) values required. The pfnHook argument should be a pointer to a function that will take in a CSV filename and return a
|
|
full path to the file. The returned string should be to an internal static buffer so that the caller doesn't have to free the result.
|
|
|
|
<b>Example:</b><br>
|
|
|
|
The listgeo utility uses the following override function if the user
|
|
specified a CSV file directory with the -t commandline switch (argument
|
|
put into CSVDirName). <p>
|
|
|
|
<pre>
|
|
|
|
...
|
|
|
|
|
|
SetCSVFilenameHook( CSVFileOverride );
|
|
|
|
...
|
|
|
|
|
|
static const char *CSVFileOverride( const char * pszInput )
|
|
|
|
{
|
|
static char szPath[1024];
|
|
|
|
#ifdef WIN32
|
|
sprintf( szPath, "%s\\%s", CSVDirName, pszInput );
|
|
#else
|
|
sprintf( szPath, "%s/%s", CSVDirName, pszInput );
|
|
#endif
|
|
|
|
return( szPath );
|
|
}
|
|
</pre>
|
|
|
|
*/
|
|
|
|
CPL_C_START
|
|
void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
|
|
|
|
{
|
|
pfnCSVFilenameHook = pfnNewHook;
|
|
}
|
|
CPL_C_END
|