Developer's Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
Example 6 - Look-Ahead and Look-Behind
  • Examples of standard and negative look-ahead and look-behind
  • Methods used: compile(), search() and setTrace()
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <memory> // nothrow
#include "p6rgx.h"
using namespace P6R;
P6DECLARE_CID( p6WRegex );
P6DECLARE_CID( p6WSplit );
P6DECLARE_CID( p6Regex );
P6DECLARE_CID( p6Split );
namespace {
//
// Simple p6IDataStream implementation which outputs
// data to stdout (via printf). This implementation
// is not thread-safe, but is adequate for this
// example.
//
class CConsoleStream : public P6R::p6IDataStream
{
public:
//
// Standard [p6]COM queryInterface() method.
//
P6COMMETHOD queryInterface( const P6R::IID &iid, P6R::P6VOID **ppIface )
{
if (!ppIface) return eAccessFault;
*ppIface = NULL;
if(iid == IID_p6ICom) *ppIface = static_cast<p6IDataStream*>(this);
else if(iid == IID_p6IDataStream) *ppIface = static_cast<p6IDataStream*>(this);
else return eNoInterface;
reinterpret_cast<p6ICom*>(*ppIface)->addref();
return eOk;
}
//
// Standard [p6]COM addref() method.
//
{
return m_cRef++;
}
//
// Standard [p6]COM release() method.
//
{
P6INT32 tmp = 0;
if(0 == (tmp = (--m_cRef))) { delete this; }
return tmp;
}
//
// beginStream() method prepares the stream for use.
//
P6COMMETHOD beginStream()
{
return eOk;
}
//
// processStream() is call to pass data to the data stream.
// This method then performs it's stream specific
// operations on the data (in this case, writing it out
// to stdout).
//
// Since printf() requires an ASCIIZ string (NULL terminated string),
// this method first copies the data into a buffer and NULL
// terminates, then passes that to printf().
//
P6COMMETHOD processStream( const P6R::P6VOID* pData, P6R::P6UINT32 cData )
{
//
// Free the buffer if we do not have enough room
//
if (m_cBuffer && cData+1 > m_cBuffer)
{
free( m_pBuffer );
m_pBuffer = NULL;
m_cBuffer = 0;
}
//
// Allocate the buffer if not already allocated
//
if (!m_pBuffer)
{
m_pBuffer = (P6CHAR*) malloc(cData+1);
m_cBuffer = cData;
}
//
// Output the data to the console
//
if (m_pBuffer)
{
memcpy( m_pBuffer, pData, cData );
m_pBuffer[cData] = '\0';
printf( m_pBuffer );
}
return eOk;
}
//
// endStream() is called to notify the stream that there is
// no more data to be processed. In this simple case there is
// nothing to do.
//
P6COMMETHOD endStream() { return eOk; }
//
// This method creates a new uninitialized instance
// of our component and returns a pointer to the
// requested interface.
//
static P6R::P6ERR createInstance( P6R::p6ICom *pOuter, const P6R::IID &iid, P6R::P6VOID **ppIface )
{
P6ERR err = eNoMemory;
CConsoleStream *pObj = NULL;
if(NULL != pOuter) return eNoAgregation;
if(NULL == ppIface) return eAccessFault;
*ppIface = NULL;
if (NULL != (pObj = new (std::nothrow) CConsoleStream() ))
{
pObj->addref();
err = pObj->queryInterface( iid, ppIface );
pObj->release();
}
return err;
}
CConsoleStream() : m_cRef(0), m_pBuffer(NULL), m_cBuffer(0) {;}
virtual ~CConsoleStream() { if (m_pBuffer) free(m_pBuffer); }
protected:
P6INT32 m_cRef; // Holds the components reference count
P6CHAR *m_pBuffer; // A pointer to our output buffer.
P6UINT32 m_cBuffer; // The current size in characters of our output buffer
};
P6R::P6ERR runRegex( p6IDataStream *pStreamDebug )
{
P6CHAR szErr[64];
P6UINT32 offset = 0;
P6UINT32 strLength = 0;
P6ERR err = eOk;
if (P6FAILED( err = p6CreateInstance( NULL, CID_p6WRegex, VALIDATECOMPTR( p6IWRegex, cpWRegex )))) return err;
if (P6FAILED( err = cpWRegex->initialize( P6WREGEX_NOFLAGS, P6WREGEX_PERL ))) return err;
// [A] Look behind and look ahead examples
// -> look behind for a certain value
err = cpWRegex->compile( P6TEXT("(?<=\\$4400)\\bwasher\\b"), P6MOD_NULL );
if (P6FAILED( err )) printf( "1 - Expected 0 but got %x\n", err );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Price $4400 for the washer brand"), (P6MOD_FASTGREEDY | P6MOD_FULLLOOKBEHIND), &offset, &strLength );
if (P6FAILED( err )) printf( "2 - Expected 0 but got %x\n", err );
if (!(20 == offset && 6 == strLength)) printf( "3 - Expected 20,6 but got %d,%d\n", offset, strLength );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Price $4400 for washer brand"), (P6MOD_NULL | P6MOD_FULLLOOKBEHIND), &offset, &strLength );
if (P6FAILED( err )) printf( "4 - Expected 0 but got %x\n", err );
if (!(16 == offset && 6 == strLength)) printf( "5 - Expected 16,6 but got %d,%d\n", offset, strLength );
// -> negative look behind for a certain value
err = cpWRegex->compile( P6TEXT("(?<!\\$4400)\\bwasher\\b"), P6MOD_NULL );
if (P6FAILED( err )) printf( "6 - Expected 0 but got %x\n", err );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Price for the good washer"), (P6MOD_FASTGREEDY | P6MOD_FULLLOOKBEHIND), &offset, &strLength );
if (P6FAILED( err )) printf( "7 - Expected 0 but got %x\n", err );
if (!(19 == offset && 6 == strLength)) printf( "8 - Expected 19,6 but got %d,%d\n", offset, strLength );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Price for the good washer"), (P6MOD_NULL | P6MOD_FULLLOOKBEHIND), &offset, &strLength );
if (P6FAILED( err )) printf( "9 - Expected 0 but got %x\n", err );
if (!(19 == offset && 6 == strLength)) printf( "10 - Expected 19,6 but got %d,%d\n", offset, strLength );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Price $4400 good washer"), (P6MOD_FASTGREEDY | P6MOD_FULLLOOKBEHIND), &offset, &strLength );
if (P6FAILED( err )) printf( "11 - Expected 0 but got %x\n", err );
if (!(0 == offset && 0 == strLength)) printf( "12 - Expected 0,0 but got %d,%d\n", offset, strLength );
// -> negative look ahead,
err = cpWRegex->compile( P6TEXT("\\b(?!Cat)\\w+"), P6MOD_NULL );
if (P6FAILED( err )) printf( "13 - Expected 0 but got %x\n", err );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Cattle are fine"), P6MOD_FASTGREEDY, &offset, &strLength );
if (P6FAILED( err )) printf( "14 - Expected 0 but got %x\n", err );
if (!(7 == offset && 3 == strLength)) printf( "15 - Expected 7,3 but got %d,%d\n", offset, strLength );
// -> STANDARD look behind for a certain value
err = cpWRegex->compile( P6TEXT("(?<=\\$4400 )\\bwasher\\b"), P6MOD_NULL );
if (P6FAILED( err )) printf( "16 - Expected 0 but got %x\n", err );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Price $4400 washer brand"), P6MOD_FASTGREEDY, &offset, &strLength );
if (P6FAILED( err )) printf( "17 - Expected 0 but got %x\n", err );
if (!(12 == offset && 6 == strLength)) printf( "18 - Expected 12,6 but got %d,%d\n", offset, strLength );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Price $4400 washer brand"), P6MOD_NULL, &offset, &strLength );
if (P6FAILED( err )) printf( "19 - Expected 0 but got %x\n", err );
if (!(12 == offset && 6 == strLength)) printf( "20 - Expected 12,6 but got %d,%d\n", offset, strLength );
// -> allow more readable regexes, as white space is ignored
err = cpWRegex->compile( P6TEXT("\\b( \\w+ ) \\s (?= \\1\\b (?! ' \\w | (?<=that) \\s))"), P6MOD_SKIPWHITESPACE );
if (P6FAILED( err )) printf( "21 - Expected 0 but got %x\n", err );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Now is the time time for all good men"), P6MOD_FASTGREEDY, &offset, &strLength );
if (P6FAILED( err )) printf( "22 - Expected 0 but got %x\n", err );
if (!(11 == offset && 5 == strLength)) printf( "23 - Expected 11,5 but got %d,%d\n", offset, strLength );
// -> is it an email header ?
// NOTE: we have no restriction in the expression in the lookbehind
err = cpWRegex->compile( P6TEXT("\\d+-\\d+-\\d+(?<=^\\w+:)"), P6MOD_NULL );
if (P6FAILED( err )) printf( "24 - Expected 0 but got %x\n", err );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Subject: unlimited 123-45-5 abc"), (P6MOD_NULL | P6MOD_FULLLOOKBEHIND), &offset, &strLength );
if (P6FAILED( err )) printf( "25 - Expected 0 but got %x\n", err );
if (!(19 == offset && 8 == strLength)) printf( "26 - Expected 19,8 but got %d,%d\n", offset, strLength );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Subject: unlimited 123-45-5 abc"), (P6MOD_FASTGREEDY | P6MOD_FULLLOOKBEHIND), &offset, &strLength );
if (P6FAILED( err )) printf( "27 - Expected 0 but got %x\n", err );
if (!(19 == offset && 8 == strLength)) printf( "28 - Expected 19,8 but got %d,%d\n", offset, strLength );
// [B] Find the first occurence of a double word
err = cpWRegex->compile( P6TEXT("\\b(\\w+)\\s(?=\\1\\b)"), P6MOD_NULL );
if (P6FAILED( err )) printf( "29 - Expected 0 but got %x\n", err );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Now is the time time for all good men"), P6MOD_FASTGREEDY, &offset, &strLength );
if (P6FAILED( err )) printf( "30 - Expected 0 but got %x\n", err );
if (!(11 == offset && 5 == strLength)) printf( "31 - Expected 11,5 but got %d,%d\n", offset, strLength );
offset = 0;
strLength = 0;
err = cpWRegex->search( P6TEXT("Now is the time time for all good men"), P6MOD_NULL, &offset, &strLength );
if (P6FAILED( err )) printf( "32 - Expected 0 but got %x\n", err );
if (!(11 == offset && 5 == strLength)) printf( "33 - Expected 11,5 but got %d,%d\n", offset, strLength );
return err;
}
} // namespace
int main(int argc,char *argv[])
{
P6ERR err = eOk;
P6CHAR szTmp[32];
if ( P6SUCCEEDED( err = CConsoleStream::createInstance( NULL, VALIDATECOMPTR( p6IDataStream, cpDataStream ))))
{
if ( P6SUCCEEDED( err = p6InitializeLoader( cpDataStream, 9, P6SCLF_NOFLAGS )))
{
err = runRegex( cpDataStream );
printf( "runRegex result: [ %s ]\n", p6ErrToStr(err, &szTmp[0], P6CHARCNT(szTmp)) );
}
else printf("ERROR: Failed to initialize the loader [ %x ]\n", err );
}
else printf( "ERROR: Failed to create CConsoleStream [ %x ]\n", err );
return err;
}