/******************************************************************************
 JRegex.h

	Interface for the JRegex class.

	Copyright  1997 by Dustin Laurence.  All rights reserved.
		
	Base code generated by Codemill v0.1.0

 *****************************************************************************/

#ifndef _H_JRegex
#define _H_JRegex

//#define JRE_ALLOC_CHECK
//#define JRE_PRINT_COMPILE_ERRORS
#define JRE_MAGIC 0

#include <JString.h>
#include <JArray.h>
#include <JError.h>

	class JSubstitute;
	class JInterpolate;

class JRegex
{
#if JRE_MAGIC
private:

		// This class gives JRegex it's telepathic DWIM interface--it should
		// never be of direct concern to anyone else!
		class JRegexMagic
		{
			friend class JRegex;

		public:

			virtual ~JRegexMagic();

			operator bool() const;
			operator JIndexRange() const;

		private:

			const JRegex*     itsRegex;
			const JCharacter* itsCString;

		private:

			// Only JRegex may create
			JRegexMagic(const JRegex* regex, const JCharacter* string);

			// Not allowed
			JRegexMagic(const JRegexMagic& source);
			const JRegexMagic& operator=(const JRegexMagic& source);
		};
#endif

public:

	enum Dialect
	{
		kLiteral,
		kBasic,
		kExtended
	};

public:

	JRegex();
	JRegex(const JCharacter* pattern, const JBoolean useJExtended = kFalse);
	JRegex(const JCharacter* pattern, const JSize length, const JBoolean useJExtended = kFalse);

	virtual ~JRegex();

	JRegex(const JRegex& source);
	const JRegex& operator=(const JRegex& source);

// Setting and examining patterns

	JError SetPattern(const JString& pattern);
	JError SetPattern(const JCharacter* pattern, const JSize length);
	JError SetPattern(const JCharacter* pattern);

	void SetPatternOrDie(const JCharacter* pattern);
	void SetPatternOrDie(const JCharacter* pattern, const JSize length);
	void SetPatternOrDie(const JString& pattern);

	void           GetPattern(const JCharacter** pattern, JSize* length) const;
	const JString& GetPattern() const;

	// JAFL 5/11/98
	static JBoolean	NeedsBackslashToBeLiteral(const JCharacter c);

// Pattern-related settings and statistics

	void     SetNoJExtended(const JBoolean yesNo = kTrue);
	JBoolean IsNoJExtended() const;

	JBoolean ContainsNULL() const;
	JSize    NULLCount() const;

	JSize    GetSubCount() const;

#if JRE_MAGIC
/******************************************************************************
 New Match and friends

 	This code is development only at the moment.

 *****************************************************************************/

	JRegexMagic Match(const JCharacter* string) const;
#endif

/******************************************************************************
 Match and friends

	possible future interface extensions included in comments

 *****************************************************************************/

// These versions simply determine whether there is a match

	// Return one match, yes/no
#if !JRE_MAGIC
	JBoolean Match(const JCharacter* string) const;
#endif
	JBoolean MatchFrom(const JCharacter* string, const JIndex index) const;
	JBoolean MatchAfter(const JCharacter* string, const JIndexRange& range) const;
	JBoolean MatchWithin(const JCharacter* string, const JIndexRange& range) const;

	// Count all matches
	JSize    MatchAll(const JCharacter* string) const;

// These versions return the overall match ranges

	// First match
#if !JRE_MAGIC
	JBoolean Match(const JCharacter* string, JIndexRange* match) const;
#endif
	JBoolean MatchFrom(const JCharacter* string, const JIndex index, JIndexRange* match) const;
	JBoolean MatchAfter(const JCharacter* string, const JIndexRange& range, JIndexRange* match) const;
	JBoolean MatchWithin(const JCharacter* string, const JIndexRange& range, JIndexRange* match) const;

	// Last match
	JSize    MatchLast(const JCharacter* string, JIndexRange* match) const;
//	JSize    MatchLastFrom(const JCharacter* string, const JIndex index, JIndexRange* match) const;
//	JSize    MatchLastAfter(const JCharacter* string, const JIndexRange& range, JIndexRange* match) const;
	JSize    MatchLastWithin(const JCharacter* string, const JIndexRange& range, JIndexRange* match) const;

	// All Matches
	JSize    MatchAll(const JCharacter* string, JIndexRange matchArray[], const JSize arraySize) const;

	JSize    MatchAll(const JCharacter* string, JArray<JIndexRange>* matchList) const;
//	JSize    MatchAllFrom(const JCharacter* string, const JIndex index, JArray<JIndexRange>* matchList) const;
//	JSize    MatchAllAfter(const JCharacter* string, const JIndexRange& range, JArray<JIndexRange>* matchList) const;
	JSize    MatchAllWithin(const JCharacter* string, const JIndexRange& range, JArray<JIndexRange>* matchList) const;

// These versions return a list of subexpression matches from the first match

	JBoolean Match(const JCharacter* string, JIndexRange subMatchArray[], const JSize size) const;

	// First match
	JBoolean Match(const JCharacter* string, JArray<JIndexRange>* subMatchList) const;
	JBoolean MatchFrom(const JCharacter* string, const JIndex index, JArray<JIndexRange>* subMatchList) const;
	JBoolean MatchAfter(const JCharacter* string, const JIndexRange& range, JArray<JIndexRange>* subMatchList) const;
	JBoolean MatchWithin(const JCharacter* string, const JIndexRange& range, JArray<JIndexRange>* subMatchList) const;

	// Last match
//	JSize    MatchLast(const JCharacter* string, JArray<JIndexRange>* subMatchList) const;
//	JSize    MatchLastFrom(const JCharacter* string, const JIndex index, JArray<JIndexRange>* subMatchList) const;
//	JSize    MatchLastAfter(const JCharacter* string, const JIndexRange& range, JArray<JIndexRange>* subMatchList) const;
	JSize    MatchLastWithin(const JCharacter* string, const JIndexRange& range, JArray<JIndexRange>* subMatchList) const;

// Search backwards

	JBoolean MatchBackward(const JCharacter* string, const JIndex index, JIndexRange* match) const;
	JSize    MatchBackward(const JCharacter* string, const JIndex index, JArray<JIndexRange>* matchList) const;

// Split--a sort of inverse Match that can be very useful.

//	JSize Split(const JCharacter* string, JArray<JIndexRange>* splitList) const;

/******************************************************************************
 Replace and friends

 *****************************************************************************/

	JString InterpolateMatches(const JCharacter* sourceString, const JArray<JIndexRange>& matchList) const;

	void    Replace(JString* string, const JArray<JIndexRange>& matchList,
	                JIndexRange* newRange) const;
	void    Replace(JString* string, const JIndexRange& oldRange,
	                JIndexRange* newRange) const;

// Setting and testing options

	JError  SetDialect(const Dialect dialect);
	Dialect GetDialect() const;

	void     SetCaseSensitive(const JBoolean yesNo = kTrue);
	JBoolean IsCaseSensitive() const;
	void     SetSingleLine(const JBoolean yesNo = kTrue);
	JBoolean IsSingleLine() const;
	void     SetMatchOnly(const JBoolean yesNo = kTrue);
	JBoolean IsMatchOnly() const;
	void     SetLineBegin(const JBoolean yesNo = kTrue);
	JBoolean IsLineBegin() const;
	void     SetLineEnd(const JBoolean yesNo = kTrue);
	JBoolean IsLineEnd() const;
	void     SetLiteralReplace(const JBoolean yesNo = kTrue);
	JBoolean IsLiteralReplace() const;
	void     SetMatchCase(const JBoolean yesNo = kTrue);
	JBoolean IsMatchCase() const;

	JError SetReplacePattern(const JCharacter* pattern, JIndexRange* errRange = NULL);
	JError SetReplacePattern(const JString& pattern, JIndexRange* errRange = NULL);

	JError SetReplacePattern(const JCharacter* pattern, const JSize length,
							 JIndexRange* errRange = NULL);

	const JString& GetReplacePattern() const;

	JError RestoreDefaults();

	// Direct access to the internal escape and match substitution engines,
	// in case you wish to customize them.

	JSubstitute*  GetPatternEscapeEngine() const;
	JInterpolate* GetMatchInterpolator()   const;

private:

	enum PatternState
	{
		kEmpty,         // Do not have a pattern, itsRegex is freed
		kCannotCompile, // Have one, but it can't be compiled in this dialect, itsRegex is freed
		kRecompile,     // A parameter changed, recompile before next match, itsRegex is allocated
		kReady          // and rarin' to go, obviously itsRegex is allocated
	};

	JString itsPattern;
	JSize   itsNULLCount;

	regex_t itsRegex;

	int itsCFlags;
	int itsEFlags;

	JString*      itsReplacePattern;

	JInterpolate* itsInterpolator;
	JSubstitute*  itsEscapeEngine;

	#ifdef JRE_ALLOC_CHECK
	int numRegexAlloc;
	#endif

	// Data which could be stored in less than 32 bits and so are collected
	// together in case JRegex's memory usage is optimized someday

	PatternState itsState; // 2 bits

	JBoolean itsNoJExtendedFlag;       // 1 bit each
	JBoolean itsLiteralReplaceFlag;
	JBoolean itsMatchCaseFlag;

	// Static data

	// JAFL 5/11/98
	static const JString theSpecialCharList;

private:

	void SetReplaceRegex() const;

	JBoolean MatchBase(const JCharacter* string, regmatch_t pmatch[], const JSize nmatch) const;
	JBoolean MatchWithinBase(const JCharacter* string, const JIndexRange& range,
	                         regmatch_t pmatch[], const JSize nmatch) const;

	void Allocate();
	void CopyPatternRegex(const JRegex& source);
	void CopyPattern(const JCharacter* pattern, const JSize length);
	void CopyPattern(const JRegex& source);

	void SetCompileOption(const int option, const JBoolean setClear);
	void SetExecuteOption(const int option, const JBoolean setClear);
	void RawSetOption(int* flags, const int option, const JBoolean setClear);

	JBoolean RawGetOption(const int flags, const int option) const;

	void CompileOrDie();

	// The basic regex library functions, translated
	JError   RegComp();
	JBoolean RegExec(const JCharacter* string, regmatch_t pmatch[], const JSize nmatch) const;
	void     RegFree();
	JError   RegError(int errorCode);

public:

	// JError stuff

	static const JCharacter* kError; // Break up into real types when we need them
	class JRegexError : public JError
	{
		friend class JRegex;
	protected:
		JRegexError(const JCharacter* type, const JCharacter* message)
			:
			JError(type, message, kTrue) // Always copy message!
		{ };
	};
};

/******************************************************************************
 IsNoJExtended

 *****************************************************************************/

inline JBoolean
JRegex::IsNoJExtended() const
{
	return itsNoJExtendedFlag;
}

/******************************************************************************
 RawGetOption (private)

 *****************************************************************************/

inline JBoolean
JRegex::RawGetOption
	(
	const int flags,
	const int option
	)
	const
{
	return JConvertToBoolean(flags & option);
}

/******************************************************************************
 NeedsBackslashToBeLiteral (static)

	JAFL 5/11/98

	Returns kTrue if the given character needs to be backslashed
	in order to be treated as a literal by the regex compiler.

 *****************************************************************************/

inline JBoolean
JRegex::NeedsBackslashToBeLiteral
	(
	const JCharacter c
	)
{
	const JCharacter s[] = { c, '\0' };
	return theSpecialCharList.Contains(s);
}

/******************************************************************************
 GetPattern

	Returns the pattern currently set.  This is a reference to the Regex's
	internal pattern buffer, it will point to garbage after the next
	SetPattern.  As usual, use JString to make your own copy.

	If there is no pattern currently set, GetPattern returns the empty string.

 *****************************************************************************/

inline const JString&
JRegex::GetPattern() const
{
	return itsPattern;
}


inline void
JRegex::GetPattern
	(
	const JCharacter** pattern,
	JSize*             length
	)
	const
{
	*pattern = itsPattern;
	*length = itsPattern.GetLength();
}

/******************************************************************************
 ContainsNULL

 *****************************************************************************/

inline JBoolean
JRegex::ContainsNULL() const
{
	return JConvertToBoolean(itsNULLCount != 0);
}

/******************************************************************************
 NULLCount

 *****************************************************************************/

inline JSize
JRegex::NULLCount() const
{
	return itsNULLCount;
}

/******************************************************************************
 CaseSensitive

	Controls whether matches will be case sensitive (the default) or case
	insensitive.

	Performance note: changing this option can cause a recompile before the
	next match.

 *****************************************************************************/

inline void
JRegex::SetCaseSensitive
	(
	const JBoolean yesNo // = kTrue
	)
{
	SetCompileOption(REG_ICASE, JNegate(yesNo) );
}

inline JBoolean
JRegex::IsCaseSensitive() const
{
	return JNegate(RawGetOption(itsCFlags, REG_ICASE));
}

/******************************************************************************
 SingleLine

	Controls whether the entire string is considered a single line or whether
	newlines are considered to indicate line boundaries (the default).  This
	option is independent of SetLineBegin() and SetLineEnd().

	Performance note: changing this option can cause a recompile before the
	next match.

 *****************************************************************************/

inline void
JRegex::SetSingleLine
	(
	const JBoolean yesNo // = kTrue
	)
{
	SetCompileOption(REG_NEWLINE, JNegate(yesNo) );
}

inline JBoolean
JRegex::IsSingleLine() const
{
	return JNegate(RawGetOption(itsCFlags, REG_NEWLINE));
}

/******************************************************************************
 MatchOnly

	Controls whether it is possible to determine the substring which was matched
	after a successful match (the default) or not.  SetMatchOnly(kFalse) allows
	determination of the substring(s) matched by the overall regular expression
	and/or by each subexpression.  SetMatchOnly(kTrue) allows (potentially)
	faster matches at the expense of *all* information about the match location
	including the location of the overall match; only whether a match took place
	or not can be determined.

	Performance note: changing this option can cause a recompile before the next
	match.

 *****************************************************************************/

inline void
JRegex::SetMatchOnly
	(
	const JBoolean yesNo // = kTrue
	)
{
	SetCompileOption(REG_NOSUB, yesNo);
}

inline JBoolean
JRegex::IsMatchOnly() const
{
	return RawGetOption(itsCFlags, REG_NOSUB);
}

/******************************************************************************
 LineBegin

	Controls whether the beginning of the string to be matched is considered
	to begin a line for purposes of matching '^'.  Default is for '^' to match
	at the beginning of the string.  This option is independent of
	SetSingleLine().

 *****************************************************************************/

inline void
JRegex::SetLineBegin
	(
	const JBoolean yesNo // = kTrue
	)
{
	SetExecuteOption(REG_NOTBOL, JNegate(yesNo) );
}

inline JBoolean
JRegex::IsLineBegin() const
{
	return JNegate(RawGetOption(itsEFlags, REG_NOTBOL));
}

/******************************************************************************
 LineEnd

	Controls whether the end of the string to be matched is considered to end a
	line for purposes of matching '$'.  Default is for '$' to match at the end
	of the string.  This option is independent of SetSingleLine().

 *****************************************************************************/

inline void
JRegex::SetLineEnd
	(
	const JBoolean yesNo // = kTrue
	)
{
	SetExecuteOption(REG_NOTEOL, JNegate(yesNo) );
}

inline JBoolean
JRegex::IsLineEnd() const
{
	return JNegate(RawGetOption(itsEFlags, REG_NOTEOL));
}

/******************************************************************************
 LiteralReplace

	Controls whether one of the forms of Replace() does pattern substitution on
	the replacement pattern before interpolation (for the other two forms
	replace pattern substitution is controlled by their arguments).  The default
	setting is kFalse, so that replacement is done before interpolation, but the
	default argument is kTrue, so that SetLiteralReplace() changes to no pattern
	replacement.

 *****************************************************************************/

inline void
JRegex::SetLiteralReplace
	(
	const JBoolean yesNo // = kTrue
	)
{
	itsLiteralReplaceFlag = yesNo;
}

inline JBoolean
JRegex::IsLiteralReplace() const
{
	return itsLiteralReplaceFlag;
}

/******************************************************************************
 MatchCase

	Controls whether Replace (and InterpolateMatches) attempts to adjust
	the case of the replacement string to match that of the string matched.
	Default is kFalse, so case is not adjusted.  If set to kTrue, the first
	character of the replacement string will match the first character of
	the match.  In addition, if the rest of the match (after the first
	character) is entirely one case or the other, the rest of the
	replacement string (after the first character) will be coerced to that
	case.

 *****************************************************************************/

inline void
JRegex::SetMatchCase
	(
	const JBoolean yesNo // = kTrue
	)
{
	itsMatchCaseFlag = yesNo;
}

inline JBoolean
JRegex::IsMatchCase() const
{
	return itsMatchCaseFlag;
}

/******************************************************************************
 SetReplacePattern

 *****************************************************************************/

// Creating JStrings is clunky, but convenient for now.

inline JError
JRegex::SetReplacePattern
	(
	const JCharacter* pattern,
	JIndexRange*      errRange
	)
{
	return SetReplacePattern( JString(pattern), errRange);
}


inline JError
JRegex::SetReplacePattern
	(
	const JCharacter* pattern,
	const JSize       length,
	JIndexRange*      errRange
	)
{
	return SetReplacePattern( JString(pattern, length), errRange);
}

/******************************************************************************
 GetReplacePattern

	Returns the current replace pattern.  The default (if no previous replace
	pattern was set) is the empty string, "".

 *****************************************************************************/

inline const JString&
JRegex::GetReplacePattern() const
{
	return *itsReplacePattern;
}

/******************************************************************************
 GetPatternEscapeEngine

 *****************************************************************************/

inline JSubstitute*
JRegex::GetPatternEscapeEngine() const
{
	return itsEscapeEngine;
}

#endif
