#include "unicode/utypes.h"
#include "unicode/parseerr.h"
Go to the source code of this file.
Typedefs | |
typedef URegularExpression | URegularExpression |
Structure represeting a compiled regular rexpression, plus the results of a match operation. | |
typedef enum URegexpFlag | URegexpFlag |
Constants for Regular Expression Match Modes. | |
Functions | |
U_DRAFT URegularExpression * | uregex_open (const UChar *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status) |
Open (compile) an ICU regular expression. | |
U_DRAFT URegularExpression * | uregex_openC (const char *pattern, uint32_t flags, UParseError *pe, UErrorCode *status) |
Open (compile) an ICU regular expression. | |
U_DRAFT void | uregex_close (URegularExpression *regexp) |
Close the regular expression, recovering all resources (memory) it was holding. | |
U_DRAFT URegularExpression * | uregex_clone (const URegularExpression *regexp, UErrorCode *status) |
Make a copy of a compiled regular expression. | |
U_DRAFT const UChar * | uregex_pattern (const URegularExpression *regexp, int32_t *patLength, UErrorCode *status) |
Return a pointer to the source form of the pattern for this regular expression. | |
U_DRAFT int32_t | uregex_flags (const URegularExpression *regexp, UErrorCode *status) |
Get the match mode flags that were specified when compiling this regular expression. | |
U_DRAFT void | uregex_setText (URegularExpression *regexp, const UChar *text, int32_t textLength, UErrorCode *status) |
Set the subject text string upon which the regular expression will look for matches. | |
U_DRAFT const UChar * | uregex_getText (URegularExpression *regexp, int32_t *textLength, UErrorCode *status) |
Get the subject text that is currently associated with this regular expression object. | |
U_DRAFT UBool | uregex_matches (URegularExpression *regexp, int32_t startIndex, UErrorCode *status) |
Attempts to match the input string, beginning at startIndex, against the pattern. | |
U_DRAFT UBool | uregex_lookingAt (URegularExpression *regexp, int32_t startIndex, UErrorCode *status) |
Attempts to match the input string, starting from the specified index, against the pattern. | |
U_DRAFT UBool | uregex_find (URegularExpression *regexp, int32_t startIndex, UErrorCode *status) |
Find the first matching substring of the input string that matches the pattern. | |
U_DRAFT UBool | uregex_findNext (URegularExpression *regexp, UErrorCode *status) |
Find the next pattern match in the input string. | |
U_DRAFT int32_t | uregex_groupCount (URegularExpression *regexp, UErrorCode *status) |
Get the number of capturing groups in this regular expression's pattern. | |
U_DRAFT int32_t | uregex_group (URegularExpression *regexp, int32_t groupNum, UChar *dest, int32_t destCapacity, UErrorCode *status) |
Extract the string for the specified matching expression or subexpression. | |
U_DRAFT int32_t | uregex_start (URegularExpression *regexp, int32_t groupNum, UErrorCode *status) |
Returns the index in the input string of the start of the text matched by the specified capture group during the previous match operation. | |
U_DRAFT int32_t | uregex_end (URegularExpression *regexp, int32_t groupNum, UErrorCode *status) |
Returns the index in the input string of the position following the end of the text matched by the specified capture group. | |
U_DRAFT void | uregex_reset (URegularExpression *regexp, int32_t index, UErrorCode *status) |
Reset any saved state from the previous match. | |
U_DRAFT int32_t | uregex_replaceAll (URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status) |
Replaces every substring of the input that matches the pattern with the given replacement string. | |
U_DRAFT int32_t | uregex_replaceFirst (URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status) |
Replaces the first substring of the input that matches the pattern with the given replacement string. | |
U_DRAFT int32_t | uregex_appendReplacement (URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status) |
Implements a replace operation intended to be used as part of an incremental find-and-replace. | |
U_DRAFT int32_t | uregex_appendTail (URegularExpression *regexp, UChar **destBuf, int32_t *destCapacity, UErrorCode *status) |
As the final step in a find-and-replace operation, append the remainder of the input string, starting at the position following the last match, to the destination string. | |
U_DRAFT int32_t | uregex_split (URegularExpression *regexp, UChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UChar *destFields[], int32_t destFieldsCapacity, UErrorCode *status) |
Split a string into fields. |
This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.
Definition in file uregex.h.
|
Constants for Regular Expression Match Modes.
|
|
Structure represeting a compiled regular rexpression, plus the results of a match operation.
|
|
Implements a replace operation intended to be used as part of an incremental find-and-replace. The input string, starting from the end of the previous match and ending at the start of the current match, is appended to the destination string. Then the replacement string is appended to the output string, including handling any substitutions of captured text. A note on preflight computation of buffersize and error handling: Calls to uregex_appendReplacement() and uregex_appendTail() are designed to be chained, one after another, with the destination buffer pointer and buffer capacity updated after each in preparation to for the next. If the destination buffer is exhausted partway through such a sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal ICU conventions are for a function to perform no action if it is called with an error status, but for this one case, uregex_appendRepacement() will operate normally so that buffer size computations will complete correctly. For simple, prepackaged, non-incremental find-and-replace operations, see replaceFirst() or replaceAll().
|
|
As the final step in a find-and-replace operation, append the remainder of the input string, starting at the position following the last match, to the destination string.
|
|
Make a copy of a compiled regular expression. Cloning a regular expression is faster than opening a second instance from the source form of the expression, and requires less memory. Note that the current input string and the position of any matched text within it are not cloned; only the pattern itself and and the match mode flags are copied. Cloning can be particularly useful to threaded applications that perform multiple match operations in parallel. Each concurrent RE operation requires its own instance of a URegularExpression.
|
|
Close the regular expression, recovering all resources (memory) it was holding.
|
|
Returns the index in the input string of the position following the end of the text matched by the specified capture group. Return -1 if the capture group was not part of the last match. Group #0 refers to the complete range of matched text. Group #1 refers to the text matched by the first set of capturing parentheses.
|
|
Find the first matching substring of the input string that matches the pattern.
The search for a match begins at the specified index. If a match is found,
|
|
Find the next pattern match in the input string.
Begin searching the input at the location following the end of the previous match, or at the start of the string if there is no previous match. If a match is found,
|
|
Get the match mode flags that were specified when compiling this regular expression.
|
|
Get the subject text that is currently associated with this regular expression object. This simply returns whatever string pointer was previously supplied via uregex_setText().
|
|
Extract the string for the specified matching expression or subexpression. Group #0 is the complete string of matched text. Group #1 is the text matched by the first set of capturing parentheses.
|
|
Get the number of capturing groups in this regular expression's pattern.
|
|
Attempts to match the input string, starting from the specified index, against the pattern. The match may be of any length, and is not required to extend to the end of the input string. Contrast with uregex_matches().
If the match succeeds then more information can be obtained via the
|
|
Attempts to match the input string, beginning at startIndex, against the pattern. To succeed, the match must extend to the end of the input string.
|
|
Open (compile) an ICU regular expression. Compiles the regular expression in string form into an internal representation using the specified match mode flags. The resulting regular expression handle can then be used to perform various matching operations.
|
|
Open (compile) an ICU regular expression. The resulting regular expression handle can then be used to perform various matching operations. This function is the same as uregex_open, except that the pattern is supplied as an 8 bit char * string in the default code page.
|
|
Return a pointer to the source form of the pattern for this regular expression.
|
|
Replaces every substring of the input that matches the pattern with the given replacement string. This is a convenience function that provides a complete find-and-replace-all operation. This method scans the input string looking for matches of the pattern. Input that is not part of any match is copied unchanged to the destination buffer. Matched regions are replaced in the output buffer by the replacement string. The replacement string may contain references to capture groups; these take the form of $1, $2, etc.
|
|
Replaces the first substring of the input that matches the pattern with the given replacement string. This is a convenience function that provides a complete find-and-replace operation. This method scans the input string looking for a match of the pattern. All input that is not part of the match is copied unchanged to the destination buffer. The matched region is replaced in the output buffer by the replacement string. The replacement string may contain references to capture groups; these take the form of $1, $2, etc.
|
|
Reset any saved state from the previous match. Has the effect of causing uregex_findNext to begin at the specified index, and causing uregex_start(), uregex_end() and uregex_group() to return an error indicating that there is no match information available.
|
|
Set the subject text string upon which the regular expression will look for matches. This function may be called any number of times, allowing the regular expression pattern to be applied to different strings. Regular expression matching operations work directly on the application's string data. No copy is made. The subject string data must not be altered after calling this function until after all regular expression operations involving this string data are completed. Zero length strings are permitted. In this case, no subsequent match operation will dereference the text string pointer.
|
|
Split a string into fields. Somewhat like split() from Perl. The pattern matches identify delimiters that separate the input into fields. The input data between the matches becomes the fields themselves. Each of the fields is copied from the input string to the destination buffer, and the NUL terminated. The position of each field within the destination buffer is returned in the destFields array.
|
|
Returns the index in the input string of the start of the text matched by the specified capture group during the previous match operation. Return -1 if the capture group was not part of the last match. Group #0 refers to the complete range of matched text. Group #1 refers to the text matched by the first set of capturing parentheses.
|